In [None]:
import IPython.display as idisplay
from google.colab.output import eval_js
import matplotlib.pyplot as plt
from base64 import b64decode, b64encode
import math
from PIL import Image, ImageDraw
import numpy as np
import requests
import cv2 
import io

In [None]:
VIDEO_JS = idisplay.Javascript('''
async function takePhoto(quality) {
  // Create a video and play it.
  const video = document.createElement('video')
  document.body.appendChild(video)
  video.srcObject = await navigator.mediaDevices.getUserMedia({video: true})
  await video.play()
  // Resize the output to fit the video element.
  google.colab.output.setIframeHeight(document.documentElement.scrollHeight, true)
  // Wait for video to be clicked.
  await new Promise((resolve) => video.onclick = resolve)
  const canvas = document.createElement('canvas')
  canvas.width = video.videoWidth
  canvas.height = video.videoHeight
  canvas.getContext('2d').drawImage(video, 0, 0)
  video.srcObject.getVideoTracks()[0].stop()
  video.remove()
  // return capture image
  return canvas.toDataURL('image/jpeg', quality)
}
''')

In [None]:
class  Camera:
    def __init__(self, quality: float = 0.8):
        self._quality = quality

    def take_picture(self):
        display(VIDEO_JS)
        canvas_url = eval_js(f'takePhoto({self._quality})')
        base64_img = canvas_url.split(',')[1]
        buffer_img = b64decode(base64_img)        
        img = Image.open(io.BytesIO(buffer_img))
        return img    

In [None]:
cam = Camera()
input_image = cam.take_picture()
plt.imshow(input_image)
plt.show()

In [None]:
API_KEY = "AIzaSyDbwPxcyIRoRnGox28J5WtEGsFQYYHCqag"
GCP_VISION_API_ENDPOINT = f"https://vision.googleapis.com/v1/images:annotate?key={API_KEY}"

class Robot:
    def __init__(self, name: str):
        self.sensors = {
            "camera": Camera()
        }

    def detect_face(self):
        pil_image = self.sensors["camera"].take_picture()       
        numpy_image = np.asarray(pil_image) 
        success, encoded_image = cv2.imencode('.jpg', numpy_image)
        encoded_image_bytes = encoded_image.tobytes()
        encoded_image_base64 = b64encode(encoded_image_bytes)
        encoded_image_base64 = encoded_image_base64.decode("utf-8")        
        response = requests.post(
            url=GCP_VISION_API_ENDPOINT, 
            json={
                "requests": [{
                "image": {
                    "content": encoded_image_base64
                },
                "features": [{
                    "maxResults": 10,
                    "type": "FACE_DETECTION"
                }]
            }]},
            headers={ 
                'Content-Type': 'application/json'
            })
        return pil_image, response.json()    
        


In [None]:
my_robot = Robot(name="wall-e")
pil_image, api_response = my_robot.detect_face()
print(api_response)

In [None]:
face_annotations = api_response["responses"][0]["faceAnnotations"]
num_faces = len(face_annotations)
canvas_image = pil_image.copy()
image_drawer = ImageDraw.Draw(canvas_image)
for face_idx in range(0, num_faces):    
    face_location = face_annotations[face_idx]["boundingPoly"]       
    points = [(point["x"], point["y"]) for point in face_location["vertices"]]
    image_drawer.polygon(points, outline ="red")
canvas_image