In [None]:
from IPython.display import display, Javascript
from google.colab.output import eval_js
from base64 import b64decode, b64encode

import numpy as np
from PIL import Image

import io
import cv2  as cv # OpenCV library

In [None]:
cv.__version__

'4.1.2'

In [None]:
from google.colab import drive
drive.mount("/content/drive")

In [None]:
from google.colab.output import eval_js

def VideoCapture():
  js = Javascript('''
    async function create(){
      div = document.createElement('div');
      document.body.appendChild(div);

      video = document.createElement('video');
      video.setAttribute('playsinline', '');

      div.appendChild(video);
      stream = await navigator.mediaDevices.getUserMedia({video: {facingMode: "environment"}});
      video.srcObject = stream;

      await video.play();

      canvas =  document.createElement('canvas');
      canvas.width = video.videoWidth;
      canvas.height = video.videoHeight;
      canvas.getContext('2d').drawImage(video, 0, 0);

      div_out = document.createElement('div');
      document.body.appendChild(div_out);
      img = document.createElement('img');
      div_out.appendChild(img);
    }

    async function capture(){
        return await new Promise(function(resolve, reject){
            pendingResolve = resolve;
            canvas.getContext('2d').drawImage(video, 0, 0);
            result = canvas.toDataURL('image/jpeg', 0.20);

            pendingResolve(result);
        })
    }

    function showimg(imgb64){
        img.src = "data:image/jpg;base64," + imgb64;
    }

  ''')
  display(js)

In [None]:
def b64_to_bytes(byte):
  jpeg = b64decode(byte.split(',')[1])
  im = Image.open(io.BytesIO(jpeg))
  return np.array(im)

def bytes_to_b64(image):
  image = Image.fromarray(image)
  buffer = io.BytesIO()
  image.save(buffer, 'jpeg')
  buffer.seek(0)
  x = b64encode(buffer.read()).decode('utf-8')
  return x

In [None]:
# Load cascades using cv2.CascadeClassifier()
face_cascade = cv.CascadeClassifier("/content/drive/MyDrive/mam4_CV/haarcascade_frontalface_alt.xml")
eye_cascade = cv.CascadeClassifier('/content/drive/MyDrive/mam4_CV/haarcascade_eye.xml')

In [None]:
def detect(img, cascade):
    rects = cascade.detectMultiScale(img, scaleFactor=1.3, minNeighbors=4, minSize=(30, 30),
                                     flags=cv.CASCADE_SCALE_IMAGE)
    if len(rects) == 0:
        return []
    rects[:,2:] += rects[:,:2]
    return rects

In [None]:
def detect_faces(img, cascades):
  # trnasform to gray
  # use cascades to detect faces
  gray = cv.cvtColor(img, cv.COLOR_RGB2GRAY)  #Convert ordinary pictures into grayscale images for computer processing
  gray = cv.equalizeHist(gray) #Count the values of these pixels in the image to get a unified overall gray concept
  faces = detect(gray,cascades) #Detect the position of the face
  return faces

In [None]:
# This function computes the sub region in which we would want to detect faces
# Params:
# previous: The previous face bounding box (x, y, w, h)
# image heigh and img_width: Self explaining and used to insure boundary constraints
# margin: How mach farther from the previous bounding box you would like to search for faces in the current frame 
def compute_optimized_search_region(previous, img_height, img_width, margin):
  # Don't forget to insure boundary constraints:
  roi=[previous[0]-margin,previous[1]-margin,previous[2]+margin,previous[3]+margin]
  if(roi[0]<0):roi[0]=0
  if(roi[1]<0):roi[1]=0
  if(roi[2]>img_width):roi[2]=img_width
  if(roi[3]>img_height):roi[3]=img_height
  (x_new, y_new, w_new, h_new)=roi
 
  return (x_new, y_new, w_new, h_new)

In [None]:
# Video capture will enable your camera and start streaming
VideoCapture()
eval_js('create()')

byte = eval_js('capture()')
im = b64_to_bytes(byte)
(img_height, img_width) = im.shape[0], im.shape[1] # To be used in the boundary constraints

previous_bbox = None
margin = 40 # You can change this value as you want


while True:
  b64_im = eval_js('capture()') # Take a capture 
  im = b64_to_bytes(b64_im) # Convert the capture brom base64 to bytes array
  region_to_use = im # 
  
  if previous_bbox is not None: # If we have detected a face in the previous frame
    # Use the previous bounding box to compute the sub region in which you would like to seach for faces
    region_n = compute_optimized_search_region(previous_bbox, 480, 640, margin)# The new sub region
    region_to_use=im[region_n[1]:region_n[3],region_n[0]:region_n[2]]
    # Draw a red rectangle around the sub region in the 'im' image (because it's the one we will plot finally)
    cv.rectangle(im, (region_n[0], region_n[1]), (region_n[2], region_n[3]), (255,0,0), 3)
  faces = detect_faces(region_to_use, face_cascade) # detect faces in the sub region to use

  if len(faces) == 1: # Assume one face is detected
    face = faces[0]
  
    # Update the face bounding box. Be careful: The coordinates should be mapped relative to the full image 'im' and not relative to the 'region_to_use'
    if region_to_use.all == im.all:
      previous_bbox=face
    else:
      previous_bbox=(region_n[0],region_n[1],region_n[0],region_n[1])+face
    # Draw a green rectangle around the detected face in the original image 'im'
    cv.rectangle(im, (previous_bbox[0], previous_bbox[1]), (previous_bbox[2], previous_bbox[3]), (0,255,0), 3) 
  else: 
    previous_bbox = None # If we did not detect any face or more than one face we just repeat the search using the entire image in the next frame

  eval_js('showimg("{}")'.format(bytes_to_b64(im))) # We convert our image with bounding boxes to base64 and plot it using JS 

<IPython.core.display.Javascript object>

KeyboardInterrupt: ignored