In [1]:
# This notebook takes live video and samples frames to feed to a trained image recognition model and displays the results

In [2]:
# Import video libraries
import os
from IPython.display import display, Javascript, Image, clear_output
# from google.colab.output import eval_js
from base64 import b64decode, b64encode
import cv2
import numpy as np
import pandas as pd
import PIL
import io
import html
import time
from tensorflow import keras


In [3]:
# JavaScript to properly create our live video stream using our webcam as input
def video_stream():
  js = Javascript('''
    var video;
    var div = null;
    var stream;
    var captureCanvas;
    var imgElement;
    var labelElement;

    var pendingResolve = null;
    var shutdown = false;

    function removeDom() {
       stream.getVideoTracks()[0].stop();
       video.remove();
       div.remove();
       video = null;
       div = null;
       stream = null;
       imgElement = null;
       captureCanvas = null;
       labelElement = null;
    }

    function onAnimationFrame() {
      if (!shutdown) {
        window.requestAnimationFrame(onAnimationFrame);
      }
      if (pendingResolve) {
        var result = "";
        if (!shutdown) {
          captureCanvas.getContext('2d').drawImage(video, 0, 0, 640, 480);
          result = captureCanvas.toDataURL('image/jpeg', 0.8)
        }
        var lp = pendingResolve;
        pendingResolve = null;
        lp(result);
      }
    }

    async function createDom() {
      if (div !== null) {
        return stream;
      }

      div = document.createElement('div');
      div.style.border = '2px solid black';
      div.style.padding = '3px';
      div.style.width = '100%';
      div.style.maxWidth = '600px';
      document.body.appendChild(div);

      const modelOut = document.createElement('div');
      modelOut.innerHTML = "<span>Status:</span>";
      labelElement = document.createElement('span');
      labelElement.innerText = 'No data';
      labelElement.style.fontWeight = 'bold';
      modelOut.appendChild(labelElement);
      div.appendChild(modelOut);

      video = document.createElement('video');
      video.style.display = 'block';
      video.width = div.clientWidth - 6;
      video.setAttribute('playsinline', '');
      video.onclick = () => { shutdown = true; };
      stream = await navigator.mediaDevices.getUserMedia(
          {video: { facingMode: "environment"}});
      div.appendChild(video);

      imgElement = document.createElement('img');
      imgElement.style.position = 'absolute';
      imgElement.style.zIndex = 1;
      imgElement.onclick = () => { shutdown = true; };
      div.appendChild(imgElement);

      const instruction = document.createElement('div');
      instruction.innerHTML =
          '<span style="color: red; font-weight: bold;">' +
          'When finished, click here or on the video to stop this demo</span>';
      div.appendChild(instruction);
      instruction.onclick = () => { shutdown = true; };

      video.srcObject = stream;
      await video.play();

      captureCanvas = document.createElement('canvas');
      captureCanvas.width = 640; //video.videoWidth;
      captureCanvas.height = 480; //video.videoHeight;
      window.requestAnimationFrame(onAnimationFrame);

      return stream;
    }
    async function stream_frame(label, imgData) {
      if (shutdown) {
        removeDom();
        shutdown = false;
        return '';
      }

      var preCreate = Date.now();
      stream = await createDom();

      var preShow = Date.now();
      if (label != "") {
        labelElement.innerHTML = label;
      }

      if (imgData != "") {
        var videoRect = video.getClientRects()[0];
        imgElement.style.top = videoRect.top + "px";
        imgElement.style.left = videoRect.left + "px";
        imgElement.style.width = videoRect.width + "px";
        imgElement.style.height = videoRect.height + "px";
        imgElement.src = imgData;
      }

      var preCapture = Date.now();
      var result = await new Promise(function(resolve, reject) {
        pendingResolve = resolve;
      });
      shutdown = false;

      return {'create': preShow - preCreate,
              'show': preCapture - preShow,
              'capture': Date.now() - preCapture,
              'img': result};
    }
    ''')

  display(js)

def video_frame(label, bbox):
  data = eval_js('stream_frame("{}", "{}")'.format(label, bbox))
  return data


In [4]:
# load our models
face_cascade = cv2.CascadeClassifier(cv2.data.haarcascades + 'haarcascade_frontalface_default.xml')
emotion_model = keras.models.load_model('emotion_model.h5')
# Define the emotion labels
emotion_labels = ["Angry", "Fear", "Happy", "Neutral", "Sad", "Surprise"]

OSError: No file or directory found at emotion_model.h5

In [4]:
# Create code that takes an image from the video every 10 seconds, and saves the last 5 images
def js_to_image(js_reply):
  """
  Params:
          js_reply: JavaScript object containing image from webcam
  Returns:
          img: OpenCV BGR image
  """
  jpeg_bytes = b64decode(js_reply.split(',')[1])
  jpg_as_np = np.frombuffer(jpeg_bytes, dtype=np.uint8)
  img = cv2.imdecode(jpg_as_np, flags=1)
  return img

def bbox_to_bytes(bbox_array):
  """
  Params:
          bbox_array: Numpy array (pixels) containing rectangle to overlay on video stream.
  Returns:
        bytes: Base64 image byte string
  """
  bbox_PIL = PIL.Image.fromarray(bbox_array, 'RGBA')
  iobuf = io.BytesIO()
  bbox_PIL.save(iobuf, format='png')
  bbox_bytes = 'data:image/png;base64,{}'.format((str(b64encode(iobuf.getvalue()), 'utf-8')))
  return bbox_bytes


In [5]:
# our snapshot instantiation code
snapshot_dir = 'snapshots'
os.makedirs(snapshot_dir, exist_ok=True)
# Initialize a list to store the last 5 snapshots
last_snapshots = []
# Initialize a variable to store the last snapshot time
last_snapshot_time = time.time()
# Initialize the emotion list
emotion_data = []


In [8]:
''' This Block just takes images from the live video and stores them. We'll do
inference in the block below'''

# start streaming video from webcam
video_stream()
# label for video
label_html = 'Capturing...'
# initialze bounding box to empty
bbox = ''
# Initialize a counter
count = 0

while True:
    js_reply = video_frame(label_html, bbox)
    if not js_reply:
        break

    # convert JS response to OpenCV Image
    img = js_to_image(js_reply["img"])

    # create transparent overlay for bounding box
    bbox_array = np.zeros([480,640,4], dtype=np.uint8)

    # grayscale image for face detection
    gray = cv2.cvtColor(img, cv2.COLOR_RGB2GRAY)

    # get face region coordinates
    faces = face_cascade.detectMultiScale(gray)  # ensure face_cascade is defined
    '''# get face bounding box for overlay
    for (x,y,w,h) in faces:
      bbox_array = cv2.rectangle(bbox_array,(x,y),(x+w,y+h),(255,0,0),2)'''
    # get face bounding box for overlay
    for (x,y,w,h) in faces:
        # Convert coordinates to integers before drawing the rectangle
        x1 = int(.7 * x)
        y1 = int(.7 * y)
        x2 = int(x + (1.3 * w))
        y2 = int(y + (1.3 * h))
        bbox_array = cv2.rectangle(bbox_array,(x1, y1),(x2, y2),(255,0,0),2)

    bbox_array[:,:,3] = (bbox_array.max(axis = 2) > 0 ).astype(int) * 255
    # convert overlay of bbox into bytes
    bbox_bytes = bbox_to_bytes(bbox_array)
    # update bbox so next frame gets new overlay
    bbox = bbox_bytes

    # Check if 5 seconds have passed
    current_time = time.time()
    if current_time - last_snapshot_time >= 5:
        count += 1
        last_snapshot_time = current_time
        snapshot_path = os.path.join(snapshot_dir, f'snapshot_{count}.jpg')
        cv2.imwrite(snapshot_path, img)
        last_snapshots.append(snapshot_path)
        if len(last_snapshots) > 5:
            os.remove(last_snapshots.pop(0))

        print(f'Snapshot saved: {snapshot_path}')


<IPython.core.display.Javascript object>

Snapshot saved: snapshots/snapshot_1.jpg
Snapshot saved: snapshots/snapshot_2.jpg
Snapshot saved: snapshots/snapshot_3.jpg
Snapshot saved: snapshots/snapshot_4.jpg


In [13]:
''' This Block takes images from the live video and runs inference on them'''

# start streaming video from webcam
video_stream()
# label for video
label_html = 'Capturing...'
# initialze bounding box to empty
bbox = ''
# Initialize a counter
count = 0
# Initialize a list to store the last 7 snapshots
last_snapshots = []
# Initialize a list to store the last 7 emotions
last_emotions = []
# Initialize a variable to store the last snapshot time
last_snapshot_time = time.time()

while True:
    js_reply = video_frame(label_html, bbox)
    if not js_reply:
        break

    # convert JS response to OpenCV Image
    img = js_to_image(js_reply["img"])

    # create transparent overlay for bounding box
    bbox_array = np.zeros([480, 640, 4], dtype=np.uint8)

    # grayscale image for face detection
    gray = cv2.cvtColor(img, cv2.COLOR_RGB2GRAY)

    # get face region coordinates
    faces = face_cascade.detectMultiScale(gray)  # ensure face_cascade is defined
    # get face bounding box for overlay
    for (x, y, w, h) in faces:

        # Extract face ROI
        face_roi = gray[y:y+h, x:x+w]

        # Preprocess the face ROI for the emotion model
        resized_face = cv2.resize(face_roi, (80, 80))  # Resize to match model input
        normalized_face = resized_face / 255.0  # Normalize pixel values
        input_image = np.expand_dims(normalized_face, axis=0)  # Add batch dimension
        input_image = np.expand_dims(input_image, axis=-1)  # Add channel dimension for grayscale

        # Convert coordinates to integers before drawing the rectangle
        x1 = int(.7 * x)
        y1 = int(.7 * y)
        x2 = int(x + (1.3 * w))
        y2 = int(y + (1.3 * h))
        bbox_array = cv2.rectangle(bbox_array, (x1, y1), (x2, y2), (255, 0, 0), 2)
        cv2.putText(bbox_array, predicted_emotion, (x1, y2 + 20), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (255, 0, 0), 2)

        predicted_emotion = ""

        # Store emotion data
        emotion_data.append({'timestamp': time.time(), 'emotion': predicted_emotion})

    bbox_array[:, :, 3] = (bbox_array.max(axis=2) > 0).astype(int) * 255
    # convert overlay of bbox into bytes
    bbox_bytes = bbox_to_bytes(bbox_array)
    # update bbox so next frame gets new overlay
    bbox = bbox_bytes

    # Check if 5 seconds have passed
    current_time = time.time()
    if current_time - last_snapshot_time >= 5:
        count += 1
        last_snapshot_time = current_time
        snapshot_path = os.path.join(snapshot_dir, f'snapshot_{count}.jpg')
        cv2.imwrite(snapshot_path, img)
        last_snapshots.append(snapshot_path)
        if len(last_snapshots) > 7:
            os.remove(last_snapshots.pop(0))

        # Perform emotion prediction
        prediction = emotion_model.predict(input_image)
        predicted_emotion_index = np.argmax(prediction)
        predicted_emotion = emotion_labels[predicted_emotion_index]

        # Store the last 7 emotions
        last_emotions.append(predicted_emotion)
        if len(last_emotions) > 7:
            last_emotions.pop(0)

        # Clear previous print statements
        clear_output(wait=True)

        # Print the last 7 emotions
        print(last_emotions)

        # Create a DataFrame from the emotion data
        emotion_df = pd.DataFrame(emotion_data)
        emotion_df.to_csv('emotion_data.csv', index=False)  # Save the DataFrame to a CSV file


['Angry', 'Angry', 'Angry', 'Angry', 'Angry', 'Angry', 'Angry']
