<a href="https://colab.research.google.com/github/daniel-gonzalez-cedre/school/blob/master/nd/TA/discernment-lectures/computer-vision.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# Computer Vision Course (CSE 40535/60535)
# University of Notre Dame, Fall 2023
# _______________________________________________________________________________
# Adam Czajka, Andrey Kuehlkamp, Siamul Khan, Thomas Summe, September 2017 - 2023

# Load the packages
import cv2
import numpy as np
import os
import sys
import matplotlib.pyplot as plt
import pandas as pd
import plotly.graph_objs as go
import plotly.offline as py

from scipy.stats import gaussian_kde as kde
from google.colab import output
from google.colab.patches import cv2_imshow

In [None]:
from IPython.display import display, Javascript, Image
from google.colab.output import eval_js
from google.colab.patches import cv2_imshow
from base64 import b64decode, b64encode
import PIL
import io

# function to convert the JavaScript object into an OpenCV image
def js_to_image(js_reply):
  """
  Params:
          js_reply: JavaScript object containing image from webcam
  Returns:
          img: OpenCV BGR image
  """
  # decode base64 image
  image_bytes = b64decode(js_reply.split(',')[1])
  # convert bytes to numpy array
  jpg_as_np = np.frombuffer(image_bytes, dtype=np.uint8)
  # decode numpy array into OpenCV BGR image
  img = cv2.imdecode(jpg_as_np, flags=1)

  return img

# function to convert OpenCV Rectangle bounding box image into base64 byte string to be overlayed on video stream
def to_bytes(bbox_array):
  """
  Params:
          bbox_array: Numpy array (pixels) containing rectangle to overlay on video stream.
  Returns:
        bytes: Base64 image byte string
  """
  # convert array into PIL image
  bbox_PIL = PIL.Image.fromarray(bbox_array, 'RGBA')
  iobuf = io.BytesIO()
  # format bbox into png for return
  bbox_PIL.save(iobuf, format='png')
  # format return string
  bbox_bytes = 'data:image/png;base64,{}'.format((str(b64encode(iobuf.getvalue()), 'utf-8')))

  return bbox_bytes

# @title Webcam live stream code (just run this cell)
# JavaScript to properly create our live video stream using our webcam as input
def video_stream():
  js = Javascript('''
    var video;
    var div = null;
    var stream;
    var captureCanvas;
    var imgElement;
    var labelElement;

    var pendingResolve = null;
    var shutdown = false;

    function removeDom() {
       stream.getVideoTracks()[0].stop();
       video.remove();
       div.remove();
       video = null;
       div = null;
       stream = null;
       imgElement = null;
       captureCanvas = null;
       labelElement = null;
    }

    function onAnimationFrame() {
      if (!shutdown) {
        window.requestAnimationFrame(onAnimationFrame);
      }
      if (pendingResolve) {
        var result = "";
        if (!shutdown) {
          captureCanvas.getContext('2d').drawImage(video, 0, 0, 640, 480);
          result = captureCanvas.toDataURL('image/jpeg', 0.8)
        }
        var lp = pendingResolve;
        pendingResolve = null;
        lp(result);
      }
    }

    async function createDom() {
      if (div !== null) {
        return stream;
      }

      div = document.createElement('div');
      div.style.border = '2px solid black';
      div.style.padding = '3px';
      div.style.width = '100%';
      div.style.maxWidth = '600px';
      document.body.appendChild(div);

      const modelOut = document.createElement('div');
      modelOut.innerHTML = "<span>Status:</span>";
      labelElement = document.createElement('span');
      labelElement.innerText = 'No data';
      labelElement.style.fontWeight = 'bold';
      modelOut.appendChild(labelElement);
      div.appendChild(modelOut);

      video = document.createElement('video');
      video.style.display = 'block';
      video.width = div.clientWidth - 6;
      video.setAttribute('playsinline', '');
      video.onclick = () => { shutdown = true; };
      stream = await navigator.mediaDevices.getUserMedia(
          {video: { facingMode: "environment"}});
      div.appendChild(video);

      imgElement = document.createElement('img');
      imgElement.style.position = 'absolute';
      imgElement.style.zIndex = 1;
      imgElement.onclick = () => { shutdown = true; };
      div.appendChild(imgElement);

      const instruction = document.createElement('div');
      instruction.innerHTML =
          '<span style="color: red; font-weight: bold;">' +
          'When finished, click here or on the video to stop this demo</span>';
      div.appendChild(instruction);
      instruction.onclick = () => { shutdown = true; };

      video.srcObject = stream;
      await video.play();

      captureCanvas = document.createElement('canvas');
      captureCanvas.width = 640; //video.videoWidth;
      captureCanvas.height = 480; //video.videoHeight;
      window.requestAnimationFrame(onAnimationFrame);

      return stream;
    }
    async function stream_frame(label, imgData) {
      if (shutdown) {
        removeDom();
        shutdown = false;
        return '';
      }

      var preCreate = Date.now();
      stream = await createDom();

      var preShow = Date.now();
      if (label != "") {
        labelElement.innerHTML = label;
      }

      if (imgData != "") {
        var videoRect = video.getClientRects()[0];
        imgElement.style.top = videoRect.top + "px";
        imgElement.style.left = videoRect.left + "px";
        imgElement.style.width = videoRect.width + "px";
        imgElement.style.height = videoRect.height + "px";
        imgElement.src = imgData;
      }

      var preCapture = Date.now();
      var result = await new Promise(function(resolve, reject) {
        pendingResolve = resolve;
      });
      shutdown = false;

      return {'create': preShow - preCreate,
              'show': preCapture - preShow,
              'capture': Date.now() - preCapture,
              'img': result};
    }
    ''')

  display(js)


def video_frame(label, bbox):
  data = eval_js('stream_frame("{}", "{}")'.format(label, bbox))
  return data


### Main task: Detection and tracking M&Ms of various colors

In [None]:
video_stream()

# Label for video streaming window
label_html = 'Capturing...'

# Initialze bounding box to empty string
bbox = ''
count = 0

while True:
    js_reply = video_frame(label_html, bbox)
    if not js_reply:
        break

    img = js_to_image(js_reply["img"])

    res_scale = 0.5 # rescale the input image if it's too large
    img = cv2.resize(img, (0,0), fx = res_scale, fy = res_scale)
    hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)

    ###########################################################################
    # The code below detects all objects within the predefined ranges
    # of Hue, Saturation and Value (HSV)
    #
    # If the detection is not perfect, scroll down and use the "HSV selection"
    # code to find better bounds for H, S and V.

    # blue M&M:
    lower = np.array([95, 220, 25])
    upper = np.array([110, 255, 200])
    b_objmask = cv2.inRange(hsv, lower, upper)

    # green M&M
    lower = np.array([10, 150, 50])
    upper = np.array([65, 240, 200])
    g_objmask = cv2.inRange(hsv, lower, upper)

    # yellow M&M
    lower = np.array([16, 200, 85])
    upper = np.array([32, 255, 255])
    y_objmask = cv2.inRange(hsv, lower, upper)

    detected_masks = [b_objmask, g_objmask, y_objmask]
    colors = ["Blue M&M", "Green M&M", "Yellow M&M"]

    # Iterate over all colors (we have three) and corresponding masks
    # telling us where these objects are located
    for mask, color in zip(detected_masks, colors):

      # Resulting binary image may have large number of small objects.
      # You may check different morphological operations to remove these unnecessary
      # elements. You may need to check your ROI defined in step 1 to
      # determine how many pixels your object may have.
      kernel = np.ones((5,5), np.uint8)
      mask = cv2.morphologyEx(mask, cv2.MORPH_OPEN, kernel=kernel)
      mask = cv2.morphologyEx(mask, cv2.MORPH_DILATE, kernel=kernel)

      # Find connected components
      cc = cv2.connectedComponents(mask)
      ccimg = cc[1].astype(np.uint8)

      # Find contours of these objects
      contours, hierarchy = cv2.findContours(ccimg,
                                             cv2.RETR_TREE,
                                             cv2.CHAIN_APPROX_SIMPLE)[-2:]


      # Ignore bounding boxes smaller than "minObjectSize"
      minObjectSize = 10;

      # If the M&M candy was found:
      if contours:

        # use the biggest object to draw a rectangle
        c = max(contours, key = cv2.contourArea)
        x, y, w, h = cv2.boundingRect(c)
        bbox_array = np.zeros([int(480*res_scale),int(640*res_scale),4], dtype=np.uint8)

        # do not show very small objects
        if w > minObjectSize or h > minObjectSize:
            bbox_array = cv2.rectangle(bbox_array,(x,y),(x+w,y+h),(0,255,0),3)
            cv2.rectangle(img, (x, y), (x+w, y+h), (0,255,0), 3)
            cv2.putText(bbox_array,     # image
            color,        # text
            (x, y-10),                  # start position
            cv2.FONT_HERSHEY_SIMPLEX,   # font
            0.5,                        # size
            (0, 255, 0),                # BGR color
            1,                          # thickness
            cv2.LINE_AA)                # type of line

            bbox_array[:,:,3] = (bbox_array.max(axis = 2) > 0 ).astype(int) * 255

            # convert overlay of bbox into bytes
            bbox_bytes = to_bytes(bbox_array)

            # update bbox so next frame gets new overlay
            bbox = bbox_bytes

### HSV Selection

#### Step 1: Capture an image with your object (M&M candy) shown to the camera

In [None]:
video_stream()

# label for video
label_html = 'Capturing...'

# initialze bounding box to empty string
svm_overlay = ''
count = 0

while True:
    js_reply = video_frame(label_html, svm_overlay)
    if not js_reply:
        break

    # convert JS response to OpenCV image
    img = js_to_image(js_reply["img"])

    # create transparent overlay
    svm_overlay = np.zeros([480,640,4], dtype=np.uint8)


    font = cv2.FONT_HERSHEY_SIMPLEX
    cv2.putText(svm_overlay, '{}'.format("click to capture"), (15, 25), font, 1, (0, 255, 0), 2, cv2.LINE_AA)

    svm_overlay[:,:,3] = (svm_overlay.max(axis = 2) > 0 ).astype(int) * 255
    # convert overlay of bbox into bytes
    svm_overlay = to_bytes(svm_overlay)

plt.imshow(img[:,:,[2,1,0]]);

#### Step 2: Get HSV and RGB images from your webcam screenshot

In [None]:
img_hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)
flat = img_hsv.reshape(-1,3)

d = dict()
inds = np.where(img_hsv[:,:,0]!=-1)
d['y'] = inds[0]
d['x'] = inds[1]
d['z0'] = flat[:,0]
d['z1'] = flat[:,1]
d['z2'] = flat[:,2]

df = pd.DataFrame(d)

img_rgb = img[:,:,[2,1,0]]
flat = img_rgb.reshape(-1,3)

d = dict()
inds = np.where(img_rgb[:,:,0]!=-1)
d['y'] = inds[0]
d['x'] = inds[1]
d['z0'] = flat[:,0]
d['z1'] = flat[:,1]
d['z2'] = flat[:,2]

df_rgb = pd.DataFrame(d)

#### Step 3: Run the code below and use the **box selection tool** in the upper right of the output to select the desired image region. After a few seconds you should see histograms of HSV (and RGB) values, which you can use to define good color ranges for your object tracker.

In [None]:
output.enable_custom_widget_manager()

f = go.FigureWidget([go.Scatter(y = [0,img.shape[0]], x = [0,img.shape[1]], mode = 'markers', opacity=0),go.Image(z=img[:,:,[2,1,0]])])

scatter = f.data[1]


t = go.FigureWidget([go.Scatter(x=np.array(range(256)),y=np.zeros(256),fill='tozeroy',name='Hue'),
                     go.Scatter(x=np.array(range(256)),y=np.zeros(256),fill='tozeroy',name='Saturation'),
                     go.Scatter(x=np.array(range(256)),y=np.zeros(256),fill='tozeroy',name='Value')])

t_rgb = go.FigureWidget([go.Scatter(x=np.array(range(256)),y=np.zeros(256),fill='tozeroy',name='Red',marker={'color':'red'}),
                     go.Scatter(x=np.array(range(256)),y=np.zeros(256),fill='tozeroy',name='Green',marker={'color':'green'}),
                     go.Scatter(x=np.array(range(256)),y=np.zeros(256),fill='tozeroy',name='Blue',marker={'color':'blue'})])


def selection_fn(trace,points,selector):
    xr = np.rint(selector.xrange).astype(int)
    yr = np.rint(selector.yrange).astype(int)
    locs = (df['x']>=xr[0]) & (df['x']<=xr[1]) & (df['y']>=yr[0]) & (df['y']<=yr[1])
    t.data[2]['y'] = kde(df.loc[locs]['z2'])(range(256))
    t.data[1]['y'] = kde(df.loc[locs]['z1'])(range(256))
    t.data[0]['y'] = kde(df.loc[locs]['z0'])(range(256))

    t_rgb.data[2]['y'] = kde(df_rgb.loc[locs]['z2'])(range(256))
    t_rgb.data[1]['y'] = kde(df_rgb.loc[locs]['z1'])(range(256))
    t_rgb.data[0]['y'] = kde(df_rgb.loc[locs]['z0'])(range(256))


scatter.on_selection(selection_fn)

display(f)
display(t)
display(t_rgb)

t.update_layout(
    title="HSV",
    font=dict(
        family="Courier New, monospace",
        size=18,
        color="RebeccaPurple"
    )
)

t_rgb.update_layout(
    title="RGB",
    font=dict(
        family="Courier New, monospace",
        size=18,
        color="RebeccaPurple"
    )
);

#### MediaPipe demos

I encourage you to run [these MediaPipe demos](https://mediapipe-studio.webapps.google.com/home) in your browser. They demonstrate a few fundamental computer vision tasks: image classification, face detection and landmarking, body pose detection, hand gestures recognition, and others.