It is highly recommended to use a powerful **GPU**, you can use it for free uploading this notebook to [Google Colab](https://colab.research.google.com/notebooks/intro.ipynb).
<table align="center">
 <td align="center"><a target="_blank" href="https://colab.research.google.com/github/ezponda/intro_deep_learning/blob/main/class/CNN/YOLO_webcam.ipynb">
        <img src="https://colab.research.google.com/img/colab_favicon_256px.png"  width="50" height="50" style="padding-bottom:5px;" />Run in Google Colab</a></td>
  <td align="center"><a target="_blank" href="https://github.com/ezponda/intro_deep_learning/blob/main/class/CNN/YOLO_webcam.ipynb">
        <img src="https://github.githubassets.com/images/modules/logos_page/GitHub-Mark.png"  width="50" height="50" style="padding-bottom:5px;" />View Source on GitHub</a></td>
</table>

This is a demo from [Gluon](https://cv.gluon.ai/build/examples_detection/demo_webcam.html)

You need to install gluon and opencv. [Installation guide](https://cv.gluon.ai/install.html)
```shell
# for mxnet
pip install --upgrade mxnet
# for pytorch
pip install torch==1.6.0+cpu torchvision==0.7.0+cpu -f https://download.pytorch.org/whl/torch_stable.html

pip install --upgrade gluoncv
```

In [None]:
#!pip install --upgrade mxnet
#!pip install torch==1.6.0+cpu torchvision==0.7.0+cpu -f https://download.pytorch.org/whl/torch_stable.html
#!pip install --upgrade gluoncv

In [None]:
import time
import gluoncv as gcv
from gluoncv.utils import try_import_cv2
cv2 = try_import_cv2()
import mxnet as mx
%matplotlib inline

We can use ssd_512_mobilenet1.0_voc or any other [model](https://cv.gluon.ai/model_zoo/detection.html), for example `ssd_512_resnet50_v1_voc` or `yolo3_darknet53_voc` or `faster_rcnn_fpn_syncbn_resnest269_coco` or `faster_rcnn_fpn_resnet50_v1b_coco`

In [None]:
# Load the model
net = gcv.model_zoo.get_model('ssd_512_mobilenet1.0_voc', pretrained=True)
# Compile the model for faster speed
net.hybridize()

## Single Image

In [None]:
## Detect one image
import tensorflow as tf
from matplotlib import pyplot as plt
import numpy as np
def read_image(image_path, target_size=None):
    image = tf.keras.preprocessing.image.load_img(image_path,
                target_size=target_size)
    image = tf.keras.preprocessing.image.img_to_array(image)
    image = image.astype(np.uint8)
    return image

url = 'https://akm-img-a-in.tosshub.com/indiatoday/images/story/201812/dogs_and_cats.jpeg?TAxD19DTCFE7WiSYLUdTu446cfW4AbuW&size=770:433'
image_path = tf.keras.utils.get_file("dog-cat2.jpg", url)
image = read_image(image_path)
plt.imshow(image)
plt.xticks([])
plt.yticks([])
plt.show()

In [None]:
import mxnet as mx
frame = mx.nd.array(image) # torch.from_numpy(image).long()
rgb_nd, frame = gcv.data.transforms.presets.ssd.transform_test(frame, short=512, max_size=700)
# Run frame through network
class_IDs, scores, bounding_boxes = net(rgb_nd)

# Display the result
img = gcv.utils.viz.cv_plot_bbox(frame, bounding_boxes[0], scores[0], class_IDs[0], class_names=net.classes)
#from google.colab.patches import cv2_imshow
#gcv.utils.viz.cv_plot_image(img)
#cv2_imshow(img)
plt.figure(figsize=(10, 10))
plt.imshow(img)
plt.xticks([])
plt.yticks([])
plt.show()

## Web cam Local

In [None]:
# Load the webcam handler
cap = cv2.VideoCapture(0)
time.sleep(1) ### letting the camera autofocus

### Detection loop
The detection loop consists of four phases:

- loading the webcam frame

- pre-processing the image

- running the image through the network

- updating the output with the resulting predictions

In [None]:
axes = None
NUM_FRAMES = 200 # you can change this
for i in range(NUM_FRAMES):
    # Load frame from the camera
    ret, frame = cap.read()

    # Image pre-processing
    frame = mx.nd.array(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)).astype('uint8')
    rgb_nd, frame = gcv.data.transforms.presets.ssd.transform_test(frame, short=512, max_size=700)

    # Run frame through network
    class_IDs, scores, bounding_boxes = net(rgb_nd)

    # Display the result
    img = gcv.utils.viz.cv_plot_bbox(frame, bounding_boxes[0], scores[0], class_IDs[0], class_names=net.classes)
    gcv.utils.viz.cv_plot_image(img)
    cv2.waitKey(1)

In [None]:
cap.release()
cv2.destroyAllWindows()

## Web cam Colab

In [None]:
'''
## Camera Capture
Using a webcam to capture images for processing on the runtime.
Source: https://colab.research.google.com/notebooks/snippets/advanced_outputs.ipynb#scrollTo=2viqYx97hPMi
'''

from IPython.display import display, Javascript
from google.colab.output import eval_js
from base64 import b64decode

def take_photo(filename='photo.jpg', quality=0.8):
  js = Javascript('''
    async function takePhoto(quality) {
      const div = document.createElement('div');
      const video = document.createElement('video');
      video.style.display = 'block';
      const stream = await navigator.mediaDevices.getUserMedia({video: true});

      // show the video in the HTML element
      document.body.appendChild(div);
      div.appendChild(video);
      video.srcObject = stream;
      await video.play();

      // Resize the output to fit the video element.
      google.colab.output.setIframeHeight(document.documentElement.scrollHeight, true);

      // prints the logs to cell
      let jsLog = function(abc) {
        document.querySelector("#output-area").appendChild(document.createTextNode(`${abc}... `));
      }

      // Wait for Capture to be clicked.
      // await new Promise((resolve) => capture.onclick = resolve);

      for (let i = 0; i < 5; i++) {
        const canvas = document.createElement('canvas');
        canvas.width = video.videoWidth;
        canvas.height = video.videoHeight;
        canvas.getContext('2d').drawImage(video, 0, 0);
        img = canvas.toDataURL('image/jpeg', quality);

        // show each captured image
        // let imgTag = document.createElement('img');
        // imgTag.src = img;
        // div.appendChild(imgTag);

        jsLog(i + "sending")
        // Call a python function and send this image
        google.colab.kernel.invokeFunction('notebook.run_algo', [img], {});
        jsLog(i + "SENT")
        // wait for X miliseconds second, before next capture
        await new Promise(resolve => setTimeout(resolve, 250));
      }

      stream.getVideoTracks()[0].stop(); // stop video stream
    }
    ''')
  display(js) # make the provided HTML, part of the cell
  data = eval_js('takePhoto({})'.format(quality)) # call the takePhoto() JavaScript function

In [None]:
import IPython
from google.colab import output
from google.colab.patches import cv2_imshow

import time
import sys
import numpy as np
import cv2

from PIL import Image
from io import BytesIO
import base64
import logging

def data_uri_to_img(uri):
    """convert base64image to numpy array"""
    try:
        image = base64.b64decode(uri.split(',')[1], validate=True)
        # make the binary image, a PIL image
        image = Image.open(BytesIO(image))
        # convert to numpy array
        image = np.array(image, dtype=np.uint8);
        return image
    except Exception as e:
        logging.exception(e);
        print('\n')
        return None


def run_algo(imgB64):
    """
    in Colab, run_algo function gets invoked by the JavaScript, that sends N images every second
  
    params:
      image: image
    """
    image = data_uri_to_img(imgB64)
    frame = image
    if image is None:
        print("At run_algo(): image is None.")
        return
    try:
        # Run detection

        # Image pre-processing
        frame = mx.nd.array(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)).astype('uint8')
        rgb_nd, frame = gcv.data.transforms.presets.ssd.transform_test(frame, short=512, max_size=700)

        # Run frame through network
        class_IDs, scores, bounding_boxes = net(rgb_nd)

        # Display the result
        img = gcv.utils.viz.cv_plot_bbox(frame, bounding_boxes[0], scores[0], class_IDs[0], class_names=net.classes)
        #gcv.utils.viz.cv_plot_image(img)
        #cv2.waitKey(1)
        cv2_imshow(frame)
    except Exception as e:
        logging.exception(e)

# register this function, so JS code could call this
output.register_callback('notebook.run_algo', run_algo)

# put the JS code in cell and run it
take_photo()