In [1]:
"""
Important Stuff
"""

# Webcam index, this should be 0 for most laptops
DEVICE = 0

# The object detection network weights file
WEIGHTS='weights/laptop.h5'

"""
User Interface Stuff (You can ignore this)
"""
import IPython
import ipywidgets as widgets
from IPython.display import display

ipython = IPython.get_ipython()

style = {'description_width': 'initial'}

w_image = widgets.Image(width=224, height=224, format='png',
                        layout=widgets.Layout(width='100%'))

w_heatmap = widgets.Checkbox(
    value=True,
    description='Overlay heatmap on image',
    style=style
)

w_bbox = widgets.Checkbox(
    value=True,
    description='Show Bounding Boxes (BBoxes)',
    style=style
)
w_bbox_thresh = widgets.FloatSlider(min=0, max=1, value=0.99, step=0.01, 
                             description='BBox Confidence Threshold',
                             layout=widgets.Layout(width='100%'),
                             style=style)

w_merge = widgets.Checkbox(
    value=True,
    description='Merge close BBoxes',
    style=style
)
w_merge_thresh = widgets.FloatSlider(min=0, max=1, value=0.75, step=0.01, 
                             description='BBox Merge Threshold',
                             layout=widgets.Layout(width='100%'),
                             style=style)

vbox = widgets.VBox([w_image, w_heatmap, w_bbox, w_bbox_thresh, w_merge, w_merge_thresh])

display(vbox)

VBox(children=(Image(value=b'', height='224', layout="Layout(width='100%')", width='224'), Checkbox(value=True…

In [2]:
# Load the model
from model import MobileDetectNetModel
import numpy as np
import cv2  

keras_model = MobileDetectNetModel.complete_model()
keras_model.load_weights(WEIGHTS, by_name=True)

tf_engine = keras_model.tf_engine()

W1004 13:12:18.242512 4571030976 deprecation.py:506] From /Users/carroll/anaconda3/envs/keras/lib/python3.7/site-packages/tensorflow/python/ops/init_ops.py:1251: calling VarianceScaling.__init__ (from tensorflow.python.ops.init_ops) with dtype is deprecated and will be removed in a future version.
Instructions for updating:
Call initializer instance with the dtype argument instead of passing it to the constructor
W1004 13:12:22.458685 4571030976 deprecation_wrapper.py:119] From /Users/carroll/Git/interactive-cv-workshop/notebooks/object_detection/model.py:21: The name tf.keras.backend.get_session is deprecated. Please use tf.compat.v1.keras.backend.get_session instead.

W1004 13:12:22.459688 4571030976 deprecation_wrapper.py:119] From /Users/carroll/Git/interactive-cv-workshop/notebooks/object_detection/model.py:22: The name tf.placeholder is deprecated. Please use tf.compat.v1.placeholder instead.

W1004 13:12:22.781317 4571030976 deprecation.py:323] From /Users/carroll/Git/interactiv

In [None]:
cap = cv2.VideoCapture(0)

try:
    while True:
        # Have to call this to get update values from sliders / dropdowns
        ipython.kernel.do_one_iteration()
        
        # Read the frame from the camera
        ret, frame = cap.read()
                
        img_original = frame
        img_draw = img_original.copy()
        img_resize = cv2.resize(img_original, (224, 224))

        scale_width = img_original.shape[1] / 224
        scale_height = img_original.shape[0] / 224
        
        """
        The network expects the image to be scaled between -1 and 1,
        but most images are scaled between 0 and 255 normally.
        
        We divide by 127.5 to scale between 0 and 2, and subtract one to
        be between -1 and 1
        """
        img_input = (img_resize / 127.5) - 1
        
        """
        The neural network expects a "batch" of images as an input
        This converts our single image with a shape of (224, 224, 3) to (1, 224, 224, 3)
        The 1 at the beginning is called the batch dimension
        """
        batch = np.expand_dims(img_input, axis=0)

        # Do the actual inference        
        bboxes, classes = tf_engine.infer(batch)
        
        rectangles = []
        for y in range(0, 7):
            for x in range(0, 7):

                if classes[0, y, x, 0] >= w_bbox_thresh.value:
                    rect = [
                        int(bboxes[0, int(y), int(x), 0] * 224),
                        int(bboxes[0, int(y), int(x), 1] * 224),
                        int(bboxes[0, int(y), int(x), 2] * 224),
                        int(bboxes[0, int(y), int(x), 3] * 224)]
                    rectangles.append(rect)

        if w_heatmap.value:
            cls_img = cv2.resize((classes[0]*255).astype(np.uint8), (img_draw.shape[1], img_draw.shape[0]), interpolation=cv2.INTER_AREA)
            cls_cmap = cv2.applyColorMap(cls_img, cv2.COLORMAP_JET)
            cls_add = (img_draw).astype(np.float32) + (np.expand_dims(cls_img, axis=-1)*cls_cmap).astype(np.float32)
            img_draw = (255*(cls_add / np.max(cls_add))).astype(np.uint8)
        
        if w_merge.value:
            rectangles, merges = cv2.groupRectangles(rectangles, 1, eps=w_bbox_thresh.value)

        if w_bbox.value:
            for rect in rectangles:
                cv2.rectangle(img_draw,
                              (int(rect[0]*scale_width), int(rect[1]*scale_height)),
                              (int(rect[2]*scale_width), int(rect[3]*scale_height)),
                              (0, 255, 0), 5)

        
        # Visualization Code
        result, img_png = cv2.imencode('.png', img_draw)
        w_image.value = img_png.tobytes()
            
        
except KeyboardInterrupt:
    pass
finally:
    cap.release()
