# OpenVINO Demo: Real Time Detection

## Let's detect objects in real time using a cam feed!

Continuing on from the last demo, let's build upon what we learned and detect things in real time! We will basically redo everything we did but instead of do inference on one image, we do inference on images that come from a feed and output the drawn-on image. The thing about this process is that we can do inference super fast with Intel hardware, so it looks like we are detecting ojects in real time.

From the beginning, let's create functions that will help us out later. After we import all the libraries/packages we need, let's define our first function which will pre-process our image.

In [None]:
# Loading required packages
import cv2
import numpy as np
import matplotlib.pyplot as plt
from openvino.inference_engine import IENetwork
from openvino.inference_engine import IEPlugin
import time

%matplotlib inline

# CHANGE THIS AS NEEDED
OS = 'linux'
dev = 'MYRIAD' # Change to MYRIAD if Intel NCS 2 plugged in

In [None]:
def pre_processing(obj_frame, input_shape):
    n, c, h, w = input_shape
    obj_in_frame = cv2.resize(obj_frame, (w, h))
    obj_in_frame = obj_in_frame.transpose((2, 0, 1))
    obj_in_frame = obj_in_frame.reshape((n, c, h, w))
    
    return {
        'blob' : obj_in_frame, 
        'frame': obj_frame, 
    }

Let's now define our second function which will return our net and useful information about it!

In [None]:
def construct_nn(path_to_xml, path_to_bin, dev, OS):
    obj_net = IENetwork(model=path_to_xml, weights=path_to_bin)
    input_layer = next(iter(obj_net.inputs))
    output_layer = next(iter(obj_net.outputs))
    net_shape = obj_net.inputs[input_layer].shape
    
    ext = None
    if OS.lower() == 'windows':
        ext = 'C:\\Users\\freyes\\Documents\\Intel\\OpenVINO\\inference_engine_samples_build\\intel64\\Release\\cpu_extension.dll'
    else:
        ext = '/opt/intel/openvino/deployment_tools/inference_engine/lib/intel64/libcpu_extension_avx2.so'

    obj_plugin = IEPlugin(device=dev)
    if dev.lower() == 'cpu':
        obj_plugin.add_cpu_extension(ext)
    obj_exec_net = obj_plugin.load(network=obj_net, num_requests=1)
    return {'net': obj_exec_net, 'input_layer': input_layer,
            'output_layer': output_layer, 'shape': net_shape}

Let's now define our third function which will process our data and draw the bounding box around the image we care about.

In [None]:
def draw_bb(obj_det, obj_frame):
    initial_w = obj_frame.shape[1]
    initial_h = obj_frame.shape[0]
    green = (0, 255, 0)

    for obj in obj_det[0][0]:
        # Draw only objects when probability more than specified threshold
        if obj[2] > 0.5:
            xmin = int(obj[3] * initial_w)
            ymin = int(obj[4] * initial_h)
            xmax = int(obj[5] * initial_w)
            ymax = int(obj[6] * initial_h)
            class_id = int(obj[1])

            # Draw box and label\class_id
            color = (min(class_id * 12.5, 255), min(class_id * 7, 255), min(class_id * 5, 255))
            cv2.rectangle(obj_frame, (xmin, ymin), (xmax, ymax), color, 2)
            cv2.putText(obj_frame, str(class_id), (xmin, ymin), cv2.FONT_HERSHEY_SIMPLEX, 0.8, green, 2, cv2.LINE_AA)
    

Finally, let's put the pieces together!

In [None]:
# Object Detection Section
def main(OS, dev):
    path_to_objxml = None
    path_to_objbin = None
    
    fp = 'fp32' if dev.lower() == 'cpu' else 'fp16'
    if OS.lower() == 'linux':
        path_to_objxml = 'gesture_optimized/' + fp + '/frozen_inference_graph.xml'
        path_to_objbin = 'gesture_optimized/' + fp + '/frozen_inference_graph.bin'
    elif os.lower() == 'windows':
        path_to_objxml = 'gesture_optimized\\' + fp + '\\frozen_inference_graph.xml'
        path_to_objbin = 'gesture_optimized\\' + fp + '\\frozen_inference_graph.bin'
    else:
        print("Need to have either linux or windows!")
        return
    
    # net, input_layer, output_layer, shape
    net_dict = construct_nn(path_to_objxml, path_to_objbin, dev, OS)
    obj_exec_net = net_dict['net']
    input_shape = net_dict['shape']
    input_layer = net_dict['input_layer']
    output_layer = net_dict['output_layer']
    
    vs = cv2.VideoCapture(0)
    print("Click on the window and press q to exit the application.")
    while True:
        ret, vframe = vs.read()
        image_dict = pre_processing(vframe, input_shape)
        inpBlob = image_dict['blob']
        
        # Starting timer
        start = time.time()
        obj_res = obj_exec_net.infer({input_layer: inpBlob})
        
        obj_detections = obj_res[output_layer]
        
        draw_bb(obj_detections, image_dict['frame'])
        
        # Time stamp
        font = cv2.FONT_HERSHEY_SIMPLEX
        stamp = time.time() - start # in nanoseconds (10 ^ -9)
        cv2.putText(vframe, "Time: " + str(stamp), (30, 30), font, 0.8, (255, 0, 0), 2, cv2.LINE_AA)
        cv2.putText(vframe, "FPS: " + str(1/stamp), (30, 60), font, 0.8, (255, 0, 0), 2, cv2.LINE_AA)
        cv2.imshow("Frame", image_dict['frame'])

        key = cv2.waitKey(1) & 0xFF
        if key == ord('q'):
            break
 
    # do a bit of cleanup
    cv2.destroyAllWindows()

main(OS, dev)

# Congratulations!

We now know how to detect objects in real time!