# Object Detection Python\* Demo

This demo showcases Object Detection with Sync and Async API.

Async API usage can improve overall frame-rate of the application, because rather than wait for inference to complete,
the app can continue doing things on the host, while accelerator is busy.
Specifically, this demo keeps the number of Infer Requests that you have set using `-nireq` flag.
While some of the Infer Requests are processed by IE, the other ones can be filled with new frame data
and asynchronously started or the next output can be taken from the Infer Request and displayed.

The technique can be generalized to any available parallel slack, for example, doing inference and simultaneously
encoding the resulting (previous) frames or running further inference, like some emotion detection on top of
the face detection results.
There are important performance caveats though, for example the tasks that run in parallel should try to avoid
oversubscribing the shared compute resources.
For example, if the inference is performed on the FPGA, and the CPU is essentially idle,
than it makes sense to do things on the CPU in parallel. But if the inference is performed say on the GPU,
than it can take little gain to do the (resulting video) encoding on the same GPU in parallel,
because the device is already busy.

This and other performance implications and tips for the Async API are covered in the
[Optimization Guide](https://docs.openvinotoolkit.org/latest/_docs_optimization_guide_dldt_optimization_guide.html).

Other demo objectives are:
* Video as input support via OpenCV\*
* Visualization of the resulting bounding boxes and text labels (from the `.labels` file)
  or class number (if no file is provided)



In [None]:
import ipywidgets as widgets
uploader = widgets.FileUpload(multiple=False)
uploader

In [None]:
input_filename = 'uploaded_video.mp4'
uploaded_filename = next(iter(uploader.value))
content =  uploader.value[uploaded_filename]['content']
with open(input_filename, 'wb') as f: f.write(content)

In [None]:
model_xml = '/home/lena/omz_models/public/yolo-v3-tf/FP16/yolo-v3-tf.xml'
#input_filename = '/home/lena/data/supersupershort_10fps.mp4'
architecture_type = 'yolo'
num_infer_requests = 3
loop = False
prob_threshold=0.5
utilization_monitors=''

In [None]:
import colorsys
import logging
import os.path 
import random
import sys
from argparse import ArgumentParser, SUPPRESS
from time import perf_counter
import matplotlib.pyplot as plt
import cv2
import numpy as np
from openvino.inference_engine import IECore
from IPython.display import clear_output
from pathlib import Path

open_model_zoo_path = os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(os.curdir))))
base_model_dir = os.curdir  # Models will be downloaded into the `intel` folder in this directory
omz_cache_dir = os.path.expanduser("~/open_model_zoo_cache")

sys.path.append(os.path.join(open_model_zoo_path, "demos", "common", "python"))

from models import *
import monitors
from pipelines import AsyncPipeline
from performance_metrics import PerformanceMetrics


In [None]:
class ColorPalette:
    def __init__(self, n, rng=None):
        assert n > 0

        if rng is None:
            rng = random.Random(0xACE)

        candidates_num = 100
        hsv_colors = [(1.0, 1.0, 1.0)]
        for _ in range(1, n):
            colors_candidates = [(rng.random(), rng.uniform(0.8, 1.0), rng.uniform(0.5, 1.0))
                                 for _ in range(candidates_num)]
            min_distances = [self.min_distance(hsv_colors, c) for c in colors_candidates]
            arg_max = np.argmax(min_distances)
            hsv_colors.append(colors_candidates[arg_max])

        self.palette = [self.hsv2rgb(*hsv) for hsv in hsv_colors]

    @staticmethod
    def dist(c1, c2):
        dh = min(abs(c1[0] - c2[0]), 1 - abs(c1[0] - c2[0])) * 2
        ds = abs(c1[1] - c2[1])
        dv = abs(c1[2] - c2[2])
        return dh * dh + ds * ds + dv * dv

    @classmethod
    def min_distance(cls, colors_set, color_candidate):
        distances = [cls.dist(o, color_candidate) for o in colors_set]
        return np.min(distances)

    @staticmethod
    def hsv2rgb(h, s, v):
        return tuple(round(c * 255) for c in colorsys.hsv_to_rgb(h, s, v))

    def __getitem__(self, n):
        return self.palette[n % len(self.palette)]

    def __len__(self):
        return len(self.palette)


def get_model(ie, model, architecture_type, labels, keep_aspect_ratio=False, prob_threshold=0.5):
    if architecture_type == 'ssd':
        return SSD(ie, model, labels=labels, keep_aspect_ratio_resize=keep_aspect_ratio)
    elif architecture_type == 'yolo':
        return YOLO(ie, model, labels=labels,
                    threshold=prob_threshold, keep_aspect_ratio=keep_aspect_ratio)
    elif architecture_type == 'faceboxes':
        return FaceBoxes(ie, model, threshold=prob_threshold)
    elif architecture_type == 'centernet':
        return CenterNet(ie, model, labels=labels, threshold=prob_threshold)
    elif architecture_type == 'retina':
        return RetinaFace(ie, model, threshold=prob_threshold)
    else:
        raise RuntimeError('No model type or invalid model type (-at) provided: {}'.format(architecture_type))


def put_highlighted_text(frame, message, position, font_face, font_scale, color, thickness):
    cv2.putText(frame, message, position, font_face, font_scale, (255, 255, 255), thickness + 1)  # white border
    cv2.putText(frame, message, position, font_face, font_scale, color, thickness)


def get_plugin_configs(device, num_streams, num_threads):
    config_user_specified = {}

    devices_nstreams = {}
    if num_streams:
        devices_nstreams = {device: num_streams for device in ['CPU', 'GPU'] if device in device} \
            if num_streams.isdigit() \
            else dict(device.split(':', 1) for device in num_streams.split(','))

    if 'CPU' in device:
        if num_threads is not None:
            config_user_specified['CPU_THREADS_NUM'] = str(num_threads)
        if 'CPU' in devices_nstreams:
            config_user_specified['CPU_THROUGHPUT_STREAMS'] = devices_nstreams['CPU'] \
                if int(devices_nstreams['CPU']) > 0 \
                else 'CPU_THROUGHPUT_AUTO'

    if 'GPU' in device:
        if 'GPU' in devices_nstreams:
            config_user_specified['GPU_THROUGHPUT_STREAMS'] = devices_nstreams['GPU'] \
                if int(devices_nstreams['GPU']) > 0 \
                else 'GPU_THROUGHPUT_AUTO'

    return config_user_specified


def draw_detections(frame, detections, palette, labels, threshold, draw_landmarks=False):
    size = frame.shape[:2]
    for detection in detections:
        if detection.score > threshold:
            xmin = max(int(detection.xmin), 0)
            ymin = max(int(detection.ymin), 0)
            xmax = min(int(detection.xmax), size[1])
            ymax = min(int(detection.ymax), size[0])
            class_id = int(detection.id)
            color = palette[class_id]
            det_label = labels[class_id] if labels and len(labels) >= class_id else '#{}'.format(class_id)
            cv2.rectangle(frame, (xmin, ymin), (xmax, ymax), color, 2)
            cv2.putText(frame, '{} {:.1%}'.format(det_label, detection.score),
                        (xmin, ymin - 7), cv2.FONT_HERSHEY_COMPLEX, 0.6, color, 1)
            if draw_landmarks:
                for landmark in detection.landmarks:
                    cv2.circle(frame, landmark, 2, (0, 255, 255), 2)
    return frame


def print_raw_results(size, detections, labels, threshold):
    print(' Class ID | Confidence | XMIN | YMIN | XMAX | YMAX ')
    for detection in detections:
        if detection.score > threshold:
            xmin = max(int(detection.xmin), 0)
            ymin = max(int(detection.ymin), 0)
            xmax = min(int(detection.xmax), size[1])
            ymax = min(int(detection.ymax), size[0])
            class_id = int(detection.id)
            det_label = labels[class_id] if labels and len(labels) >= class_id else '#{}'.format(class_id)
            print('{:^9} | {:10f} | {:4} | {:4} | {:4} | {:4} '
                     .format(det_label, detection.score, xmin, ymin, xmax, ymax))

In [None]:
ie = IECore()

model = get_model(ie, model=Path(model_xml), architecture_type=architecture_type, labels=None)
plugin_config = get_plugin_configs('CPU','5','3')

In [None]:
has_landmarks = architecture_type == 'retina'

detector_pipeline = AsyncPipeline(ie, model, plugin_config, device='CPU', max_num_requests=5)

cap = cv2.VideoCapture(input_filename)

next_frame_id = 0
next_frame_id_to_show = 0

palette = ColorPalette(len(model.labels) if model.labels else 100)

metrics = PerformanceMetrics()

while cap.isOpened():
    if detector_pipeline.callback_exceptions:
        raise detector_pipeline.callback_exceptions[0]
    # Process all completed requests
    results = detector_pipeline.get_result(next_frame_id_to_show)
    if results:
        objects, frame_meta = results
        frame = frame_meta['frame']
        start_time = frame_meta['start_time']

#         if len(objects):
#             print_raw_results(frame.shape[:2], objects, model.labels, prob_threshold)

        
        frame = draw_detections(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB), objects, palette, model.labels, prob_threshold, has_landmarks)
        metrics.update(start_time, frame)
        clear_output(wait=True)
        plt.imshow(frame)
        plt.show()
        
            
        next_frame_id_to_show += 1

    if detector_pipeline.is_ready():
        # Get new image/frame
        start_time = perf_counter()
        ret, frame = cap.read()
        if not ret:
            if loop:
                cap.open(input_stream)
            else:
                cap.release()
            continue

        # Submit for inference
        detector_pipeline.submit_data(frame, next_frame_id, {'frame': frame, 'start_time': start_time})
        next_frame_id += 1

    else:
        # Wait for empty request
        detector_pipeline.await_any()

    

detector_pipeline.await_all()

# Process completed requests
while detector_pipeline.has_completed_request():
    results = detector_pipeline.get_result(next_frame_id_to_show)
    if results:
        objects, frame_meta = results
        frame = frame_meta['frame']
        start_time = frame_meta['start_time']

#         if len(objects):
#             print_raw_results(frame.shape[:2], objects, model.labels, prob_threshold)
            
        frame = draw_detections(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB), objects, palette, model.labels, prob_threshold, has_landmarks)
        metrics.update(start_time, frame)
        clear_output(wait=True)

        plt.imshow(frame)
        plt.show
        
    next_frame_id_to_show += 1

metrics.print_total()
#print(presenter.reportMeans())

## Demo Output

The demo uses matplotlib to display the resulting frame with detections (rendered as bounding boxes and labels, if provided).
The demo reports:

* **FPS**: average rate of video frame processing (frames per second).
* **Latency**: average time required to process one frame (from reading the frame to displaying the results).
You can use both of these metrics to measure application-level performance.


## See Also
* [Using Open Model Zoo demos](../README.md)
* [Model Optimizer](https://docs.openvinotoolkit.org/latest/_docs_MO_DG_Deep_Learning_Model_Optimizer_DevGuide.html)
* [Model Downloader](../../tools/downloader/README.md)