# Object Detection: TensorFlow vs OpenVINO
We will use the following notebook to compare model performance on TensorFlow before and after conversion to Intermediate Representation (IR) with Model Optimizer and OpenVINO Toolkit. Some of the code on this notebook was reused and modified from Object Detection From TF1 Saved Model: https://tensorflow-object-detection-api-tutorial.readthedocs.io/en/latest/auto_examples/plot_object_detection_saved_model_tf1.html.

# Object Detection with TensorFlow

## Object detection imports
Here are the imports from the object detection module.

In [8]:
import numpy as np
import os, os.path
import six.moves.urllib as urllib
import sys
import tarfile
import tensorflow as tf
import zipfile

from distutils.version import StrictVersion
from collections import defaultdict
from io import StringIO
from matplotlib import pyplot as plt
import matplotlib
import time

from PIL import Image
import glob
# This is needed since the notebook is stored in the object_detection folder.
sys.path.append("..")
from object_detection.utils import ops as utils_ops

if StrictVersion(tf.__version__) < StrictVersion('1.9.0'):
  raise ImportError('Please upgrade your TensorFlow installation to v1.9.* or later!')


## Env setup

In [9]:
# This is needed to display the images.
%matplotlib inline
print("TF",tf.__version__)

TF 1.15.4


## Model preparation 

## Variables and Prepare Images 
Any model exported using the `export_inference_graph.py` tool can be loaded here simply by changing `PATH_TO_FROZEN_GRAPH` to point to a new .pb file.  

In this app we will use "SSD Lite with MobilenetV2 COCO" model [here](http://download.tensorflow.org/models/object_detection/ssd_mobilenet_v2_coco_2018_03_29.tar.gz). Other frozen topologies from TensorFlow supported by OpenVINO OpenVINO can be found in the [website](https://docs.openvinotoolkit.org/2019_R3.1/_docs_MO_DG_prepare_model_convert_model_Convert_Model_From_TensorFlow.html).

In this section the image paths are prepared and imported for use with TensorFlow and reuse for OpenVINO Inference Engine once inference phase is reached.

In [10]:
# What model to use
MODEL_NAME = '../model/ssdlite_mobilenet_v2_coco_2018_05_09'

# Path to frozen detection graph. This is the actual model that is used for the object detection.
PATH_TO_FROZEN_GRAPH = MODEL_NAME + '/frozen_inference_graph.pb'

## Load the frozen Tensorflow model into memory

In [11]:
detection_graph = tf.Graph()
with detection_graph.as_default():
  od_graph_def = tf.GraphDef()
  with tf.gfile.GFile(PATH_TO_FROZEN_GRAPH, 'rb') as fid:
    serialized_graph = fid.read()
    od_graph_def.ParseFromString(serialized_graph)
    tf.import_graph_def(od_graph_def, name='')

In [13]:
# Function to load a frame into numpy array
def load_image_into_numpy_array(image):
  (im_width, im_height) = image.size
  return np.array(image.getdata()).reshape(
      (im_height, im_width, 3)).astype(np.uint8)

## Detection

In [15]:
# Two different paths used for data images, as we will inference on classified frames by a person (frames w/people, frames w/out people)
PATH_TO_TEST_IMAGES_DIR = 'data/person/'
TEST_IMAGE_PATHS = []
valid_images = [".jpg"]
for f in os.listdir(PATH_TO_TEST_IMAGES_DIR):
    ext = os.path.splitext(f)[1]
    if ext.lower() not in valid_images:
        continue
    TEST_IMAGE_PATHS.append(os.path.join(PATH_TO_TEST_IMAGES_DIR,f))

PATH_TO_TEST_IMAGES_DIR2 = 'data/no-person/'
TEST_IMAGE_PATHS2 = []
valid_images = [".jpg"]
for f in os.listdir(PATH_TO_TEST_IMAGES_DIR2):
    ext = os.path.splitext(f)[1]
    if ext.lower() not in valid_images:
        continue
    TEST_IMAGE_PATHS2.append(os.path.join(PATH_TO_TEST_IMAGES_DIR2,f))

In [16]:
inference_time_avg = 0
def run_inference_for_single_image(IMAGE_PATHS):
    itime = 0
    start_inference = 0
    graph = detection_graph
    with graph.as_default():
        with tf.Session() as sess:
            scores_people = []
            duration = time.time()
            inference_time = 0
            inf_time = 0
            for image_path in IMAGE_PATHS:
                image = Image.open(image_path)
                image_np = load_image_into_numpy_array(image)
                # Expand dimensions since the model expects images to have shape: [1, None, None, 3]
                image_np_expanded = np.expand_dims(image_np, axis=0)
                # Get handles to input and output tensors
                ops = tf.get_default_graph().get_operations()
                all_tensor_names = {output.name for op in ops for output in op.outputs}
                tensor_dict = {}
                for key in ['detection_scores']:
                    tensor_name = key + ':0'
                    if tensor_name in all_tensor_names:
                        tensor_dict[key] = tf.get_default_graph().get_tensor_by_name(
                        tensor_name)

                image_tensor = tf.get_default_graph().get_tensor_by_name('image_tensor:0')
                expanded = np.expand_dims(image, 0)
                # Run inference
                start_inference = time.time()
                output_dict = sess.run(tensor_dict,
                                     feed_dict={image_tensor: expanded})
                itime += (time.time() - start_inference)
                # all outputs are float32 numpy arrays
                output_dict['detection_scores'] = output_dict['detection_scores'][0]
                scores_people.append(output_dict['detection_scores'][0])
    return scores_people,itime

In [17]:
duration = time.time()
scores_people,inf_time = run_inference_for_single_image(TEST_IMAGE_PATHS)
duration = time.time() - duration

In [18]:
duration2 = time.time()
scores_nopeople,inf_time2 = run_inference_for_single_image(TEST_IMAGE_PATHS2)
duration2 = time.time() - duration2

In [19]:
inference_time_tot = inf_time + inf_time2
inference_time_avg = ((inference_time_tot/(len(TEST_IMAGE_PATHS)+len(TEST_IMAGE_PATHS2)))/2)*1000

## Calculate Frozen Model Accuracy

In [21]:
confidence = 0.10
tp = 0 # true positives
fn = 0 # false negatives
for score in scores_people:
    if(score >= confidence):
        tp = tp + 1
    if(score < confidence):
        fn = fn + 1
        
tn = 0 #true negatives
fp = 0 #false positives 
for score in scores_nopeople:
    if(score >= confidence):
        fp = fp + 1
    if(score < confidence):
        tn = tn + 1

In [22]:
people = np.array(scores_people)
people_frames = np.array(TEST_IMAGE_PATHS) 
nopeople = np.array(scores_nopeople)
nopeople_frames = np.array(TEST_IMAGE_PATHS2) 

output = np.column_stack((people_frames,people))
output2 = np.column_stack((nopeople_frames,nopeople))

np.savetxt("results/people-results-tf.csv", output, delimiter=',', header="Frame,Score", fmt='%s')
np.savetxt("results/no-people-results-tf.csv", output2, delimiter=',', header="Frame,Score", fmt='%s')

test = len(TEST_IMAGE_PATHS) + len(TEST_IMAGE_PATHS2)
accuracy = ((tp + tn) / test)*100

## Results of TensorFlow Frozen Model


In [23]:
#print("TP: {}  |  FN: {}  |  FP: {}  |  TN: {}".format(tp,fn,fp,tn))
print("Inference Time:\t {:.02f} sec".format(inference_time_tot))
print("Avg. Inference:\t {:.02f} ms".format((inference_time_avg)))
print("Accuracy:\t {:.2f}% \nFPS:\t\t {:.2f}".format(accuracy,test/(duration + duration2 - inference_time_tot)))

Inference Time:	 36.45 sec
Avg. Inference:	 13.07 ms
Accuracy:	 92.04% 
FPS:		 4.79


# Object Detection with OpenVINO

## Imports

In [24]:
import sys
import os
from argparse import ArgumentParser, SUPPRESS
import cv2
import time
import logging as log
from openvino.inference_engine import IENetwork, IEPlugin

In [25]:
def run_inference(image_paths, device):
    try:
        log.basicConfig(format="[ %(levelname)s ] %(message)s", level=log.INFO, stream=sys.stdout)
        
        # variables
        avg_inf_time = 0
        cpu_extension = "/opt/intel/openvino/deployment_tools/inference_engine/lib/intel64/libcpu_extension_avx2.so"
        device = device
        frames_processed = 0
        model_path = "../model/ssdlite_mobilenet_v2_coco_2018_05_09/ssdlite_mobilenet_v2_coco.xml"
        model_xml = model_path
        model_bin = os.path.splitext(model_xml)[0] + ".bin"
        plugin_dir = None
        prob_threshold = 0.0
        
        # Plugin initialization for specified device and load extensions library if specified
        log.info("Initializing plugin for {} device...".format(device))
        plugin = IEPlugin(device=device, plugin_dirs=plugin_dir)
        
        if cpu_extension and device == 'CPU':
            plugin.add_cpu_extension(cpu_extension)
        # Read IR
        log.info("Reading IR...")
        net = IENetwork(model=model_xml, weights=model_bin)

        if plugin.device == "CPU":
            supported_layers = plugin.get_supported_layers(net)
            not_supported_layers = [l for l in net.layers.keys() if l not in supported_layers]
            if len(not_supported_layers) != 0:
                log.error("Following layers are not supported by the plugin for specified device {}:\n {}".
                          format(plugin.device, ', '.join(not_supported_layers)))
                log.error("Please try to specify cpu extensions library path in demo's command line parameters using -l "
                          "or --cpu_extension command line argument")
                sys.exit(1)
                
        assert len(net.inputs.keys()) == 1, "App supports only single input topologies"
        assert len(net.outputs) == 1, "App supports only single output topologies"
        input_blob = next(iter(net.inputs))
        out_blob = next(iter(net.outputs))
        log.info("Loading IR to the plugin...")
        exec_net = plugin.load(network=net, num_requests=2)
        # Read and pre-process input image
        n, c, h, w = net.inputs[input_blob].shape
        del net
        total_inf_start = time.time()
        inf_scores = []
        log.info("Starting inference requests in sync mode...")
        for image_path in image_paths:
            
            input_stream = image_path
            assert os.path.isfile(image_path), "Specified input file doesn't exist"
            
            cap = cv2.VideoCapture(input_stream)

            cur_request_id = 0

            render_time = 0

            while cap.isOpened():
                ret, frame = cap.read()
                if not ret:
                    break
                initial_w = cap.get(3)
                initial_h = cap.get(4)

                in_frame = cv2.resize(frame, (w, h))
                in_frame = in_frame.transpose((2, 0, 1))  # Change data layout from HWC to CHW
                in_frame = in_frame.reshape((n, c, h, w))
                inf_start = time.time()
                exec_net.start_async(request_id=cur_request_id, inputs={input_blob: in_frame})
                if exec_net.requests[cur_request_id].wait(-1) == 0:

                    # Parse detection results of the current request
                    res = exec_net.requests[cur_request_id].outputs[out_blob]
                    # Append score of inference request result
                    inf_scores.append(res[0][0][0,2])
                    avg_inf_time += (time.time() - inf_start)
        
        log.info("Inference requests completed")
        
        # Report performance stats
        total_inf_end = time.time()
        total_det_time = total_inf_end - total_inf_start
        inf_time_message = "Total detection time on {}: {:.3f} sec".format(device,total_det_time)
        print(inf_time_message)
        print("Avg. inference time: {} ms".format((avg_inf_time/len(image_paths))*1000))
        
        return inf_scores,total_det_time,avg_inf_time;

    except ValueError: 
        sys.exit(1)

## Run Inference with Inference Engine
Here we begin inferencing with device of choice. Possible device options if present on system are:
{CPU, GPU, MYRIAD, HDDL}

In [26]:
device = "CPU"
print("person data baseline: ")
scores_people2, detection_time3, avg_inf3 = run_inference(TEST_IMAGE_PATHS, device)
print("\nno-person data baseline:")
scores_nopeople2, detection_time4, avg_inf4 = run_inference(TEST_IMAGE_PATHS2, device)

person data baseline: 
[ INFO ] Initializing plugin for CPU device...
[ INFO ] Reading IR...
[ INFO ] Loading IR to the plugin...
[ INFO ] Starting inference requests in sync mode...
[ INFO ] Inference requests completed
Total detection time on CPU: 10.574 sec
Avg. inference time: 6.987771987915039 ms

no-person data baseline:
[ INFO ] Initializing plugin for CPU device...
[ INFO ] Reading IR...
[ INFO ] Loading IR to the plugin...
[ INFO ] Starting inference requests in sync mode...
[ INFO ] Inference requests completed
Total detection time on CPU: 3.493 sec
Avg. inference time: 7.1150015952975245 ms


## Calculate Optimized Model Accuracy

In [27]:
tp2 = 0 # true positives
fn2 = 0 # false negatives
for score in scores_people2:
    if(score >= confidence):
        tp2 = tp2 + 1
    if(score < confidence):
        fn2 = fn2 + 1
        
tn2 = 0 #true negatives
fp2 = 0 #false positives 
for score in scores_nopeople2:
    if(score >= confidence): # Threshold of .25
        fp2 = fp2 + 1
    if(score < confidence):
        tn2 = tn2 + 1
        

people2 = np.array(scores_people2)
people_frames2 = np.array(TEST_IMAGE_PATHS) 
nopeople2 = np.array(scores_nopeople2)
nopeople_frames2 = np.array(TEST_IMAGE_PATHS2) 
test2 = len(TEST_IMAGE_PATHS) + len(TEST_IMAGE_PATHS2)
accuracy2 = ((tp2 + tn2) / test2)*100

output3 = np.column_stack((people_frames2,people2))
output4 = np.column_stack((nopeople_frames2,nopeople2))

np.savetxt("results/people-results-openvino-cpu.csv", output3, delimiter=',', header="Frame,Score", fmt='%s')
np.savetxt("results/no-people-results-openvino-cpu.csv", output4, delimiter=',', header="Frame,Score", fmt='%s')

inference_time_avg2 = (avg_inf3 + avg_inf4)/2

fps = test/(duration + duration2 - inference_time_tot)
fps2 = test2/(detection_time3 + detection_time4)

## Results of Optimized Model with OpenVINO

In [28]:
#print("TP: {}  |  FN: {}  |  FP: {}  |  TN: {}".format(tp,fn,fp,tn))
print("Inference Time:\t {:.02f} sec\nAvg. Inference:  {:.2f} ms".format((detection_time3 + detection_time4),(avg_inf3 + avg_inf4)/2))   
print("Accuracy:\t {:.2f}% \nFPS:\t\t {:.2f}".format((accuracy2),test2/(detection_time3 + detection_time4)))

Inference Time:	 14.07 sec
Avg. Inference:  4.89 ms
Accuracy:	 88.95% 
FPS:		 99.09


## Frozen Model and Intemmediate Representation File Sizes

In [29]:
# Get model file sizes
file_size_pb = 0
file_size_xml = 0
file_size_bin = 0
file_stats = os.stat("../model/ssdlite_mobilenet_v2_coco_2018_05_09/frozen_inference_graph.pb")
file_size_pb = file_stats.st_size / (1024 * 1024)
file_stats = os.stat("../model/ssdlite_mobilenet_v2_coco_2018_05_09/ssdlite_mobilenet_v2_coco.xml")
file_size_xml = file_stats.st_size / (1024 * 1024)
file_stats = os.stat("../model/ssdlite_mobilenet_v2_coco_2018_05_09/ssdlite_mobilenet_v2_coco.bin")
file_size_bin = file_stats.st_size / (1024 * 1024)
print("Model File Size (MB):\t{:.02f}\t{:.02f}\t{:.02f}".format(file_size_pb,file_size_xml + file_size_bin, ((file_size_xml + file_size_bin-file_size_pb)/(file_size_pb)*-100)))

Model File Size (MB):	18.99	17.19	9.46


## Results Comparison

In [30]:
res_headers = "Metric\tTensorFlow\tOpenVINO\t%Difference"
res_inf = "Inference Time (s):\t{:.02f}\t{:.02f}\t{:.02f}".format((inference_time_tot),(detection_time3 + detection_time4),(((detection_time3 + detection_time4)-inference_time_tot)/(inference_time_tot)*-100))
res_inf2 = "Avg. Inference (ms):\t{:.02f}\t{:.02f}\t{:.02f}".format(inference_time_avg,inference_time_avg2,((inference_time_avg2-inference_time_avg)/(inference_time_avg)*-100))
res_acc = "Accuracy (%):\t{:.02f}\t{:.02f}\t{:.02f}".format(accuracy,accuracy2,((accuracy2-accuracy)/accuracy*-100))
res_fps = "FPS:\t{:.2f}\t{:.2f}\t{:.02f}".format(fps,fps2,((fps2-fps)/fps)*100)
res_size = "Model File Size (MB):\t{:.02f}\t{:.02f}\t{:.02f}".format(file_size_pb,file_size_xml + file_size_bin, ((file_size_xml + file_size_bin-file_size_pb)/(file_size_pb)*-100))

In [31]:
res_inf = np.array(res_inf)
res_inf2 = np.array(res_inf2)
res_acc = np.array(res_acc)
res_fps = np.array(res_fps)
res_size = np.array(res_size)

output = np.row_stack((res_inf,res_inf2,res_acc,res_fps,res_size))

np.savetxt("results/tf-openvino-metrics.csv", output, delimiter=',', header=res_headers, fmt='%s')
dir_path = os.getcwd()
print("Find results stored in {}/tf-openvino-metrics.csv".format(dir_path))

Find results stored in /home/leavitia/udacity/people-counter-app/resources/tf-openvino-metrics.csv


In [32]:
print(res_headers,"\n",res_inf,"\n",res_inf2,"\n",res_acc,"\n",res_fps,"\n",res_size)

Metric	TensorFlow	OpenVINO	%Difference 
 Inference Time (s):	36.45	14.07	61.41 
 Avg. Inference (ms):	13.07	4.89	62.58 
 Accuracy (%):	92.04	88.95	3.35 
 FPS:	4.79	99.09	1970.12 
 Model File Size (MB):	18.99	17.19	9.46
