# Vehicle detection
Welcome to the object detection inference walkthrough!  This notebook will walk you step by step through the process of using a pre-trained model to detect objects in an image. Make sure to follow the [installation instructions](https://github.com/tensorflow/models/blob/master/object_detection/g3doc/installation.md) before you start.

# Imports

In [1]:
import numpy as np
import os
import sys
import tensorflow as tf

from collections import defaultdict
from io import StringIO
from matplotlib import pyplot as plt
from PIL import Image

## Env setup

In [2]:
# This is needed to display the images.
%matplotlib inline

In [3]:
## Show the tensorflow version

In [4]:
from tensorflow.python.client import device_lib
print(device_lib.list_local_devices())


[name: "/device:CPU:0"
device_type: "CPU"
memory_limit: 268435456
locality {
}
incarnation: 6150007027338401143
, name: "/device:GPU:0"
device_type: "GPU"
memory_limit: 1823563776
locality {
  bus_id: 1
}
incarnation: 12897577544258497738
physical_device_desc: "device: 0, name: Quadro M1000M, pci bus id: 0000:01:00.0, compute capability: 5.0"
]


## Object detection imports
Here are the imports from the object detection module.

In [5]:
from utils import label_map_util

from utils import visualization_utils as vis_util

# Model preparation 

## Variables

Any model exported using the `export_inference_graph.py` tool can be loaded here simply by changing `PATH_TO_CKPT` to point to a new .pb file.  

By default we use an "SSD with Mobilenet" model here. See the [detection model zoo](https://github.com/tensorflow/models/blob/master/object_detection/g3doc/detection_model_zoo.md) for a list of other models that can be run out-of-the-box with varying speeds and accuracies.




In [1]:

CKPT = 'pre_trained_models/ssd_mobilenet_v1_coco_2017_11_17/frozen_inference_graph.pb'
PATH_TO_LABELS = 'data/mscoco_label_map.pbtxt'

NUM_CLASSES = 90

## Load a (frozen) Tensorflow model into memory.


[Median](https://blog.metaflow.fr/tensorflow-how-to-freeze-a-model-and-serve-it-with-a-python-api-d4f3596b3adc)

In [7]:
detection_graph = tf.Graph()

with detection_graph.as_default():
    
  od_graph_def = tf.GraphDef()

  with tf.gfile.GFile(CKPT, 'rb') as fid:
        
    serialized_graph = fid.read()
    od_graph_def.ParseFromString(serialized_graph)
    tf.import_graph_def(od_graph_def, name='')  

## Loading label map
Label maps map indices to category names, so that when our convolution network predicts `5`, we know that this corresponds to `airplane`.  Here we use internal utility functions, but anything that returns a dictionary mapping integers to appropriate string labels would be fine

In [8]:
label_map = label_map_util.load_labelmap(PATH_TO_LABELS)
categories = label_map_util.convert_label_map_to_categories(label_map, max_num_classes=NUM_CLASSES, use_display_name=True)
category_index = label_map_util.create_category_index(categories)
print(category_index)

{1: {'id': 1, 'name': 'person'}, 2: {'id': 2, 'name': 'bicycle'}, 3: {'id': 3, 'name': 'car'}, 4: {'id': 4, 'name': 'motorcycle'}, 5: {'id': 5, 'name': 'airplane'}, 6: {'id': 6, 'name': 'bus'}, 7: {'id': 7, 'name': 'train'}, 8: {'id': 8, 'name': 'truck'}, 9: {'id': 9, 'name': 'boat'}, 10: {'id': 10, 'name': 'traffic light'}, 11: {'id': 11, 'name': 'fire hydrant'}, 13: {'id': 13, 'name': 'stop sign'}, 14: {'id': 14, 'name': 'parking meter'}, 15: {'id': 15, 'name': 'bench'}, 16: {'id': 16, 'name': 'bird'}, 17: {'id': 17, 'name': 'cat'}, 18: {'id': 18, 'name': 'dog'}, 19: {'id': 19, 'name': 'horse'}, 20: {'id': 20, 'name': 'sheep'}, 21: {'id': 21, 'name': 'cow'}, 22: {'id': 22, 'name': 'elephant'}, 23: {'id': 23, 'name': 'bear'}, 24: {'id': 24, 'name': 'zebra'}, 25: {'id': 25, 'name': 'giraffe'}, 27: {'id': 27, 'name': 'backpack'}, 28: {'id': 28, 'name': 'umbrella'}, 31: {'id': 31, 'name': 'handbag'}, 32: {'id': 32, 'name': 'tie'}, 33: {'id': 33, 'name': 'suitcase'}, 34: {'id': 34, 'name'

## Helper code

In [9]:
def load_image_into_numpy_array(image):
  (im_width, im_height) = image.size
  return np.array(image.getdata()).reshape(
      (im_height, im_width, 3)).astype(np.uint8)

In [10]:
def extrac_detected_objects(objects, prob, boxes, img_height, img_width):
    detected_object_dict = []
    
    for i in range(len(objects)):
        y_min = round(boxes[i][0]*img_height)
        x_min = round(boxes[i][1]*img_width)
        y_max = round(boxes[i][2]*img_height)
        x_max = round(boxes[i][3]*img_width)
        position = [x_max, x_min, y_max, y_min]
        detected_object_dict.append({'type':objects[i], 'probability':prob[i], 'position':position})
        # Follow the autotation styple of the bosch dataset.
        
    return detected_object_dict

# Vehicle detection

In [11]:
from glob import glob

In [12]:
import cv2
import time

In [13]:
filename = 'project_video.mp4'

In [16]:
cap = cv2.VideoCapture(filename)
desired_frame = 10
cap.set(1, desired_frame)

True

In [17]:
video_width = cap.get(3)   # float
video_height = cap.get(4) # float

In [18]:
print("Video size {} x {} ".format(video_width, video_height))

Video size 1280.0 x 720.0 


In [20]:
def pipeline(image):
    with detection_graph.as_default():
        with tf.Session(graph=detection_graph) as sess:
            while (True):
                
                image_np = image
                # Definite input and output Tensors for detection_graph
                image_tensor = detection_graph.get_tensor_by_name('image_tensor:0')

                # Each box represents a part of the image where a particular object was detected.
                detection_boxes = detection_graph.get_tensor_by_name('detection_boxes:0')

                # Each score represent how level of confidence for each of the objects.
                # Score is shown on the result image, together with the class label.
                detection_scores = detection_graph.get_tensor_by_name('detection_scores:0')
                detection_classes = detection_graph.get_tensor_by_name('detection_classes:0')
                num_detections = detection_graph.get_tensor_by_name('num_detections:0')


                # Expand dimensions since the model expects images to have shape: [1, None, None, 3]
                image_np_expanded = np.expand_dims(image_np, axis=0)

                time0 = time.time()

                # Actual detection.
                (boxes, scores, classes, num) = sess.run(
                  [detection_boxes, detection_scores, detection_classes, num_detections],
                  feed_dict={image_tensor: image_np_expanded})

                time1 = time.time()
                #print("Time in milliseconds", (time1 - time0) * 1000)

                # Visualization of the results of a detection.P
                vis_util.visualize_boxes_and_labels_on_image_array(
                  image_np,
                  np.squeeze(boxes),
                  np.squeeze(classes).astype(np.int32),
                  np.squeeze(scores),
                  category_index,    #    category_index: a dict containing category dictionaries (each holding
                                            #category index `id` and category name `name`) keyed by category indices.
                  use_normalized_coordinates=True,
                  line_thickness=3)

                # Retrun found objects
                min_score_thresh = 0.5  #threshold same as in "visualize_boxes_and_lables_on_image_array"
                objects = [category_index.get(value)['name'] for index,value in enumerate(classes[0]) if scores[0,index] > min_score_thresh]
                prob = [scores[0,index] for index,value in enumerate(classes[0]) if scores[0,index] > min_score_thresh]
                boxes = [np.squeeze(boxes)[index] for index,value in enumerate(classes[0]) if scores[0,index] > min_score_thresh]

                # resize back to the coordinate
                width, height = video_width, video_height      # 1280 * 720 for Bosch_dataset  

                detected_object_dict = []
                detected_object_dict = extrac_detected_objects(objects, prob, boxes, width, height)

                return image_np


In [21]:
from moviepy.editor import VideoFileClip
from IPython.display import HTML

In [22]:
inpfile='project_video.mp4'
outfile='project_video_out_SSD.mp4'
clip = VideoFileClip(inpfile)
out_clip = clip.fl_image(pipeline) 
%time out_clip.write_videofile(outfile, audio=False)

[MoviePy] >>>> Building video project_video_out.mp4
[MoviePy] Writing video project_video_out.mp4


100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▉| 1260/1261 [55:53<00:02,  2.55s/it]


[MoviePy] Done.
[MoviePy] >>>> Video ready: project_video_out.mp4 

Wall time: 55min 54s
