# Object Detection Demo
Welcome to the object detection inference walkthrough!  This notebook will walk you step by step through the process of using a pre-trained model to detect objects in an image. Make sure to follow the [installation instructions](https://github.com/tensorflow/models/blob/master/research/object_detection/g3doc/installation.md) before you start.

# Imports

In [1]:
import numpy as np
import os
import six.moves.urllib as urllib
import sys
import tarfile
import tensorflow as tf
import zipfile
import time

import pprint

from distutils.version import StrictVersion
from collections import defaultdict
from io import StringIO
from matplotlib import pyplot as plt
from PIL import Image

# This is needed since the notebook is stored in the object_detection folder.
sys.path.append(".")
#from object_detection.utils import ops as utils_ops

#if StrictVersion(tf.__version__) < StrictVersion('1.12.0'):
#    raise ImportError('Please upgrade your TensorFlow installation to v1.12.*.')


# Model preparation 

### Variables

In [2]:
#Inference configs
IMAGE_PATH = '/home/johann/Pictures/images_compl'
DESTINATION_PATH = '/home/johann/Pictures/dest_images'
IMAGE_DIMS = [500, 500]
FIGURE_DIMS = tuple(IMAGE_DIMS.copy())
BATCH_NUM = 20
THRESHOLD = 20

# What model to download.
MODEL_NAME = 'ssd_mobilenet_v1_0.75_depth_300x300_coco14_sync_2018_07_03'
MODEL_FILE = MODEL_NAME + '.tar.gz'
INFERENCE_GRAPH_PATH = MODEL_NAME + '/frozen_inference_graph.pb'

#Download url
DOWNLOAD_BASE = 'http://download.tensorflow.org/models/object_detection/'

VISUALIZE = True

### Model download

In [3]:
if not os.path.isfile(MODEL_FILE):
    print("Downloading model file")
    opener = urllib.request.URLopener()
    opener.retrieve(DOWNLOAD_BASE + MODEL_FILE, MODEL_FILE)
else:
    print("Model file already exists")
    
if not os.path.isfile(INFERENCE_GRAPH_PATH):
    tar_file = tarfile.open(MODEL_FILE)
    for file in tar_file.getmembers():
        file_name = os.path.basename(file.name)
        if 'frozen_inference_graph.pb' in file_name:
            tar_file.extract(file, os.getcwd())
else:
    print('Model file already extracted')

Model file already exists
Model file already extracted


## Loading label map
Label maps map indices to category names, so that when our convolution network predicts `5`, we know that this corresponds to `airplane`.  Here we use internal utility functions, but anything that returns a dictionary mapping integers to appropriate string labels would be fine

In [4]:
CATEGORY_INDEX = {}
f = open("object_detection/mscoco_label_map.pbtxt", "r")
for line in f:
    if 'item' in line:
        name = f.readline().split()[1]
        real_id = int(f.readline().split()[1])
        real_name = f.readline().split()[1]
        CATEGORY_INDEX[real_id] =  {'id': real_id, 'name': real_name}

In [5]:
print(CATEGORY_INDEX)

{1: {'name': 'person', 'id': 1}, 2: {'name': 'bicycle', 'id': 2}, 3: {'name': 'car', 'id': 3}, 4: {'name': 'motorcycle', 'id': 4}, 5: {'name': 'airplane', 'id': 5}, 6: {'name': 'bus', 'id': 6}, 7: {'name': 'train', 'id': 7}, 8: {'name': 'truck', 'id': 8}, 9: {'name': 'boat', 'id': 9}, 10: {'name': 'traffic', 'id': 10}, 11: {'name': 'fire', 'id': 11}, 13: {'name': 'stop', 'id': 13}, 14: {'name': 'parking', 'id': 14}, 15: {'name': 'bench', 'id': 15}, 16: {'name': 'bird', 'id': 16}, 17: {'name': 'cat', 'id': 17}, 18: {'name': 'dog', 'id': 18}, 19: {'name': 'horse', 'id': 19}, 20: {'name': 'sheep', 'id': 20}, 21: {'name': 'cow', 'id': 21}, 22: {'name': 'elephant', 'id': 22}, 23: {'name': 'bear', 'id': 23}, 24: {'name': 'zebra', 'id': 24}, 25: {'name': 'giraffe', 'id': 25}, 27: {'name': 'backpack', 'id': 27}, 28: {'name': 'umbrella', 'id': 28}, 31: {'name': 'handbag', 'id': 31}, 32: {'name': 'tie', 'id': 32}, 33: {'name': 'suitcase', 'id': 33}, 34: {'name': 'frisbee', 'id': 34}, 35: {'name'

### Create dataset as tensors

In [6]:
def create_dataset(path, image_dims, batch_size = 32):
    import pathlib 
    import random 
    print(image_dims)

    
    data_root = pathlib.Path(path)
    image_uris = list(data_root.glob('**/*.jpg'))
    image_uris = [str(image) for image in image_uris]
    
    def preprocess_image(image):

        image = tf.image.decode_jpeg(image, channels=3)
        image = tf.image.resize_image_with_pad(image, image_dims[0], image_dims[1])
        #image /= 255  # normalize to [0,1] range

        return image
    
    def load_and_preprocess_image(path):
        
        image = tf.read_file(path)
        
        return preprocess_image(image)
    
    path_ds = tf.data.Dataset.from_tensor_slices(image_uris)
    #AUTOTUNE = tf.data.experimental.AUTOTUNE
    image_ds = path_ds.map(load_and_preprocess_image)
    
    return image_ds

### Create graph and connect tensors

In [7]:
def build_inference_graph(inference_graph_path, prepend_name):
    """Loads the inference graph and connects it to the input image.
    Args:
    image_tensor: The input image. uint8 tensor, shape=[1, None, None, 3]
    inference_graph_path: Path to the inference graph with embedded weights
    Returns:
    detected_boxes_tensor: Detected boxes. Float tensor,
        shape=[num_detections, 4]
    detected_scores_tensor: Detected scores. Float tensor,
        shape=[num_detections]
    detected_labels_tensor: Detected labels. Int64 tensor,
        shape=[num_detections]
    """
    with tf.gfile.Open(inference_graph_path, 'rb') as graph_def_file:
        graph_content = graph_def_file.read()
    graph_def = tf.GraphDef()
    graph_def.MergeFromString(graph_content)

    tf.import_graph_def(graph_def, name=prepend_name)

    g = tf.get_default_graph()
    #print([n.name for n in tf.get_default_graph().as_graph_def().node])
    
    tensor_dict = {}
    for key in ['num_detections', 'detection_boxes', 'detection_scores', 'detection_classes']:
        tensor_name = key + ':0'
        try:
            tensor_dict[key] = g.get_tensor_by_name(prepend_name + '/' + tensor_name)
        except:
            print("Something went horribly wrong when loading graph tensors")
            pass
        
    
    image_tensor = g.get_tensor_by_name(prepend_name + '/' + 'image_tensor:0')
    return {'out': tensor_dict, 'in': image_tensor}, g

# Threaded visualization

In [8]:
from threading import Thread
from object_detection.utils import visualization_utils as vis_util

vis_threads = []

def threaded_function(image_np, args_dict, counter):
    boxes = args_dict['detection_boxes']
    scores = args_dict['detection_scores']
    classes = np.array(args_dict['detection_classes'], np.int16)
    
    for i in range(len(image_np)):
        new_img = np.array(image_np[i], np.uint8)
        vis_util.visualize_boxes_and_labels_on_image_array(
          new_img,
          boxes[i],
          classes[i],
          scores[i],
          CATEGORY_INDEX,
          use_normalized_coordinates=True,
          line_thickness=8)
        plt.figure(figsize=FIGURE_DIMS)
        plt.imsave(DESTINATION_PATH + '/{}_{}.jpg'.format(str(counter), str(i)), new_img)
        plt.close()

In [9]:
def threaded_inference(tensor_dict, image_tensor, graph, image, counter):
    config=tf.ConfigProto(log_device_placement=True)
    with tf.Session(graph = graph, config = config) as sess:
        output_dict = sess.run(tensor_dict, feed_dict = {image_tensor: image})
        
        if VISUALIZE:
                thread = Thread(target = threaded_function, args = (image, output_dict, counter))
                thread.start()
                vis_threads.append(thread)
        

# Actual Inference

In [10]:
from tensorflow.python.client import device_lib

def get_available_gpus():
    local_device_protos = device_lib.list_local_devices()
    return [x.name for x in local_device_protos if (x.device_type == 'GPU' or x.device_type == 'CPU')]
    #return [x.name for x in local_device_protos if (x.device_type == 'GPU')]

In [11]:
print(get_available_gpus())

['/device:CPU:0', '/device:GPU:0']


In [12]:
ds = create_dataset(IMAGE_PATH, IMAGE_DIMS)
ds = ds.prefetch(BATCH_NUM*2).batch(BATCH_NUM)
it = ds.make_one_shot_iterator()

# This is needed to display the images.
%matplotlib inline

#config=tf.ConfigProto(log_device_placement=True)

with tf.Session() as sess:
    #tf.logging.info('Reading input from files and connecting to graph')
    #image_tensor = build_input(image_path)
    
    devices = get_available_gpus()
    
    tf.logging.info('Reading graph and building model specified for the different cluster devices')
    
    tf_dict = []
    tg_dict = []
    n = 0
    for d in devices:
        tensors, graph = build_inference_graph(INFERENCE_GRAPH_PATH, 'test')
        tf_dict.append(tensors)
        tg_dict.append(graph)
    
    sess.run(tf.global_variables_initializer())
    image_it = it.get_next()
    
    #threads = []
    
    i = 1
    j = 1
    try:
        while True:
            threads = []
            print('Step: ' + str(i))
            for idx in range(len(tf_dict)):
                loc_image = sess.run(image_it)
                thread = Thread(target = threaded_inference, 
                                args = (tf_dict[idx]['out'], tf_dict[idx]['in'], tg_dict[idx], loc_image, i))
                i += 1
                thread.start()
                threads.append(thread)
            
            for t in threads:
                t.join()
        '''
        while True:
            print('Step: ' + str(i))
            start = time.time()
            
            output_multi_device = []
            input_feed_dict = {}
            images = []
            
            for idx in tf_dict:
                
                output_multi_device.append(idx['out'])
                #print(idx['out'])
                loc_image = sess.run(image_it)
                print(idx['in'].name)
                input_feed_dict[idx['in']] = loc_image
                images.append(loc_image)
                
            outputs = sess.run(output_multi_device, input_feed_dict)
            #pprint.pprint(outputs)
            
            if VISUALIZE:
                for count in range(len(tf_dict)):
                    thread = Thread(target = threaded_function, args = (images[count], outputs[count], i, count))
                    thread.start()
                    threads.append(thread)
            i += 1
                
                    image = sess.run(image_it)
                    print('Time required for reading: ' + str(time.time() - start))

                    start = time.time()
                    output_dict = sess.run(tensor_dict, feed_dict = {image_tensor: image})
                    print('Time required for computation: ' + str(time.time() - start))
            
            if VISUALIZE:
                thread = Thread(target = threaded_function, args = (image, output_dict, i))
                thread.start()
                threads.append(thread)
                
            i += 1
        '''
    
    
    except tf.errors.OutOfRangeError:
        tf.logging.info('Finished processing records')
        for t in vis_threads:
            t.join()

[500, 500]
Instructions for updating:
Colocations handled automatically by placer.
INFO:tensorflow:Reading graph and building model specified for the different cluster devices
Step: 1
Step: 3
Step: 5
Step: 7
Step: 9
Step: 11
Step: 13
Step: 15
Step: 17
Step: 19
Step: 21
Step: 23
Step: 25
Step: 27
Step: 29
Step: 31
Step: 33
Step: 35
Step: 37
Step: 39
Step: 41
Step: 43


KeyboardInterrupt: 

In [None]:
tensor_dict, image_tensor = build_inference_graph(INFERENCE_GRAPH_PATH)


In [None]:
print(tensor_dict)