In [56]:
import os
import collections
import cv2
import sys
import time
import argparse
import multiprocessing
import numpy as np
import tensorflow as tf
from matplotlib import pyplot as plt

In [2]:
%matplotlib inline

# This is needed since the notebook is stored in the object_detection folder.
sys.path.append("..")
sys.path

['',
 '/Users/Evilown/anaconda3/lib/python36.zip',
 '/Users/Evilown/anaconda3/lib/python3.6',
 '/Users/Evilown/anaconda3/lib/python3.6/lib-dynload',
 '/Users/Evilown/anaconda3/lib/python3.6/site-packages',
 '/Users/Evilown/anaconda3/lib/python3.6/site-packages/aeosa',
 '/Users/Evilown/anaconda3/lib/python3.6/site-packages/IPython/extensions',
 '/Users/Evilown/.ipython',
 '..']

In [3]:
from utils import label_map_util
from utils import visualization_utils as vis_util

## Get Path

In [4]:
CWD_PATH = os.getcwd()

# Path to frozen detection graph. This is the actual model that is used for 
# the object detection.
MODEL_NAME = 'ssd_mobilenet_v1_coco_2017_11_17'
PATH_TO_CKPT = os.path.join(CWD_PATH, 'Tensorflow_detection_model_zoo', 
                            MODEL_NAME, 'frozen_inference_graph.pb')

# List of the strings that is used to add correct label for each box.
PATH_TO_LABELS = os.path.join(CWD_PATH, 'data', 'mscoco_label_map.pbtxt')


## Loading label map
Label maps map indices to category names, so that when our convolution network predicts `5`, we know that this corresponds to `airplane`.  Here we use internal utility functions, but anything that returns a dictionary mapping integers to appropriate string labels would be fine


In [5]:
NUM_CLASSES = 90

# Loading label map
label_map = label_map_util.load_labelmap(PATH_TO_LABELS)
categories = label_map_util.convert_label_map_to_categories(label_map, 
                                                            max_num_classes=NUM_CLASSES,
                                                            use_display_name=True)
category_index = label_map_util.create_category_index(categories)

## Helper Code

In [6]:
def load_image_into_numpy_array(image):
    (im_width, im_height) = image.size
    return np.array(image.getdata()).reshape( (im_height, im_width, 3)).astype(np.uint8)

def save_nparray_into_img(array, name = 'saved_boxed_image'):
    im = Image.fromarray(array)
    SAVE_PATH = os.path.join(CWD_PATH, 'test_images')
    im.save("test_images/saved_boxed_file.jpeg")
    print('Image saved to {}'.format(SAVE_PATH))

# Detection

In [72]:
def detect_objects(image_np, sess, detection_graph):
    ''' Input: Original_test_image, tf.Session(), the_frozen_TF_graph
        Output: Labelled_and_Boxed_image '''
    
    # Expand dimensions since the model expects images to have shape: [1, None, None, 3]
    image_np_expanded = np.expand_dims(image_np, axis=0) #[None,None,3] -> [1,None,None,3]
    
    # Definite input and output Tensors for detection_graph
    image_tensor = detection_graph.get_tensor_by_name('image_tensor:0')

    # Each box represents a part of the image where a particular object was detected.
    detection_boxes = detection_graph.get_tensor_by_name('detection_boxes:0')

    # Each score represent how level of confidence for each of the objects.
    # Score is shown on the result image, together with the class label.
    detection_scores = detection_graph.get_tensor_by_name('detection_scores:0')
    detection_classes = detection_graph.get_tensor_by_name('detection_classes:0')
    num_detections = detection_graph.get_tensor_by_name('num_detections:0')

    # Actual detection.
    (boxes, scores, classes, num_detections) = sess.run(
        [detection_boxes, detection_scores, detection_classes, num_detections],
        feed_dict={image_tensor: image_np_expanded})

    # Visualization of the results of a detection.
    vis_util.visualize_boxes_and_labels_on_image_array(
        image_np,
        np.squeeze(boxes),
        np.squeeze(classes).astype(np.int32),
        np.squeeze(scores),
        category_index,
        use_normalized_coordinates=True,
        line_thickness=8)
    return image_np, boxes, scores, classes, num_detections

## Load a (frozen) Tensorflow model into memory.

In [8]:
detection_graph = tf.Graph()
with detection_graph.as_default():
    od_graph_def = tf.GraphDef()
    
    with tf.gfile.GFile(PATH_TO_CKPT, 'rb') as fid:
        serialized_graph = fid.read()
        od_graph_def.ParseFromString(serialized_graph)
        tf.import_graph_def(od_graph_def, name='')

## On Image

In [13]:
# First test on images
# For the sake of simplicity we will use only 2 images:
# image1.jpg   &   image2.jpg
PATH_TO_TEST_IMAGES_DIR = 'test_images'
TEST_IMAGE_PATHS = [ os.path.join(PATH_TO_TEST_IMAGES_DIR, 
                                  'image{}.jpg'.format(i)) for i in range(0, 1) ]

# Size, in inches, of the output images. ->(how large the result plt is)
IMAGE_SIZE = (12, 8)
a = TEST_IMAGE_PATHS[0]


In [16]:
from PIL import Image
# Take a look at the images
for image_path in TEST_IMAGE_PATHS:
    image = Image.open(image_path)
    image_np = load_image_into_numpy_array(image)
    #plt.imshow(image_np)
    print('image size: {}, image_np size: {}'.format(image.size, image_np.shape))

image size: (1352, 900), image_np size: (900, 1352, 3)


In [73]:
with detection_graph.as_default():
    with tf.Session(graph=detection_graph) as sess:
        
        for image_path in TEST_IMAGE_PATHS:
            image = Image.open(image_path)
            image_np = load_image_into_numpy_array(image)
            
            image_boxed, boxes, scores, classes, num_detection= detect_objects(image_np, sess, detection_graph)
            print(image_boxed.shape)
            #plt.figure(figsize=IMAGE_SIZE)
            #plt.imshow(image_boxed)
            
            save_nparray_into_img(image_boxed)

(900, 1352, 3)
Image saved to /Users/Evilown/Desktop/object_detection/test_images


## Dont plot but output results greater than thres.

In [69]:
def box_N_class_output(image,
                       boxes,
                       classes,
                       scores,
                       category_index,
                       max_boxes_to_return=20,
                       min_score_thresh=.5,
                       agnostic_mode=False,
                       line_thickness=4):
    
    class_map = []
    box_map = []
    score_map = []

    if not max_boxes_to_return:
        max_boxes_to_return = boxes.shape[0]
        
    for i in range(min(max_boxes_to_return, boxes.shape[0])):
        if scores is None or scores[i] > min_score_thresh:
            box = boxes[i].tolist()
            box_map.append(box)

       
            if classes[i] in category_index.keys():
                class_name = category_index[classes[i]]['name']
            else:
                class_name = 'N/A'
                
            class_map.append(class_name)
            score_map.append(scores[i])    
                    
    num_detected = len(score_map)
    return box_map,score_map,class_map,num_detected

In [77]:
def detect_objects(image_np, sess, detection_graph):
    ''' Input: Original_test_image, tf.Session(), the_frozen_TF_graph
        Output: Labelled_and_Boxed_image '''
    
    # Expand dimensions since the model expects images to have shape: [1, None, None, 3]
    image_np_expanded = np.expand_dims(image_np, axis=0) #[None,None,3] -> [1,None,None,3]
    
    # Definite input and output Tensors for detection_graph
    image_tensor = detection_graph.get_tensor_by_name('image_tensor:0')

    # Each box represents a part of the image where a particular object was detected.
    detection_boxes = detection_graph.get_tensor_by_name('detection_boxes:0')

    # Each score represent how level of confidence for each of the objects.
    # Score is shown on the result image, together with the class label.
    detection_scores = detection_graph.get_tensor_by_name('detection_scores:0')
    detection_classes = detection_graph.get_tensor_by_name('detection_classes:0')
    num_detections = detection_graph.get_tensor_by_name('num_detections:0')

    # Actual detection.
    (boxes, scores, classes, num_detections) = sess.run(
        [detection_boxes, detection_scores, detection_classes, num_detections],
        feed_dict={image_tensor: image_np_expanded})

    # Selecting high possibility results
    (boxes, scores, classes, num_detections) = box_N_class_output(
        image_np,
        np.squeeze(boxes),
        np.squeeze(classes).astype(np.int32),
        np.squeeze(scores),
        category_index,
        max_boxes_to_return=20)
    return image_np, boxes, scores, classes, num_detections

In [117]:
with detection_graph.as_default():
    with tf.Session(graph=detection_graph) as sess:
        
        for image_path in TEST_IMAGE_PATHS:
            image = Image.open(image_path)
            image_np = load_image_into_numpy_array(image)
            
            image_boxed, boxes, scores, classes, num_detection= detect_objects(image_np, sess, detection_graph)
            print(image_boxed.shape)

(900, 1352, 3)


In [118]:
for i in range(len(classes)):
    boxes[i].append(classes[i])
boxes

[[0.5538768172264099,
  0.39422380924224854,
  0.5931246876716614,
  0.40913766622543335,
  'person'],
 [0.3829464316368103,
  0.34582412242889404,
  0.40220093727111816,
  0.3590298891067505,
  'kite'],
 [0.5741666555404663,
  0.057666998356580734,
  0.6233518123626709,
  0.0747537910938263,
  'person'],
 [0.07991442084312439,
  0.43740910291671753,
  0.16590245068073273,
  0.5006028413772583,
  'kite'],
 [0.26564282178878784,
  0.2011229395866394,
  0.3075351119041443,
  0.22309386730194092,
  'kite'],
 [0.6833807826042175,
  0.0784299373626709,
  0.8405881524085999,
  0.11782577633857727,
  'person'],
 [0.38510024547576904,
  0.43172216415405273,
  0.40073245763778687,
  0.44773054122924805,
  'kite'],
 [0.7606196403503418,
  0.15739655494689941,
  0.9369254112243652,
  0.20186904072761536,
  'person'],
 [0.5428125262260437,
  0.25604742765426636,
  0.5623460412025452,
  0.26740866899490356,
  'person'],
 [0.5870811343193054,
  0.026993142440915108,
  0.6204380393028259,
  0.0413380

In [119]:
np.shape(boxes)

(10, 5)

In [5]:
boxes = [[0.5538768172264099,
  0.39422380924224854,
  0.5931246876716614,
  0.40913766622543335,
  'person'],
 [0.3829464316368103,
  0.34582412242889404,
  0.40220093727111816,
  0.3590298891067505,
  'kite'],
 [0.5741666555404663,
  0.057666998356580734,
  0.6233518123626709,
  0.0747537910938263,
  'person'],
 [0.07991442084312439,
  0.43740910291671753,
  0.16590245068073273,
  0.5006028413772583,
  'kite'],
 [0.26564282178878784,
  0.2011229395866394,
  0.3075351119041443,
  0.22309386730194092,
  'kite'],
 [0.6833807826042175,
  0.0784299373626709,
  0.8405881524085999,
  0.11782577633857727,
  'person'],
 [0.38510024547576904,
  0.43172216415405273,
  0.40073245763778687,
  0.44773054122924805,
  'kite'],
 [0.7606196403503418,
  0.15739655494689941,
  0.9369254112243652,
  0.20186904072761536,
  'person'],
 [0.5428125262260437,
  0.25604742765426636,
  0.5623460412025452,
  0.26740866899490356,
  'person'],
 [0.5870811343193054,
  0.026993142440915108,
  0.6204380393028259,
  0.04133802652359009,
  'person']]
str.encode(str(boxes))

b"[[0.5538768172264099, 0.39422380924224854, 0.5931246876716614, 0.40913766622543335, 'person'], [0.3829464316368103, 0.34582412242889404, 0.40220093727111816, 0.3590298891067505, 'kite'], [0.5741666555404663, 0.057666998356580734, 0.6233518123626709, 0.0747537910938263, 'person'], [0.07991442084312439, 0.43740910291671753, 0.16590245068073273, 0.5006028413772583, 'kite'], [0.26564282178878784, 0.2011229395866394, 0.3075351119041443, 0.22309386730194092, 'kite'], [0.6833807826042175, 0.0784299373626709, 0.8405881524085999, 0.11782577633857727, 'person'], [0.38510024547576904, 0.43172216415405273, 0.40073245763778687, 0.44773054122924805, 'kite'], [0.7606196403503418, 0.15739655494689941, 0.9369254112243652, 0.20186904072761536, 'person'], [0.5428125262260437, 0.25604742765426636, 0.5623460412025452, 0.26740866899490356, 'person'], [0.5870811343193054, 0.026993142440915108, 0.6204380393028259, 0.04133802652359009, 'person']]"

In [122]:
import csv
with open("boxes_with_classes.csv", "w") as f:  
    w = csv.writer(f)
    w.writerows(boxes)

<br>

## On Video

In [53]:
# Import everything needed to edit/save/watch video clips
from moviepy.editor import VideoFileClip
from IPython.display import HTML

In [57]:
def process_image(image):
    ''' NOTE: The output you return should be a color image (3 channel) for processing video below
              you should return the final output (image with lines are drawn on lanes)'''
    with detection_graph.as_default():
        with tf.Session(graph=detection_graph) as sess:
            image_boxed = detect_objects(image, sess, detection_graph)
            return image_boxed

In [60]:
white_output = 'video1_out.mp4'
clip1 = VideoFileClip("video1.mp4").subclip(0,2)
white_clip = clip1.fl_image(process_image) #NOTE: this function expects color images!!s
%time white_clip.write_videofile(white_output, audio=False)

[MoviePy] >>>> Building video video1_out.mp4
[MoviePy] Writing video video1_out.mp4


 98%|█████████▊| 60/61 [02:49<00:02,  2.74s/it]


[MoviePy] Done.
[MoviePy] >>>> Video ready: video1_out.mp4 

CPU times: user 2min 55s, sys: 24 s, total: 3min 19s
Wall time: 2min 50s


In [63]:
HTML("""
<video width="760" height="340" controls>
  <source src="{0}">
</video>
""".format(white_output))

### More Videos

In [38]:
white_output1 = 'cars_out.mp4'
clip1 = VideoFileClip("cars.mp4").subclip(0,2)
white_clip = clip1.fl_image(process_image) #NOTE: this function expects color images!!s
%time white_clip.write_videofile(white_output1, audio=False)

[MoviePy] >>>> Building video cars_out.mp4
[MoviePy] Writing video cars_out.mp4


100%|██████████| 60/60 [00:44<00:00,  1.38it/s]

[MoviePy] Done.
[MoviePy] >>>> Video ready: cars_out.mp4 

CPU times: user 1min 25s, sys: 4.6 s, total: 1min 30s
Wall time: 44.4 s





In [39]:
HTML("""
<video width="960" height="540" controls>
  <source src="{0}">
</video>
""".format(white_output1))

In [17]:
white_output2 = 'fruits1_out.mp4'
clip2 = VideoFileClip("fruits1.mp4").subclip(0,1)
white_clip = clip2.fl_image(process_image) #NOTE: this function expects color images!!s
%time white_clip.write_videofile(white_output2, audio=False)

[MoviePy] >>>> Building video fruits1_out.mp4
[MoviePy] Writing video fruits1_out.mp4


100%|██████████| 30/30 [00:21<00:00,  1.42it/s]

[MoviePy] Done.
[MoviePy] >>>> Video ready: fruits1_out.mp4 

CPU times: user 41.9 s, sys: 1.65 s, total: 43.6 s
Wall time: 21.3 s





In [18]:
HTML("""
<video width="960" height="540" controls>
  <source src="{0}">
</video>
""".format(white_output2))

In [40]:
white_output3 = 'dog_out.mp4'
clip3 = VideoFileClip("dog.mp4").subclip(12,14)
white_clip = clip3.fl_image(process_image) #NOTE: this function expects color images!!s
%time white_clip.write_videofile(white_output3, audio=False)

[MoviePy] >>>> Building video dog_out.mp4
[MoviePy] Writing video dog_out.mp4


100%|██████████| 60/60 [00:43<00:00,  1.43it/s]

[MoviePy] Done.
[MoviePy] >>>> Video ready: dog_out.mp4 

CPU times: user 1min 25s, sys: 3.58 s, total: 1min 29s
Wall time: 43.7 s





In [41]:
HTML("""
<video width="960" height="540" controls>
  <source src="{0}">
</video>
""".format(white_output3))

In [42]:
# Merge videos
from moviepy.editor import VideoFileClip, concatenate_videoclips
clip1 = VideoFileClip("cars_out.mp4")
clip2 = VideoFileClip("fruits1_out.mp4")
clip3 = VideoFileClip("dog_out.mp4")
final_clip = concatenate_videoclips([clip1,clip2,clip3], method="compose")
final_clip.write_videofile("my_concatenation.mp4",bitrate="5000k")

[MoviePy] >>>> Building video my_concatenation.mp4
[MoviePy] Writing video my_concatenation.mp4


100%|██████████| 150/150 [00:00<00:00, 189.12it/s]


[MoviePy] Done.
[MoviePy] >>>> Video ready: my_concatenation.mp4 



In [43]:
from moviepy.editor import *
clip = VideoFileClip("my_concatenation.mp4")
clip.write_gif("final.gif")


[MoviePy] Building file final.gif with imageio


100%|██████████| 151/151 [00:11<00:00, 12.35it/s]
