# Demo of Gloves Detection in Video
In this notebook, I will walk through a demo to show how to use tensorflow object detection API to detect and segment boxer gloves in a short video.

To process frames of images in the vido, the python library moviepy is used. Install it first if necessary.

In [2]:
import os
import time
import argparse
import multiprocessing
import numpy as np
import tensorflow as tf
from matplotlib import pyplot as plt
from object_detection.utils import ops as utils_ops

In [3]:
from object_detection.utils import label_map_util
from object_detection.utils import visualization_utils as vis_util

In [4]:
def model_detect(image_np, sess, detection_graph):
    # Expand dimensions since the model expects images to have shape: [1, None, None, 3]
    image_np_expanded = np.expand_dims(image_np, axis=0)
    image_tensor = detection_graph.get_tensor_by_name('image_tensor:0')

    # Each box represents a part of the image where a particular object was detected.
    boxes = detection_graph.get_tensor_by_name('detection_boxes:0')

    # Each score represent how level of confidence for each of the objects.
    # Score is shown on the result image, together with the class label.
    scores = detection_graph.get_tensor_by_name('detection_scores:0')
    classes = detection_graph.get_tensor_by_name('detection_classes:0')
    num_detections = detection_graph.get_tensor_by_name('num_detections:0')
    
    ############################################################################
    detection_boxes = tf.squeeze(boxes, [0])
    detection_masks = tf.squeeze(detection_graph.get_tensor_by_name('detection_masks:0'), [0])
    # Reframe is required to translate mask from box coordinates to image coordinates and fit the image size.
    real_num_detection = tf.cast(num_detections[0], tf.int32)
    detection_boxes = tf.slice(detection_boxes, [0, 0], [real_num_detection, -1])
    detection_masks = tf.slice(detection_masks, [0, 0, 0], [real_num_detection, -1, -1])
    detection_masks_reframed = utils_ops.reframe_box_masks_to_image_masks(
        detection_masks, detection_boxes, image_np_expanded.shape[1], image_np_expanded.shape[2])
    detection_masks_reframed = tf.cast(tf.greater(detection_masks_reframed, 0.5), tf.uint8)
    # Follow the convention by adding back the batch dimension
    detection_masks = tf.expand_dims(detection_masks_reframed, 0)
    ############################################################################
    
    # Actual detection.
    (boxes, scores, classes, num_detections, detection_masks) = sess.run(
        [boxes, scores, classes, num_detections, detection_masks],
        feed_dict={image_tensor: image_np_expanded})

    # Visualization of the results of a detection.
    image_copy = np.copy(image_np)
    vis_util.visualize_boxes_and_labels_on_image_array(
        image_copy,
        np.squeeze(boxes),
        np.squeeze(classes).astype(np.int32),
        np.squeeze(scores),
        category_index,
        instance_masks=detection_masks[0],
        use_normalized_coordinates=True,
        line_thickness=8)
    plt.figure(figsize=(12,8))
    plt.imshow(image_copy)
    return image_copy

In [6]:
PATH_TO_LABELS = '../annotations_boxgloves_train/label_map.pbtxt'
category_index = label_map_util.create_category_index_from_labelmap(PATH_TO_LABELS, use_display_name=True)
PATH_TO_FROZEN_GRAPH = '../exported_graphs_gloves/frozen_inference_graph.pb'
detection_graph = tf.Graph()
with detection_graph.as_default():
  od_graph_def = tf.GraphDef()
  with tf.gfile.GFile(PATH_TO_FROZEN_GRAPH, 'rb') as fid:
    serialized_graph = fid.read()
    od_graph_def.ParseFromString(serialized_graph)
    tf.import_graph_def(od_graph_def, name='')

In [7]:
def image_func(image):
    # NOTE: The output you return should be a color image (3 channel) for processing video below
    # you should return the final output (image with lines are drawn on lanes)
    with detection_graph.as_default():
        with tf.Session(graph=detection_graph) as sess:
            image_processed = model_detect(image, sess, detection_graph)
            return image_processed

In [8]:
# Import everything needed to edit/save/watch video clips
from moviepy.editor import VideoFileClip
from IPython.display import HTML

In [10]:
white_output = 'Boxing_fight_readme.mp4'
clip = VideoFileClip("Boxing.mp4").subclip(4,7)
white_clip = clip.fl_image(image_func) #NOTE: this function expects color images!!s
%time white_clip.write_videofile(white_output, audio=False)

t:   0%|          | 0/75 [00:00<?, ?it/s, now=None]

Moviepy - Building video Boxing_fight_readme.mp4.
Moviepy - Writing video Boxing_fight_readme.mp4



                                                                  

Moviepy - Done !
Moviepy - video ready Boxing_fight_readme.mp4
CPU times: user 1h 15min 23s, sys: 9min 57s, total: 1h 25min 21s
Wall time: 9h 10min 35s


In [12]:
HTML("""
<video width="800" height="450" controls>
  <source src="{0}">
</video>
""".format(white_output))