In [1]:
import os
import cv2
import tensorflow as tf
import numpy as np
import json

In [2]:
from utilities import *

In [3]:
MODEL_NAME = 'ssd_mobilenet_v1_coco_2017_11_17'
#LABEL_MAP_NAME = 'mscoco_label_map.pbtxt'
LABEL_MAP_NAME = 'mscoco_label_map.json'

CWD_PATH = os.getcwd()
MODELS_PATH = os.path.abspath(os.path.join(CWD_PATH, 'models'))
LABELS_PATH = os.path.abspath(os.path.join(CWD_PATH, 'labels'))
DATA_PATH = os.path.abspath(os.path.join(CWD_PATH, 'data'))

VIDEO_FILE = 'cows.mp4'
VIDEO_PATH = os.path.join(DATA_PATH, VIDEO_FILE)

MODEL_PATH = os.path.join(MODELS_PATH, MODEL_NAME, 'frozen_inference_graph.pb')
LABEL_MAP_PATH = os.path.join(LABELS_PATH, LABEL_MAP_NAME)

THRESHOLD = 0

In [4]:
with open(LABEL_MAP_PATH) as json_file:
    labels = json.load(json_file)

In [5]:
labels

{'1': {'id': 1, 'name': 'person'},
 '10': {'id': 10, 'name': 'traffic light'},
 '11': {'id': 11, 'name': 'fire hydrant'},
 '13': {'id': 13, 'name': 'stop sign'},
 '14': {'id': 14, 'name': 'parking meter'},
 '15': {'id': 15, 'name': 'bench'},
 '16': {'id': 16, 'name': 'bird'},
 '17': {'id': 17, 'name': 'cat'},
 '18': {'id': 18, 'name': 'dog'},
 '19': {'id': 19, 'name': 'horse'},
 '2': {'id': 2, 'name': 'bicycle'},
 '20': {'id': 20, 'name': 'sheep'},
 '21': {'id': 21, 'name': 'cow'},
 '22': {'id': 22, 'name': 'elephant'},
 '23': {'id': 23, 'name': 'bear'},
 '24': {'id': 24, 'name': 'zebra'},
 '25': {'id': 25, 'name': 'giraffe'},
 '27': {'id': 27, 'name': 'backpack'},
 '28': {'id': 28, 'name': 'umbrella'},
 '3': {'id': 3, 'name': 'car'},
 '31': {'id': 31, 'name': 'handbag'},
 '32': {'id': 32, 'name': 'tie'},
 '33': {'id': 33, 'name': 'suitcase'},
 '34': {'id': 34, 'name': 'frisbee'},
 '35': {'id': 35, 'name': 'skis'},
 '36': {'id': 36, 'name': 'snowboard'},
 '37': {'id': 37, 'name': 'spor

In [6]:
detection_graph = tf.Graph()
with detection_graph.as_default():
    od_graph_def = tf.GraphDef()
    with tf.gfile.GFile(MODEL_PATH, 'rb') as fid:
        serialized_graph = fid.read()
        od_graph_def.ParseFromString(serialized_graph)
        tf.import_graph_def(od_graph_def, name='')

In [10]:
def filter_boxes(min_score, boxes, scores, classes, categories=21):
    """Return boxes with a confidence >= `min_score`"""
    n = len(classes)
    idxs = []
    for i in range(n):
        if classes[i] == categories and scores[i]>=min_score:
            idxs.append(i)
    
    filtered_boxes = boxes[idxs, ...]
    filtered_scores = scores[idxs, ...]
    filtered_classes = classes[idxs, ...]
    return filtered_boxes, filtered_scores, filtered_classes

In [20]:
 def detect_objects(image_np, sess, detection_graph, threshold=0.95):
        # Expand dimensions since the model expects images to have shape: [1, None, None, 3]
        image_np_expanded = np.expand_dims(image_np, axis=0)
        image_tensor = detection_graph.get_tensor_by_name('image_tensor:0')

        # Each box represents a part of the image where a particular object was detected.
        boxes = detection_graph.get_tensor_by_name('detection_boxes:0')

        # Each score represent level of confidence for each of the objects.
        scores = detection_graph.get_tensor_by_name('detection_scores:0')
        classes = detection_graph.get_tensor_by_name('detection_classes:0')
        num_detections = detection_graph.get_tensor_by_name('num_detections:0')

        # Actual detection.
        (boxes, scores, classes, num_detections) = sess.run(
            [boxes, scores, classes, num_detections],
            feed_dict={image_tensor: image_np_expanded})
        
        print(classes)
        
        print(classes[0])
        print(scores[0])
        print(boxes[0])
        boxes, scores, classes, = filter_boxes(0.2, boxes[0], scores[0], classes[0], 21)

        
        return (boxes, scores, classes, num_detections)
        # filter for target classes within threshold
#         boxes = np.squeeze(boxes)
#         classes = np.squeeze(classes)
#         scores = np.squeeze(scores)
#         ix_scores = scores >= threshold
#         ix = ix_classes * ix_scores
#         
#         return tf_data

In [21]:
# WEBCAM
#video_stream = VideoStream(src=0).start() 
# VIDEO File
video_stream = VideoStream(src=VIDEO_PATH).start()

while video_stream.grabbed:
    frame, counter = video_stream.read()
    
    # do some zoo model detection on the frame
    with detection_graph.as_default():
        with tf.Session(graph=detection_graph) as sess:
            data = detect_objects(frame, sess, detection_graph)
    
    height = frame.shape[0]
    width = frame.shape[1]
    
    for item in data[0]:
        cv2.rectangle(
            frame,
            (int(width * item[1]), int(height * item[0])), 
            (int(width * item[3]), int(height * item[2])), 
            (0, 255, 0),
            1
        )
        
    
    cv2.imshow('frame', frame)
    
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break
video_stream.stop()
cv2.destroyAllWindows()
cv2.waitKey(1000) # to autoclose window after a few seconds

[[ 1. 21. 19. 19. 20.  1. 20. 19. 21. 20. 21. 19. 19. 20.  1. 20. 19. 21.
  19. 20. 21. 20. 19. 19. 19.  1. 19. 20. 21. 20.  1.  1. 20. 18.  1.  1.
   1. 20.  1.  1. 21.  1. 21. 39. 19.  1. 21.  1. 21.  1. 19. 21. 39.  1.
  19.  1.  1. 20.  1.  1. 19. 19. 21.  1. 19.  1. 19.  1.  1. 20.  1. 19.
  21.  1.  1.  1.  1.  1. 19. 20.  1.  1. 18.  1.  1.  1. 19.  1.  1. 21.
  18.  1.  1. 21.  1.  1. 20.  1.  1.  1.]]
[ 1. 21. 19. 19. 20.  1. 20. 19. 21. 20. 21. 19. 19. 20.  1. 20. 19. 21.
 19. 20. 21. 20. 19. 19. 19.  1. 19. 20. 21. 20.  1.  1. 20. 18.  1.  1.
  1. 20.  1.  1. 21.  1. 21. 39. 19.  1. 21.  1. 21.  1. 19. 21. 39.  1.
 19.  1.  1. 20.  1.  1. 19. 19. 21.  1. 19.  1. 19.  1.  1. 20.  1. 19.
 21.  1.  1.  1.  1.  1. 19. 20.  1.  1. 18.  1.  1.  1. 19.  1.  1. 21.
 18.  1.  1. 21.  1.  1. 20.  1.  1.  1.]
[0.78468394 0.62755203 0.5415804  0.36666048 0.30852136 0.2615746
 0.26101127 0.24816875 0.16828595 0.1347526  0.13332808 0.10354618
 0.09106436 0.08888687 0.08742253 0.08683081 0

TypeError: only size-1 arrays can be converted to Python scalars