In [None]:
!pip install -U --pre tensorflow=="2.*"
!pip install tf_slim

In [None]:
!pip install pycocotools

In [None]:
import os
import pathlib


if "models" in pathlib.Path.cwd().parts:
  while "models" in pathlib.Path.cwd().parts:
    os.chdir('..')
elif not pathlib.Path('models').exists():
  !git clone --depth 1 https://github.com/tensorflow/models

In [None]:
%%bash
cd models/research/
protoc object_detection/protos/*.proto --python_out=.
cp object_detection/packages/tf2/setup.py .
python -m pip install .

### Imports

In [1]:
import numpy as np
import os
import six.moves.urllib as urllib
import sys
import tarfile
import tensorflow as tf
import zipfile
import cv2

from collections import defaultdict
from io import StringIO
from matplotlib import pyplot as plt
from PIL import Image
from IPython.display import display

In [2]:
from object_detection.utils import ops as utils_ops
from object_detection.utils import label_map_util
from object_detection.utils import visualization_utils as vis_util

In [3]:
# patch tf1 into `utils.ops`
utils_ops.tf = tf.compat.v1

# Patch the location of gfile
tf.gfile = tf.io.gfile

## Loader

In [None]:
def load_model(model_name):
  base_url = 'http://download.tensorflow.org/models/object_detection/'
  model_file = model_name + '.tar.gz'
  model_dir = tf.keras.utils.get_file(
    fname=model_name, 
    origin=base_url + model_file,
    untar=True)

  model_dir = pathlib.Path(model_dir)/"saved_model"

  model = tf.saved_model.load(str(model_dir))

  return model

In [5]:
# List of the strings that is used to add correct label for each box.
PATH_TO_LABELS = '../models/research/object_detection/data/mscoco_label_map.pbtxt'
category_index = label_map_util.create_category_index_from_labelmap(PATH_TO_LABELS, use_display_name=True)

For the sake of simplicity we will test on 2 images:

In [None]:
# If you want to test the code with your images, just add path to the images to the TEST_IMAGE_PATHS.
PATH_TO_TEST_IMAGES_DIR = pathlib.Path('models/research/object_detection/test_images')
TEST_IMAGE_PATHS = sorted(list(PATH_TO_TEST_IMAGES_DIR.glob("*.jpg")))
TEST_IMAGE_PATHS

# Detection

Load an object detection model:

In [None]:
model_name = 'ssd_mobilenet_v1_coco_2017_11_17'
detection_model = load_model(model_name)

In [4]:
detection_model = tf.saved_model.load('../myModel')
category_index = {0: {'id': 0, 'name': 'empty'}, 1: {'id': 1, 'name': 'full'}}

In [5]:
def run_inference_for_single_image(model, image):
  image = np.asarray(image)
  input_tensor = tf.convert_to_tensor(image, dtype=tf.float32)
  input_tensor = input_tensor[tf.newaxis,...]

  # Run inference
#   model_fn = model.signatures['serving_default']
#   output_dict = model_fn(input_tensor)
  # when passing our loaded model
  output_dict = model.signatures[ 'detect' ](input_tensor)

  # All outputs are batches tensors.
  # Convert to numpy arrays, and take index [0] to remove the batch dimension.
  # We're only interested in the first num_detections.
  num_detections = int(output_dict.pop('num_detections'))
  output_dict = {key:value[0, :num_detections].numpy() 
                 for key,value in output_dict.items()}
  output_dict['num_detections'] = num_detections

  # detection_classes should be ints.
  output_dict['detection_classes'] = output_dict['detection_classes'].astype(np.int64)
   
  # Handle models with masks:
  if 'detection_masks' in output_dict:
    # Reframe the the bbox mask to the image size.
    detection_masks_reframed = utils_ops.reframe_box_masks_to_image_masks(
              output_dict['detection_masks'], output_dict['detection_boxes'],
               image.shape[0], image.shape[1])      
    detection_masks_reframed = tf.cast(detection_masks_reframed > 0.5,
                                       tf.uint8)
    output_dict['detection_masks_reframed'] = detection_masks_reframed.numpy()
    
  return output_dict

In [6]:
# Returns a list of the classes that are detected with above .5 certainty
def get_detected_classes(output_dict):
    detected = {}
    for i in range(0, output_dict['detection_scores'].size):
        score = output_dict['detection_scores'][i]
        classID = output_dict['detection_classes'][i]
        box = output_dict['detection_boxes'][i]
        # Calculating the center location of the bounding box
        xcenter = (box[1]+box[3])/2
        ycenter = (box[0]+box[2])/2
        center = (xcenter, ycenter)
        if score > .5:
            if category_index[classID]['name'] in detected:
                detected[category_index[classID]['name']].append(center)
            else:
                detected[category_index[classID]['name']] = [center]
            
    return detected

This is the code that activates your camera and does inference on each frame of the live video. Press 'q' to quit.

In [10]:
cap = cv2.VideoCapture(0) # or cap = cv2.VideoCapture("<video-path>")
# cap.set(3, 640)
# cap.set(4, 640) 
cv2.namedWindow("video")
cv2.startWindowThread()
def run_inference(model, cap):
    timer = 0
    frame = 0
    players = {1: Player(1, 'full', (0,0), 0), 2: Player(2, 'full', (0,0), 0)}
    while cap.isOpened():
        timer += 1
        ret, image_np = cap.read()
        # Printing detected classes every ten frames
        if timer%40 == 0:
            frame += 1
            image_np = cv2.resize(image_np, dsize=(640,640), interpolation=cv2.INTER_CUBIC)
            # Actual detection.
            output_dict = run_inference_for_single_image(model, image_np)
            track_players(get_detected_classes(output_dict), players, frame)
            # Visualization of the results of a detection.
            vis_util.visualize_boxes_and_labels_on_image_array(
                image_np,
                output_dict['detection_boxes'],
                output_dict['detection_classes'],
                output_dict['detection_scores'],
                category_index,
                instance_masks=output_dict.get('detection_masks_reframed', None),
                use_normalized_coordinates=True,
                line_thickness=8)
            display_text1 = 'Player 1: ' + str(players[1].score) + "; frames_toggled: " + str(players[1].frames_toggled) + " Cup: " + players[1].cup
            display_text2 = 'Player 2: ' + str(players[2].score) + "; frames_toggled: " + str(players[2].frames_toggled) + " Cup: " + players[2].cup
#             display_text1 = 'Sam: ' + str(players[1].score)
#             display_text2 = 'Harrison: ' + str(players[1].score)
            cv2.putText(image_np, 
                display_text1, 
                (50, 50), 
                cv2.FONT_HERSHEY_SIMPLEX, .5, 
                (0, 255, 255), 
                2, 
                cv2.LINE_4)
            cv2.putText(image_np, 
                display_text2, 
                (50, 75), 
                cv2.FONT_HERSHEY_SIMPLEX, .5, 
                (0, 255, 255), 
                2, 
                cv2.LINE_4)
            cv2.imwrite('timelapse/timelapse_frame_' + str(frame) + '.jpg', image_np)
            cv2.imshow('object_detection', cv2.resize(image_np, (640, 640)))
#             print(get_detected_classes(output_dict))
            print(players)
            
        if cv2.waitKey(25) & 0xFF == ord('q'):
            cap.release()
            
            cv2.waitKey(1)
            cv2.destroyAllWindows()
            cv2.waitKey(1)
            
            return output_dict

temp = run_inference(detection_model, cap)


{1: <__main__.Player object at 0x7fa9fdc41430>, 2: <__main__.Player object at 0x7fa9f4a210d0>}
{1: <__main__.Player object at 0x7fa9fdc41430>, 2: <__main__.Player object at 0x7fa9f4a210d0>}
{1: <__main__.Player object at 0x7fa9fdc41430>, 2: <__main__.Player object at 0x7fa9f4a210d0>}
{1: <__main__.Player object at 0x7fa9fdc41430>, 2: <__main__.Player object at 0x7fa9f4a210d0>}
{1: <__main__.Player object at 0x7fa9fdc41430>, 2: <__main__.Player object at 0x7fa9f4a210d0>}
{1: <__main__.Player object at 0x7fa9fdc41430>, 2: <__main__.Player object at 0x7fa9f4a210d0>}
{1: <__main__.Player object at 0x7fa9fdc41430>, 2: <__main__.Player object at 0x7fa9f4a210d0>}
{1: <__main__.Player object at 0x7fa9fdc41430>, 2: <__main__.Player object at 0x7fa9f4a210d0>}
{1: <__main__.Player object at 0x7fa9fdc41430>, 2: <__main__.Player object at 0x7fa9f4a210d0>}
{1: <__main__.Player object at 0x7fa9fdc41430>, 2: <__main__.Player object at 0x7fa9f4a210d0>}
{1: <__main__.Player object at 0x7fa9fdc41430>, 2:

{1: <__main__.Player object at 0x7fa9fdc41430>, 2: <__main__.Player object at 0x7fa9f4a210d0>}
{1: <__main__.Player object at 0x7fa9fdc41430>, 2: <__main__.Player object at 0x7fa9f4a210d0>}
{1: <__main__.Player object at 0x7fa9fdc41430>, 2: <__main__.Player object at 0x7fa9f4a210d0>}
{1: <__main__.Player object at 0x7fa9fdc41430>, 2: <__main__.Player object at 0x7fa9f4a210d0>}
{1: <__main__.Player object at 0x7fa9fdc41430>, 2: <__main__.Player object at 0x7fa9f4a210d0>}
{1: <__main__.Player object at 0x7fa9fdc41430>, 2: <__main__.Player object at 0x7fa9f4a210d0>}
{1: <__main__.Player object at 0x7fa9fdc41430>, 2: <__main__.Player object at 0x7fa9f4a210d0>}
{1: <__main__.Player object at 0x7fa9fdc41430>, 2: <__main__.Player object at 0x7fa9f4a210d0>}
{1: <__main__.Player object at 0x7fa9fdc41430>, 2: <__main__.Player object at 0x7fa9f4a210d0>}
{1: <__main__.Player object at 0x7fa9fdc41430>, 2: <__main__.Player object at 0x7fa9f4a210d0>}
{1: <__main__.Player object at 0x7fa9fdc41430>, 2:

{1: <__main__.Player object at 0x7fa9fdc41430>, 2: <__main__.Player object at 0x7fa9f4a210d0>}
{1: <__main__.Player object at 0x7fa9fdc41430>, 2: <__main__.Player object at 0x7fa9f4a210d0>}
{1: <__main__.Player object at 0x7fa9fdc41430>, 2: <__main__.Player object at 0x7fa9f4a210d0>}
{1: <__main__.Player object at 0x7fa9fdc41430>, 2: <__main__.Player object at 0x7fa9f4a210d0>}
{1: <__main__.Player object at 0x7fa9fdc41430>, 2: <__main__.Player object at 0x7fa9f4a210d0>}
{1: <__main__.Player object at 0x7fa9fdc41430>, 2: <__main__.Player object at 0x7fa9f4a210d0>}
{1: <__main__.Player object at 0x7fa9fdc41430>, 2: <__main__.Player object at 0x7fa9f4a210d0>}
{1: <__main__.Player object at 0x7fa9fdc41430>, 2: <__main__.Player object at 0x7fa9f4a210d0>}
{1: <__main__.Player object at 0x7fa9fdc41430>, 2: <__main__.Player object at 0x7fa9f4a210d0>}
{1: <__main__.Player object at 0x7fa9fdc41430>, 2: <__main__.Player object at 0x7fa9f4a210d0>}
{1: <__main__.Player object at 0x7fa9fdc41430>, 2:

{1: <__main__.Player object at 0x7fa9fdc41430>, 2: <__main__.Player object at 0x7fa9f4a210d0>}
{1: <__main__.Player object at 0x7fa9fdc41430>, 2: <__main__.Player object at 0x7fa9f4a210d0>}
{1: <__main__.Player object at 0x7fa9fdc41430>, 2: <__main__.Player object at 0x7fa9f4a210d0>}
{1: <__main__.Player object at 0x7fa9fdc41430>, 2: <__main__.Player object at 0x7fa9f4a210d0>}
{1: <__main__.Player object at 0x7fa9fdc41430>, 2: <__main__.Player object at 0x7fa9f4a210d0>}
{1: <__main__.Player object at 0x7fa9fdc41430>, 2: <__main__.Player object at 0x7fa9f4a210d0>}
{1: <__main__.Player object at 0x7fa9fdc41430>, 2: <__main__.Player object at 0x7fa9f4a210d0>}
{1: <__main__.Player object at 0x7fa9fdc41430>, 2: <__main__.Player object at 0x7fa9f4a210d0>}
{1: <__main__.Player object at 0x7fa9fdc41430>, 2: <__main__.Player object at 0x7fa9f4a210d0>}
{1: <__main__.Player object at 0x7fa9fdc41430>, 2: <__main__.Player object at 0x7fa9f4a210d0>}
{1: <__main__.Player object at 0x7fa9fdc41430>, 2:

{1: <__main__.Player object at 0x7fa9fdc41430>, 2: <__main__.Player object at 0x7fa9f4a210d0>}
{1: <__main__.Player object at 0x7fa9fdc41430>, 2: <__main__.Player object at 0x7fa9f4a210d0>}
{1: <__main__.Player object at 0x7fa9fdc41430>, 2: <__main__.Player object at 0x7fa9f4a210d0>}
{1: <__main__.Player object at 0x7fa9fdc41430>, 2: <__main__.Player object at 0x7fa9f4a210d0>}
{1: <__main__.Player object at 0x7fa9fdc41430>, 2: <__main__.Player object at 0x7fa9f4a210d0>}
{1: <__main__.Player object at 0x7fa9fdc41430>, 2: <__main__.Player object at 0x7fa9f4a210d0>}
{1: <__main__.Player object at 0x7fa9fdc41430>, 2: <__main__.Player object at 0x7fa9f4a210d0>}
{1: <__main__.Player object at 0x7fa9fdc41430>, 2: <__main__.Player object at 0x7fa9f4a210d0>}
{1: <__main__.Player object at 0x7fa9fdc41430>, 2: <__main__.Player object at 0x7fa9f4a210d0>}
{1: <__main__.Player object at 0x7fa9fdc41430>, 2: <__main__.Player object at 0x7fa9f4a210d0>}
{1: <__main__.Player object at 0x7fa9fdc41430>, 2:

{1: <__main__.Player object at 0x7fa9fdc41430>, 2: <__main__.Player object at 0x7fa9f4a210d0>}
{1: <__main__.Player object at 0x7fa9fdc41430>, 2: <__main__.Player object at 0x7fa9f4a210d0>}
{1: <__main__.Player object at 0x7fa9fdc41430>, 2: <__main__.Player object at 0x7fa9f4a210d0>}
{1: <__main__.Player object at 0x7fa9fdc41430>, 2: <__main__.Player object at 0x7fa9f4a210d0>}
{1: <__main__.Player object at 0x7fa9fdc41430>, 2: <__main__.Player object at 0x7fa9f4a210d0>}
{1: <__main__.Player object at 0x7fa9fdc41430>, 2: <__main__.Player object at 0x7fa9f4a210d0>}
{1: <__main__.Player object at 0x7fa9fdc41430>, 2: <__main__.Player object at 0x7fa9f4a210d0>}
{1: <__main__.Player object at 0x7fa9fdc41430>, 2: <__main__.Player object at 0x7fa9f4a210d0>}
{1: <__main__.Player object at 0x7fa9fdc41430>, 2: <__main__.Player object at 0x7fa9f4a210d0>}
{1: <__main__.Player object at 0x7fa9fdc41430>, 2: <__main__.Player object at 0x7fa9f4a210d0>}
{1: <__main__.Player object at 0x7fa9fdc41430>, 2:

{1: <__main__.Player object at 0x7fa9fdc41430>, 2: <__main__.Player object at 0x7fa9f4a210d0>}
{1: <__main__.Player object at 0x7fa9fdc41430>, 2: <__main__.Player object at 0x7fa9f4a210d0>}
{1: <__main__.Player object at 0x7fa9fdc41430>, 2: <__main__.Player object at 0x7fa9f4a210d0>}
{1: <__main__.Player object at 0x7fa9fdc41430>, 2: <__main__.Player object at 0x7fa9f4a210d0>}
{1: <__main__.Player object at 0x7fa9fdc41430>, 2: <__main__.Player object at 0x7fa9f4a210d0>}
{1: <__main__.Player object at 0x7fa9fdc41430>, 2: <__main__.Player object at 0x7fa9f4a210d0>}
{1: <__main__.Player object at 0x7fa9fdc41430>, 2: <__main__.Player object at 0x7fa9f4a210d0>}
{1: <__main__.Player object at 0x7fa9fdc41430>, 2: <__main__.Player object at 0x7fa9f4a210d0>}
{1: <__main__.Player object at 0x7fa9fdc41430>, 2: <__main__.Player object at 0x7fa9f4a210d0>}
{1: <__main__.Player object at 0x7fa9fdc41430>, 2: <__main__.Player object at 0x7fa9f4a210d0>}
{1: <__main__.Player object at 0x7fa9fdc41430>, 2:

{1: <__main__.Player object at 0x7fa9fdc41430>, 2: <__main__.Player object at 0x7fa9f4a210d0>}
{1: <__main__.Player object at 0x7fa9fdc41430>, 2: <__main__.Player object at 0x7fa9f4a210d0>}
{1: <__main__.Player object at 0x7fa9fdc41430>, 2: <__main__.Player object at 0x7fa9f4a210d0>}
{1: <__main__.Player object at 0x7fa9fdc41430>, 2: <__main__.Player object at 0x7fa9f4a210d0>}
{1: <__main__.Player object at 0x7fa9fdc41430>, 2: <__main__.Player object at 0x7fa9f4a210d0>}
{1: <__main__.Player object at 0x7fa9fdc41430>, 2: <__main__.Player object at 0x7fa9f4a210d0>}
{1: <__main__.Player object at 0x7fa9fdc41430>, 2: <__main__.Player object at 0x7fa9f4a210d0>}
{1: <__main__.Player object at 0x7fa9fdc41430>, 2: <__main__.Player object at 0x7fa9f4a210d0>}
{1: <__main__.Player object at 0x7fa9fdc41430>, 2: <__main__.Player object at 0x7fa9f4a210d0>}
{1: <__main__.Player object at 0x7fa9fdc41430>, 2: <__main__.Player object at 0x7fa9f4a210d0>}
{1: <__main__.Player object at 0x7fa9fdc41430>, 2:

{1: <__main__.Player object at 0x7fa9fdc41430>, 2: <__main__.Player object at 0x7fa9f4a210d0>}
{1: <__main__.Player object at 0x7fa9fdc41430>, 2: <__main__.Player object at 0x7fa9f4a210d0>}
{1: <__main__.Player object at 0x7fa9fdc41430>, 2: <__main__.Player object at 0x7fa9f4a210d0>}
{1: <__main__.Player object at 0x7fa9fdc41430>, 2: <__main__.Player object at 0x7fa9f4a210d0>}
{1: <__main__.Player object at 0x7fa9fdc41430>, 2: <__main__.Player object at 0x7fa9f4a210d0>}
{1: <__main__.Player object at 0x7fa9fdc41430>, 2: <__main__.Player object at 0x7fa9f4a210d0>}
{1: <__main__.Player object at 0x7fa9fdc41430>, 2: <__main__.Player object at 0x7fa9f4a210d0>}
{1: <__main__.Player object at 0x7fa9fdc41430>, 2: <__main__.Player object at 0x7fa9f4a210d0>}
{1: <__main__.Player object at 0x7fa9fdc41430>, 2: <__main__.Player object at 0x7fa9f4a210d0>}
{1: <__main__.Player object at 0x7fa9fdc41430>, 2: <__main__.Player object at 0x7fa9f4a210d0>}
{1: <__main__.Player object at 0x7fa9fdc41430>, 2:

{1: <__main__.Player object at 0x7fa9fdc41430>, 2: <__main__.Player object at 0x7fa9f4a210d0>}
{1: <__main__.Player object at 0x7fa9fdc41430>, 2: <__main__.Player object at 0x7fa9f4a210d0>}
{1: <__main__.Player object at 0x7fa9fdc41430>, 2: <__main__.Player object at 0x7fa9f4a210d0>}
{1: <__main__.Player object at 0x7fa9fdc41430>, 2: <__main__.Player object at 0x7fa9f4a210d0>}
{1: <__main__.Player object at 0x7fa9fdc41430>, 2: <__main__.Player object at 0x7fa9f4a210d0>}
{1: <__main__.Player object at 0x7fa9fdc41430>, 2: <__main__.Player object at 0x7fa9f4a210d0>}
{1: <__main__.Player object at 0x7fa9fdc41430>, 2: <__main__.Player object at 0x7fa9f4a210d0>}
{1: <__main__.Player object at 0x7fa9fdc41430>, 2: <__main__.Player object at 0x7fa9f4a210d0>}
{1: <__main__.Player object at 0x7fa9fdc41430>, 2: <__main__.Player object at 0x7fa9f4a210d0>}
{1: <__main__.Player object at 0x7fa9fdc41430>, 2: <__main__.Player object at 0x7fa9f4a210d0>}
{1: <__main__.Player object at 0x7fa9fdc41430>, 2:

{1: <__main__.Player object at 0x7fa9fdc41430>, 2: <__main__.Player object at 0x7fa9f4a210d0>}
{1: <__main__.Player object at 0x7fa9fdc41430>, 2: <__main__.Player object at 0x7fa9f4a210d0>}
{1: <__main__.Player object at 0x7fa9fdc41430>, 2: <__main__.Player object at 0x7fa9f4a210d0>}
{1: <__main__.Player object at 0x7fa9fdc41430>, 2: <__main__.Player object at 0x7fa9f4a210d0>}
{1: <__main__.Player object at 0x7fa9fdc41430>, 2: <__main__.Player object at 0x7fa9f4a210d0>}
{1: <__main__.Player object at 0x7fa9fdc41430>, 2: <__main__.Player object at 0x7fa9f4a210d0>}
{1: <__main__.Player object at 0x7fa9fdc41430>, 2: <__main__.Player object at 0x7fa9f4a210d0>}
{1: <__main__.Player object at 0x7fa9fdc41430>, 2: <__main__.Player object at 0x7fa9f4a210d0>}
{1: <__main__.Player object at 0x7fa9fdc41430>, 2: <__main__.Player object at 0x7fa9f4a210d0>}
{1: <__main__.Player object at 0x7fa9fdc41430>, 2: <__main__.Player object at 0x7fa9f4a210d0>}
{1: <__main__.Player object at 0x7fa9fdc41430>, 2:

{1: <__main__.Player object at 0x7fa9fdc41430>, 2: <__main__.Player object at 0x7fa9f4a210d0>}
{1: <__main__.Player object at 0x7fa9fdc41430>, 2: <__main__.Player object at 0x7fa9f4a210d0>}
{1: <__main__.Player object at 0x7fa9fdc41430>, 2: <__main__.Player object at 0x7fa9f4a210d0>}
{1: <__main__.Player object at 0x7fa9fdc41430>, 2: <__main__.Player object at 0x7fa9f4a210d0>}
{1: <__main__.Player object at 0x7fa9fdc41430>, 2: <__main__.Player object at 0x7fa9f4a210d0>}
{1: <__main__.Player object at 0x7fa9fdc41430>, 2: <__main__.Player object at 0x7fa9f4a210d0>}
{1: <__main__.Player object at 0x7fa9fdc41430>, 2: <__main__.Player object at 0x7fa9f4a210d0>}
{1: <__main__.Player object at 0x7fa9fdc41430>, 2: <__main__.Player object at 0x7fa9f4a210d0>}
{1: <__main__.Player object at 0x7fa9fdc41430>, 2: <__main__.Player object at 0x7fa9f4a210d0>}
{1: <__main__.Player object at 0x7fa9fdc41430>, 2: <__main__.Player object at 0x7fa9f4a210d0>}
{1: <__main__.Player object at 0x7fa9fdc41430>, 2:

{1: <__main__.Player object at 0x7fa9fdc41430>, 2: <__main__.Player object at 0x7fa9f4a210d0>}
{1: <__main__.Player object at 0x7fa9fdc41430>, 2: <__main__.Player object at 0x7fa9f4a210d0>}
{1: <__main__.Player object at 0x7fa9fdc41430>, 2: <__main__.Player object at 0x7fa9f4a210d0>}
{1: <__main__.Player object at 0x7fa9fdc41430>, 2: <__main__.Player object at 0x7fa9f4a210d0>}
{1: <__main__.Player object at 0x7fa9fdc41430>, 2: <__main__.Player object at 0x7fa9f4a210d0>}
{1: <__main__.Player object at 0x7fa9fdc41430>, 2: <__main__.Player object at 0x7fa9f4a210d0>}
{1: <__main__.Player object at 0x7fa9fdc41430>, 2: <__main__.Player object at 0x7fa9f4a210d0>}
{1: <__main__.Player object at 0x7fa9fdc41430>, 2: <__main__.Player object at 0x7fa9f4a210d0>}
{1: <__main__.Player object at 0x7fa9fdc41430>, 2: <__main__.Player object at 0x7fa9f4a210d0>}
{1: <__main__.Player object at 0x7fa9fdc41430>, 2: <__main__.Player object at 0x7fa9f4a210d0>}
{1: <__main__.Player object at 0x7fa9fdc41430>, 2:

{1: <__main__.Player object at 0x7fa9fdc41430>, 2: <__main__.Player object at 0x7fa9f4a210d0>}
{1: <__main__.Player object at 0x7fa9fdc41430>, 2: <__main__.Player object at 0x7fa9f4a210d0>}
{1: <__main__.Player object at 0x7fa9fdc41430>, 2: <__main__.Player object at 0x7fa9f4a210d0>}
{1: <__main__.Player object at 0x7fa9fdc41430>, 2: <__main__.Player object at 0x7fa9f4a210d0>}
{1: <__main__.Player object at 0x7fa9fdc41430>, 2: <__main__.Player object at 0x7fa9f4a210d0>}
{1: <__main__.Player object at 0x7fa9fdc41430>, 2: <__main__.Player object at 0x7fa9f4a210d0>}
{1: <__main__.Player object at 0x7fa9fdc41430>, 2: <__main__.Player object at 0x7fa9f4a210d0>}
{1: <__main__.Player object at 0x7fa9fdc41430>, 2: <__main__.Player object at 0x7fa9f4a210d0>}
{1: <__main__.Player object at 0x7fa9fdc41430>, 2: <__main__.Player object at 0x7fa9f4a210d0>}
{1: <__main__.Player object at 0x7fa9fdc41430>, 2: <__main__.Player object at 0x7fa9f4a210d0>}
{1: <__main__.Player object at 0x7fa9fdc41430>, 2:

{1: <__main__.Player object at 0x7fa9fdc41430>, 2: <__main__.Player object at 0x7fa9f4a210d0>}
{1: <__main__.Player object at 0x7fa9fdc41430>, 2: <__main__.Player object at 0x7fa9f4a210d0>}
{1: <__main__.Player object at 0x7fa9fdc41430>, 2: <__main__.Player object at 0x7fa9f4a210d0>}
{1: <__main__.Player object at 0x7fa9fdc41430>, 2: <__main__.Player object at 0x7fa9f4a210d0>}
{1: <__main__.Player object at 0x7fa9fdc41430>, 2: <__main__.Player object at 0x7fa9f4a210d0>}
{1: <__main__.Player object at 0x7fa9fdc41430>, 2: <__main__.Player object at 0x7fa9f4a210d0>}
{1: <__main__.Player object at 0x7fa9fdc41430>, 2: <__main__.Player object at 0x7fa9f4a210d0>}
{1: <__main__.Player object at 0x7fa9fdc41430>, 2: <__main__.Player object at 0x7fa9f4a210d0>}
{1: <__main__.Player object at 0x7fa9fdc41430>, 2: <__main__.Player object at 0x7fa9f4a210d0>}
{1: <__main__.Player object at 0x7fa9fdc41430>, 2: <__main__.Player object at 0x7fa9f4a210d0>}
{1: <__main__.Player object at 0x7fa9fdc41430>, 2:

{1: <__main__.Player object at 0x7fa9fdc41430>, 2: <__main__.Player object at 0x7fa9f4a210d0>}
{1: <__main__.Player object at 0x7fa9fdc41430>, 2: <__main__.Player object at 0x7fa9f4a210d0>}
{1: <__main__.Player object at 0x7fa9fdc41430>, 2: <__main__.Player object at 0x7fa9f4a210d0>}
{1: <__main__.Player object at 0x7fa9fdc41430>, 2: <__main__.Player object at 0x7fa9f4a210d0>}
{1: <__main__.Player object at 0x7fa9fdc41430>, 2: <__main__.Player object at 0x7fa9f4a210d0>}
{1: <__main__.Player object at 0x7fa9fdc41430>, 2: <__main__.Player object at 0x7fa9f4a210d0>}
{1: <__main__.Player object at 0x7fa9fdc41430>, 2: <__main__.Player object at 0x7fa9f4a210d0>}
{1: <__main__.Player object at 0x7fa9fdc41430>, 2: <__main__.Player object at 0x7fa9f4a210d0>}
{1: <__main__.Player object at 0x7fa9fdc41430>, 2: <__main__.Player object at 0x7fa9f4a210d0>}
{1: <__main__.Player object at 0x7fa9fdc41430>, 2: <__main__.Player object at 0x7fa9f4a210d0>}
{1: <__main__.Player object at 0x7fa9fdc41430>, 2:

{1: <__main__.Player object at 0x7fa9fdc41430>, 2: <__main__.Player object at 0x7fa9f4a210d0>}
{1: <__main__.Player object at 0x7fa9fdc41430>, 2: <__main__.Player object at 0x7fa9f4a210d0>}
{1: <__main__.Player object at 0x7fa9fdc41430>, 2: <__main__.Player object at 0x7fa9f4a210d0>}
{1: <__main__.Player object at 0x7fa9fdc41430>, 2: <__main__.Player object at 0x7fa9f4a210d0>}
{1: <__main__.Player object at 0x7fa9fdc41430>, 2: <__main__.Player object at 0x7fa9f4a210d0>}
{1: <__main__.Player object at 0x7fa9fdc41430>, 2: <__main__.Player object at 0x7fa9f4a210d0>}
{1: <__main__.Player object at 0x7fa9fdc41430>, 2: <__main__.Player object at 0x7fa9f4a210d0>}
{1: <__main__.Player object at 0x7fa9fdc41430>, 2: <__main__.Player object at 0x7fa9f4a210d0>}
{1: <__main__.Player object at 0x7fa9fdc41430>, 2: <__main__.Player object at 0x7fa9f4a210d0>}
{1: <__main__.Player object at 0x7fa9fdc41430>, 2: <__main__.Player object at 0x7fa9f4a210d0>}
{1: <__main__.Player object at 0x7fa9fdc41430>, 2:

For player tracking we tried to use the location of each bounding box to try to track the players, but the model isn't accurate enough to have more than two players with any accuracy so we moved to a hardcoded 2 players. Then, because we had to reduce the framerate so much, we can't assume that movement of the cups will be small, instead, we assume that each player will keep their cup on their half of the table. We also looked at using the halfway point between the two cups at each fram as the bounding line, but when multiple boxes are predicted for a cup or an erroneous box is predicted, determining which is the ground truth cup is impossible.

In [7]:
FRAMES_UNTIL_TOGGLE = 3

class Player():
    _id = 0
    cup = 'FULL'
    score = 0
    frames_toggled = 0
    last_update = 0
    location = (0,0)
    
    def __init__(self, id_number, cup, loc, timestamp):
        self._id = id_number
        self.cup = cup
        self.location = loc
        self.last_update = timestamp
        
    def __str__(self):
        return "Player " + str(self._id) + "\n" + "score: " + str(self.score) + "\n" + "cup: " + self.cup + "\n" + "location: " + str(self.location) + "\n" + "last_update: " + str(self.last_update) + "\n"
        
        
def track_players(detections_dict, players, timestamp):
    player_boxes = allocate_boxes_to_players(detections_dict)
    # If no players then create from current output
    if len(players) == 0:
        for fill_status in detections_dict:
            for cup in detections_dict[fill_status]:
                player_id = len(players) + 1
                players[player_id] = Player(player_id, fill_status, cup, timestamp)
    else:
        for player_id in players:
            player = players[player_id]
            if 'full' in player_boxes[player_id]:
                # Assuming cup is full if detected as full
                if player.cup != 'full':
                    player.frames_toggled += 1
                    if player.frames_toggled == FRAMES_UNTIL_TOGGLE:
                        player.cup = 'full'
                        player.last_update = timestamp
                        player.frames_toggled = 0
                else:
                    # Set this back to 0 because we confirmed the current state of the glass
                    player.frames_toggled = 0
            elif 'empty' in player_boxes[player_id]:
                if player.cup != 'empty':
                    player.frames_toggled += 1
                    if player.frames_toggled == FRAMES_UNTIL_TOGGLE:
                        player.cup = 'empty'
                        player.last_update = timestamp
                        player.frames_toggled = 0
                        player.score += 1
                else:
                    # Set this back to 0 because we confirmed the current state of the glass
                    player.frames_toggled = 0
                        
def allocate_boxes_to_players(detections_dict):
    player_boxes = {1: [], 2: []}
    for fill_status in detections_dict:
        for cup in detections_dict[fill_status]:
            if cup[0] < .5:
                player_boxes[1].append(fill_status)
            else:
                player_boxes[2].append(fill_status)
    return player_boxes

## Collecting results on a dataset

In [8]:
# Utility function
from six import BytesIO
def load_image_into_numpy_array(path):
  """Load an image from file into a numpy array.

  Puts image into numpy array to feed into tensorflow graph.
  Note that by convention we put it into a numpy array with shape
  (height, width, channels), where channels=3 for RGB.

  Args:
    path: a file path.

  Returns:
    uint8 numpy array with shape (img_height, img_width, 3)
  """
  img_data = tf.io.gfile.GFile(path, 'rb').read()
  image = Image.open(BytesIO(img_data))
  (im_width, im_height) = image.size
  return np.array(image.getdata()).reshape(
      (im_height, im_width, 3)).astype(np.uint8)

In [29]:
# Loading images for inference
TEST_IMAGES_PATH = 'test_images'
arr = ['full', 'empty']
test_images_np = {}
for status in arr:
    test_images_np[status] = []
    dir_path = os.path.join(TEST_IMAGES_PATH, status)
    for name in os.listdir(dir_path):
        filepath = os.path.join(dir_path, name)
        if os.path.isfile(filepath) and filepath.endswith('.jpg'):
            test_images_np[status].append(load_image_into_numpy_array(filepath))

In [39]:
# Formatting images for inference
formatted_images_np = {}
for status in arr:
    formatted_images_np[status] = []
    for image_np in test_images_np[status]:
        image_np = cv2.resize(image_np, dsize=(640,640), interpolation=cv2.INTER_CUBIC)
        formatted_images_np[status].append(image_np)


In [41]:
# Running inference on test images
right_class = {}
wrong_class = {}
true_positive = {}
false_positive = {}
true_negative = {}
false_negative = {}
total = {}
for status in arr:
    right_class[status] = 0
    wrong_class[status] = 0
    true_positive[status] = 0
    false_positive[status] = 0
    true_negative[status] = 0
    false_negative[status] = 0
    total[status] = 0
    for image_np in formatted_images_np[status]:
        total[status] += 1
        output_dict = run_inference_for_single_image(detection_model, image_np)
        detections = get_detected_classes(output_dict)
        if status in detections:
            right_class[status] += 1
            if len(detections.keys()) > 1:
                wrong_class[status] += 1
        elif len(detections.keys()) > 0:
            wrong_class[status] += 1
    predicting_wrong_class = 1 - (wrong_class[status]/total[status])
    recall = right_class[status]/total[status]
    f1 = (precision + recall)/2
    print(status + '\nPrecision: ' + str(precision) + '\nRecall: ' + str(recall) + '\nF1: ' + str(f1) + '\n\n')

full
Precision: 0.45833333333333337
Recall: 0.5
F1: 0.4791666666666667


empty
Precision: 0.9333333333333333
Recall: 0.6
F1: 0.7666666666666666




## Gif inference

In [None]:
cap = cv2.VideoCapture('../cups.mov')
count = 1
while(cap.isOpened() and count < 150):
    ret, image_np = cap.read()
    image_np = cv2.resize(image_np, dsize=(640,640), interpolation=cv2.INTER_CUBIC)
    # Actual detection.
    output_dict = run_inference_for_single_image(detection_model, image_np)
    # Visualization of the results of a detection.
    vis_util.visualize_boxes_and_labels_on_image_array(
        image_np,
        output_dict['detection_boxes'],
        output_dict['detection_classes'],
        output_dict['detection_scores'],
        category_index,
        instance_masks=output_dict.get('detection_masks_reframed', None),
        use_normalized_coordinates=True,
        line_thickness=8)
    cv2.imwrite('gif/gif_frame_' + str(count) + '.jpg', image_np)
    count += 1
    
    
cap.release()
cv2.destroyAllWindows()
print('Done')

In [None]:
import imageio
import glob
from IPython.display import Image as IPyImage
imageio.plugins.freeimage.download()

anim_file = 'test.gif'

filenames = glob.glob('gif/gif_frame_*.jpg')
filenames = sorted(filenames)
last = -1
for filename in filenames:
  image = imageio.imread(filename)
  images.append(image)

imageio.mimsave(anim_file, images, 'GIF-FI', fps=30)

display(IPyImage(open(anim_file, 'rb').read()))