In [None]:
import matplotlib
import matplotlib.pyplot as plt

import os
import random
import io
import imageio
import glob
import scipy.misc
import numpy as np
from bs4 import BeautifulSoup
from six import BytesIO
from PIL import Image, ImageDraw, ImageFont
from IPython.display import display, Javascript
from IPython.display import Image as IPyImage

import tensorflow as tf

from object_detection.utils import label_map_util
from object_detection.utils import config_util
from object_detection.utils import visualization_utils as viz_utils
from object_detection.builders import model_builder

from create_and_restore_model import create_and_restore_model
%matplotlib inline

num_classes = 1
pipeline_config = '/home/evan/Desktop/Tensorflow/models/research/object_detection/configs/tf2/ssd_resnet50_v1_fpn_640x640_coco17_tpu-8.config'
checkpoint_path = './fine_tuned_ckpts/ckpt--1' #set to latest ckpt, remove -
(orig_width ,orig_height) = (960,544)
(new_width, new_height) = (640,640)
hand_class_id = 1
num_classes = 1
CATEGORY_INDEX = {hand_class_id: {'id': hand_class_id, 'name': 'hand'}}

# Create model and restore weights from ckpt for all but last layer

In this cell we build a single stage detection architecture (RetinaNet) and restore all but the classification layer at the top (which will be automatically randomly initialized).

In [None]:
detection_model = \
create_and_restore_model(pipeline_config = '/home/evan/Desktop/Tensorflow/models/research/object_detection/configs/tf2/ssd_resnet50_v1_fpn_640x640_coco17_tpu-8.config', 
                        checkpoint_path = './fine_tuned_ckpts/ckpt--1', 
                        new_height = 640, 
                        new_width = 640, 
                        hand_class_id = 1, 
                        num_classes = 1,
                        restore_classification_head = True,)

# Run inference!

In [None]:
import cv2

kalman = cv2.KalmanFilter(4,2)
kalman.measurementMatrix = np.array(
    [[1, 0, 0, 0],
     [0, 1, 0, 0]], np.float32)
kalman.transitionMatrix = np.array(
    [[1, 0, 1, 0],
     [0, 1, 0, 1],
     [0, 0, 1, 0],
     [0, 0, 0, 1]], np.float32)
kalman.processNoiseCov = np.array( 
    [[1, 0, 0, 0],
     [0, 1, 0, 0],
     [0, 0, 1, 0],
     [0, 0, 0, 1]] , np.float32) * 0.03

In [None]:
@tf.function
def detect(input_tensor):
  """Run detection on an input image.

  Args:
    input_tensor: A [1, height, width, 3] Tensor of type tf.float32.
      Note that height and width can be anything since the image will be
      immediately resized according to the needs of the model within this
      function.

  Returns:
    A dict containing 3 Tensors (`detection_boxes`, `detection_classes`,
      and `detection_scores`).
  """
  preprocessed_image, shapes = detection_model.preprocess(input_tensor)
  prediction_dict = detection_model.predict(preprocessed_image, shapes)
  return detection_model.postprocess(prediction_dict, shapes)

def plot_detections(image_np,
                    boxes,
                    classes,
                    scores,
                    CATEGORY_INDEX,
                    figsize=(12, 16),
                    image_name=None):
  """Wrapper function to visualize detections.

  Args:
    image_np: uint8 numpy array with shape (img_height, img_width, 3)
    boxes: a numpy array of shape [N, 4]
    classes: a numpy array of shape [N]. Note that class indices are 1-based,
      and match the keys in the label map.
    scores: a numpy array of shape [N] or None.  If scores=None, then
      this function assumes that the boxes to be plotted are groundtruth
      boxes and plot all boxes as black with no classes or scores.
    CATEGORY_INDEX: a dict containing category dictionaries (each holding
      category index `id` and category name `name`) keyed by category indices.
    figsize: size for the figure.
    image_name: a name for the image file.
  """
  viz_utils.visualize_boxes_and_labels_on_image_array(
      image_np,
      boxes,
      classes,
      scores,
      CATEGORY_INDEX,
      use_normalized_coordinates=True,
      min_score_thresh=0.0)
  return image_np


In [None]:
vid = cv2.VideoCapture(0) #use webcam
last_measurement = None
last_box = None
last_score = None
last_class = None
last_prediction = None
first_good_detection = False
label_id_offset = 1
kalman_counter = 1

while(True):
    #take frames until escape button
    ret, frame = vid.read()
    if not ret:
      print('unable to read frame')
      continue
    k = cv2.waitKey(1)
    if k == 27: #escape
        break
    
    if kalman_counter == 1:
      print('ran model')
      input_tensor = tf.convert_to_tensor([frame], dtype=tf.float32)
      detections = detect(input_tensor)
      box = detections['detection_boxes'][0][0].numpy()#[ymin, xmin, ymax, xmax]
      class_ = detections['detection_classes'][0][0].numpy()
      if class_ is None:
        class_ = np.array([0])
      score = detections['detection_scores'][0][0].numpy()
      print(score)
      #bbox center coords, height, width
      x = box[3]-box[1]
      y = box[2]-box[0]
      h = box[2]-box[0]
      w = box[3]-box[1]
      if first_good_detection:
        kalman_counter = 0

    #wait for at least .9 confidence detection, then use Kalman
    if first_good_detection is False and score > .9:
      print('first good det')
      first_good_detection = True

    if first_good_detection is True:
      if last_measurement is None:
        kalman.statePre = np.array([[x], [y], [0], [0]], np.float32)
        kalman.statePost = np.array([[x], [y], [0], [0]], np.float32)
      else: 
        print('using Kalman')
        kalman.correct(np.array([[x], [y]], np.float32))
        prediction = kalman.predict().reshape(4)
        box = prediction
        class_ = last_class
        kalman_counter += 1
      
    #TODO: look @ below line
    last_class = class_
    last_measurement = np.array([[x], [y]], np.float32)

    # print(box)
    frame = plot_detections(
        frame,
        box[np.newaxis, :],
        np.array([class_.astype(np.uint32) + label_id_offset]),
        np.array([score]),
        CATEGORY_INDEX, figsize=(15, 20), image_name='frame with detections')
    cv2.imshow('detections on frame', frame)


vid.release()
cv2.destroyAllWindows()