In [3]:
import cv2 as cv
import numpy as np
from motrackers.simple_tracker2 import Tracker
from motrackers.utils import select_videofile, select_tfmobilenet

In [4]:
video_file_path = select_videofile("..")
pbtxt_file_path, tfweights_path = select_tfmobilenet("..")
display(video_file_path, pbtxt_file_path, tfweights_path)

FileChooser(path='..', filename='', show_hidden='False')

FileChooser(path='..', filename='', show_hidden='False')

FileChooser(path='..', filename='', show_hidden='False')

#### Loading Object Detector Model

##### Tensorflow model for Object Detection and Tracking

Here, the SSD Object Detection Model is used.

For more details about single shot detection (SSD), refer the following:
 - **Liu, W., Anguelov, D., Erhan, D., Szegedy, C., Reed, S., Fu, C. Y., & Berg, A. C. (2016, October). Ssd: Single shot multibox detector. In European conference on computer vision (pp. 21-37). Springer, Cham.**
 - Research paper link: https://arxiv.org/abs/1512.02325
 - The pretrained model: https://github.com/opencv/opencv/wiki/TensorFlow-Object-Detection-API#use-existing-config-file-for-your-model

In [5]:
model_info = dict(
    config_path=pbtxt_file_path.selected,
    model_weights_path=tfweights_path.selected,
    object_names={0: 'background',
                  1: 'person', 2: 'bicycle', 3: 'car', 4: 'motorcycle', 5: 'airplane', 6: 'bus',
                  7: 'train', 8: 'truck', 9: 'boat', 10: 'traffic light', 11: 'fire hydrant',
                  13: 'stop sign', 14: 'parking meter', 15: 'bench', 16: 'bird', 17: 'cat',
                  18: 'dog', 19: 'horse', 20: 'sheep', 21: 'cow', 22: 'elephant', 23: 'bear',
                  24: 'zebra', 25: 'giraffe', 27: 'backpack', 28: 'umbrella', 31: 'handbag',
                  32: 'tie', 33: 'suitcase', 34: 'frisbee', 35: 'skis', 36: 'snowboard',
                  37: 'sports ball', 38: 'kite', 39: 'baseball bat', 40: 'baseball glove',
                  41: 'skateboard', 42: 'surfboard', 43: 'tennis racket', 44: 'bottle',
                  46: 'wine glass', 47: 'cup', 48: 'fork', 49: 'knife', 50: 'spoon',
                  51: 'bowl', 52: 'banana', 53: 'apple', 54: 'sandwich', 55: 'orange',
                  56: 'broccoli', 57: 'carrot', 58: 'hot dog', 59: 'pizza', 60: 'donut',
                  61: 'cake', 62: 'chair', 63: 'couch', 64: 'potted plant', 65: 'bed',
                  67: 'dining table', 70: 'toilet', 72: 'tv', 73: 'laptop', 74: 'mouse',
                  75: 'remote', 76: 'keyboard', 77: 'cell phone', 78: 'microwave', 79: 'oven',
                  80: 'toaster', 81: 'sink', 82: 'refrigerator', 84: 'book', 85: 'clock',
                  86: 'vase', 87: 'scissors', 88: 'teddy bear', 89: 'hair drier', 90: 'toothbrush'},
    confidence_threshold=0.5,
    threshold=0.4
)

net = cv.dnn.readNetFromTensorflow(model_info["model_weights_path"], model_info["config_path"])

In [6]:
np.random.seed(12345)

bbox_colors = {key: np.random.randint(0, 255, size=(3,)).tolist() for key in model_info['object_names'].keys()}

##### Instantiate the Tracker Class

In [8]:
tracker = Tracker(max_lost=5)

##### Initiate opencv video capture object

The `video_src` can take two values:
1. If `video_src=0`: OpenCV accesses the camera connected through USB
2. If `video_src='video_file_path'`: OpenCV will access the video file at the given path (can be MP4, AVI, etc format)

In [9]:
video_src = video_file_path.selected  # 0
cap = cv.VideoCapture(video_src)

##### Start object detection and tracking

In [10]:
(H, W) = (None, None)  # input image height and width for the network
writer = None
while(True):
    
    ok, image = cap.read()
    
    if not ok:
        print("Cannot read the video feed.")
        break
    
    if W is None or H is None: (H, W) = image.shape[:2]
    
    blob = cv.dnn.blobFromImage(image, size=(300, 300), swapRB=True, crop=False)
    net.setInput(blob)
    detections = net.forward()
    
    detections_bbox = []     # bounding box for detections
    boxes, confidences, classIDs = [], [], []
    
    for detection in detections[0, 0, :, :]:
        classID = detection[1]
        confidence = detection[2]
        if confidence > model_info['confidence_threshold']:
            box = detection[3:7] * np.array([W, H, W, H])
            (left, top, right, bottom) = box.astype("int")
            width = right - left + 1
            height = bottom - top + 1
            boxes.append([int(left), int(top), int(width), int(height)])
            confidences.append(float(confidence))
            classIDs.append(int(classID))
    
    indices = cv.dnn.NMSBoxes(boxes, confidences, model_info["confidence_threshold"], model_info["threshold"])
    
    if len(indices)>0:
        for i in indices.flatten():
            x, y, w, h = boxes[i][0], boxes[i][1], boxes[i][2], boxes[i][3]
            detections_bbox.append((x, y, x+w, y+h))
            clr = [int(c) for c in bbox_colors[classIDs[i]]]
            cv.rectangle(image, (x, y), (x+w, y+h), clr, 2)
            label = "{}:{:.4f}".format(model_info["object_names"][classIDs[i]], confidences[i])
            (label_width, label_height), baseLine = cv.getTextSize(label, cv.FONT_HERSHEY_SIMPLEX, 0.5, 2)
            y_label = max(y, label_height)
            
            cv.rectangle(
                image, (x, y_label-label_height),
                (x+label_width, y_label+baseLine), (255, 255, 255), cv.FILLED
            )
            
            cv.putText(image, label, (x, y_label), cv.FONT_HERSHEY_SIMPLEX, 0.5, clr, 2)
    
    objects = tracker.update(detections_bbox)           # update tracker based on the newly detected objects
    
    for (objectID, centroid) in objects.items():
        text = "ID {}".format(objectID)
        cv.putText(image, text, (centroid[0] - 10, centroid[1] - 10), cv.FONT_HERSHEY_SIMPLEX,
                    0.5, (0, 255, 0), 2)
        cv.circle(image, (centroid[0], centroid[1]), 4, (0, 255, 0), -1)
        
    cv.imshow("image", image)
    
    if cv.waitKey(1) & 0xFF == ord('q'):
        break
        
    if writer is None:
        fourcc = cv.VideoWriter_fourcc(*"MJPG")
        writer = cv.VideoWriter("output.avi", fourcc, 30, (W, H), True)
    writer.write(image)

writer.release()
cap.release()
cv.destroyWindow("image")

TypeError: update() missing 1 required positional argument: 'detection_scores'