In [1]:
import cv2 as cv
from scipy.spatial import distance
import numpy as np
from collections import OrderedDict
from motrackers import SimpleTracker
from motrackers.utils import select_videofile, select_yolo_model

In [2]:
video_file_path = select_videofile("..")
yolo_weights_path, yolo_config_path, coco_names_path = select_yolo_model("..")
display(video_file_path, yolo_weights_path, yolo_config_path, coco_names_path)

FileChooser(path='..', filename='', show_hidden='False')

FileChooser(path='..', filename='', show_hidden='False')

FileChooser(path='..', filename='', show_hidden='False')

FileChooser(path='..', filename='', show_hidden='False')

#### Loading Object Detector Model

##### YOLO Object Detection and Tracking

Here, the YOLO Object Detection Model is used.

The pre-trained model is from following link:
 - Object detection is taken from the following work:  
     **Redmon, J., & Farhadi, A. (2018). Yolov3: An incremental improvement. arXiv preprint arXiv:1804.02767.**
 - Research paper for YOLO object detections and its improvement can be found here: https://arxiv.org/abs/1804.02767
 - Refer the following link for more details on the network: https://pjreddie.com/darknet/yolo/
 - The weights and configuration files can be downloaded and stored in a folder.
 - Weights: https://pjreddie.com/media/files/yolov3.weights

In [3]:
yolomodel = {"config_path":yolo_config_path.selected,
              "model_weights_path":yolo_weights_path.selected,
              "coco_names":coco_names_path.selected,
              "confidence_threshold": 0.5,
              "threshold":0.3
             }

net = cv.dnn.readNetFromDarknet(yolomodel["config_path"], yolomodel["model_weights_path"])
labels = open(yolomodel["coco_names"]).read().strip().split("\n")

In [4]:
np.random.seed(12345)
layer_names = net.getLayerNames()
layer_names = [layer_names[i[0]-1] for i in net.getUnconnectedOutLayers()]
bbox_colors = np.random.randint(0, 255, size=(len(labels), 3))

['yolo_82', 'yolo_94', 'yolo_106']


##### Instantiate the Tracker Class

In [5]:
maxLost = 5   # maximum number of object losts counted when the object is being tracked
tracker = SimpleTracker(max_lost = maxLost)

##### Initiate opencv video capture object

The `video_src` can take two values:
1. If `video_src=0`: OpenCV accesses the camera connected through USB
2. If `video_src='video_file_path'`: OpenCV will access the video file at the given path (can be MP4, AVI, etc format)

In [6]:
video_src = video_file_path.selected #0
cap = cv.VideoCapture(video_src)

##### Start object detection and tracking

In [7]:
(H, W) = (None, None)  # input image height and width for the network
writer = None
while(True):
    
    ok, image = cap.read()
    
    if not ok:
        print("Cannot read the video feed.")
        break
    
    if W is None or H is None: (H, W) = image.shape[:2]
    
    blob = cv.dnn.blobFromImage(image, 1 / 255.0, (416, 416), swapRB=True, crop=False)
    net.setInput(blob)
    detections_layer = net.forward(layer_names)   # detect objects using object detection model
    
    detections_bbox = []     # bounding box for detections
    
    boxes, confidences, classIDs = [], [], []
    for out in detections_layer:
        for detection in out:
            scores = detection[5:]
            classID = np.argmax(scores)
            confidence = scores[classID]
            
            if confidence > yolomodel['confidence_threshold']:
                box = detection[0:4] * np.array([W, H, W, H])
                (centerX, centerY, width, height) = box.astype("int")
                x = int(centerX - (width / 2))
                y = int(centerY - (height / 2))
                
                boxes.append([x, y, int(width), int(height)])
                confidences.append(float(confidence))
                classIDs.append(classID)
    
    idxs = cv.dnn.NMSBoxes(boxes, confidences, yolomodel["confidence_threshold"], yolomodel["threshold"])
    
    if len(idxs)>0:
        for i in idxs.flatten():
            (x, y) = (boxes[i][0], boxes[i][1])
            (w, h) = (boxes[i][2], boxes[i][3])
            detections_bbox.append((x, y, x+w, y+h))
            clr = [int(c) for c in bbox_colors[classIDs[i]]]
            cv.rectangle(image, (x, y), (x+w, y+h), clr, 2)
            cv.putText(image, "{}: {:.4f}".format(labels[classIDs[i]], confidences[i]),
                      (x, y-5), cv.FONT_HERSHEY_SIMPLEX, 0.5, clr, 2)
    
    objects = tracker.update(detections_bbox)           # update tracker based on the newly detected objects
    
    for (objectID, centroid) in objects.items():
        text = "ID {}".format(objectID)
        cv.putText(image, text, (centroid[0] - 10, centroid[1] - 10), cv.FONT_HERSHEY_SIMPLEX,
                    0.5, (0, 255, 0), 2)
        cv.circle(image, (centroid[0], centroid[1]), 4, (0, 255, 0), -1)
        
    cv.imshow("image", image)
    
    if cv.waitKey(1) & 0xFF == ord('q'):
        break
        
    if writer is None:
        fourcc = cv.VideoWriter_fourcc(*"MJPG")
        writer = cv.VideoWriter("output.avi", fourcc, 30, (W, H), True)
    writer.write(image)
writer.release()
cap.release()
cv.destroyWindow("image")

Cannot read the video feed.
