#1. Darknet Clone
Runtime => GPU

In [None]:
# clone darknet repo
!git clone https://github.com/AlexeyAB/darknet

In [None]:
%cd darknet

In [None]:
!ls -al

In [None]:
# change makefile to have GPU,OPENCV, CUDNN enabled
!sed -i 's/OPENCV=0/OPENCV=1/' Makefile
!sed -i 's/GPU=0/GPU=1/' Makefile
!sed -i 's/CUDNN=0/CUDNN=1/' Makefile

In [None]:
#Compile Sources
!make

In [None]:
# get yolov3 pretrained coco dataset weights
!wget https://pjreddie.com/media/files/yolov3.weights

#2. Tracking

In [None]:
import cv2
import IPython
import numpy as np
import time
import math
from google.colab.patches import cv2_imshow

In [None]:
min_confidence = 0.5
weight_file = 'yolov3.weights'
cfg_file = 'cfg/yolov3.cfg'
name_file = 'data/coco.names'

file_name = 'cabc30fc-e7726578.mp4'

In [None]:
# Load Yolo
net = cv2.dnn.readNet(weight_file, cfg_file)

In [None]:
classes = []
with open(name_file, 'r') as f:
     classes = [line.strip() for line in f.readlines()]
print(classes)

In [None]:
# cabc30fc-e7726578.mp4

from google.colab import files
files.upload()

In [None]:
def writeFrame(img):
    # use global variable, writer
    global writer
    height, width = img.shape[:2]
    if writer is None and output_name is not None:
        fourcc = cv2.VideoWriter_fourcc(*'MJPG')
        writer = cv2.VideoWriter(output_name, fourcc, 24, (width, height), True)
    if writer is not None:
        writer.write(img)

In [None]:
frame_count = 0
# initialize the video writer 
writer = None
output_name = 'output_tracking.avi'

detected = False
frame_mode = 'Tracking'
elapsed_time = 0
tracker = cv2.TrackerKCF_create()
trackers = cv2.MultiTracker_create()

vs = cv2.VideoCapture(file_name)

while True:
    start_time = time.time()
    frame_count += 1
    ret, frame = vs.read()
    if frame is None:
        print('### No more frame ###')
        break

    if detected:
        frame_mode = 'Tracking'
        (success, boxes) = trackers.update(frame)
        for box in boxes:
           (x, y, w, h) = [int(v) for v in box]
           cv2.rectangle(frame, (x, y), (x + w, y + h), (0, 255, 0), 2)
    else:
        frame_mode = 'Detection'
        height, width, channedls = frame.shape
        # Detecting objects
        # https://docs.opencv.org/master/d6/d0f/group__dnn.html
        blob = cv2.dnn.blobFromImage(frame, 0.00392, (416, 416), (0, 0, 0), True, crop=False)

        net.setInput(blob)
        outs = net.forward(output_layers)

        class_ids = []
        confidences = []
        boxes = []

        for out in outs:
            for detection in out:
                scores = detection[5:]
                class_id = np.argmax(scores)
                confidence = scores[class_id]
                if (confidence > min_confidence) and (class_id == 2):
                    # Object detected
                    center_x = int(detection[0] * width)
                    center_y = int(detection[1] * height)
                    w = int(detection[2] * width)
                    h = int(detection[3] * height)

                    # Rectangle coordinates
                    x = int(center_x - w / 2)
                    y = int(center_y - h / 2)

                    boxes.append([x, y, w, h])
                    confidences.append(float(confidence))
                    class_ids.append(class_id)
        # Region of Interest
        roi_left = int(0.3 * width)
        roi_right = int(0.6 * width)

        indexes = cv2.dnn.NMSBoxes(boxes, confidences, min_confidence, 0.4)
        font = cv2.FONT_HERSHEY_COMPLEX
        for i in range(len(boxes)):
            if i in indexes:
                x, y, w, h = boxes[i]
                # Eliminate Small object(<50)
                if (w > 50) and (x > roi_left) and (x < roi_right):
                    selected = boxes[i]
                    cv2.rectangle(frame, (x, y), (x + w, y + h), (0, 255, 255), 5)    
        trackers.add(tracker, frame, tuple(selected))
        detected = True 

    cv2_imshow(frame)
    writeFrame(frame)
    frame_time = time.time() - start_time
    elapsed_time += frame_time
    print("[{}] Frame {} time {}".format(frame_mode, frame_count, frame_time))

print("Elapsed time {}".format(elapsed_time))
vs.release()          