# <font color='green'><b> Tracking of detected objects </b></font>

### Credits: Sergio Canu (https://pysource.com/blog/)

#Before starting:

download the object detection model from: https://lanzarotti.di.unimi.it/dnn_model.zip

save and unzip it in the same folder of this notebook

In [1]:
#@title ▶️ Base dir setup
import os, sys

# check if hosted (Google VM) or running on local server
if 'google.colab' in sys.modules:
  #@markdown Google Drive root folder - hosted by Google VM (adapt to your local paths)
  from google.colab import drive
  drive.mount('/content/drive', force_remount=False)
  base_dir = 'infoMM/' #@param {type: "string"}
  base_dir  = os.path.join('/content/drive/MyDrive/', base_dir)
  #!pip install pillow  --upgrade
else:
  
  # dirs
 
  vid_dir = 'VIDEO_IN/'
 

 
print("Current dir:", os.getcwd())

Current dir: c:\Users\eumir\Desktop\Università\Anno III\Semestre 1\Informazione Multimediale\VIDEO


In [2]:
import cv2
import numpy as np
from ipywidgets import interact
import ipywidgets as widgets 
import math
import matplotlib.pyplot as plt

In [3]:
#visualization of image sequence
def showVideo(I):
  
  n = len(I)

  def view_image(idx):
    plt.imshow(I[idx-1], interpolation='nearest', cmap='gray')
  
  interact(view_image, idx=widgets.IntSlider(min=1, max=n, step=1, value=1))

In [4]:
class ObjectDetection:
    def __init__(self, weights_path="./dnn_model/yolov4.weights", 
                 cfg_path="./dnn_model/yolov4.cfg"):
        print("Loading Object Detection")
        print("Running opencv dnn with YOLOv4")
        self.nmsThreshold = 0.4
        self.confThreshold = 0.5
        self.image_size = 608

        # Load Network
        net = cv2.dnn.readNet(weights_path, cfg_path)

        # Enable GPU CUDA
        net.setPreferableBackend(cv2.dnn.DNN_BACKEND_CUDA)
        net.setPreferableTarget(cv2.dnn.DNN_TARGET_CUDA)
        self.model = cv2.dnn_DetectionModel(net)

        self.classes = []
        self.load_class_names()
        self.colors = np.random.uniform(0, 255, size=(80, 3))

        self.model.setInputParams(size=(self.image_size, self.image_size), scale=1/255)

    def load_class_names(self, classes_path="./dnn_model/classes.txt"):

        with open(classes_path, "r") as file_object:
            for class_name in file_object.readlines():
                class_name = class_name.strip()
                self.classes.append(class_name)

        self.colors = np.random.uniform(0, 255, size=(80, 3))
        return self.classes

    def detect(self, frame):
        return self.model.detect(frame, nmsThreshold=self.nmsThreshold, confThreshold=self.confThreshold)

In [5]:
# Initialize Object Detection
od = ObjectDetection()

classes = od.load_class_names()
print(classes)

Loading Object Detection
Running opencv dnn with YOLOv4
['person', 'bicycle', 'car', 'motorbike', 'aeroplane', 'bus', 'train', 'truck', 'boat', 'traffic light', 'fire hydrant', 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow', 'elephant', 'bear', 'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee', 'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard', 'tennis racket', 'bottle', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'sofa', 'pottedplant', 'bed', 'diningtable', 'toilet', 'tvmonitor', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone', 'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase', 'scissors', 'teddy bear', 'hair drier', 'toothbrush', 'person', 'bicycle', 'car', 'motorbike', 'aeroplane', 'bus', 'train', 'truck',

In [6]:
cap = cv2.VideoCapture(vid_dir + "los_angeles.mp4")

In [7]:
#We load a sequence of images
cap = cv2.VideoCapture(vid_dir + 'IPPR/in000%3d.jpg')
cap.set(cv2.CAP_PROP_POS_FRAMES, 30)
 

True

In [8]:

# Initialize count
count = 0
center_points_prev_frame = []

tracking_objects = {}
track_id = 0
I=[]
 
length_tracking= 20 #track only on the first frames

while count <length_tracking: # Alternatively : True:
    ret, frame = cap.read()
    count += 1
 
    if not ret:
        break

    # Point current frame
    center_points_cur_frame = []

    # Detect objects on frame
    (class_ids, scores, boxes) = od.detect(frame)
    if class_ids.any():
      print("detected these kind of objects: " + classes[class_ids[0]])
  
    for box in boxes:
        (x, y, w, h) = box
        cx = int((x + x + w) / 2)
        cy = int((y + y + h) / 2)
        center_points_cur_frame.append((cx, cy))
        #print("FRAME N°", count, " ", x, y, w, h)

        # cv2.circle(frame, (cx, cy), 5, (0, 0, 255), -1)
        cv2.rectangle(frame, (x, y), (x + w, y + h), (0, 255, 0), 2)

    # Only at the beginning we compare previous and current frame
    if count <= 2:
        for pt in center_points_cur_frame:
            for pt2 in center_points_prev_frame:
                distance = math.hypot(pt2[0] - pt[0], pt2[1] - pt[1])

                if distance < 20:
                    tracking_objects[track_id] = pt
                    track_id += 1
    else:

        tracking_objects_copy = tracking_objects.copy()
        center_points_cur_frame_copy = center_points_cur_frame.copy()

        for object_id, pt2 in tracking_objects_copy.items():
            object_exists = False
            for pt in center_points_cur_frame_copy:
                distance = math.hypot(pt2[0] - pt[0], pt2[1] - pt[1])

                # Update IDs position
                if distance < 20:
                    tracking_objects[object_id] = pt
                    object_exists = True
                    if pt in center_points_cur_frame:
                        center_points_cur_frame.remove(pt)
                    continue

            # Remove IDs lost
            if not object_exists:
                tracking_objects.pop(object_id)

        # Add new IDs found
        for pt in center_points_cur_frame:
            tracking_objects[track_id] = pt
            track_id += 1
    img_det = frame.copy()
    for object_id, pt in tracking_objects.items():
        img_det = cv2.circle(frame, pt, 5, (0, 0, 255), -1)
        img_det = cv2.putText(img_det, str(object_id), (pt[0], pt[1] - 7), 0, 1, (255, 0, 0), 5)
    I.append(img_det)

    print("Tracking objects")
    print(tracking_objects)

    print("CUR FRAME LEFT PTS")
    print(center_points_cur_frame)

    # Make a copy of the points
    center_points_prev_frame = center_points_cur_frame.copy()


Tracking objects
{}
CUR FRAME LEFT PTS
[(165, 139)]
Tracking objects
{0: (150, 135)}
CUR FRAME LEFT PTS
[(150, 135)]
Tracking objects
{0: (150, 135)}
CUR FRAME LEFT PTS
[]
Tracking objects
{0: (150, 129)}
CUR FRAME LEFT PTS
[]
Tracking objects
{0: (150, 129)}
CUR FRAME LEFT PTS
[]
Tracking objects
{0: (150, 129)}
CUR FRAME LEFT PTS
[]
Tracking objects
{0: (150, 129)}
CUR FRAME LEFT PTS
[]
Tracking objects
{0: (154, 119), 1: (124, 8)}
CUR FRAME LEFT PTS
[(124, 8)]
Tracking objects
{0: (154, 119), 1: (124, 8)}
CUR FRAME LEFT PTS
[]
Tracking objects
{0: (150, 116), 1: (124, 10)}
CUR FRAME LEFT PTS
[]
Tracking objects
{0: (150, 116), 1: (124, 10)}
CUR FRAME LEFT PTS
[]
Tracking objects
{0: (150, 116), 1: (124, 10)}
CUR FRAME LEFT PTS
[]
Tracking objects
{0: (150, 116), 1: (124, 10)}
CUR FRAME LEFT PTS
[]
Tracking objects
{0: (142, 106)}
CUR FRAME LEFT PTS
[]
Tracking objects
{0: (142, 106)}
CUR FRAME LEFT PTS
[]
Tracking objects
{0: (139, 105)}
CUR FRAME LEFT PTS
[]
Tracking objects
{0: (1

In [9]:
showVideo(I)

interactive(children=(IntSlider(value=1, description='idx', max=20, min=1), Output()), _dom_classes=('widget-i…

# <font color='green'>Exercise:</font>

- Make the cell above a function with signature:

`def tracking(video, length_Tracking, verbose= False):` 
such that the print instruction are conditioned on the value of verbose.
The function return the list of frame `I`

- Call the function in this way:
  - load the video in the folder IPPR, 
  - set the position frame to 30 (hint: `cap.set(cv2.CAP_PROP_POS_FRAMES, 30)`)
  - call the tracking function so that no print will be produced, and so that 20 frames will be processed. 
  - Finally show the result with the function `showVideo()`


In [24]:
def tracking(cap, length_Tracking, verbose=False):

    # Initialize count
    count = 0
    center_points_prev_frame = []

    tracking_objects = {}
    track_id = 0
    I=[]
    
    length_tracking= 20 #track only on the first frames

    while count <length_tracking: # Alternatively : True:
        ret, frame = cap.read()
        count += 1
    
        if not ret:
            break

        # Point current frame
        center_points_cur_frame = []

        # Detect objects on frame
        (class_ids, scores, boxes) = od.detect(frame)
        if class_ids.any():
            print("detected these kind of objects: " + classes[class_ids[0]])
    
        for box in boxes:
            (x, y, w, h) = box
            cx = int((x + x + w) / 2)
            cy = int((y + y + h) / 2)
            center_points_cur_frame.append((cx, cy))
            #print("FRAME N°", count, " ", x, y, w, h)

            # cv2.circle(frame, (cx, cy), 5, (0, 0, 255), -1)
            cv2.rectangle(frame, (x, y), (x + w, y + h), (0, 255, 0), 2)

        # Only at the beginning we compare previous and current frame
        if count <= 2:
            for pt in center_points_cur_frame:
                for pt2 in center_points_prev_frame:
                    distance = math.hypot(pt2[0] - pt[0], pt2[1] - pt[1])

                    if distance < 20:
                        tracking_objects[track_id] = pt
                        track_id += 1
        else:

            tracking_objects_copy = tracking_objects.copy()
            center_points_cur_frame_copy = center_points_cur_frame.copy()

            for object_id, pt2 in tracking_objects_copy.items():
                object_exists = False
                for pt in center_points_cur_frame_copy:
                    distance = math.hypot(pt2[0] - pt[0], pt2[1] - pt[1])

                    # Update IDs position
                    if distance < 20:
                        tracking_objects[object_id] = pt
                        object_exists = True
                        if pt in center_points_cur_frame:
                            center_points_cur_frame.remove(pt)
                        continue

                # Remove IDs lost
                if not object_exists:
                    tracking_objects.pop(object_id)

            # Add new IDs found
            for pt in center_points_cur_frame:
                tracking_objects[track_id] = pt
                track_id += 1
        img_det = frame.copy()
        for object_id, pt in tracking_objects.items():
            img_det = cv2.circle(frame, pt, 5, (0, 0, 255), -1)
            img_det = cv2.putText(img_det, str(object_id), (pt[0], pt[1] - 7), 0, 1, (255, 0, 0), 5)
        I.append(img_det)

        if verbose != False:
            print("Tracking objects")
            print(tracking_objects)

            print("CUR FRAME LEFT PTS")
            print(center_points_cur_frame)

        # Make a copy of the points
        center_points_prev_frame = center_points_cur_frame.copy()

    return I


In [25]:
#load
cap = cv2.VideoCapture(vid_dir + 'IPPR/in000%3d.jpg')

#set the first frame to process to 30
cap.set(cv2.CAP_PROP_POS_FRAMES, 30)

#call the tracking function on 20 frames
I = tracking(cap, 20)

#show the result
showVideo(I)


interactive(children=(IntSlider(value=1, description='idx', max=20, min=1), Output()), _dom_classes=('widget-i…