**Object Tracking using YOLOv5-DeepSort**

## Installing requirements

In [None]:
!pip install -r https://raw.githubusercontent.com/ultralytics/yolov5/master/requirements.txt

In [None]:
import cv2
import numpy as np
import sys
import glob

import time
import torch

## Define YOLOv5s Detector

In [None]:
class YoloDetector():

    def __init__(self):
        self.model = torch.hub.load('ultralytics/yolov5',
                                    'yolov5s',
                                    pretrained=True)
        self.classes = self.model.names
        self.device = 'cuda' if torch.cuda.is_available() else 'cpu'
        print('Using Device: ', self.device)

    def score_frame(self, frame):
        self.model.to(self.device)
        downscale_factor = 2
        width = int(frame.shape[1] / downscale_factor)
        height = int(frame.shape[0] / downscale_factor)
        frame = cv2.resize(frame, (width, height))

        results = self.model(frame)

        labels, cord = results.xyxyn[0][:, -1], results.xyxyn[0][:, :-1]

        return labels, cord

    def class_to_label(self, x):
        return self.classes[int(x)]

    def plot_boxes(self, results, frame, height, width, confidence=0.3):

        labels, cord = results
        detections = []

        n = len(labels)
        x_shape, y_shape = width, height

        for i in range(n):
            row = cord[i]

            if row[4] >= confidence:
                x1, y1, x2, y2 = int(row[0] * x_shape), int(row[1] * y_shape), int(row[2] * x_shape), int(row[3] * y_shape)

                if self.class_to_label(labels[i]) == 'person':

                    x_center = x1 + (x2 - x1)
                    y_center = y1 + ((y2 - y1) / 2)

                    tlwh = np.asarray([x1, y1, int(x2 - x1), int(y2 - y1)], dtype=np.float32)
                    confidence = float(row[4].item())

                    detections.append(([x1, y1, int(x2 - x1), int(y2 - y1)], row[4].item(), 'person'))

        return frame, detections

# Define DeepSort object tracker

##Installing a implemented DeepSort library

In [None]:
!pip install deep-sort-realtime

Collecting deep-sort-realtime
  Downloading deep_sort_realtime-1.3.2-py3-none-any.whl.metadata (12 kB)
Downloading deep_sort_realtime-1.3.2-py3-none-any.whl (8.4 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m8.4/8.4 MB[0m [31m67.1 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: deep-sort-realtime
Successfully installed deep-sort-realtime-1.3.2


In [None]:
import os

os.environ["KMP_DUPLICATE_LIB_OK"] = "TRUE"

> Dowloading sample video

In [None]:
!wget -O test.mp4 https://videos.pexels.com/video-files/854671/854671-hd_1280_720_50fps.mp4

--2024-09-02 18:01:43--  https://videos.pexels.com/video-files/854671/854671-hd_1280_720_50fps.mp4
Resolving videos.pexels.com (videos.pexels.com)... 104.18.66.220, 104.18.67.220, 2606:4700::6812:42dc, ...
Connecting to videos.pexels.com (videos.pexels.com)|104.18.66.220|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 20678601 (20M)
Saving to: ‘test.mp4’


2024-09-02 18:01:44 (54.3 MB/s) - ‘test.mp4’ saved [20678601/20678601]



### Creating a instance of Detector Model and DeepSort model

In [None]:
from deep_sort_realtime.deepsort_tracker import DeepSort

object_tracker = DeepSort()
detector = YoloDetector()

# Tracking

## Loading input & Initializing output

In [None]:
cap = cv2.VideoCapture('test.mp4')
cap.set(cv2.CAP_PROP_FRAME_WIDTH, 1280)
cap.set(cv2.CAP_PROP_FRAME_HEIGHT, 720)

fourcc = cv2.VideoWriter_fourcc(*'mp4v')
out = cv2.VideoWriter('out.mp4',
                      fourcc,
                      30.0,
                      (int(cap.get(3)), int(cap.get(4))))

## Object(person)Tracking from cam/video

In [None]:
while cap.isOpened():
    success, img = cap.read()

    if not success:
        break

    start = time.perf_counter()

    results = detector.score_frame(img)

    img, detections = detector.plot_boxes(results,
                                          img,
                                          height=img.shape[0],
                                          width=img.shape[1],
                                          confidence=0.25)

    tracks = object_tracker.update_tracks(detections, frame=img)

    for track in tracks:
        if not track.is_confirmed():
            continue

        track_id = track.track_id
        track_conf = track.det_conf
        if track.det_conf:
            track_conf = f'{track.det_conf:.2f}'

        bbox = track.to_ltrb()

        cv2.rectangle(img,
                      (int(bbox[0]), int(bbox[1])),
                      (int(bbox[2]), int(bbox[3])),
                      (0, 0, 255),
                      2)
        cv2.putText(img,
                    f"ID: {track_id} - CONF: {track_conf}",
                    (int(bbox[0]), int(bbox[1] - 10)),
                    cv2.FONT_HERSHEY_SIMPLEX,
                    .5,
                    (0, 255, 0),
                    2)

    end = time.perf_counter()
    totalTime = end - start
    fps = 1 / totalTime

    cv2.putText(img,
                f'FPS: {int(fps)}',
                (20, 70),
                cv2.FONT_HERSHEY_SIMPLEX,
                1,
                (0, 255, 0),
                5)

    out.write(img)

    if cv2.waitKey(1) & 0xFF == 27:
        break

cap.release()
out.release()

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
  with amp.autocast(autocast):
  with amp.autocast(autocast):
  with amp.autocast(autocast):
  with amp.autocast(autocast):
  with amp.autocast(autocast):
  with amp.autocast(autocast):
  with amp.autocast(autocast):
  with amp.autocast(autocast):
  with amp.autocast(autocast):
  with amp.autocast(autocast):
  with amp.autocast(autocast):
  with amp.autocast(autocast):
  with amp.autocast(autocast):
  with amp.autocast(autocast):
  with amp.autocast(autocast):
  with amp.autocast(autocast):
  with amp.autocast(autocast):
  with amp.autocast(autocast):
  with amp.autocast(autocast):
  with amp.autocast(autocast):
  with amp.autocast(autocast):
  with amp.autocast(autocast):
  with amp.autocast(autocast):
  with amp.autocast(autocast):
  with amp.autocast(autocast):
  with amp.autocast(autocast):
  with amp.autocast(autocast):
  with amp.autocast(autocast):
  with amp.autocast(autocast):
  with amp.autocast(autocast):
  wit