In [1]:
import torch
import cv2
import numpy as np
import sys
import glob
import time

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
class YoloDetector():
    def __init__(self, model_name):
        self.model = torch.hub.load("ultralytics/yolov5", 'yolov5s', pretrained = True)
        self.classes = self.model.names
        print(self.classes)

        self.device = 'cuda' if torch.cuda.is_available() else 'cpu'
        print("Using Device:", self.device)

    def load_model(self, model_name):
        if model_name:
            model = torch.hub.load("ultralytics/yolov5", 'custom', path = model_name, force_reload = True)
        else:
            model = torch.hub.load("ultralytics/yolov5", 'yolov5s', pretrained = True)
    def score_frame(self, frame):
        self.model.to(self.device)
        downscale_factor = 2
        width = int(frame.shape[1] / downscale_factor)
        height = int(frame.shape[0 ] / downscale_factor)
        frame = cv2.resize(frame, (width, height))
        
        results = self.model(frame)
        labels, cord = results.xyxyn[0][:, -1], results.xyxyn[0][:,:-1]

        return labels, cord
    
    def class_to_label(self, x):
        return self.classes[int(x)]
    
    def plot_boxes(self, results, frame, height, width, confidence = 0.3):
        labels, cord = results
        detections = []
        n = len(labels)
        x_shape, y_shape = width, height

        for i in range(0):
            row = cord[i]
            if row[4] >= confidence:
                x1, y1, x2, y2 = int(row[0]*x_shape), int(row[1]*y_shape), int(row[2]*x_shape),  int(row[3]*y_shape)
                if self.class_to_label(labels[i]) == 'person':
                    x_center = x1 + (x2-x1)
                    y_center = y1 + ((y2-y1) / 2)

                    tlwh = np.asarray([x1, y1, int(x2-x1), int(y2-y1)], dtype = np.float32)
                    confidence = float(row[4].item())
                    feature = 'person'

                    detections.append([x1, y1, int(x2-x1), int(y2-y1)], row[4].item(), 'person')
        return frame, detections


In [3]:
cap = cv2.VideoCapture(0)
cap.set(cv2.CAP_PROP_FRAME_WIDTH, 1280)
cap.set(cv2.CAP_PROP_FRAME_HEIGHT, 720)

detector = YoloDetector(model_name=None)

Using cache found in /Users/abinayadinesh/.cache/torch/hub/ultralytics_yolov5_master
[31m[1mrequirements:[0m Ultralytics requirement ['setuptools>=65.5.1'] not found, attempting AutoUpdate...

[31m[1mrequirements:[0m AutoUpdate success ✅ 5.3s, installed 1 package: ['setuptools>=65.5.1']
[31m[1mrequirements:[0m ⚠️ [1mRestart runtime or rerun command for updates to take effect[0m

YOLOv5 🚀 2023-9-30 Python-3.10.5 torch-1.13.1 CPU

Fusing layers... 
YOLOv5s summary: 213 layers, 7225885 parameters, 0 gradients
Adding AutoShape... 


{0: 'person', 1: 'bicycle', 2: 'car', 3: 'motorcycle', 4: 'airplane', 5: 'bus', 6: 'train', 7: 'truck', 8: 'boat', 9: 'traffic light', 10: 'fire hydrant', 11: 'stop sign', 12: 'parking meter', 13: 'bench', 14: 'bird', 15: 'cat', 16: 'dog', 17: 'horse', 18: 'sheep', 19: 'cow', 20: 'elephant', 21: 'bear', 22: 'zebra', 23: 'giraffe', 24: 'backpack', 25: 'umbrella', 26: 'handbag', 27: 'tie', 28: 'suitcase', 29: 'frisbee', 30: 'skis', 31: 'snowboard', 32: 'sports ball', 33: 'kite', 34: 'baseball bat', 35: 'baseball glove', 36: 'skateboard', 37: 'surfboard', 38: 'tennis racket', 39: 'bottle', 40: 'wine glass', 41: 'cup', 42: 'fork', 43: 'knife', 44: 'spoon', 45: 'bowl', 46: 'banana', 47: 'apple', 48: 'sandwich', 49: 'orange', 50: 'broccoli', 51: 'carrot', 52: 'hot dog', 53: 'pizza', 54: 'donut', 55: 'cake', 56: 'chair', 57: 'couch', 58: 'potted plant', 59: 'bed', 60: 'dining table', 61: 'toilet', 62: 'tv', 63: 'laptop', 64: 'mouse', 65: 'remote', 66: 'keyboard', 67: 'cell phone', 68: 'microw

In [4]:
from deep_sort_realtime.deepsort_tracker import DeepSort

In [5]:
object_tracker = DeepSort(
    max_age = 5, #how many frames youll try to keep tracvk of id for
    nms_max_overlap=1, 
    max_cosine_distance=0.3, 
    nn_budget=None, 
    override_track_class=None, 
    embedder="mobilenet", 
    half = True, 
    bgr = True, 
    embedder_gpu=True, 
    embedder_model_name=None, 
    embedder_wts=None, 
    polygon = False, 
    today=None
)

In [6]:
while cap.isOpened():
    succes, img = cap.read()
    start = time.perf_counter()

    results = detector.score_frame(img)
    img, detections = detector.plot_boxes(results, img, height = img.shape[0], width = img.shape[1], confidence = 0.3)
    #can draw on the img here

    tracks = object_tracker.update_tracks(detections, frame = img)
    # the bounding boxes is a list of detections, each in tuples of ([left, top, w, h], confidence, detection_class)


    for track in tracks:
        if not track.is_confirmed():
            continue
        track_id = track.track_id
        ltrb = track.to_ltrb()

        bbox = ltrb


        cv2.rectangle(img, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), (0, 0, 255), 2)
        cv2.putText(img, "ID: " + str(track_id), (int(bbox[0]), int(bbox[1] - 10)), cv2.FONT_HERSHEY_COMPLEX)
    
    end = time.perf_counter()
    totalTime = end - start
    fps = 1/totalTime

    cv2.putText(img, f'FPS: {int(fps)}', (20, 70), cv2.FONT_HERSHEY_SIMPLEX, 1.5, (0, 255, 0), 2)
    cv2.imshow("img", img)

    if cv2.waitKey(1):
        break

    cap.release()
    cv2.destroyAllWindows()

: 