In [1]:
import cv2
import torch
from deep_sort_realtime.deepsort_tracker import DeepSort
from ultralytics import YOLO

In [2]:
# DeepSORT algorithm for tracking (Kalman filter combined with feature extractor vectors)
deepSort = DeepSort()

# It needs an object detection algorithm -> YOLO
yolo = YOLO('yolo11n.pt')

In [None]:
capture = cv2.VideoCapture('football_video.mp4')

while capture.isOpened():
    ret, frame = capture.read()
    
    if not ret:
        break

    results = yolo(frame)
    detections = []
    boxes = results[0].boxes

    for box in boxes:
        confidence = float(box.conf[0])
        if confidence < 0.5:
            continue
        x1, y1, x2, y2 = map(int, box.xyxy[0].tolist())
        class_label = int(box.cls[0])
        # w -> x2-x1, h -> y2-y1
        detections.append([[x1, y1, (x2-x1), (y2-y1)], confidence, class_label])

    # DeepSORT
    tracks = deepSort.update_tracks(detections, frame = frame)

    for track in tracks:
        x, y, w, h = track.to_tlwh()
        track_id = track.track_id

        cv2.rectangle(frame, (int(x), int(y)), (int(x+w), int(y+h)), color = (0, 255, 0), thickness = 2) 
        cv2.putText(frame, f"ID: {track_id}", (int(x), int(y)-10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 1)

        cv2.imshow('DeepSORT', frame)

        if cv2.waitKey(1) & 0xFF == ord('q'):
            break


capture.release()
cv2.destroyAllWindows()


0: 384x640 3 persons, 1 sports ball, 70.5ms
Speed: 4.0ms preprocess, 70.5ms inference, 1.2ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 3 persons, 1 sports ball, 56.7ms
Speed: 2.2ms preprocess, 56.7ms inference, 1.2ms postprocess per image at shape (1, 3, 384, 640)
