In [1]:
%pip install ultralytics


Collecting ultralytics
  Downloading ultralytics-8.3.160-py3-none-any.whl.metadata (37 kB)
Collecting opencv-python>=4.6.0 (from ultralytics)
  Using cached opencv_python-4.11.0.86-cp37-abi3-win_amd64.whl.metadata (20 kB)
Collecting pyyaml>=5.3.1 (from ultralytics)
  Downloading PyYAML-6.0.2-cp313-cp313-win_amd64.whl.metadata (2.1 kB)
Collecting torch>=1.8.0 (from ultralytics)
  Downloading torch-2.7.1-cp313-cp313-win_amd64.whl.metadata (28 kB)
Collecting torchvision>=0.9.0 (from ultralytics)
  Downloading torchvision-0.22.1-cp313-cp313-win_amd64.whl.metadata (6.1 kB)
Collecting tqdm>=4.64.0 (from ultralytics)
  Downloading tqdm-4.67.1-py3-none-any.whl.metadata (57 kB)
Collecting py-cpuinfo (from ultralytics)
  Downloading py_cpuinfo-9.0.0-py3-none-any.whl.metadata (794 bytes)
Collecting ultralytics-thop>=2.0.0 (from ultralytics)
  Downloading ultralytics_thop-2.0.14-py3-none-any.whl.metadata (9.4 kB)
Collecting filelock (from torch>=1.8.0->ultralytics)
  Using cached filelock-3.18.0-p


[notice] A new release of pip is available: 24.3.1 -> 25.1.1
[notice] To update, run: python.exe -m pip install --upgrade pip


In [3]:
%pip install ultralytics opencv-python numpy deep_sort_realtime


Note: you may need to restart the kernel to use updated packages.



[notice] A new release of pip is available: 24.3.1 -> 25.1.1
[notice] To update, run: python.exe -m pip install --upgrade pip


In [None]:
import cv2
import numpy as np
from ultralytics import YOLO
from deep_sort_realtime.deepsort_tracker import DeepSort
import csv

# Load YOLOv11 model
model = YOLO("best.pt")

# Initialize DeepSORT
tracker = DeepSort(max_age=30)

# Load video
video_path = "15sec_input_720p.mp4"
cap = cv2.VideoCapture(video_path)

# Output video
output_path = "output_tracked.mp4"
fourcc = cv2.VideoWriter_fourcc(*'mp4v')
fps = int(cap.get(cv2.CAP_PROP_FPS))
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
out = cv2.VideoWriter(output_path, fourcc, fps, (width, height))

# Logging
log = []
frame_num = 0

# Define box filtering thresholds
MIN_BOX_AREA = 500     # minimum area for player box
MAX_BOX_AREA = width * height * 0.5  # avoid huge boxes
MIN_ASPECT = 0.2
MAX_ASPECT = 4.0

while True:
    ret, frame = cap.read()
    if not ret:
        break

    frame_num += 1
    results = model(frame)[0]

    detections = []
    for box in results.boxes:
        cls_id = int(box.cls)
        conf = float(box.conf)

        # Only proceed for confident detections
        if conf < 0.5:
            continue

        # Extract box
        x1, y1, x2, y2 = map(int, box.xyxy[0])
        w = x2 - x1
        h = y2 - y1
        area = w * h

        # Clip box within frame
        x1, y1 = max(0, x1), max(0, y1)
        x2, y2 = min(width, x2), min(height, y2)

        # Area and aspect ratio filter
        aspect = w / float(h + 1e-5)
        if area < MIN_BOX_AREA or area > MAX_BOX_AREA:
            continue
        if aspect < MIN_ASPECT or aspect > MAX_ASPECT:
            continue

        # Append for DeepSORT
        detections.append(([x1, y1, w, h], conf, 'player'))

    # Update DeepSORT
    tracks = tracker.update_tracks(detections, frame=frame)

    for track in tracks:
        if not track.is_confirmed():
            continue
        track_id = track.track_id
        l, t, r, b = track.to_ltrb()
        x1, y1, x2, y2 = int(l), int(t), int(r), int(b)

        # Draw bounding box and ID
        cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 255, 0), 2)
        cv2.putText(frame, f'ID {track_id}', (x1, y1 - 10),
                    cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 255, 0), 2)

        log.append([frame_num, track_id, x1, y1, x2, y2])

    out.write(frame)

cap.release()
out.release()

# Save CSV
with open('tracking_log.csv', mode='w', newline='') as file:
    writer = csv.writer(file)
    writer.writerow(['frame', 'track_id', 'x1', 'y1', 'x2', 'y2'])
    writer.writerows(log)

print("Clean tracking complete. Files saved:")
print(" - output_tracked.mp4")
print(" - tracking_log.csv")



0: 384x640 1 ball, 16 players, 2 referees, 1456.7ms
Speed: 5.1ms preprocess, 1456.7ms inference, 2.7ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 18 players, 2 referees, 1325.6ms
Speed: 3.6ms preprocess, 1325.6ms inference, 2.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 ball, 16 players, 2 referees, 1279.8ms
Speed: 4.3ms preprocess, 1279.8ms inference, 1.3ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 ball, 14 players, 2 referees, 1513.0ms
Speed: 2.6ms preprocess, 1513.0ms inference, 1.1ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 ball, 14 players, 2 referees, 1466.6ms
Speed: 2.1ms preprocess, 1466.6ms inference, 1.2ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 ball, 16 players, 2 referees, 1462.8ms
Speed: 2.3ms preprocess, 1462.8ms inference, 1.1ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 15 players, 2 referees, 1417.4ms
Speed: 3.6ms preprocess, 1417.4ms inference, 1.0ms 