In [None]:
# Install only the headless version of OpenCV
!pip install opencv-python-headless==4.9.0.80

# Install boxmot (includes DeepOcSort tracker and correct numpy version)
!pip install boxmot==12.0.1

# Install Ultralytics for the YOLO model
!pip install ultralytics

In [None]:
! kill -9 $(ps -A | grep python | awk '{print $1}')

In [1]:
import cv2
import numpy as np
from collections import defaultdict, deque
from pathlib import Path
from ultralytics import YOLO
from boxmot import DeepOcSort
import os

Creating new Ultralytics Settings v0.0.6 file ✅ 
View Ultralytics Settings with 'yolo settings' or at '/root/.config/Ultralytics/settings.json'
Update Settings with 'yolo settings key=value', i.e. 'yolo settings runs_dir=path/to/dir'. For help see https://docs.ultralytics.com/quickstart/#ultralytics-settings.


In [2]:
def track_humans(input_video, output_video):
    # Initialize the YOLO model for detecting humans
    human_detector = YOLO("yolo11x.pt")
    # Initialize the DeepOcSort tracker for tracking detected people
    human_tracker = DeepOcSort(
        asso_func="ciou",  # Association function for matching detections to tracks
        reid_weights=Path("osnet_x0_25_msmt17.pt"),  # Model weights for re-identification
        device="cuda:0",   # Run on GPU (cuda:0)
        half=True,         # Use half-precision for faster inference
        det_thresh=0.5,    # Detection confidence threshold
        max_age=30,        # Max frames to keep 'lost' tracks
        min_hits=3,        # Min detections before a new track is confirmed
        iou_threshold=0.3, # IOU threshold for matching
        delta_t=3,         # Tracker parameter (time window)
        inertia=0.2,       # Tracker smoothing parameter
        w_association_emb=0.5,
        alpha_fixed_emb=0.95,
        aw_param=0.5,
        embedding_off=False,
        cmc_off=False,
        aw_off=False,
        new_kf_off=False,
        use_cuda=True
    )

    # Open the input video file
    video_reader = cv2.VideoCapture(input_video)
    if not video_reader.isOpened():
        return  # Exit if the video could not be opened

    # Get video information: frame rate, width, height
    fps = video_reader.get(cv2.CAP_PROP_FPS)
    width = int(video_reader.get(cv2.CAP_PROP_FRAME_WIDTH))
    height = int(video_reader.get(cv2.CAP_PROP_FRAME_HEIGHT))
    fourcc = cv2.VideoWriter_fourcc(*'mp4v')
    # Prepare to write the output video
    video_writer = cv2.VideoWriter(output_video, fourcc, fps, (width, height))

    # Set font sizes and thickness for annotations
    base_scale = height / 500
    base_thickness = max(1, int(height / 300))
    label_scale = base_scale * 0.6
    label_thickness = max(1, base_thickness - 1)

    # Initialize variables for tracking people
    human_history = defaultdict(lambda: deque(maxlen=60))  # Stores recent positions for each person
    last_positions = {}           # Stores last position for each ID to check for jumps
    position_threshold = 50       # Ignore sudden large jumps in position (pixels)
    frame_count = 0               # Frame counter

    while True:
        ret, frame = video_reader.read()
        if not ret:
            break  # End of video

        # Detect humans in the frame using YOLO
        human_results = human_detector(frame, conf=0.1)
        human_detections = []
        for box in human_results[0].boxes:
            bbox = box.xyxy.cpu().numpy()[0]      # Bounding box coordinates
            conf = float(box.conf.item())         # Detection confidence
            cls = int(box.cls.item())             # Class ID (0: person in COCO dataset)
            if cls == 0:  # Only keep person detections
                human_detections.append([bbox[0], bbox[1], bbox[2], bbox[3], conf, cls])

        # Convert detections to numpy array for the tracker
        human_detections = np.array(human_detections) if human_detections else np.empty((0, 6))

        # Track the detected humans using DeepOcSort
        try:
            human_tracks = human_tracker.update(human_detections, frame)
        except IndexError:
            human_tracks = []

        # Copy the frame to draw annotations
        annotated_frame = frame.copy()
        for track in human_tracks:
            bbox = track[:4]          # Bounding box coordinates
            track_id = int(track[4])  # Unique ID for the tracked person
            confidence = track[5] if len(track) > 5 else 1.0  # Tracking confidence

            # Calculate the center of the bounding box
            center = ((bbox[0] + bbox[2]) / 2, (bbox[1] + bbox[3]) / 2)

            # Check for sudden large position changes (to avoid ID switches or errors)
            if track_id in last_positions:
                last_pos = last_positions[track_id]
                distance = np.linalg.norm(np.array(center) - np.array(last_pos))
                if distance > position_threshold:
                    continue  # Ignore if jump is too large

            # Save the current position
            last_positions[track_id] = center
            # Add the center to the trajectory history
            human_history[track_id].append(center)

            # Ignore bounding boxes that are too large (likely errors)
            bbox_width = bbox[2] - bbox[0]
            bbox_height = bbox[3] - bbox[1]
            bbox_area = bbox_width * bbox_height
            if bbox_area > (width * height) / 4:
                continue

            # Set color and thickness based on confidence
            color = (0, 255, 0) if confidence >= 0.5 else (0, 0, 255)
            thickness = 2 if confidence >= 0.5 else 1

            # Draw the bounding box around the person
            cv2.rectangle(annotated_frame, (int(bbox[0]), int(bbox[1])),
                          (int(bbox[2]), int(bbox[3])), color, thickness)

            # Draw the tracking ID and confidence above the box
            cv2.putText(annotated_frame, f"ID:{track_id} ({confidence:.2f})",
                        (int(bbox[0]), int(bbox[1]) - 10),
                        cv2.FONT_HERSHEY_SIMPLEX, label_scale, color, label_thickness)

            # Draw the movement trajectory for each tracked person
            if len(human_history[track_id]) >= 2:
                for i in range(1, len(human_history[track_id])):
                    pt1 = (int(human_history[track_id][i-1][0]), int(human_history[track_id][i-1][1]))
                    pt2 = (int(human_history[track_id][i][0]), int(human_history[track_id][i][1]))
                    # Assign a unique color to each track using the ID
                    track_color = (int(track_id * 50) % 255,
                                   int(track_id * 100) % 255,
                                   int(track_id * 150) % 255)
                    cv2.line(annotated_frame, pt1, pt2, track_color, 2)

        # Save the annotated frame to the output video
        video_writer.write(annotated_frame)
        frame_count += 1

    # Release video resources when done
    video_reader.release()
    video_writer.release()

if __name__ == "__main__":
    # Define input and output directories
    input_dir = "/content/drive/MyDrive/Colab Notebooks/soccer_prj/soccer_tracking/soccer_input"
    output_dir = "/content/drive/MyDrive/Colab Notebooks/soccer_prj/soccer_tracking/soccer_output"
    os.makedirs(output_dir, exist_ok=True)  # Create the output directory if it doesn't exist

    # Process all video files in the input directory
    video_files = [f for f in os.listdir(input_dir) if f.lower().endswith(('.mp4', '.avi', '.mov'))]
    for video_file in video_files:
        input_path = os.path.join(input_dir, video_file)
        output_path = os.path.join(output_dir, f"processed_{video_file}")
        print(f"Starting processing for video: {input_path}")
        track_humans(input_path, output_path)
        print(f"Finished processing for video: {input_path}")

Starting processing for video: /content/drive/MyDrive/Colab Notebooks/soccer_prj/soccer_tracking/soccer_input/for_tracking.mp4
Downloading https://github.com/ultralytics/assets/releases/download/v8.3.0/yolo11x.pt to 'yolo11x.pt'...


100%|██████████| 109M/109M [00:00<00:00, 348MB/s] 
[32m2025-05-17 03:49:47.741[0m | [1mINFO    [0m | [36mboxmot.utils.torch_utils[0m:[36mselect_device[0m:[36m52[0m - [1mYolo Tracking v12.0.0 🚀 Python-3.11.11 torch-2.2.2+cu121
CUDA:0 (Tesla T4, 15095MiB)[0m
Downloading...
From: https://drive.google.com/uc?id=1sSwXSUlj4_tHZequ_iZ8w_Jh0VaRQMqF
To: /content/osnet_x0_25_msmt17.pt
100%|██████████| 3.06M/3.06M [00:00<00:00, 189MB/s]
[32m2025-05-17 03:49:51.770[0m | [32m[1mSUCCESS [0m | [36mboxmot.appearance.reid_model_factory[0m:[36mload_pretrained_weights[0m:[36m183[0m - [32m[1mLoaded pretrained weights from osnet_x0_25_msmt17.pt[0m



0: 320x640 22 persons, 2 umbrellas, 86.8ms
Speed: 19.5ms preprocess, 86.8ms inference, 154.3ms postprocess per image at shape (1, 3, 320, 640)

0: 320x640 21 persons, 2 umbrellas, 52.6ms
Speed: 2.1ms preprocess, 52.6ms inference, 1.7ms postprocess per image at shape (1, 3, 320, 640)

0: 320x640 20 persons, 3 umbrellas, 52.6ms
Speed: 5.3ms preprocess, 52.6ms inference, 2.1ms postprocess per image at shape (1, 3, 320, 640)

0: 320x640 21 persons, 2 umbrellas, 54.1ms
Speed: 3.0ms preprocess, 54.1ms inference, 2.2ms postprocess per image at shape (1, 3, 320, 640)

0: 320x640 20 persons, 3 umbrellas, 52.6ms
Speed: 4.5ms preprocess, 52.6ms inference, 2.1ms postprocess per image at shape (1, 3, 320, 640)

0: 320x640 20 persons, 3 umbrellas, 52.5ms
Speed: 3.3ms preprocess, 52.5ms inference, 2.1ms postprocess per image at shape (1, 3, 320, 640)

0: 320x640 21 persons, 3 umbrellas, 59.0ms
Speed: 3.5ms preprocess, 59.0ms inference, 2.2ms postprocess per image at shape (1, 3, 320, 640)

0: 320x64