# CO1.4 Modeling - Part 4 (YOLO + DeepSORT)

In [None]:
!pip install deep-sort-realtime opencv-python ultralytics



In [None]:
import cv2
from ultralytics import YOLO
from deep_sort_realtime.deepsort_tracker import DeepSort
import os


In [None]:
# Loading the trained YOLOv11 model
model = YOLO("best.pt")

# Initialize DeepSORT tracker
tracker = DeepSort(max_age=30)


In [None]:
video_path = '/content/cat_cctv_fin.mp4'
cap = cv2.VideoCapture(video_path)

# Prepare video writer
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
fps = cap.get(cv2.CAP_PROP_FPS)
output_path = '/content/cat_cctv_annotated_fin.mp4'
fourcc = cv2.VideoWriter_fourcc(*'mp4v')
out = cv2.VideoWriter(output_path, fourcc, fps, (width, height))

# Perform detection and tracking
while True:
    ret, frame = cap.read()
    if not ret:
        break

    results = model(frame)
    detections = []
    for result in results:
        for box in result.boxes:
            x1, y1, x2, y2 = box.xyxy[0].cpu().numpy()
            conf = box.conf[0].item()
            cls_id = int(box.cls[0].item())
            detections.append(([x1, y1, x2 - x1, y2 - y1], conf, cls_id))

    tracks = tracker.update_tracks(detections, frame=frame)
    for track in tracks:
        if not track.is_confirmed():
            continue
        track_id = track.track_id
        ltrb = track.to_ltrb()
        cv2.rectangle(frame, (int(ltrb[0]), int(ltrb[1])), (int(ltrb[2]), int(ltrb[3])), (0, 255, 0), 2)
        cv2.putText(frame, "cat", (int(ltrb[0]), int(ltrb[1]) - 10),
                    cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 255, 0), 2)

    out.write(frame)

cap.release()
out.release()
print("✅ Tracking complete. Output saved to:", output_path)



0: 640x384 1 cat, 145.4ms
Speed: 5.4ms preprocess, 145.4ms inference, 1.2ms postprocess per image at shape (1, 3, 640, 384)

0: 640x384 1 cat, 124.2ms
Speed: 4.2ms preprocess, 124.2ms inference, 0.9ms postprocess per image at shape (1, 3, 640, 384)

0: 640x384 1 cat, 117.1ms
Speed: 3.1ms preprocess, 117.1ms inference, 0.9ms postprocess per image at shape (1, 3, 640, 384)

0: 640x384 1 cat, 119.8ms
Speed: 3.4ms preprocess, 119.8ms inference, 1.2ms postprocess per image at shape (1, 3, 640, 384)

0: 640x384 1 cat, 115.6ms
Speed: 3.4ms preprocess, 115.6ms inference, 1.2ms postprocess per image at shape (1, 3, 640, 384)

0: 640x384 1 cat, 123.3ms
Speed: 3.2ms preprocess, 123.3ms inference, 1.0ms postprocess per image at shape (1, 3, 640, 384)

0: 640x384 1 cat, 121.3ms
Speed: 2.6ms preprocess, 121.3ms inference, 1.0ms postprocess per image at shape (1, 3, 640, 384)

0: 640x384 1 cat, 119.6ms
Speed: 2.7ms preprocess, 119.6ms inference, 1.2ms postprocess per image at shape (1, 3, 640, 384)


In [None]:
import csv

# Prepare CSV file to save tracking data
csv_output_path = '/content/cat_tracking_output.csv'
with open(csv_output_path, mode='w', newline='') as file:
    writer = csv.writer(file)
    writer.writerow(['frame', 'track_id', 'x1', 'y1', 'x2', 'y2'])

    cap = cv2.VideoCapture(video_path)
    frame_idx = 0

    while True:
        ret, frame = cap.read()
        if not ret:
            break

        results = model(frame)
        detections = []
        for result in results:
            for box in result.boxes:
                x1, y1, x2, y2 = box.xyxy[0].cpu().numpy()
                conf = box.conf[0].item()
                cls_id = int(box.cls[0].item())
                detections.append(([x1, y1, x2 - x1, y2 - y1], conf, cls_id))

        tracks = tracker.update_tracks(detections, frame=frame)
        for track in tracks:
            if not track.is_confirmed():
                continue
            track_id = track.track_id
            ltrb = track.to_ltrb()
            writer.writerow([frame_idx, track_id, int(ltrb[0]), int(ltrb[1]), int(ltrb[2]), int(ltrb[3])])

        frame_idx += 1

    cap.release()

print("✅ Tracking data saved to CSV at:", csv_output_path)


0: 640x384 1 cat, 258.8ms
Speed: 13.9ms preprocess, 258.8ms inference, 2.5ms postprocess per image at shape (1, 3, 640, 384)

0: 640x384 1 cat, 370.2ms
Speed: 5.2ms preprocess, 370.2ms inference, 1.1ms postprocess per image at shape (1, 3, 640, 384)

0: 640x384 1 cat, 118.9ms
Speed: 2.7ms preprocess, 118.9ms inference, 1.0ms postprocess per image at shape (1, 3, 640, 384)

0: 640x384 1 cat, 121.5ms
Speed: 3.6ms preprocess, 121.5ms inference, 1.1ms postprocess per image at shape (1, 3, 640, 384)

0: 640x384 1 cat, 134.0ms
Speed: 3.0ms preprocess, 134.0ms inference, 1.1ms postprocess per image at shape (1, 3, 640, 384)

0: 640x384 1 cat, 128.7ms
Speed: 3.5ms preprocess, 128.7ms inference, 1.0ms postprocess per image at shape (1, 3, 640, 384)

0: 640x384 1 cat, 122.1ms
Speed: 3.6ms preprocess, 122.1ms inference, 1.0ms postprocess per image at shape (1, 3, 640, 384)

0: 640x384 1 cat, 120.7ms
Speed: 3.5ms preprocess, 120.7ms inference, 1.2ms postprocess per image at shape (1, 3, 640, 384)

In [None]:
import pandas as pd

df = pd.read_csv(csv_output_path)

total_frames = df['frame'].nunique()
total_tracks = df['track_id'].nunique()
avg_track_length = df.groupby('track_id').size().mean()

print(df.head(10))
print(f"📈 Total Frames: {total_frames}")
print(f"🧍 Unique Tracks (Objects): {total_tracks}")
print(f"📊 Average Track Length: {avg_track_length:.2f} frames")

   frame  track_id  x1   y1   x2    y2
0      0        19   8  466  644   951
1      1        19 -10  443  691  1000
2      2        19 -10  433  707  1024
3      3        19  -2  429  706  1034
4      4        19   8  429  698  1038
5      5        19  21  430  687  1038
6      6        19  37  441  673  1038
7      7        19  53  455  658  1037
8      8        19  69  464  649  1036
9      9        19  78  469  641  1035
📈 Total Frames: 560
🧍 Unique Tracks (Objects): 10
📊 Average Track Length: 122.30 frames
