In [1]:
!pip install deep-sort-realtime

Collecting deep-sort-realtime
  Downloading deep_sort_realtime-1.3.2-py3-none-any.whl.metadata (12 kB)
Downloading deep_sort_realtime-1.3.2-py3-none-any.whl (8.4 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m8.4/8.4 MB[0m [31m31.7 MB/s[0m eta [36m0:00:00[0m [36m0:00:01[0m
[?25hInstalling collected packages: deep-sort-realtime
Successfully installed deep-sort-realtime-1.3.2

[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m24.3.1[0m[39;49m -> [0m[32;49m25.3[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m


In [3]:
from ultralytics import YOLO
from deep_sort_realtime.deepsort_tracker import DeepSort
import cv2, json, os
from tqdm import tqdm

In [5]:
input_dir = "./../../data/temp/videos/trimmed"
output_dir = "./../../data/temp/skeletons/skeletons_tracked"
os.makedirs(output_dir, exist_ok=True)

In [None]:
pose_model = YOLO("yolo11l-pose.pt")
tracker = DeepSort(max_age=30, n_init=3, nms_max_overlap=1.0, max_cosine_distance=0.4)

In [None]:
video_files = [v for v in os.listdir(input_dir) if v.endswith(".mp4")]
print(f"Found {len(video_files)} videos to process.")

for video_name in tqdm(video_files, desc="Tracking videos"):
    video_path = os.path.join(input_dir, video_name)
    out_json = os.path.join(output_dir, video_name.replace(".mp4", "_tracked.json"))

    if os.path.exists(out_json):
        print(f"[SKIPPED] Skipping {video_name}, already processed.")
        continue

    tracker = DeepSort(
        max_age=30,
        n_init=3,
        nms_max_overlap=1.0,
        max_cosine_distance=0.4,
        embedder_gpu=True
    )

    skeleton_data = []
    frame_idx = 0

    try:
        results = pose_model(video_path, stream=True, device=0, half=True, verbose=False)
        for result in results:
            people = []
            bboxes = result.boxes.xyxy.cpu().numpy() if result.boxes is not None else []
            kpts = result.keypoints.xy.cpu().numpy() if result.keypoints is not None else []
            confs = result.boxes.conf.cpu().numpy() if result.boxes is not None else []

            detections = []
            for i, box in enumerate(bboxes):
                x1, y1, x2, y2 = box
                detections.append(([x1, y1, x2 - x1, y2 - y1], confs[i], None))

            tracks = tracker.update_tracks(detections, frame=None)

            # Store tracked skeletons
            for t_idx, track in enumerate(tracks):
                if not track.is_confirmed():
                    continue
                tid = int(track.track_id)
                if t_idx < len(bboxes):
                    x1, y1, x2, y2 = bboxes[t_idx]
                    joints = []
                    if len(kpts) > t_idx:
                        for (x, y) in kpts[t_idx]:
                            joints.append({"x": float(x), "y": float(y)})
                    people.append({
                        "track_id": tid,
                        "bbox": [float(x1), float(y1), float(x2), float(y2)],
                        "keypoints": joints
                    })

            skeleton_data.append({"frame": frame_idx, "people": people})
            frame_idx += 1

        # Save per-video JSON
        with open(out_json, "w") as f:
            json.dump(skeleton_data, f, indent=2)

        print(f"Saved tracked skeletons → {out_json}")

    except Exception as e:
        print(f"❌ Error processing {video_name}: {e}")
        continue

print("All videos processed successfully!")