In [1]:
from ultralytics import YOLO
import os, json, cv2
from tqdm import tqdm
import torch
import matplotlib.pyplot as plt

In [2]:
device = 0 if torch.cuda.is_available() else "cpu"
print(f"Using device: {device}")

Using device: cpu


In [3]:
VIDEO_DIR = "./../../data/temp/videos/trimmed"
OUTPUT_DIR = "./../../data/temp/objects"

os.makedirs(OUTPUT_DIR, exist_ok=True)

videos = [v for v in os.listdir(VIDEO_DIR) if v.endswith(".mp4")]
print(f"Found {len(videos)} videos for object detection.")

Found 1643 videos for object detection.


In [None]:
model = YOLO("yolo11l")

In [None]:
for video_name in tqdm(videos, desc="Detecting objects"):
    video_path = os.path.join(VIDEO_DIR, video_name)
    out_json = os.path.join(OUTPUT_DIR, video_name.replace(".mp4", "_objects.json"))

    frame_idx, object_data = 0, []

    results = model(video_path, stream=True, device=device, half=True, verbose=False)

    for r in results:
        frame_objects = []
        if r.boxes is not None:
            boxes = r.boxes.xyxy.cpu().numpy()
            confs = r.boxes.conf.cpu().numpy()
            classes = r.boxes.cls.cpu().numpy()

            for i, box in enumerate(boxes):
                x1, y1, x2, y2 = map(float, box)
                cls_name = model.names[int(classes[i])]
                frame_objects.append({
                    "id": int(i),
                    "class": cls_name,
                    "bbox": [x1, y1, x2, y2],
                    "conf": float(confs[i])
                })

        object_data.append({"frame": frame_idx, "objects": frame_objects})
        frame_idx += 1

    with open(out_json, "w") as f:
        json.dump(object_data, f, indent=2)

    print(f"Saved detections → {out_json}")