# Setup

In [1]:
keyframes_dir = None
save_dir = None

In [2]:
import os
from collections import defaultdict

dir_path = os.getcwd()

if not keyframes_dir:
    if 'google.colab' in str(get_ipython()):
        # Update this path as necessary
        keyframes_dir = f'{dir_path}/Keyframes'
    elif 'kaggle' in str(get_ipython()):
        keyframes_dir = f'{dir_path}/Keyframes'
    else:
        parent_dir_path = os.path.dirname(dir_path)
        keyframes_dir = f'{parent_dir_path}/transnet/Keyframes'

if not save_dir:
    save_dir = './object_extraction'

In [3]:
! pip install ultralytics pillow tqdm



In [4]:
import os
import glob
import json
import torch
from typing import Dict, List
from ultralytics import YOLO
from tqdm.auto import tqdm

  from .autonotebook import tqdm as notebook_tqdm


# Parse data path

In [5]:
def parse_keyframe_info(keyframes_dir = '../transnet/Keyframes'):
    all_keyframe_paths = {}
    for part in sorted(os.listdir(keyframes_dir)):
        data_part = part.split('/')[-1]
        all_keyframe_paths[data_part] = {}
        
    for data_part in sorted(all_keyframe_paths.keys()):
        data_part_path = f'{keyframes_dir}/{data_part}'
        video_dirs = sorted(os.listdir(data_part_path))
        video_ids = [video_dir.split('_')[-1] for video_dir in video_dirs]
        for video_id, video_dir in zip(video_ids, video_dirs):
            keyframe_paths = sorted(
                glob.glob(f'{data_part_path}/{video_dir}/*.jpg'))
            all_keyframe_paths[data_part][video_id] = keyframe_paths
    
    return all_keyframe_paths

# Process Image

In [6]:
class ObjectDetector:
    def __init__(self, model_path: str):
        self.device = 'cuda' if torch.cuda.is_available() else 'cpu'
        print(f"Using device: {self.device}")
        self.model = YOLO(model_path).to(self.device)

    def detect(self, image_path: str) -> List[Dict]:
        results = self.model(image_path)
        return [
            {
                "label": self.model.names[int(box.cls[0])],
                "score": float(box.conf[0]),
                "box": box.xyxy[0].tolist()
            }
            for r in results
            for box in r.boxes
        ]


def count_objects_by_class(detected_objects: List[Dict]) -> Dict[str, int]:
    return {obj["label"]: sum(1 for o in detected_objects if o["label"] == obj["label"])
            for obj in detected_objects}


def save_results(save_dir: str, key: str, video_id: str, results: Dict[str, Dict]):
    for dir_type in ['object_detection', 'object_count']:
        os.makedirs(os.path.join(save_dir, dir_type,
                    key, video_id), exist_ok=True)

    for image_path, data in results.items():
        base_name = os.path.basename(image_path).replace('.jpg', '')
        for data_type, content in [('detection', 'detected_objects'), ('counts', 'class_counts')]:
            with open(os.path.join(save_dir, f'object_{data_type}', key, video_id, f'{base_name}_{data_type}.json'), 'w') as f:
                json.dump(data[content], f)


def process_video(detector: ObjectDetector, image_paths: List[str]) -> Dict[str, Dict]:
    video_results = {}
    for image_path in image_paths:
        detected_objects = detector.detect(image_path)
        if detected_objects:
            class_counts = count_objects_by_class(detected_objects)
            video_results[image_path] = {
                'detected_objects': detected_objects,
                'class_counts': class_counts
            }
    return video_results


def process_and_save_results(detector: ObjectDetector, all_keyframe_paths: Dict[str, Dict[str, List[str]]], save_dir: str):
    for key, videos in all_keyframe_paths.items():
        for video_id, image_paths in tqdm(videos.items(), desc=f"Processing {key}"):
            video_results = process_video(detector, image_paths)
            save_results(save_dir, key, video_id, video_results)

In [7]:
model_path = 'yolov8n.pt'

detector = ObjectDetector(model_path)
all_keyframe_paths = parse_keyframe_info(keyframes_dir)
process_and_save_results(detector, all_keyframe_paths, save_dir)

  return torch._C._cuda_getDeviceCount() > 0


Using device: cpu


Processing L01_extra:   0%|          | 0/1 [00:00<?, ?it/s]


image 1/1 /home/jiggle/personal/competition/hcm-ai/Pipeline_HCM_AI/notebooks/data_extraction/transnet/Keyframes/L01_extra/V001/000000.jpg: 384x640 (no detections), 723.7ms
Speed: 24.4ms preprocess, 723.7ms inference, 17.4ms postprocess per image at shape (1, 3, 384, 640)

image 1/1 /home/jiggle/personal/competition/hcm-ai/Pipeline_HCM_AI/notebooks/data_extraction/transnet/Keyframes/L01_extra/V001/000010.jpg: 384x640 2 trains, 233.4ms
Speed: 39.9ms preprocess, 233.4ms inference, 16.1ms postprocess per image at shape (1, 3, 384, 640)

image 1/1 /home/jiggle/personal/competition/hcm-ai/Pipeline_HCM_AI/notebooks/data_extraction/transnet/Keyframes/L01_extra/V001/000021.jpg: 384x640 1 person, 1 train, 1 traffic light, 100.3ms
Speed: 6.6ms preprocess, 100.3ms inference, 0.8ms postprocess per image at shape (1, 3, 384, 640)

image 1/1 /home/jiggle/personal/competition/hcm-ai/Pipeline_HCM_AI/notebooks/data_extraction/transnet/Keyframes/L01_extra/V001/000032.jpg: 384x640 (no detections), 91.5ms

Processing L01_extra:   0%|          | 0/1 [00:15<?, ?it/s]


KeyboardInterrupt: 