In [None]:
import numpy as np
import os
from pathlib import Path
from PIL import Image
import json
import matplotlib.pyplot as plt
from tqdm import tqdm
from torchvision import transforms as T
import torch
import random
from torch.utils.data import Dataset
from torchvision import datasets, transforms
from torch.utils.data import DataLoader


In [None]:
# ------------  CONFIG  -------------------
base_path = '/scratch/cv-course-group-5/data/dataset_jpg'
src_root   = Path(base_path + '/dataset')
anno_file  = Path(base_path + '/dataset/annotations.json')

In [None]:
# load annotations
annos_dict = json.loads(anno_file.read_text())

annos = annos_dict.get('annotations', [])
videos = annos_dict.get('videos', [])
images = annos_dict.get('images', [])

video_id2name = {v["id"]: v["name"] for v in videos}
image_by_id = {img["id"]: img for img in images}

In [None]:
filtered_annos = [ann for ann in annos if ann['bbox'][2] > 400 or ann['bbox'][3] > 400]

# Step 1: Create a lookup from image_id to file_name
image_id_to_filename = {img["id"]: img["file_name"] for img in images}

# Step 2: Map filtered annotations to their filenames
for i in range(len(filtered_annos)):
    image_id = filtered_annos[i]['image_id']
    filename = image_id_to_filename.get(image_id, "UNKNOWN")
    filtered_annos[i]['filename'] = filename

In [None]:
# How many full images to show
N = 20

for i, ann in enumerate(filtered_annos[:N]):
    file_path = os.path.join(src_root, ann["filename"])

    if not os.path.exists(file_path):
        print(f"Missing file: {file_path}")
        continue

    # Load full image
    img = Image.open(file_path).convert("RGB")

    # Show full image
    plt.imshow(img)
    plt.title(f"Frame {ann['time_step']} | Filename {ann['filename']}")
    plt.axis("off")
    plt.show()