In [1]:
from pathlib import Path
import json

In [2]:
DATA_DIR = Path("D:/.competitions/aic2025/dataset")

VIDEO_DIR = DATA_DIR / "video"
SCENE_DIR = DATA_DIR / "scenes"
MAP_KEYFRAME_DIR = DATA_DIR / "map-keyframes"
NEW_MAP_KEYFRAME_DIR = DATA_DIR / "new-map-keyframes"
OUTPUT_DIR = DATA_DIR / "keyframes"

In [3]:
video = {}

for video_path in VIDEO_DIR.glob("*.mp4"):
    video[video_path.stem] = str(video_path)

In [4]:
video_scenes = []

for video_dir in SCENE_DIR.iterdir():
    video_id = video_dir.stem
    scenes = []
    for scene in video_dir.glob("*.scenes.txt"):
        """
        The file looks like this:
            0 8
            9 53
            54 342
            343 410
            411 457
            458 509
            510 564
            565 611
            612 660
        """
        with open(scene, "r") as f:
            for line in f:
                start, end = map(int, line.split())
                mid = (start + end) // 2
                scenes.append((start, mid, end))
    video_scenes.append({
        "video_id": video_id,
        "scenes": scenes,
    })


In [6]:
video_scenes[0]

{'video_id': 'K01_V001',
 'scenes': [(0, 3, 6),
  (7, 17, 27),
  (28, 159, 290),
  (291, 316, 342),
  (343, 365, 388),
  (389, 407, 426),
  (427, 440, 453),
  (454, 461, 469),
  (470, 495, 520),
  (521, 539, 557),
  (558, 575, 593),
  (594, 614, 634),
  (635, 656, 678),
  (679, 693, 708),
  (709, 717, 726),
  (727, 752, 778),
  (779, 1330, 1881),
  (1882, 1941, 2000),
  (2001, 2073, 2145),
  (2146, 2214, 2282),
  (2283, 2353, 2423),
  (2424, 2504, 2584),
  (2585, 2709, 2834),
  (2835, 2844, 2854),
  (2855, 2879, 2903),
  (2904, 2931, 2958),
  (2959, 2987, 3016),
  (3017, 3048, 3079),
  (3080, 3102, 3124),
  (3125, 3154, 3184),
  (3185, 3216, 3248),
  (3249, 3279, 3310),
  (3311, 3548, 3786),
  (3787, 3827, 3867),
  (3868, 3893, 3918),
  (3919, 3946, 3973),
  (3974, 3993, 4013),
  (4014, 4037, 4060),
  (4061, 4085, 4109),
  (4110, 4130, 4151),
  (4152, 4193, 4235),
  (4236, 4256, 4277),
  (4278, 4303, 4328),
  (4329, 4338, 4348),
  (4349, 4369, 4390),
  (4391, 4420, 4450),
  (4451, 4467

In [17]:
import pandas as pd
import cv2
from typing import List, Tuple

def get_fps(video_id: str) -> float:
    video_path = VIDEO_DIR / f"{video_id}.mp4"
    cap = cv2.VideoCapture(str(video_path))
    fps = cap.get(cv2.CAP_PROP_FPS)
    cap.release()
    return fps

def create_keyframe_map(video, scenes: List[Tuple[int, int, int]]) -> pd.DataFrame:
    """
    Create a keyframe map from scenes.
            pts_time,fps,frame_idx
            0.0,30.0,0
            3.0,30.0,90
            8.7,30.0,261
            11.7333,30.0,351
            13.7,30.0,411
            17.7,30.0,531
            23.7,30.0,711
            28.6,30.0,858
            30.7,30.0,921
            37.7,30.0,1131
            42.7,30.0,1281
    """
    fps = get_fps(video)
    df_keyframes = pd.DataFrame(columns=["pts_time", "fps", "frame_idx"])
    for scene in scenes:
        for frame_idx in scene:
            frame_idx = int(frame_idx)

            pts_time = round(frame_idx / fps, 1)
            df_keyframes = pd.concat([df_keyframes, pd.DataFrame({
                "pts_time": [pts_time],
                "fps": [fps],
                "frame_idx": [frame_idx],
            })], ignore_index=True)

    df_keyframes = df_keyframes.drop_duplicates().reset_index(drop=True).sort_values(by="pts_time")
    df_keyframes["n"] = range(1, len(df_keyframes) + 1)

    return df_keyframes

def load_keyframes(video_id: str) -> pd.DataFrame:
    keyframe_path = MAP_KEYFRAME_DIR / f"{video_id}.csv"
    df_keyframes = pd.read_csv(keyframe_path)
    return df_keyframes.drop(columns=["n"])

def merge_keyframes(df1: pd.DataFrame, df2: pd.DataFrame) -> pd.DataFrame:
    df = pd.concat([df1, df2]).drop_duplicates().reset_index(drop=True).sort_values(by="pts_time")
    df["n"] = range(1, len(df) + 1)
    return df[["n", "pts_time", "fps", "frame_idx"]]

def crop_keyframe(cap: cv2.VideoCapture, frame_idx: int, output_path: Path) -> bool:
    if not output_path.exists():
        output_path.parent.mkdir(parents=True, exist_ok=True)

    cap.set(cv2.CAP_PROP_POS_FRAMES, frame_idx)
    ret, frame = cap.read()
    if not ret:
        return False
    cv2.imwrite(str(output_path), frame)
    return True

def save_keyframes(video_id: str, df_keyframes: pd.DataFrame, output_path: Path) -> None:
    output_path.mkdir(parents=True, exist_ok=True)
    keyframe_path = output_path / f"{video_id}.csv"
    df_keyframes.to_csv(keyframe_path, index=False)

'''
    n,pts_time,fps,frame_idx
    1,0.0,30.0,0
    2,3.0,30.0,90
    3,8.7,30.0,261
    4,11.7333,30.0,351
    5,13.7,30.0,411
    6,17.7,30.0,531
    7,23.7,30.0,711
    8,28.6,30.0,858
    9,30.7,30.0,921
    10,37.7,30.0,1131
    11,42.7,30.0,1281
'''

'\n    n,pts_time,fps,frame_idx\n    1,0.0,30.0,0\n    2,3.0,30.0,90\n    3,8.7,30.0,261\n    4,11.7333,30.0,351\n    5,13.7,30.0,411\n    6,17.7,30.0,531\n    7,23.7,30.0,711\n    8,28.6,30.0,858\n    9,30.7,30.0,921\n    10,37.7,30.0,1131\n    11,42.7,30.0,1281\n'

In [None]:
df_extracted = create_keyframe_map(video_scenes[0]["video_id"], video_scenes[0]["scenes"])
cap = cv2.VideoCapture(str(VIDEO_DIR / f"{video_scenes[0]['video_id']}.mp4"))
crop_keyframe()

  df_keyframes = pd.concat([df_keyframes, pd.DataFrame({


In [15]:
df_extracted

Unnamed: 0,pts_time,fps,frame_idx,n
0,0.0,25.0,0,1
1,0.1,25.0,3,2
2,0.2,25.0,6,3
3,0.3,25.0,7,4
4,0.7,25.0,17,5
...,...,...,...,...
892,1040.6,25.0,26016,893
893,1046.4,25.0,26160,894
894,1046.5,25.0,26162,895
895,1048.2,25.0,26204,896


In [19]:
from tqdm import tqdm

errors = []
done = []

for video_scene in tqdm(video_scenes):
    video_id = video_scene["video_id"]
    scenes = video_scene["scenes"]
    df_extracted = create_keyframe_map(video_id, scenes)
    save_keyframes(video_id, df_extracted, NEW_MAP_KEYFRAME_DIR)

    cap = cv2.VideoCapture(str(VIDEO_DIR / f"{video_id}.mp4"))
    for _, row in df_extracted.iterrows():
        n = row["n"]
        frame_idx = row["frame_idx"]
        if not crop_keyframe(cap, frame_idx, OUTPUT_DIR / video_id / f"{n}.jpg"):
            errors.append(video_id)
            print(f"Error cropping keyframe {n} for video {video_id}")
    cap.release()

    if not errors:
        done.append(video_id)

    # clean up all the df created
    del df_extracted
    # remove video file
    (VIDEO_DIR / f"{video_id}.mp4").unlink(missing_ok=True)

  df_keyframes = pd.concat([df_keyframes, pd.DataFrame({
  0%|          | 0/605 [00:03<?, ?it/s]


KeyboardInterrupt: 

In [9]:
errors

[]

In [10]:
len(done)

873