In [None]:
from pathlib import Path
import json

In [None]:
VIDEO_DIR =  Path("video")
SCENE_DIR =  Path("scenes")
MAP_KEYFRAME_DIR =  Path("map-keyframes")
NEW_MAP_KEYFRAME_DIR =  Path("new-map-keyframes")
OUTPUT_DIR =  Path("keyframes")
VIDEO_CROPPED_DIR = Path("scenes-cropped")

In [None]:
video_urls = [
    "https://aic-data.ledo.io.vn/Videos_L21_a.zip",
    "https://aic-data.ledo.io.vn/Videos_L22_a.zip",
    "https://aic-data.ledo.io.vn/Videos_L23_a.zip",
    "https://aic-data.ledo.io.vn/Videos_L24_a.zip",
    "https://aic-data.ledo.io.vn/Videos_L25_a.zip",
    "https://aic-data.ledo.io.vn/Videos_L25_a1.zip",
    "https://aic-data.ledo.io.vn/Videos_L25_b.zip",
    "https://aic-data.ledo.io.vn/Videos_L26_a.zip",
    "https://aic-data.ledo.io.vn/Videos_L26_b.zip",
    "https://aic-data.ledo.io.vn/Videos_L26_c.zip",
    "https://aic-data.ledo.io.vn/Videos_L26_d.zip",
    "https://aic-data.ledo.io.vn/Videos_L26_e.zip",
    "https://aic-data.ledo.io.vn/Videos_L27_a.zip",
    "https://aic-data.ledo.io.vn/Videos_L28_a.zip",
    "https://aic-data.ledo.io.vn/Videos_L29_a.zip",
    "https://aic-data.ledo.io.vn/Videos_L30_a.zip"
]

'Videos_L21_a.zip'

In [None]:
import subprocess
from tqdm import tqdm

for video_url in tqdm(video_urls):
    subprocess.run(["wget", video_url], stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
    subprocess.run(["unzip", f"{video_url.split('/')[-1]}"], stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
    subprocess.run(["rm", f"{video_url.split('/')[-1]}"], stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)

In [None]:
videos = {}

for video_path in VIDEO_DIR.glob("*.mp4"):
    videos[video_path.stem] = str(video_path)

In [None]:
video_scenes = []

for video_dir in SCENE_DIR.iterdir():
    video_id = video_dir.stem
    scenes = []
    for scene in video_dir.glob("*.scenes.txt"):
        """
        The file looks like this:
            0 8
            9 53
            54 342
            343 410
            411 457
            458 509
            510 564
            565 611
            612 660
        """
        with open(scene, "r") as f:
            for line in f:
                start, end = map(int, line.split())
                scenes.append((start, end))
    video_scenes.append({
        "video_id": video_id,
        "scenes": scenes,
    })


In [None]:
import cv2
import subprocess
import os

def get_fps(video_path) -> float:
    cap = cv2.VideoCapture(str(video_path))
    fps = cap.get(cv2.CAP_PROP_FPS)
    cap.release()
    return fps

def crop_video(input_path: str, output_folder: str, scenes: list[tuple[int, int]]):
    fps = get_fps(input_path)
    for i, scene in enumerate(scenes):
        start_f, end_f = scene

        start_time = start_f / fps
        end_time = end_f / fps
        output_path = os.path.join(output_folder, f"{i}.mp4")
        duration = (end_f - start_f + 1) / fps

        cmd = [
            "ffmpeg","-hide_banner","-nostdin","-loglevel","error","-stats",
            "-threads","1",                # cap CPU usage
            "-ss", f"{start_time:.6f}",
            "-i", str(input_path),
            "-t", f"{duration:.6f}",
            "-map","0:v:0","-map","0:a?",
            "-c:v","libx264","-pix_fmt","yuv420p",
            "-preset","veryfast","-crf","23",
            "-c:a","aac",
            str(output_path)
        ]

        subprocess.run(cmd, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)

: 