In [None]:
# !pip -q install ultralytics decord tqdm

In [1]:
import json, os, subprocess, pathlib, shlex, glob

In [2]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [3]:
def ffprobe_json(path):
    cmd = f'ffprobe -v info -print_format json -show_format -show_streams {shlex.quote(path)}'

    # ffprobe's log output goes to stderr, and its JSON output goes to stdout.
    # subprocess.check_output captures stdout but lets stderr pass through to the console.
    # This means you will see the verbose logs while your script captures the JSON data.
    try:
        out = subprocess.check_output(cmd, shell=True, stderr=subprocess.STDOUT).decode()

        start_of_json = out.find('{')
        if start_of_json != -1:
            json_output = out[start_of_json:]
            return json.loads(json_output)
        else:
            raise ValueError("FFprobe did not return a valid JSON object.")

    except subprocess.CalledProcessError as e:
        print(f"Error running ffprobe: {e.returncode}")
        print(f"Output: {e.output.decode()}")
        raise

In [None]:
def make_proxy(master, proxy):
    os.makedirs(os.path.dirname(proxy), exist_ok=True)

    # CFR=25, scale for fast scrubbing, dense keyframes for frame-accurate seeking in CVAT
    cmd = [
        "ffmpeg", "-y", "-v", "info", "-hide_banner", "-i", master,
        "-vf", "scale=-2:720,fps=25",
        "-c:v", "libx264", "-preset", "veryfast", "-crf", "25",
        "-vsync", "cfr",
        "-g", "10", "-keyint_min", "10", "-sc_threshold", "0",
        "-pix_fmt", "yuv420p",
        "-movflags", "faststart",
        "-an", proxy
    ]

    print("Executing ffmpeg command:")
    print(" ".join(shlex.quote(arg) for arg in cmd))

    try:
        # Redirect stderr to stdout so all logs are captured and printed
        subprocess.run(cmd, check=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
        print("FFmpeg command completed successfully.")
    except subprocess.CalledProcessError as e:
        print(f"ERROR: ffmpeg exited with code {e.returncode}")
        print("---FFmpeg Output---")
        print(e.stdout.decode()) # e.output contains the combined stdout/stderr
        print("---End of Output---")
        raise

In [None]:
ROOT = "/content/drive/MyDrive/FIT3163,3164/SlowFast"
RAW  = f"{ROOT}/01_raw"
PROX = f"{ROOT}/02_proxy_25fps"

## **Convert all files according to specific file structure**

In [None]:
for master in glob.glob(os.path.join(RAW, "*", "*", "master.mp4")):
    print(f"Processing master file: {master}")
    parts = master.split("/")
    channel, yt_id = parts[-3], parts[-2]
    out_dir = f"{PROX}/{channel}/{yt_id}"
    proxy = f"{out_dir}/proxy.mp4"

    if not os.path.exists(proxy):
        print("\nMaking proxy:", proxy)
        try:
            make_proxy(master, proxy)
            print("Proxy created.")
        except Exception as e:
            print(f"Error making proxy: {e}")
    else:
        print(f"Proxy already created for {proxy}.")

    if os.path.exists(proxy) and os.path.getsize(proxy) > 0:
        print(f"Running ffprobe: {proxy}")
        try:
            meta = ffprobe_json(proxy)
            with open(f"{out_dir}/proxy_meta.json","w") as f:
                json.dump(meta, f, indent=2)
        except Exception as e:
            print(f"Error running ffprobe: {e}")
    else:
        print(f"Warning: Proxy file {proxy} was not created or is empty.")

NameError: name 'RAW' is not defined

## **Convert specific files**

In [13]:
def make_proxy(input_file, output_file, target_fps):
    cmd = [
        "ffmpeg", "-y", "-v", "warning", "-hide_banner", "-stats", "-i", input_file,
        "-vf", f"scale=-2:720,fps={target_fps}",
        "-c:v", "libx264", "-preset", "veryfast", "-crf", "30",
        "-vsync", "cfr",
        "-g", "10", "-keyint_min", "10", "-sc_threshold", "0",
        "-pix_fmt", "yuv420p",
        "-movflags", "faststart",
        "-an", output_file
    ]

    print("Executing ffmpeg command:")
    print(" ".join(shlex.quote(arg) for arg in cmd))

    try:
        subprocess.run(cmd, check=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
        print(f"FFmpeg command completed successfully for {input_file}.")
    except subprocess.CalledProcessError as e:
        print(f"ERROR: ffmpeg exited with code {e.returncode}")
        print(e.stdout.decode())
        raise

In [14]:
# DO NOT CHANGE
PROXY_FPS = 30

In [15]:
input_files = ['/content/drive/MyDrive/FIT3163,3164/SlowFast/01_raw/bwf25_shi_vit/bwf25_shi_vit.mp4']
output_files_and_fps = ['/content/bwf25_shi_vit_30fps.mp4']

for file in input_files:
    make_proxy(file, output_files_and_fps[input_files.index(file)], PROXY_FPS)

Executing ffmpeg command:
FFmpeg command completed successfully for /content/drive/MyDrive/FIT3163,3164/SlowFast/01_raw/bwf25_shi_vit/bwf25_shi_vit.mp4.
