## **Generate player-centric clips from masters (dynamic crop with smoothing)**

Read each shot's frame-wise boxes (proxy space), map times to the master, and crop per-frame with a smoothed, slightly expanded ROI. This ensures the hitter stays centered even if they move.

Notes:
- We crop in Python (OpenCV) because ffmpeg's crop is static; dynamic crops need per-frame control.
- We add EMA smoothing and expand the box (x 1.6-1.8) to include limb motion and small tracker noise.
- Output: square 256x256 (good for SlowFast speed), but you can change.

In [1]:
!pip -q install decord

[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m13.6/13.6 MB[0m [31m24.0 MB/s[0m eta [36m0:00:00[0m
[?25h

In [16]:
import os, csv, json, math, glob, uuid, tqdm
import numpy as np, cv2
from decord import VideoReader, cpu
import subprocess, shlex
from typing import Tuple, List, Dict, Optional

In [3]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [4]:
def infer_video_rel(task_name: str) -> str:
    """Infers the relative video path from a CVAT task name (optional helper)."""
    m = re.match(r'([^_]+)__([^_]+)', task_name)
    if m:
        return f"{m.group(1)}/{m.group(2)}/proxy.mp4"
    return task_name

def master_from_video_rel(video_rel: str, raw_base_dir: str) -> str:
    """
    Determines the path to the master video file.

    Args:
        video_rel: A path like "channel/yt_id/proxy.mp4" or a token like "export_1".
        raw_base_dir: The base directory for raw video files.
    """
    parts = video_rel.split("/")
    if len(parts) >= 3 and parts[-1].endswith(".mp4"):
        channel, yt_id = parts[0], parts[1]
        return os.path.join(raw_base_dir, channel, yt_id, "master.mp4")
    else:
        token = parts[0]
        return os.path.join(raw_base_dir, token, "master.mp4")

def proxy_from_video_rel(video_rel: str, proxy_base_dir: str) -> str:
    """
    Resolves the proxy.mp4 path used in CVAT for this video_rel.

    Args:
        video_rel: A path like "channel/yt_id/proxy.mp4" or a token like "export_1".
        proxy_base_dir: The base directory for proxy video files.
    """
    parts = video_rel.split("/")
    if len(parts) >= 3 and parts[-1].endswith(".mp4"):
        channel, yt_id = parts[0], parts[1]
        return os.path.join(proxy_base_dir, channel, yt_id, "proxy.mp4")
    else:
        token = parts[0]
        return os.path.join(proxy_base_dir, token, "proxy.mp4")

def get_master_fps(master_path: str) -> float:
    """Retrieves the frame rate of the master video using ffprobe."""
    try:
        command = shlex.split(f'ffprobe -v error -select_streams v:0 -show_entries stream=avg_frame_rate -of csv=p=0 "{master_path}"')
        out = subprocess.check_output(command, stderr=subprocess.STDOUT).decode().strip()
        if "/" in out:
            a, b = out.split("/")
            return float(a) / float(b) if float(b) != 0 else float(a)
        return float(out)
    except (subprocess.CalledProcessError, ValueError) as e:
        print(f"Error getting FPS for {master_path}: {e}")
        return PROXY_FPS # Default to proxy FPS

def get_proxy_size(proxy_path: str) -> Tuple[int, int]:
    """Returns (W, H) of the proxy video by reading a single frame."""
    vr = VideoReader(proxy_path, ctx=cpu(0))
    frame0 = vr[0].asnumpy()
    H, W = frame0.shape[0], frame0.shape[1]
    return W, H

In [5]:
def write_mp4(frames: list, path: str, fps: int):
    """
    Writes a list of RGB frames to an MP4 video file.

    Args:
        frames (list): A list of NumPy arrays (H, W, 3) in RGB format.
        path (str): The output file path.
        fps (int): Frames per second for the output video.
    """
    if not frames:
        return
    H, W = frames[0].shape[:2]
    tmp_avi_path = "/content/tmp_out.avi"

    vw = cv2.VideoWriter(tmp_avi_path, cv2.VideoWriter_fourcc(*"XVID"), fps, (W,H))
    for f in frames:
        vw.write(cv2.cvtColor(f, cv2.COLOR_RGB2BGR))
    vw.release()

    # Use subprocess.run for better control and error handling
    command = shlex.split(f'ffmpeg -y -i "{tmp_avi_path}" -c:v libx264 -preset veryfast -crf 20 -pix_fmt yuv420p "{path}"')
    subprocess.run(command, check=True)
    os.remove(tmp_avi_path)

In [6]:
def expand_square(x1: float, y1: float, x2: float, y2: float, W: int, H: int, factor: float = 1.7) -> tuple:
    """
    Expands a bounding box to a square shape, centered on the original box.

    Args:
        x1, y1, x2, y2: Original bounding box coordinates.
        W, H: The width and height of the frame.
        factor: The expansion factor.

    Returns:
        A tuple of new coordinates (nx1, ny1, nx2, ny2).
    """
    cx, cy = (x1 + x2) / 2, (y1 + y2) / 2
    w, h = (x2 - x1), (y2 - y1)
    r = max(w, h) * factor / 2
    nx1, ny1 = int(max(0, cx - r)), int(max(0, cy - r))
    nx2, ny2 = int(min(W - 1, cx + r)), int(min(H - 1, cy + r))
    return nx1, ny1, nx2, ny2

def ema(prev: np.ndarray, cur: np.ndarray, alpha: float = 0.4) -> np.ndarray:
    """Applies exponential moving average for smoothing."""
    if prev is None:
        return cur
    return alpha * cur + (1 - alpha) * prev

In [7]:
def parse_bboxes_json(bboxes_json: str) -> Dict[int, Tuple[float, float, float, float]]:
    """
    Parses the serialized bbox list: [[frame, x1, y1, x2, y2], ...]
    Returns a dict: frame -> (x1,y1,x2,y2)
    """
    arr = json.loads(bboxes_json)
    return {int(fr): (float(x1), float(y1), float(x2), float(y2)) for fr, x1, y1, x2, y2 in arr}

def interp_bbox(bb_map: Dict[int, Tuple[float,float,float,float]], f: int) -> Tuple[float,float,float,float]:
    """
    Linear interpolation of bbox at frame f using nearest known frames if missing.
    """
    if f in bb_map:
        return bb_map[f]
    keys = sorted(bb_map.keys())
    # find neighbors
    prevs = [k for k in keys if k <= f]
    nexts = [k for k in keys if k >= f]
    if not prevs:
        return bb_map[nexts[0]]
    if not nexts:
        return bb_map[prevs[-1]]
    p0, n0 = prevs[-1], nexts[0]
    if n0 == p0:
        return bb_map[p0]
    w = (f - p0) / float(n0 - p0)
    px1, py1, px2, py2 = bb_map[p0]
    nx1, ny1, nx2, ny2 = bb_map[n0]
    x1 = px1*(1-w)+nx1*w; y1 = py1*(1-w)+ny1*w
    x2 = px2*(1-w)+nx2*w; y2 = py2*(1-w)+ny2*w
    return (x1, y1, x2, y2)

def extract_master_segment(master_path: str, ss: float, to: float, force_cfr_30: bool = True) -> Tuple[str, VideoReader]:
    """
    Extracts [ss, to] seconds from master into a temporary mp4 and returns (tmp_path, VideoReader).

    If force_cfr_30=True, we normalize the segment to CFR=30 fps so each proxy frame
    maps to exactly one segment frame (local_mf == k).
    """
    tmp_path = f"/content/seg_{uuid.uuid4().hex[:8]}.mp4"
    if force_cfr_30:
        cmd = (
            f'ffmpeg -y -ss {ss:.3f} -to {to:.3f} -i "{master_path}" '
            f'-vsync cfr -r {int(PROXY_FPS)} '
            f'-c:v libx264 -preset veryfast -crf 18 -pix_fmt yuv420p -an "{tmp_path}"'
        )
    else:
        # keep native fps; if you choose this path, map with fps_segment later
        cmd = (
            f'ffmpeg -y -ss {ss:.3f} -to {to:.3f} -i "{master_path}" '
            f'-c:v libx264 -preset veryfast -crf 18 -pix_fmt yuv420p -an "{tmp_path}"'
        )
    subprocess.run(shlex.split(cmd), check=True)
    vrm = VideoReader(tmp_path, ctx=cpu(0))
    return tmp_path, vrm

In [8]:
def process_single_shot(
    r: Dict[str, str],
    master_path: str,
    proxy_path: str,
    output_clips_dir: str,
    out_side: int,
    expand_factor: float,
    ema_alpha: float,
    fps_out: Optional[int],
    min_frames: int,
    overwrite: bool
):
    """Processes a single row from the shots.csv to create a clip."""
    start_f = int(r["start_f"])
    end_f = int(r["end_f"])
    shot_type = r["shot_type"]
    bboxes_json = r["bboxes_json"]
    video_rel = r.get("video_rel", "N/A")

    if not os.path.exists(proxy_path):
        print(f"[WARN] Missing proxy: {proxy_path} (skip)")
        return False
    if not os.path.exists(master_path):
        print(f"[WARN] Missing master: {master_path} (skip)")
        return False

    Wp, Hp = get_proxy_size(proxy_path)
    ss = start_f / PROXY_FPS
    to = (end_f + 1) / PROXY_FPS

    try:
        tmp_seg_path, vrm = extract_master_segment(master_path, ss, to, force_cfr_30=True)
    except subprocess.CalledProcessError as e:
        print(f"[WARN] ffmpeg extract failed for {master_path}: {e}")
        return False

    Hm, Wm = vrm[0].asnumpy().shape[:2]
    fps_master = get_master_fps(master_path)
    # fps_write = int(round(PROXY_FPS if fps_out is not None else fps_master))
    fps_write = int(PROXY_FPS) if fps_out is None else int(fps_out)

    bb_map = parse_bboxes_json(bboxes_json)
    frames_rgb: List[np.ndarray] = []
    prev_vec: Optional[np.ndarray] = None
    num_pf = end_f - start_f + 1

    for k in range(num_pf):
        pf = start_f + k
        x1, y1, x2, y2 = interp_bbox(bb_map, pf)
        ex1, ey1, ex2, ey2 = expand_square(x1, y1, x2, y2, Wp, Hp, factor=expand_factor)
        cur_vec = np.array([ex1, ey1, ex2, ey2], dtype=np.float32)
        sm_vec = ema(prev_vec, cur_vec, alpha=ema_alpha)
        prev_vec = sm_vec
        bx1, by1, bx2, by2 = sm_vec.astype(int).tolist()

        # local_mf = int(round(k * (fps_master / PROXY_FPS)))
        # local_mf = min(local_mf, len(vrm) - 1)
        local_mf = min(k, len(vrm) - 1)
        img = vrm[local_mf].asnumpy()

        sx, sy = Wm / float(Wp), Hm / float(Hp)
        mx1, my1 = int(bx1 * sx), int(by1 * sy)
        mx2, my2 = int(bx2 * sx), int(by2 * sy)
        mx1, my1 = max(0, mx1), max(0, my1)
        mx2, my2 = min(Wm - 1, mx2), min(Hm - 1, my2)
        if mx2 <= mx1 or my2 <= my1: continue

        crop = img[my1:my2, mx1:mx2]
        crop = cv2.resize(crop, (out_side, out_side), interpolation=cv2.INTER_AREA)
        frames_rgb.append(crop)

    # Cleanup
    try:
        os.remove(tmp_seg_path)
    except OSError:
        pass

    if len(frames_rgb) < min_frames:
        return False

    uid = uuid.uuid4().hex[:10]
    out_dir = os.path.join(output_clips_dir, shot_type)
    os.makedirs(out_dir, exist_ok=True)
    out_path = os.path.join(out_dir, f"{uid}.mp4")

    if (not overwrite) and os.path.exists(out_path):
        return False

    write_mp4(frames_rgb, out_path, fps=fps_write)
    return True

In [19]:
def create_clips_from_all_shots_csv(
    shots_csv_path: str,
    output_clips_dir: str,
    raw_base_dir: str = RAW_BASE,
    proxy_base_dir: str = PROX_BASE,
    out_side: int = 256,
    expand_factor: float = 1.7,
    ema_alpha: float = 0.4,
    fps_out: Optional[int] = None,   # None → keep 30 (from segment); set to int to override
    min_frames: int = 8,
    overwrite: bool = False
):
    """
    Reads a shots.csv file and creates clips for all shots, assuming a standard
    directory structure for raw and proxy videos.
    """
    made, skipped = 0, 0
    with open(shots_csv_path) as f:
        rdr = csv.DictReader(f)
        for r in rdr:
            proxy_path  = proxy_from_video_rel(r["video_rel"], proxy_base_dir)
            master_path = master_from_video_rel(r["video_rel"], raw_base_dir)
            ok = process_single_shot(
                r=r,
                master_path=master_path,
                proxy_path=proxy_path,
                output_clips_dir=output_clips_dir,
                out_side=out_side,
                expand_factor=expand_factor,
                ema_alpha=ema_alpha,
                fps_out=fps_out,
                min_frames=min_frames,
                overwrite=overwrite
            )
            if ok: made += 1
            else:  skipped += 1
    print(f"[DONE] Created {made} clips; skipped {skipped}. Output -> {output_clips_dir}")

def create_clips_for_single_video(
    shots_csv_path: str,
    master_path: str,
    proxy_path: str,
    output_clips_dir: str,
    out_side: int = 256,
    expand_factor: float = 1.7,
    ema_alpha: float = 0.4,
    fps_out: Optional[int] = None,   # None → keep 30 (from segment)
    min_frames: int = 8,
    overwrite: bool = False
):
    """
    Reads a shots.csv file and creates clips for all shots from a single
    specified video. This bypasses the need for a specific directory structure.
    """
    made, skipped = 0, 0
    with open(shots_csv_path) as f:
        rdr = csv.DictReader(f)
        for r in tqdm.tqdm(rdr):
            ok = process_single_shot(
                r=r,
                master_path=master_path,
                proxy_path=proxy_path,
                output_clips_dir=output_clips_dir,
                out_side=out_side,
                expand_factor=expand_factor,
                ema_alpha=ema_alpha,
                fps_out=fps_out,
                min_frames=min_frames,
                overwrite=overwrite
            )
            if ok: made += 1
            else:  skipped += 1
    print(f"[DONE] Created {made} clips from a single video; skipped {skipped}. Output -> {output_clips_dir}")

In [14]:
ROOT = "/content/drive/MyDrive/FIT3163,3164/SlowFast"
RAW_BASE = f"{ROOT}/01_raw"
PROX_BASE = f"{ROOT}/02_proxy_25fps"
SHOTS_CSV = f"{ROOT}/04_shots/shots.csv"
CLIPS_BASE = f"{ROOT}/05_clips/player"
os.makedirs(CLIPS_BASE, exist_ok=True)

# Define label list and processing constants
LABELS = [
    "smash", "jump_smash", "block",
    "drop", "clear", "lift", "drive",
    "straight_net", "cross_net", "serve",
    "push", "tap",
    "average_joe"
]
OUT_SIDE = 256
EXPAND_FACTOR = 1.4     # original: 1.7
EMA_ALPHA = 0.4
PROXY_FPS = 30.0

## **Process ALL clips from a standard directory structure**

In [None]:
print("--- Process all clips from a standard directory structure ---")

# Make sure the shots.csv path and output directory exist
create_clips_from_all_shots_csv(
        shots_csv_path=SHOTS_CSV,
        output_clips_dir=CLIPS_BASE,
        out_side=OUT_SIDE,
        expand_factor=EXPAND_FACTOR,
        ema_alpha=EMA_ALPHA,
        fps_out=PROXY_FPS,
        min_frames=8,
        overwrite=False
    )

## **Process a single shots.csv from specific video files**

In [20]:
print("\n--- Process clips from a single video ---")

# Define paths for a specific video you want to process
single_shots_csv = "/content/drive/MyDrive/FIT3163,3164/SlowFast/04_shots/phua_1.csv"
single_master_video = "/content/drive/MyDrive/FIT3163,3164/SlowFast/01_raw/sin_tty_2016/1/master.mp4"
single_proxy_video = "/content/drive/MyDrive/FIT3163,3164/SlowFast/02_proxy_30fps/sin_tty_2016/1/proxy.mp4"
single_output_dir = "/content/drive/MyDrive/FIT3163,3164/SlowFast/05_clips/sin_tty_2016_1"

os.makedirs(single_output_dir, exist_ok=True)
create_clips_for_single_video(
        shots_csv_path=single_shots_csv,
        master_path=single_master_video,
        proxy_path=single_proxy_video,
        output_clips_dir=single_output_dir,
        out_side=OUT_SIDE,
        expand_factor=EXPAND_FACTOR,
        ema_alpha=EMA_ALPHA,
        fps_out=PROXY_FPS,
        min_frames=8,
        overwrite=True
    )


--- Process clips from a single video ---


72it [02:29,  2.07s/it]

[DONE] Created 72 clips from a single video; skipped 0. Output -> /content/drive/MyDrive/FIT3163,3164/SlowFast/05_clips/sin_tty_2016_1



