### Label timestamp on video for consistency

In [7]:
!ffmpeg \
  -i /home/yogee/Desktop/boxing_move_predictor/data_raw/sparring_pov/slow_motion_sparring.mp4 \
  -vf "drawtext=fontfile=/usr/share/fonts/truetype/dejavu/DejaVuSans.ttf:\
text='%{pts\:hms}':x=10:y=10:fontsize=24:fontcolor=white:box=1:boxcolor=0x00000099"\
  -c:v libx264 -crf 18 -c:a copy \
  /home/yogee/Desktop/boxing_move_predictor/data_raw/sparring_pov/slow_motion_sparring_with_pts.mp4


ffmpeg version 4.4.2-0ubuntu0.22.04.1+esm7 Copyright (c) 2000-2021 the FFmpeg developers
  built with gcc 11 (Ubuntu 11.4.0-1ubuntu1~22.04)
  configuration: --prefix=/usr --extra-version=0ubuntu0.22.04.1+esm7 --toolchain=hardened --libdir=/usr/lib/x86_64-linux-gnu --incdir=/usr/include/x86_64-linux-gnu --arch=amd64 --enable-gpl --disable-stripping --enable-gnutls --enable-ladspa --enable-libaom --enable-libass --enable-libbluray --enable-libbs2b --enable-libcaca --enable-libcdio --enable-libcodec2 --enable-libdav1d --enable-libflite --enable-libfontconfig --enable-libfreetype --enable-libfribidi --enable-libgme --enable-libgsm --enable-libjack --enable-libmp3lame --enable-libmysofa --enable-libopenjpeg --enable-libopenmpt --enable-libopus --enable-libpulse --enable-librabbitmq --enable-librubberband --enable-libshine --enable-libsnappy --enable-libsoxr --enable-libspeex --enable-libsrt --enable-libssh --enable-libtheora --enable-libtwolame --enable-libvidstab --enable-libvorbis --enabl

### Split Video using timestamps

In [3]:
import os
import subprocess
import pandas as pd
from pathlib import Path
from shutil import which

def find_video(video_name, search_dir="."):
    """
    Recursively search for "video_name.mp4" (case‐insensitive) under search_dir.
    Returns the first full path found, or None if not found.
    """
    target = f"{video_name.lower()}.mp4"
    for root, _, files in os.walk(search_dir):
        for f in files:
            if f.lower() == target:
                return os.path.join(root, f)
    return None

def ensure_ffmpeg_exists():
    """Raise an error if ffmpeg/ffprobe are not on PATH."""
    if which("ffmpeg") is None:
        raise RuntimeError("ffmpeg not found. Please install ffmpeg.")
    if which("ffprobe") is None:
        raise RuntimeError("ffprobe not found. Please install ffmpeg (includes ffprobe).")

def timestamp_to_seconds(ts_str):
    """Convert HH:MM:SS to total seconds."""
    h, m, s = map(int, ts_str.split(":"))
    return h*3600 + m*60 + s

def validate_timestamp(ts_str):
    """
    Normalize timestamp:
      - "MM:SS"        -> "00:MM:SS"
      - "HH:MM:00"     -> "00:HH:MM"
      - "HH:MM:SS"     -> unchanged
    """
    ts_str = ts_str.strip()
    parts = ts_str.split(":")
    # MM:SS (2 parts)
    if len(parts) == 2:
        mm, ss = map(int, parts)
        if 0 <= mm < 60 and 0 <= ss < 60:
            return f"00:{mm:02d}:{ss:02d}"
    # HH:MM:SS (3 parts)
    if len(parts) == 3:
        hh, mm, ss = map(int, parts)
        # if ends in :00, assume it was really MM:SS stored as HH:MM:00
        if ss == 0:
            return f"00:{hh:02d}:{mm:02d}"
        # otherwise valid HH:MM:SS
        if hh >= 0 and 0 <= mm < 60 and 0 <= ss < 60:
            return f"{hh:02d}:{mm:02d}:{ss:02d}"
    raise ValueError(f"Invalid timestamp format: '{ts_str}'")

def ffmpeg_cut_segment(input_path, start_ts, end_ts, output_path):
    """
    Reliable cut: seek before input, fixed duration, always re-encode
    with a keyframe at start and proper header.
    """
    start_sec = timestamp_to_seconds(start_ts)
    end_sec   = timestamp_to_seconds(end_ts)
    duration  = end_sec - start_sec
    if duration <= 0:
        raise RuntimeError(f"Invalid segment length: {start_ts} → {end_ts}")

    cmd = [
        "ffmpeg",
        "-hide_banner",
        "-loglevel", "info",
        "-ss",            start_ts,        # seek before input
        "-i",             input_path,
        "-t",             str(duration),   # duration in seconds
        "-c:v",           "libx264",       # re-encode video
        "-preset",        "fast",
        "-crf",           "23",
        "-c:a",           "aac",           # re-encode audio
        "-movflags",      "+faststart",    # header at front
        "-pix_fmt",       "yuv420p",
        "-y",
        output_path
    ]

    print(f"→ Cutting '{input_path}' from {start_ts} for {duration}s → '{output_path}'")
    result = subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)
    if result.returncode != 0:
        print(result.stderr)
        raise RuntimeError(f"ffmpeg failed with code {result.returncode}")

    size = os.path.getsize(output_path)
    if size < 2000:
        raise RuntimeError(f"Output file too small ({size} bytes)")
    print(f"✔ Created: {output_path} ({size:,} bytes)")

def split_videos_from_sheet(sheet_path, search_dir, output_dir):
    ensure_ffmpeg_exists()

    # read spreadsheet, forcing Start/End as strings
    converters = {
        "Start": lambda x: str(x).strip(),
        "End":   lambda x: str(x).strip()
    }
    if sheet_path.lower().endswith(".csv"):
        df = pd.read_csv(sheet_path, converters=converters)
    else:
        df = pd.read_excel(sheet_path, converters=converters)
    df.columns = df.columns.str.strip()

    required = {"Video Name", "Start", "End", "Label"}
    if not required.issubset(df.columns):
        raise ValueError(f"Missing columns: found {df.columns.tolist()}")

    os.makedirs(output_dir, exist_ok=True)
    success = 0

    for idx, row in df.iterrows():
        try:
            name = str(row["Video Name"]).strip()
            if not name or name.lower() in ("nan", "none"):
                continue
            raw_start = row["Start"]
            raw_end   = row["End"]
            label     = str(row["Label"]).strip().lower().replace(" ", "_")

            print(f"\n[DEBUG] Row {idx+1}: Raw Start='{raw_start}', End='{raw_end}'")
            start_ts = validate_timestamp(raw_start)
            end_ts   = validate_timestamp(raw_end)
            print(f"[DEBUG] Row {idx+1}: Normalized {start_ts} → {end_ts}")

            video_path = find_video(name, search_dir)
            if video_path is None:
                print(f"[!] Video not found for '{name}' (skipping)")
                continue

            label_dir = os.path.join(output_dir, label)
            os.makedirs(label_dir, exist_ok=True)
            out_file = os.path.join(label_dir, f"{name}_{idx+1:03}.mp4")

            ffmpeg_cut_segment(video_path, start_ts, end_ts, out_file)
            success += 1

        except Exception as e:
            print(f"[X] Row {idx+1} failed: {e}")

    total = len(df)
    print(f"\nDone: {success}/{total} segments created.")

if __name__ == "__main__":
    split_videos_from_sheet(
        sheet_path="time_stamps/Boxing_Videos_Timestamp.xlsx",
        search_dir=".",
        output_dir="dataset"
    )



[DEBUG] Row 1: Raw Start='00:25:00', End='00:29:00'
[DEBUG] Row 1: Normalized 00:00:25 → 00:00:29
→ Cutting './data_raw/sparring_pov/slow_motion_sparring.mp4' from 00:00:25 for 4s → 'dataset/jab/slow_motion_sparring_001.mp4'
✔ Created: dataset/jab/slow_motion_sparring_001.mp4 (813,180 bytes)

[DEBUG] Row 2: Raw Start='00:30:00', End='00:32:00'
[DEBUG] Row 2: Normalized 00:00:30 → 00:00:32
→ Cutting './data_raw/sparring_pov/slow_motion_sparring.mp4' from 00:00:30 for 2s → 'dataset/right_hook/slow_motion_sparring_002.mp4'
✔ Created: dataset/right_hook/slow_motion_sparring_002.mp4 (384,086 bytes)

[DEBUG] Row 3: Raw Start='01:55:00', End='01:57:00'
[DEBUG] Row 3: Normalized 00:01:55 → 00:01:57
→ Cutting './data_raw/sparring_pov/slow_motion_sparring.mp4' from 00:01:55 for 2s → 'dataset/jab/slow_motion_sparring_003.mp4'
✔ Created: dataset/jab/slow_motion_sparring_003.mp4 (456,665 bytes)

[DEBUG] Row 4: Raw Start='01:58:00', End='02:00:00'
[DEBUG] Row 4: Normalized 00:01:58 → 00:02:00
→ Cut

### Dataset with poses

In [20]:
import os
import cv2
import imageio
import numpy as np
import pandas as pd
from ultralytics import YOLO

# ─────────────────────────────────────────────────────────────────────────────
# CONFIGURATION (edit only these three lines)
# ─────────────────────────────────────────────────────────────────────────────

POSE_MODEL_PATH = "models/yolo11n-pose.pt"   # path to your YOLOv11-pose .pt
DATASET_ROOT    = "dataset"                  # folder containing subfolders “<label>/*.mp4”
OUTPUT_ROOT     = "dataset_with_poses"       # where to save .npy and labels.csv

# ─────────────────────────────────────────────────────────────────────────────
# SCRIPT BEGINS HERE – no other edits needed
# ─────────────────────────────────────────────────────────────────────────────

def find_all_clips(root_dir):
    """
    Yield (label, full_video_path, clip_name) for every .mp4 under root_dir/<label>/.
    """
    for label in os.listdir(root_dir):
        label_dir = os.path.join(root_dir, label)
        if not os.path.isdir(label_dir):
            continue
        for fname in os.listdir(label_dir):
            if fname.lower().endswith(".mp4"):
                clip_name = os.path.splitext(fname)[0]
                yield label, os.path.join(label_dir, fname), clip_name

def read_frames(path):
    """
    Try to read frames via imageio. If that fails, fallback to OpenCV.
    Return list of RGB frames (numpy arrays). Raise if both fail.
    """
    # First attempt: imageio + ffmpeg
    try:
        reader = imageio.get_reader(path, "ffmpeg")
        frames = [frame for frame in reader]
        reader.close()
        if frames:
            return frames
        # if no frames, treat as failure
    except Exception:
        pass

    # Fallback: OpenCV VideoCapture
    cap = cv2.VideoCapture(path)
    if not cap.isOpened():
        raise RuntimeError("Cannot open video with imageio or OpenCV")
    frames = []
    while True:
        ret, frame_bgr = cap.read()
        if not ret:
            break
        frame_rgb = cv2.cvtColor(frame_bgr, cv2.COLOR_BGR2RGB)
        frames.append(frame_rgb)
    cap.release()
    if not frames:
        raise RuntimeError("Video opened but no frames read")
    return frames

def extract_pose_sequence(model, video_path):
    """
    Read every frame of video_path (RGB), run YOLOv11-pose on it,
    and return an array of shape (num_frames, num_keypoints*2).
    Zero‐fill if no person detected in a frame.
    """
    frames = read_frames(video_path)
    num_kpts = model.model.yaml["data"]["nc"]
    kp_list = []

    for frame in frames:
        results = model(frame)
        if len(results) > 0 and results[0].keypoints is not None:
            kps = results[0].keypoints.cpu().numpy()
            if kps.size > 0:
                arr = kps[0, :, :2].flatten()  # first person, x/y only
            else:
                arr = np.zeros(num_kpts * 2, dtype=float)
        else:
            arr = np.zeros(num_kpts * 2, dtype=float)
        kp_list.append(arr)

    return np.stack(kp_list, axis=0) if kp_list else None

def main():
    os.makedirs(OUTPUT_ROOT, exist_ok=True)
    model = YOLO(POSE_MODEL_PATH)
    rows = []

    for label, video_path, clip_name in find_all_clips(DATASET_ROOT):
        print(f"Processing {label}/{clip_name}.mp4 …")
        try:
            seq = extract_pose_sequence(model, video_path)
        except Exception as e:
            print(f"  ↳ skip (cannot read): {e}")
            continue

        if seq is None:
            print("  ↳ no frames: skipping")
            continue

        out_dir = os.path.join(OUTPUT_ROOT, label)
        os.makedirs(out_dir, exist_ok=True)
        npy_path = os.path.join(out_dir, f"{clip_name}.npy")
        np.save(npy_path, seq)

        rows.append({"npy_path": npy_path, "label": label})
        print(f"  ↳ saved {clip_name}.npy  (shape = {seq.shape})")

    df = pd.DataFrame(rows)
    df.to_csv(os.path.join(OUTPUT_ROOT, "labels.csv"), index=False)
    print("Done. labels.csv written to", OUTPUT_ROOT)

if __name__ == "__main__":
    main()


Processing block/slow_motion_sparring_009.mp4 …
  ↳ skip (cannot read): Cannot open video with imageio or OpenCV
Processing right_hook/slow_motion_sparring_019.mp4 …
  ↳ skip (cannot read): Cannot open video with imageio or OpenCV
Processing right_hook/slow_motion_sparring_001.mp4 …
  ↳ skip (cannot read): Cannot open video with imageio or OpenCV
Processing jab/slow_motion_sparring_030.mp4 …
  ↳ skip (cannot read): Cannot open video with imageio or OpenCV
Processing jab/slow_motion_sparring_003.mp4 …
  ↳ skip (cannot read): Cannot open video with imageio or OpenCV
Processing jab/slow_motion_sparring_000.mp4 …
  ↳ skip (cannot read): Cannot open video with imageio or OpenCV
Processing jab/slow_motion_sparring_018.mp4 …
  ↳ skip (cannot read): Cannot open video with imageio or OpenCV
Processing jab/slow_motion_sparring_012.mp4 …
  ↳ skip (cannot read): Cannot open video with imageio or OpenCV
Processing jab/slow_motion_sparring_006.mp4 …
  ↳ skip (cannot read): Cannot open video with ima