Goal: detect face/pose/hands; crop tight ROI; write to videos_roi/; build manifest_nslt2000_roi.csv.

In [3]:
### smoke test for installations:
import cv2, mediapipe as mp
print("OpenCV:", cv2.__version__)
print("MediaPipe:", mp.__version__)
# quick hands init
with mp.solutions.hands.Hands() as h:
    pass
print("OK")


2025-11-11 11:13:53.247728: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2025-11-11 11:13:53.345948: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:467] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1762881233.384805 2032798 cuda_dnn.cc:8579] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1762881233.395916 2032798 cuda_blas.cc:1407] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
W0000 00:00:1762881233.482898 2032798 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking 

OpenCV: 4.12.0
MediaPipe: 0.10.14
OK


I0000 00:00:1762881235.196755 2032798 gl_context_egl.cc:85] Successfully initialized EGL. Major : 1 Minor: 5
I0000 00:00:1762881235.293395 4150743 gl_context.cc:357] GL version: 3.2 (OpenGL ES 3.2 NVIDIA 575.64.03), renderer: NVIDIA GeForce RTX 5080/PCIe/SSE2
INFO: Created TensorFlow Lite XNNPACK delegate for CPU.
W0000 00:00:1762881235.301417 4150725 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
W0000 00:00:1762881235.312131 4150728 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.


In [4]:
# === Cell A — Setup & Paths ===
import os
os.environ["TF_CPP_MIN_LOG_LEVEL"] = "3"        # silence TF C++ logs
os.environ["ABSL_LOG_SEVERITY"] = "fatal"       # kill absl spam
# If you hit any GL driver quirks, you can force CPU path:
# os.environ["MEDIAPIPE_DISABLE_GPU"] = "1"
from pathlib import Path
import os, json, math, shutil, subprocess
import pandas as pd
import numpy as np

root = Path("..").resolve()
data_dir = root / "data" / "wlasl_preprocessed"

MANIFEST_IN  = data_dir / "manifest_nslt2000_merged.csv"   # <- use merged
MANIFEST_OUT = data_dir / "manifest_nslt2000_roi.csv"
ROI_DIR      = data_dir / "videos_roi"                     # cropped videos out
TMP_DIR      = root / "runs" / "roi_tmp"                   # temp (safe to delete)

ROI_DIR.mkdir(parents=True, exist_ok=True)
TMP_DIR.mkdir(parents=True, exist_ok=True)

df = pd.read_csv(MANIFEST_IN)
print("Manifest:", MANIFEST_IN)
print("Splits | train=%d val=%d test=%d | classes=%d" %
      ((df['split']=='train').sum(), (df['split']=='val').sum(), (df['split']=='test').sum(), df['label'].nunique()))

# detect NVENC availability for optional faster encode (not required)
def has_nvenc():
    try:
        out = subprocess.check_output(["ffmpeg","-hide_banner","-encoders"], stderr=subprocess.STDOUT).decode()
        return "h264_nvenc" in out
    except Exception:
        return False

USE_NVENC = has_nvenc()
print("NVENC available:", USE_NVENC)


Manifest: /home/falasoul/notebooks/USD/AAI-590/Capstone/AAI-590-G3-ASL/data/wlasl_preprocessed/manifest_nslt2000_merged.csv
Splits | train=8313 val=2253 test=1414 | classes=2000
NVENC available: True


### Cell B — MediaPipe Init + ROI helpers

In [5]:
# === Cell B — MediaPipe Init + ROI helpers ===
import cv2
import mediapipe as mp

mp_hands = mp.solutions.hands
mp_pose  = mp.solutions.pose
mp_drawing = mp.solutions.drawing_utils

# Expand a bbox by scale factor and clamp to frame
def expand_and_clip(xyxy, scale, W, H):
    x1, y1, x2, y2 = xyxy
    cx, cy = (x1+x2)/2.0, (y1+y2)/2.0
    w, h   = (x2-x1), (y2-y1)
    w2, h2 = w*scale/2.0, h*scale/2.0
    nx1, ny1 = max(0, int(cx - w2)), max(0, int(cy - h2))
    nx2, ny2 = min(W-1, int(cx + w2)), min(H-1, int(cy + h2))
    return nx1, ny1, nx2, ny2

# Exponential moving average smoother for boxes
class BoxSmoother:
    def __init__(self, alpha=0.6):
        self.alpha = alpha
        self.state = None
    def __call__(self, box):
        if self.state is None:
            self.state = np.array(box, dtype=np.float32)
        else:
            self.state = self.alpha*np.array(box, dtype=np.float32) + (1-self.alpha)*self.state
        return tuple(self.state.astype(int))

def hands_pose_roi(frame_bgr, hands_model, pose_model, last_box=None):
    """Return ROI around hands/upper-body; fall back to center crop if not found."""
    H, W = frame_bgr.shape[:2]
    frame_rgb = cv2.cvtColor(frame_bgr, cv2.COLOR_BGR2RGB)

    # Hands
    hres = hands_model.process(frame_rgb)
    pts = []
    if hres.multi_hand_landmarks:
        for lm in hres.multi_hand_landmarks:
            for p in lm.landmark:
                pts.append((int(p.x * W), int(p.y * H)))

    # Pose (upper body keypoints)
    pres = pose_model.process(frame_rgb)
    if pres.pose_landmarks:
        for idx in [mp_pose.PoseLandmark.LEFT_SHOULDER,
                    mp_pose.PoseLandmark.RIGHT_SHOULDER,
                    mp_pose.PoseLandmark.NOSE,
                    mp_pose.PoseLandmark.LEFT_ELBOW,
                    mp_pose.PoseLandmark.RIGHT_ELBOW]:
            p = pres.pose_landmarks.landmark[idx]
            pts.append((int(p.x * W), int(p.y * H)))

    if pts:
        xs = [p[0] for p in pts]; ys = [p[1] for p in pts]
        x1, y1, x2, y2 = max(0, min(xs)), max(0, min(ys)), min(W-1, max(xs)), min(H-1, max(ys))
        # pad a bit vertically to include upper torso
        y1 = max(0, int(y1 - 0.1*(y2-y1)))
        box = (x1, y1, x2, y2)
    else:
        # fallback: center box
        size = int(0.7 * min(W, H))
        cx, cy = W//2, H//2
        x1 = max(0, cx - size//2); y1 = max(0, cy - size//2)
        x2 = min(W-1, cx + size//2); y2 = min(H-1, cy + size//2)
        box = (x1, y1, x2, y2)

    # expand and smooth
    box = expand_and_clip(box, scale=1.4, W=W, H=H)
    if last_box is not None:
        box = BoxSmoother(alpha=0.6)(box)
    return box


#### Cell C — Single-file crop & write

In [6]:
# === Cell C — Single-file ROI crop & write ===
def read_video_meta(path):
    cap = cv2.VideoCapture(path)
    if not cap.isOpened(): return None
    fps = cap.get(cv2.CAP_PROP_FPS) or 30.0
    W   = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    H   = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
    N   = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
    cap.release()
    return {"fps": fps, "W": W, "H": H, "N": N}

def crop_and_save(in_path, out_path, size=(224,224)):
    meta = read_video_meta(in_path)
    if meta is None: return False, "open_failed"
    cap = cv2.VideoCapture(in_path)
    # Writer: try H.264; fallback to mp4v if needed
    fourcc = cv2.VideoWriter_fourcc(*("avc1"))
    writer = cv2.VideoWriter(str(out_path), fourcc, meta["fps"], size)
    if not writer.isOpened():
        fourcc = cv2.VideoWriter_fourcc(*("mp4v"))
        writer = cv2.VideoWriter(str(out_path), fourcc, meta["fps"], size)
        if not writer.isOpened():
            cap.release()
            return False, "writer_failed"

    smoother = BoxSmoother(alpha=0.6)
    with mp_hands.Hands(static_image_mode=False, max_num_hands=2, min_detection_confidence=0.5) as hands, \
         mp_pose.Pose(static_image_mode=False, model_complexity=1, enable_segmentation=False,
                      min_detection_confidence=0.5) as pose:
        last = None
        ok_frames = 0
        while True:
            ret, frame = cap.read()
            if not ret: break
            box = hands_pose_roi(frame, hands, pose, last)
            last = box
            x1,y1,x2,y2 = box
            crop = frame[y1:y2, x1:x2]
            if crop.size == 0:
                # fallback to center crop if something went wrong
                H, W = frame.shape[:2]
                s = min(H, W)
                y0 = (H - s)//2; x0 = (W - s)//2
                crop = frame[y0:y0+s, x0:x0+s]
            crop = cv2.resize(crop, size, interpolation=cv2.INTER_AREA)
            writer.write(crop)
            ok_frames += 1

    writer.release()
    cap.release()
    return (ok_frames > 0), "ok" if ok_frames > 0 else "no_frames"


### Cell D — Batch process (multiprocessing) + new manifest

Notes & tips

This picks a single, stable ROI per clip (compute once from a handful of frames) → consistent crop + faster than per-frame landmarks.

We read with OpenCV and rely on the global re-encode you already did; that keeps decoding stable.

If you want even more speed, you can change _iter_frames_cv to decord; the rest stays the same.

You can smoke-test with df.head(100) before the full run.

Once this finishes, you can point your Cell E visual QA at manifest_nslt2000_roi.csv (as you already do) and proceed to the next notebook (feature export / training).

In [1]:
# Run this in a small cell once
import os, numpy as np, cv2
os.environ.setdefault("MEDIAPIPE_DISABLE_GPU", "1")

import mediapipe as mp
hands = mp.solutions.hands.Hands(static_image_mode=False, max_num_hands=2)
pose  = mp.solutions.pose.Pose(static_image_mode=False, model_complexity=0)

# Trigger a first inference to force model download/extract
dummy = np.zeros((256,256,3), dtype=np.uint8)
hands.process(cv2.cvtColor(dummy, cv2.COLOR_BGR2RGB))
pose.process(cv2.cvtColor(dummy, cv2.COLOR_BGR2RGB))

hands.close(); pose.close()
print("✅ MediaPipe models primed.")


2025-11-11 12:30:37.796801: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2025-11-11 12:30:37.902059: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:467] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1762885837.941332 4192412 cuda_dnn.cc:8579] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1762885837.953128 4192412 cuda_blas.cc:1407] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
W0000 00:00:1762885838.038842 4192412 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking 

✅ MediaPipe models primed.


I0000 00:00:1762885840.150798 4192412 gl_context_egl.cc:85] Successfully initialized EGL. Major : 1 Minor: 5
I0000 00:00:1762885840.226356 4192509 gl_context.cc:357] GL version: 3.2 (OpenGL ES 3.2 NVIDIA 575.64.03), renderer: NVIDIA GeForce RTX 5080/PCIe/SSE2
INFO: Created TensorFlow Lite XNNPACK delegate for CPU.
I0000 00:00:1762885840.235066 4192412 gl_context_egl.cc:85] Successfully initialized EGL. Major : 1 Minor: 5
W0000 00:00:1762885840.240547 4192485 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
W0000 00:00:1762885840.254783 4192497 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
I0000 00:00:1762885840.263676 4192534 gl_context.cc:357] GL version: 3.2 (OpenGL ES 3.2 NVIDIA 575.64.03), renderer: NVIDIA GeForce RTX 5080/PCIe/SSE2
W0000 00:00:1762885840.308723 4192521 inference_feedback_man

In [3]:
# === Cell D (robust) — ROI extraction with spawn + CPU MediaPipe ===
import os, sys, math, traceback, json
from pathlib import Path
import numpy as np
import pandas as pd
from tqdm.auto import tqdm
import cv2
import multiprocessing as mp
from concurrent.futures import ThreadPoolExecutor, as_completed
import pathlib
os.environ["MEDIAPIPE_CACHE_DIR"] = str(pathlib.Path("..")/"runs"/"mp_cache")

# ---- harden environment (disable GPU in mediapipe/tflite & quiet TF) ----
os.environ.setdefault("MEDIAPIPE_DISABLE_GPU", "1")
os.environ.setdefault("TF_CPP_MIN_LOG_LEVEL", "2")

# Force 'spawn' to avoid forking GL contexts
if mp.get_start_method(allow_none=True) != "spawn":
    mp.set_start_method("spawn", force=True)

root    = Path("..").resolve()
data_dir= root / "data" / "wlasl_preprocessed"
man_in  = data_dir / "manifest_nslt2000_merged.csv"
out_dir = data_dir / "videos_roi"
out_dir.mkdir(parents=True, exist_ok=True)

OUT_SIZE   = 224
FPS_TARGET = 30
MARGIN     = 0.15
USE_NVENC  = True
CRF_OR_CQ  = 23
# keep worker count conservative for mediapipe
NUM_WORKERS = min(4, max(2, (os.cpu_count() or 4)//2))

df = pd.read_csv(man_in)
assert {"video_id","path","gloss","label","split"}.issubset(df.columns)
def open_ffmpeg_writer(out_path, w, h, fps=30, use_nvenc=True, crf=23, preset="p4"):
    """
    Return (proc, stdin) where proc is a Popen and stdin is a pipe you can write raw RGB frames to.
    Frames must be contiguous np.uint8 arrays shaped (h, w, 3) in RGB order.
    """
    pix_fmt = "rgb24"  # we'll send RGB, FFmpeg will convert to yuv420p
    size_arg = f"{w}x{h}"

    def build_cmd(nvenc):
        if nvenc:
            # H.264 NVENC path (fast)
            vcodec = ["-c:v", "h264_nvenc", "-cq", str(crf), "-preset", preset]
        else:
            # CPU x264 path (portable)
            vcodec = ["-c:v", "libx264", "-crf", str(crf), "-preset", "medium", "-pix_fmt", "yuv420p"]
        cmd = [
            "ffmpeg", "-y",
            "-f", "rawvideo", "-vcodec", "rawvideo",
            "-pix_fmt", pix_fmt, "-s", size_arg, "-r", str(fps),
            "-i", "-",             # stdin
            *vcodec,
            "-movflags", "+faststart",
            "-an",                 # no audio
            str(out_path)
        ]
        return cmd

    # try NVENC then fallback
    for try_nvenc in (use_nvenc, False):
        cmd = build_cmd(try_nvenc)
        try:
            proc = subprocess.Popen(cmd, stdin=subprocess.PIPE, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
            return proc, proc.stdin
        except Exception as e:
            if try_nvenc:
                print("[FFmpeg] NVENC path failed; falling back to libx264:", e, file=sys.stderr)
            else:
                raise

# ====== keep your open_ffmpeg_writer / write_frame / close_ffmpeg_writer here ======
# (no changes needed)

# ---------- ROI helpers (init inside worker) ----------
def _load_mediapipe():
    import mediapipe as mp
    mp_hands = mp.solutions.hands
    mp_pose  = mp.solutions.pose
    hands = mp_hands.Hands(
        static_image_mode=False, max_num_hands=2,
        min_detection_confidence=0.5, min_tracking_confidence=0.5
    )
    pose = mp_pose.Pose(
        static_image_mode=False, model_complexity=0,
        enable_segmentation=False,
        min_detection_confidence=0.5, min_tracking_confidence=0.5
    )
    return hands, pose

def _bbox_from_landmarks(landmarks, w, h):
    xs, ys = [], []
    for x,y in landmarks:
        xs.append(0 if x<0 else (w-1 if x>w-1 else x))
        ys.append(0 if y<0 else (h-1 if y>h-1 else y))
    if not xs: return None
    x1,x2 = int(min(xs)), int(max(xs))
    y1,y2 = int(min(ys)), int(max(ys))
    if x2<=x1 or y2<=y1: return None
    return [x1,y1,x2,y2]

def _expand_square(b, w, h, margin=MARGIN):
    x1,y1,x2,y2 = b
    cx, cy = (x1+x2)/2.0, (y1+y2)/2.0
    side = int(max(x2-x1, y2-y1) * (1.0+margin))
    half = side//2
    nx1, ny1 = max(0, int(cx-half)), max(0, int(cy-half))
    nx2, ny2 = min(w, nx1+side), min(h, ny1+side)
    side = min(nx2-nx1, ny2-ny1)
    return [nx1, ny1, nx1+side, ny1+side]

def _sampled_indices(n, k=8):
    if n<=0: return []
    if k>=n: return list(range(n))
    step = n/float(k)
    return [int(i*step) for i in range(k)]

# Add near the top of your ROI cell (before _fixed_roi_for_clip)
import threading
_tls = threading.local()

def _get_mp():
    # one Hands/Pose per thread; created once on first use
    if not hasattr(_tls, "hands"):
        import mediapipe as mp
        _tls.hands = mp.solutions.hands.Hands(
            static_image_mode=False, max_num_hands=2,
            min_detection_confidence=0.5, min_tracking_confidence=0.5
        )
        _tls.pose = mp.solutions.pose.Pose(
            static_image_mode=False, model_complexity=0,
            enable_segmentation=False,
            min_detection_confidence=0.5, min_tracking_confidence=0.5
        )
    return _tls.hands, _tls.pose


def _fixed_roi_for_clip(frames_bgr):
    try:
        hands, pose = _get_mp()
    except Exception:
        hands = pose = None
    H, W = frames_bgr[0].shape[:2]
    boxes = []
    for idx in _sampled_indices(len(frames_bgr), k=min(8, len(frames_bgr))):
        f = frames_bgr[idx]
        rgb = cv2.cvtColor(f, cv2.COLOR_BGR2RGB) if hands else None
        lm = []
        if hands:
            rh = hands.process(rgb)
            if rh.multi_hand_landmarks:
                for hl in rh.multi_hand_landmarks:
                    for p in hl.landmark:
                        lm.append((p.x*W, p.y*H))
        if pose:
            rp = pose.process(rgb)
            if rp.pose_landmarks:
                for p in rp.pose_landmarks.landmark:
                    y = p.y*H
                    if y < 0.75*H:
                        lm.append((p.x*W, y))
        if lm:
            b = _bbox_from_landmarks(lm, W, H)
            if b: boxes.append(b)
    if not boxes:
        side = min(W,H); cx=(W-side)//2; cy=(H-side)//2
        return [cx,cy,cx+side,cy+side]
    mean_box = np.array(boxes).mean(axis=0).astype(int).tolist()
    return _expand_square(mean_box, W, H, margin=MARGIN)

def _iter_frames_cv(path, fps_target=FPS_TARGET):
    cap = cv2.VideoCapture(path)
    if not cap.isOpened(): return []
    in_fps = cap.get(cv2.CAP_PROP_FPS) or fps_target
    stride = max(1, int(round(in_fps / fps_target)))
    frames = []; i = 0
    while True:
        ok, f = cap.read()
        if not ok: break
        if (i % stride) == 0:
            frames.append(f)
        i += 1
    cap.release()
    return frames

def _process_one(row):
    vid   = int(row["video_id"])
    src   = row["path"]
    gloss = row["gloss"]
    label = int(row["label"])
    split = row["split"]
    out_path = out_dir / f"{str(vid).zfill(5)}.mp4"
    if out_path.exists():
        return {"video_id": vid, "path": str(out_path), "gloss": gloss, "label": label, "split": split, "ok": True, "reason": "exists"}
    try:
        frames = _iter_frames_cv(src, fps_target=FPS_TARGET)
        if not frames:
            return {"video_id": vid, "path": str(out_path), "gloss": gloss, "label": label, "split": split, "ok": False, "reason": "decode_failed"}
        H, W = frames[0].shape[:2]
        x1,y1,x2,y2 = _fixed_roi_for_clip(frames)
        proc, pipe = open_ffmpeg_writer(out_path, OUT_SIZE, OUT_SIZE, fps=FPS_TARGET, use_nvenc=USE_NVENC, crf=CRF_OR_CQ)
        for f in frames:
            crop = f[y1:y2, x1:x2]
            if crop.size == 0:
                side = min(W,H); cx=(W-side)//2; cy=(H-side)//2
                crop = f[cy:cy+side, cx:cx+side]
            out = cv2.resize(crop, (OUT_SIZE, OUT_SIZE), interpolation=cv2.INTER_AREA)
            if not out.flags['C_CONTIGUOUS']:
                out = np.ascontiguousarray(out)
            write_frame(pipe, out)
        close_ffmpeg_writer(proc, pipe)
        return {"video_id": vid, "path": str(out_path), "gloss": gloss, "label": label, "split": split, "ok": True, "reason": "ok"}
    except Exception as e:
        return {"video_id": vid, "path": str(out_path), "gloss": gloss, "label": label, "split": split, "ok": False, "reason": f"err:{e}"}

records = df[["video_id","path","gloss","label","split"]].to_dict("records")
print(f"[ROI] Threads | workers={NUM_WORKERS} | records={len(records)}")

results = []
with ThreadPoolExecutor(max_workers=NUM_WORKERS) as ex:
    for res in tqdm(ex.map(_process_one, records, chunksize=4), total=len(records), desc="ROI"):
        results.append(res)

res_df = pd.DataFrame(results).sort_values("video_id")
ok_df  = res_df[res_df["ok"]==True].copy()
bad_df = res_df[res_df["ok"]==False].copy()

roi_manifest = data_dir / "manifest_nslt2000_roi.csv"
ok_df[["video_id","path","gloss","label","split"]].to_csv(roi_manifest, index=False)

fail_csv = root / "runs" / "nslt2000_roi_failures.csv"
bad_df.to_csv(fail_csv, index=False)

print(f"[ROI] Done. Wrote {len(ok_df)} clips  | failures: {len(bad_df)}")
print(f"Saved manifest: {roi_manifest}")
print(f"Failures logged: {fail_csv}")
if len(bad_df):
    print("Top failure reasons:", bad_df['reason'].value_counts().head(5).to_dict())

[ROI] Threads | workers=4 | records=11980


ROI:   0%|          | 0/11980 [00:00<?, ?it/s]

I0000 00:00:1762885948.862228 4192563 gl_context_egl.cc:85] Successfully initialized EGL. Major : 1 Minor: 5
I0000 00:00:1762885948.905372 4192655 gl_context.cc:357] GL version: 3.2 (OpenGL ES 3.2 NVIDIA 575.64.03), renderer: NVIDIA GeForce RTX 5080/PCIe/SSE2
I0000 00:00:1762885948.909062 4192563 gl_context_egl.cc:85] Successfully initialized EGL. Major : 1 Minor: 5
W0000 00:00:1762885948.912063 4192633 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
W0000 00:00:1762885948.922359 4192635 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
I0000 00:00:1762885948.936028 4192680 gl_context.cc:357] GL version: 3.2 (OpenGL ES 3.2 NVIDIA 575.64.03), renderer: NVIDIA GeForce RTX 5080/PCIe/SSE2
I0000 00:00:1762885948.945175 4192561 gl_context_egl.cc:85] Successfully initialized EGL. Major : 1 Minor: 5
I0000 0

[ROI] Done. Wrote 355 clips  | failures: 11625
Saved manifest: /home/falasoul/notebooks/USD/AAI-590/Capstone/AAI-590-G3-ASL/data/wlasl_preprocessed/manifest_nslt2000_roi.csv
Failures logged: /home/falasoul/notebooks/USD/AAI-590/Capstone/AAI-590-G3-ASL/runs/nslt2000_roi_failures.csv
Top failure reasons: {"err:name 'subprocess' is not defined": 11625}


### Cell E — Quick visual QA (before vs after)

In [None]:
# === Cell E — Visual QA: show before/after frames for a few samples (robust) ===
import random, os
import numpy as np
import matplotlib.pyplot as plt
import cv2

# Optional: reproducibility for the picks
random.seed(42)

# --- robust single-frame reader: try OpenCV, then Decord mid-frame ---
def read_one_frame_robust(path):
    # 1) Try OpenCV (BGR)
    cap = cv2.VideoCapture(path)
    ok, frame = cap.read()
    cap.release()
    if ok and frame is not None:
        return frame  # BGR uint8

    # 2) Fallback: Decord mid-frame (RGB)
    try:
        import decord
        decord.bridge.set_bridge('torch')  # safe even if torch not used
        vr = decord.VideoReader(path)
        if len(vr) == 0:
            return None
        mid = len(vr) // 2
        fr = vr[mid].asnumpy()  # HxWxC RGB
        # convert to BGR to keep downstream consistent
        return fr[..., ::-1].copy()
    except Exception:
        return None

# pick samples
S = 6
N = min(S, len(df_roi))
idxs = random.sample(range(len(df_roi)), k=N)

plt.figure(figsize=(10, 2*N))
for i, ridx in enumerate(idxs, 1):
    row = df_roi.iloc[ridx]
    vid = str(row["video_id"]).zfill(5)
    p_out = row["path"]

    # find matching raw path from merged raw manifest
    _raw = df.loc[df["video_id"] == row["video_id"], "path"]
    p_in = _raw.values[0] if len(_raw) else None

    f_in  = read_one_frame_robust(p_in)  if p_in  and os.path.exists(p_in)  else None
    f_out = read_one_frame_robust(p_out) if p_out and os.path.exists(p_out) else None

    # left: RAW
    plt.subplot(N, 2, 2*i-1); plt.axis("off")
    plt.title(f"RAW {vid}\n{os.path.basename(p_in) if p_in else 'NA'}", fontsize=9)
    if f_in is not None:
        plt.imshow(cv2.cvtColor(f_in, cv2.COLOR_BGR2RGB))
    else:
        plt.imshow(np.zeros((224,224,3), dtype=np.uint8))

    # right: ROI
    plt.subplot(N, 2, 2*i); plt.axis("off")
    plt.title(f"ROI {vid}\n{os.path.basename(p_out) if p_out else 'NA'}", fontsize=9)
    if f_out is not None:
        plt.imshow(cv2.cvtColor(f_out, cv2.COLOR_BGR2RGB))
    else:
        plt.imshow(np.zeros((224,224,3), dtype=np.uint8))

plt.tight_layout(); plt.show()
