### Cell A â€” Imports & Config

In [1]:
# === Cell A â€” Imports, reproducibility, load balanced ROI manifest ===
import os, random
from pathlib import Path

import numpy as np
import pandas as pd

import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import DataLoader

import cv2
from tqdm.auto import tqdm

# Reproducibility
SEED = 42
random.seed(SEED)
np.random.seed(SEED)
torch.manual_seed(SEED)
torch.cuda.manual_seed_all(SEED)
torch.backends.cudnn.deterministic = False
torch.backends.cudnn.benchmark = True

# Keep CPU threads tame
torch.set_num_threads(1)

root = Path("..").resolve()
data_dir = root / "data" / "wlasl_preprocessed"

# Use the specific balanced ROI manifest you showed
man_path = data_dir / "manifest_nslt2000_roi_top104_balanced_clean.csv"
assert man_path.exists(), f"Manifest not found: {man_path}"

df = pd.read_csv(man_path)
print("Loaded:", man_path)
print(f"Samples: {len(df)} | classes={df['gloss'].nunique()}")
print("Columns:", df.columns.tolist())

print("label_new min/max:", df["label_new"].min(), df["label_new"].max())
print("label_new nunique:", df["label_new"].nunique())

df.head()


Loaded: /home/falasoul/notebooks/USD/AAI-590/Capstone/AAI-590-G3-ASL/data/wlasl_preprocessed/manifest_nslt2000_roi_top104_balanced_clean.csv
Samples: 1159 | classes=104
Columns: ['video_id', 'path', 'gloss', 'label', 'split', 'exists', 'label_new']
label_new min/max: 0 103
label_new nunique: 104


Unnamed: 0,video_id,path,gloss,label,split,exists,label_new
0,639,/home/falasoul/notebooks/USD/AAI-590/Capstone/...,accident,8,train,True,0
1,624,/home/falasoul/notebooks/USD/AAI-590/Capstone/...,accident,8,train,True,0
2,632,/home/falasoul/notebooks/USD/AAI-590/Capstone/...,accident,8,train,True,0
3,623,/home/falasoul/notebooks/USD/AAI-590/Capstone/...,accident,8,train,True,0
4,65009,/home/falasoul/notebooks/USD/AAI-590/Capstone/...,accident,8,train,True,0


### Cell B â€” WLASLDataset (using ROI + label_new)

In [2]:
# === Cell B â€” WLASLDataset (ROI, label_new) with safe video read ===
import torch, numpy as np, cv2, decord
from torch.utils.data import Dataset
decord.bridge.set_bridge('torch')

def _resize_112(frame_tchw: torch.Tensor) -> torch.Tensor:
    T, C, H, W = frame_tchw.shape
    arr = frame_tchw.permute(0, 2, 3, 1).cpu().numpy()
    out = np.empty((T, 112, 112, C), dtype=np.float32)
    for t in range(T):
        out[t] = cv2.resize(arr[t], (112, 112), interpolation=cv2.INTER_AREA)
    return torch.from_numpy(out).permute(0, 3, 1, 2)

def _normalize(frame_tchw, mean=(0.45,)*3, std=(0.225,)*3):
    mean = torch.tensor(mean, dtype=frame_tchw.dtype, device=frame_tchw.device)[None, :, None, None]
    std  = torch.tensor(std,  dtype=frame_tchw.dtype, device=frame_tchw.device)[None, :, None, None]
    return (frame_tchw - mean) / std

def uniform_temporal_indices(n_total, clip_len, stride):
    if n_total <= 0:
        return [0] * clip_len
    wanted = (clip_len - 1) * stride + 1
    if n_total >= wanted:
        start = (n_total - wanted) // 2
        return [start + i * stride for i in range(clip_len)]
    idxs = [min(i * stride, n_total - 1) for i in range(clip_len)]
    return idxs

class WLASLDataset(Dataset):
    def __init__(self, df, clip_len=32, stride=2, train=False):
        self.df = df.reset_index(drop=True)
        self.clip_len = clip_len
        self.stride = stride
        self.train = train

    def __len__(self):
        return len(self.df)

    def _safe_load_clip(self, path: str) -> torch.Tensor:
        """
        Try to read a clip with decord. If anything fails, return a dummy zero clip.
        Shape: [T, C, H, W]
        """
        try:
            vr = decord.VideoReader(path)
            n = len(vr)
            if n <= 0:
                raise RuntimeError("no frames")

            idxs = uniform_temporal_indices(n, self.clip_len, self.stride)
            batch = vr.get_batch(idxs)        # [T,H,W,C]
            x = batch.float() / 255.0
            x = x.permute(0, 3, 1, 2)         # [T,C,H,W]
            x = _resize_112(x)
            x = _normalize(x)
            return x
        except Exception as e:
            # Log once per failure, but don't kill training
            print(f"[WARN] Failed to read video {path}: {e} â€” using zero clip.")
            # [T,C,H,W] of zeros
            return torch.zeros(self.clip_len, 3, 112, 112, dtype=torch.float32)

    def __getitem__(self, i):
        row = self.df.iloc[i]
        path = row["path"]
        label = int(row["label_new"])   # contiguous 0..C-1

        x = self._safe_load_clip(path)
        return x, label, path


#### Cell C â€” Split DataFrames & DataLoaders (single worker)

In [3]:
# === Cell C â€” Splits + DataLoaders (no multiprocessing) ===

train_df = df[df["split"] == "train"].reset_index(drop=True)
val_df   = df[df["split"] == "val"].reset_index(drop=True)
test_df  = df[df["split"] == "test"].reset_index(drop=True)

print("Split sizes:", len(train_df), "train |", len(val_df), "val |", len(test_df), "test")

clip_len = 32
stride   = 2
batch_size = 4   # small to be safe on GPU

train_ds = WLASLDataset(train_df, clip_len=clip_len, stride=stride, train=True)
val_ds   = WLASLDataset(val_df,   clip_len=clip_len, stride=stride, train=False)
test_ds  = WLASLDataset(test_df,  clip_len=clip_len, stride=stride, train=False)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Device:", device)

train_loader = DataLoader(
    train_ds,
    batch_size=batch_size,
    shuffle=True,
    num_workers=0,              # ðŸ”’ NO worker processes
    pin_memory=(device.type == "cuda"),
)

val_loader = DataLoader(
    val_ds,
    batch_size=batch_size,
    shuffle=False,
    num_workers=0,
    pin_memory=(device.type == "cuda"),
)

test_loader = DataLoader(
    test_ds,
    batch_size=batch_size,
    shuffle=False,
    num_workers=0,
    pin_memory=(device.type == "cuda"),
)

x_dbg, y_dbg, _ = next(iter(train_loader))
print("Sample batch shape:", x_dbg.shape, "| labels range:", y_dbg.min().item(), "->", y_dbg.max().item())


Split sizes: 831 train | 192 val | 136 test
Device: cuda
Sample batch shape: torch.Size([4, 32, 3, 112, 112]) | labels range: 60 -> 88


#### Cell D â€” Model: R3D-18 baseline

In [4]:
# === Cell D â€” R3D-18 baseline model ===
from torchvision.models.video import r3d_18

num_classes = df["label_new"].nunique()
print("num_classes:", num_classes)

def build_model(num_classes):
    model = r3d_18(weights=None)   # we can swap to pretrained later if needed
    in_f = model.fc.in_features
    model.fc = nn.Linear(in_f, num_classes)
    return model

model = build_model(num_classes).to(device)
print("Model on:", device)


num_classes: 104
Model on: cuda


#### Cell E â€” Optimizer, Scaler, Loss

In [5]:
# === Cell E â€” Optimizer, scaler, loss ===
from torch.amp import GradScaler

epochs = 20
lr     = 3e-4
wd     = 1e-2
amp_on = True  # you can set to False if you want to debug more easily

opt = torch.optim.AdamW(model.parameters(), lr=lr, weight_decay=wd)
scaler = GradScaler(enabled=amp_on)

criterion = nn.CrossEntropyLoss()  # we can plug class-weights later if we want
best_val_acc = -1.0


#### Cell F â€” run_epoch (with correct [B,C,T,H,W] permute)

In [6]:
# === Cell F â€” Metrics + epoch loop ===

def top1_acc(logits, y):
    return (logits.argmax(1) == y).float().mean().item()

def run_epoch(loader, train=True):
    if train:
        model.train()
    else:
        model.eval()

    total_loss = 0.0
    total_acc  = 0.0
    total_n    = 0

    opt.zero_grad(set_to_none=True)

    for x, y, _ in loader:
        # x: [B, T, C, H, W]  --> [B, C, T, H, W] for r3d_18
        x = x.permute(0, 2, 1, 3, 4).contiguous()
        x = x.to(device, non_blocking=True)
        y = y.to(device, non_blocking=True)

        with torch.amp.autocast(device_type=device.type, enabled=amp_on):
            logits = model(x)
            loss = criterion(logits, y)

        if train:
            scaler.scale(loss).backward()
            scaler.step(opt)
            scaler.update()
            opt.zero_grad(set_to_none=True)

        with torch.no_grad():
            acc = top1_acc(logits, y)
            bs  = x.size(0)
            total_loss += loss.item() * bs
            total_acc  += acc * bs
            total_n    += bs

    return total_loss / total_n, total_acc / total_n


#### Cell G â€” Training Loop

In [7]:
# === Cell G â€” Training Loop ===

for epoch in range(1, epochs + 1):
    tr_loss, tr_acc = run_epoch(train_loader, train=True)
    va_loss, va_acc = run_epoch(val_loader,   train=False)

    print(f"Epoch {epoch:02d}/{epochs} | "
          f"train loss {tr_loss:.4f} acc {tr_acc:.3f} | "
          f"val loss {va_loss:.4f} acc {va_acc:.3f}")

    if va_acc > best_val_acc:
        best_val_acc = va_acc
        torch.save(model.state_dict(), "best_r3d18_roi_top104.pt")
        print(f"  âžœ New best val acc={best_val_acc:.3f} (model saved)")


Epoch 01/20 | train loss 4.9853 acc 0.008 | val loss 5.0089 acc 0.005
  âžœ New best val acc=0.005 (model saved)
Epoch 02/20 | train loss 4.7458 acc 0.006 | val loss 4.7605 acc 0.005
Epoch 03/20 | train loss 4.7239 acc 0.012 | val loss 4.9171 acc 0.005
Epoch 04/20 | train loss 4.7265 acc 0.012 | val loss 4.7809 acc 0.010
  âžœ New best val acc=0.010 (model saved)
Epoch 05/20 | train loss 4.6681 acc 0.013 | val loss 5.0054 acc 0.010
Epoch 06/20 | train loss 4.6594 acc 0.017 | val loss 5.3299 acc 0.010
Epoch 07/20 | train loss 4.6417 acc 0.022 | val loss 4.7199 acc 0.000
Epoch 08/20 | train loss 4.6183 acc 0.016 | val loss 4.8251 acc 0.005
Epoch 09/20 | train loss 4.5848 acc 0.013 | val loss 4.7419 acc 0.005
Epoch 10/20 | train loss 4.5676 acc 0.016 | val loss 5.2373 acc 0.021
  âžœ New best val acc=0.021 (model saved)
Epoch 11/20 | train loss 4.5531 acc 0.028 | val loss 5.2299 acc 0.016
Epoch 12/20 | train loss 4.5194 acc 0.032 | val loss 4.7472 acc 0.021
Epoch 13/20 | train loss 4.4968