###  Cell A â€” Imports, config, run tag

In [1]:
# === Cell A â€” Imports, CONFIG, run tag ===
from pathlib import Path
import sys, yaml, json
from datetime import datetime

# --- project paths ---
root = Path("..").resolve()
if str(root) not in sys.path:
    sys.path.insert(0, str(root))
if str(root / "src") not in sys.path:
    sys.path.insert(0, str(root / "src"))

# --- load global YAML config ---
cfg_path = root / "configs" / "wlasl100.yaml"
CFG = yaml.safe_load(open(cfg_path, "r"))

# === ROI manifest for top-104 balanced classes ===
# CHANGE THIS if your cleaned manifest has a different name
DATA_MANIFEST = root / "data" / "wlasl_preprocessed" / "manifest_nslt2000_roi_top104_balanced_clean.csv"

assert DATA_MANIFEST.exists(), f"Missing manifest: {DATA_MANIFEST}"

# --- base CONFIG (you can tweak) ---
CONFIG = {
    # data
    "clip_len":        32,        # number of frames per clip
    "frame_stride":    2,
    "batch_size":      8,
    "num_workers":     4,
    "use_weighted_sampler": True, # balance classes via sampler

    # model
    "backbone":        "r3d18_k400",  # ["c3dlite_gn", "r3d18_k400", "r2plus1d_k400"]
    "dropout":         0.2,
    "label_smoothing": 0.0,       # 0.0..0.2
    "normalize":       "kinetics",# ["kinetics","none"]
    "amp":             True,      # mixed precision
    "compile":         True,      # torch.compile if available
    "seed":            CFG["wlasl"]["split_seed"],

    # optimization
    "epochs":          25,
    "lr":              1e-4,      # lower LR for pretrained
    "weight_decay":    1e-5,
    "grad_accum":      1,
    "warmup_epochs":   2,
    "cosine_eta_min":  1e-5,

    # staged fine-tuning (optional, wired in for r3d18_k400)
    "freeze_backbone": True,      # stage 1: train head only
    "unfreeze_at_epoch": 5,       # stage 2: unfreeze later
    "unfreeze_scope":  "layer4",  # deepest block

    # resume / checkpoints
    "resume":          "",        # e.g. "checkpoints/best_roi_r3d18.pt"
    "save_every_epoch": False,
}

# derived paths
CKPT_DIR = root / CFG["paths"]["checkpoints_dir"]
LOG_DIR  = root / CFG["paths"]["logs_dir"]
CKPT_DIR.mkdir(parents=True, exist_ok=True)
LOG_DIR.mkdir(parents=True, exist_ok=True)

# record this run config (nice for reproducibility)
stamp = datetime.utcnow().strftime("%Y%m%dT%H%M%SZ")
run_tag = f"roi_top104_{CONFIG['backbone']}_T{CONFIG['clip_len']}_B{CONFIG['batch_size']}_{stamp}"
with open(LOG_DIR / f"{run_tag}.config.json", "w") as f:
    json.dump(CONFIG, f, indent=2)

print("Run tag:", run_tag)
print("Manifest:", DATA_MANIFEST)


Run tag: roi_top104_r3d18_k400_T32_B8_20251117T041330Z
Manifest: /home/falasoul/notebooks/USD/AAI-590/Capstone/AAI-590-G3-ASL/data/wlasl_preprocessed/manifest_nslt2000_roi_top104_balanced_clean.csv


### Cell B â€” Dataset, Kinetics normalization, DataLoader

In [2]:
# === Cell B â€” Dataset, normalization, loaders ===
import torch, torch.nn.functional as F, numpy as np, pandas as pd
from torch.utils.data import DataLoader, WeightedRandomSampler

from src.utils.seed import seed_everything
from src.data.wlasl_ds import WLASLDataset
import src.data.wlasl_ds as wds_mod

# ---------- reproducibility ----------
seed_everything(CONFIG["seed"])

# ---------- Kinetics-style normalization ----------
def kinetics_normalize(x):
    # x: [T,C,H,W], float32 in [0,1]
    mean = torch.tensor((0.432, 0.394, 0.376), dtype=x.dtype, device=x.device)[None,:,None,None]
    std  = torch.tensor((0.228, 0.221, 0.223), dtype=x.dtype, device=x.device)[None,:,None,None]
    return (x - mean) / std

if CONFIG["normalize"] == "kinetics":
    wds_mod._normalize = kinetics_normalize
elif CONFIG["normalize"] == "none":
    wds_mod._normalize = lambda x: x

# ---------- load ROI manifest ----------
m = pd.read_csv(DATA_MANIFEST)
assert {"path","label_new","split"}.issubset(m.columns), m.columns

# Use label_new as our contiguous label
m = m.copy()
m["label"] = m["label_new"].astype(int)
num_classes = m["label"].nunique()
print("num_classes:", num_classes)

clip_len = CONFIG["clip_len"]
stride   = CONFIG["frame_stride"]
bs       = CONFIG["batch_size"]
nw       = CONFIG["num_workers"]

train_df = m[m.split == "train"].reset_index(drop=True)
val_df   = m[m.split == "val"].reset_index(drop=True)
test_df  = m[m.split == "test"].reset_index(drop=True)

train_ds = WLASLDataset(train_df, clip_len=clip_len, stride=stride, train=True)
val_ds   = WLASLDataset(val_df,   clip_len=clip_len, stride=stride, train=False)
test_ds  = WLASLDataset(test_df,  clip_len=clip_len, stride=stride, train=False)

if CONFIG["use_weighted_sampler"]:
    counts  = train_df["label"].value_counts().to_dict()
    weights = train_df["label"].map(lambda y: 1.0 / counts[y]).values.astype(np.float32)
    sampler = WeightedRandomSampler(weights, num_samples=len(weights), replacement=True)
    train_loader = DataLoader(train_ds, batch_size=bs, sampler=sampler,
                              num_workers=nw, pin_memory=True)
else:
    train_loader = DataLoader(train_ds, batch_size=bs, shuffle=True,
                              num_workers=nw, pin_memory=True)

val_loader  = DataLoader(val_ds,  batch_size=bs, shuffle=False,
                         num_workers=nw, pin_memory=True)
test_loader = DataLoader(test_ds, batch_size=bs, shuffle=False,
                         num_workers=nw, pin_memory=True)

print(f"Splits | train={len(train_ds)} val={len(val_ds)} test={len(test_ds)} | classes={num_classes}")


num_classes: 104
Splits | train=831 val=192 test=136 | classes=104


#### Cell C â€” Models (C3Dlite and R3D18 K400, extendable to R(2+1)D later)

In [3]:
# === Cell C â€” Model factory (C3Dlite GN, R3D18 K400, extensible) ===
import torch.nn as nn
from torchvision.models.video import r3d_18, R3D_18_Weights

# Small 3D GN CNN (from your old code)
class C3DliteGN(nn.Module):
    def __init__(self, num_classes=100, drop=0.5):
        super().__init__()
        def gn(c): return nn.GroupNorm(num_groups=8, num_channels=c)
        def block(cin, cout, pool_t=2):
            return nn.Sequential(
                nn.Conv3d(cin, cout, 3, padding=1, bias=False),
                gn(cout), nn.ReLU(inplace=True),
                nn.MaxPool3d(kernel_size=(pool_t,2,2), stride=(pool_t,2,2)),
            )
        self.stem = nn.Sequential(
            nn.Conv3d(3, 32, 3, padding=1, bias=False),
            gn(32), nn.ReLU(inplace=True),
        )
        self.b1 = block(32,  64)
        self.b2 = block(64, 128)
        self.b3 = block(128, 256)
        self.b4 = block(256, 256)
        self.head = nn.Sequential(
            nn.AdaptiveAvgPool3d(1),
            nn.Flatten(),
            nn.Dropout(drop),
            nn.Linear(256, num_classes),
        )

    def forward(self, x):             # x [B,T,C,H,W]
        x = x.permute(0,2,1,3,4).contiguous()   # [B,C,T,H,W]
        x = self.stem(x)
        x = self.b1(x); x = self.b2(x); x = self.b3(x); x = self.b4(x)
        return self.head(x)

class R3D18WithPermute(nn.Module):
    """Wrap r3d_18 to accept [B, T, C, H, W] and permute internally."""
    def __init__(self, num_classes, pretrained=True, dropout=0.2):
        super().__init__()
        weights = R3D_18_Weights.KINETICS400_V1 if pretrained else None
        self.backbone = r3d_18(weights=weights)
        in_feats = self.backbone.fc.in_features
        self.backbone.fc = nn.Linear(in_feats, num_classes)

    def forward(self, x):  # x: [B, T, C, H, W]
        x = x.permute(0, 2, 1, 3, 4).contiguous()  # [B,C,T,H,W]
        return self.backbone(x)

def make_model(backbone, num_classes, drop):
    if backbone == "r3d18_k400":
        return R3D18WithPermute(num_classes=num_classes, pretrained=True, dropout=drop)
    elif backbone == "c3dlite_gn":
        return C3DliteGN(num_classes=num_classes, drop=drop)
    # extend here later for r2plus1d_k400, mc3_k400, etc.
    raise ValueError(f"Unknown backbone: {backbone}")

# build model
import torch
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
torch.set_float32_matmul_precision('high')

model = make_model(CONFIG["backbone"], num_classes=num_classes, drop=CONFIG["dropout"]).to(device)

if CONFIG["compile"]:
    try:
        model = torch.compile(model)
        print("torch.compile: ON")
    except Exception as e:
        print("torch.compile skipped:", e)

print("Backbone:", CONFIG["backbone"])
print("Device:", device)


torch.compile: ON
Backbone: r3d18_k400
Device: cuda


#### Cell D â€” Optimizer, scheduler, scaler, optional staged unfreezing

In [4]:
# === Cell D â€” Optimizer, scheduler, GradScaler, staged unfreezing ===
from torch.optim import AdamW
from torch.optim.lr_scheduler import CosineAnnealingLR
from torch.amp import GradScaler

epochs   = int(CONFIG["epochs"])
lr       = float(CONFIG["lr"])
wd       = float(CONFIG["weight_decay"])
amp_on   = bool(CONFIG["amp"])
gs       = int(CONFIG["grad_accum"])
warmup   = int(CONFIG["warmup_epochs"])
eta_min  = float(CONFIG["cosine_eta_min"])

opt   = AdamW(model.parameters(), lr=lr, weight_decay=wd)
sched = CosineAnnealingLR(opt, T_max=max(1, epochs - warmup), eta_min=eta_min)
scaler = GradScaler(device.type if device.type == "cuda" else "cpu", enabled=amp_on)

best_val = -1.0
start_epoch = 0

# optional resume
resume = CONFIG.get("resume") or ""
if resume:
    from src.utils.checkpoints import load_checkpoint
    rp = root / resume
    if rp.exists():
        start_epoch, best_val = load_checkpoint(str(rp), model, opt, scaler)
        print(f"Resumed from {rp} @ epoch {start_epoch} best={best_val:.3f}")

def maybe_freeze_backbone(epoch):
    """Stage-1 freeze backbone, then unfreeze scope (e.g. layer4) after unfreeze_at_epoch."""
    if CONFIG["backbone"] != "r3d18_k400":
        return

    freeze = CONFIG.get("freeze_backbone", False)
    unfreeze_at = CONFIG.get("unfreeze_at_epoch", 0)
    scope = CONFIG.get("unfreeze_scope", "layer4")

    if not freeze:
        return

    if epoch == 0:
        # freeze all backbone parameters initially
        for name, p in model.named_parameters():
            if "backbone" in name:
                p.requires_grad = False
        # but keep final FC trainable
        for name, p in model.named_parameters():
            if "backbone.fc" in name:
                p.requires_grad = True
        print("[FT] Backbone frozen except final fc.")

    if epoch == unfreeze_at:
        # unfreeze selected scope
        for name, p in model.named_parameters():
            if f"backbone.{scope}" in name:
                p.requires_grad = True
        print(f"[FT] Unfroze backbone scope: {scope}")


#### Cell E â€” Epoch runner + Training loop + Checkpoints

In [5]:
# === Cell E â€” Train / eval loops, checkpointing ===
from src.utils.checkpoints import save_checkpoint

def top1_acc(logits, y):
    with torch.no_grad():
        return (logits.argmax(1) == y).float().mean().item()

def run_epoch(loader, train=True, epoch=0):
    model.train() if train else model.eval()
    tot_loss = tot_acc = tot_n = 0.0
    opt.zero_grad(set_to_none=True)

    for step, (x, y, _) in enumerate(loader):
        x = x.to(device, non_blocking=True)
        y = y.to(device, non_blocking=True)

        with torch.amp.autocast(device_type=device.type, enabled=amp_on):
            logits = model(x)
            loss = F.cross_entropy(
                logits, y,
                label_smoothing=CONFIG["label_smoothing"]
            ) / gs

        if train:
            scaler.scale(loss).backward()
            if (step + 1) % gs == 0:
                scaler.step(opt)
                scaler.update()
                opt.zero_grad(set_to_none=True)

        with torch.no_grad():
            bs = x.size(0)
            tot_loss += (loss.item() * gs) * bs
            tot_acc  += top1_acc(logits, y) * bs
            tot_n    += bs

    if train:
        if epoch < warmup:
            # linear warmup
            warm_lr = lr * float(epoch + 1) / max(1, warmup)
            for g in opt.param_groups:
                g["lr"] = warm_lr
        else:
            sched.step()

    return tot_loss / max(1, tot_n), tot_acc / max(1, tot_n)

# ----- MAIN TRAINING LOOP -----
for epoch in range(start_epoch, epochs):
    maybe_freeze_backbone(epoch)

    tr_loss, tr_acc = run_epoch(train_loader, train=True,  epoch=epoch)
    va_loss, va_acc = run_epoch(val_loader,   train=False, epoch=epoch)

    print(f"Epoch {epoch+1:03d}/{epochs} | "
          f"train loss {tr_loss:.4f} acc {tr_acc:.3f} | "
          f"val loss {va_loss:.4f} acc {va_acc:.3f}")

    state = {
        "epoch": epoch,
        "model_state": model.state_dict(),
        "optim_state": opt.state_dict(),
        "scaler_state": scaler.state_dict(),
        "best_metric": best_val,
    }
    # always save last
    save_checkpoint(state, is_best=False, ckpt_dir=str(CKPT_DIR), filename=f"{run_tag}_last.pt")

    if CONFIG["save_every_epoch"]:
        save_checkpoint(state, is_best=False, ckpt_dir=str(CKPT_DIR),
                        filename=f"{run_tag}_epoch_{epoch:04d}.pt")

    if va_acc > best_val:
        best_val = va_acc
        save_checkpoint(state, is_best=True, ckpt_dir=str(CKPT_DIR), filename=f"{run_tag}_best.pt")


[FT] Backbone frozen except final fc.
Epoch 001/25 | train loss 4.7646 acc 0.007 | val loss 4.7375 acc 0.005
Epoch 002/25 | train loss 4.6808 acc 0.012 | val loss 4.6974 acc 0.016
Epoch 003/25 | train loss 4.6265 acc 0.016 | val loss 4.6832 acc 0.005
Epoch 004/25 | train loss 4.5736 acc 0.035 | val loss 4.6754 acc 0.010
Epoch 005/25 | train loss 4.5254 acc 0.030 | val loss 4.6655 acc 0.016
[FT] Unfroze backbone scope: layer4
Epoch 006/25 | train loss 4.4755 acc 0.048 | val loss 4.6502 acc 0.010
Epoch 007/25 | train loss 4.4548 acc 0.047 | val loss 4.6603 acc 0.005
Epoch 008/25 | train loss 4.4253 acc 0.053 | val loss 4.6430 acc 0.021
Epoch 009/25 | train loss 4.3962 acc 0.066 | val loss 4.6476 acc 0.021
Epoch 010/25 | train loss 4.3879 acc 0.082 | val loss 4.6300 acc 0.026
Epoch 011/25 | train loss 4.3482 acc 0.108 | val loss 4.6252 acc 0.031
Epoch 012/25 | train loss 4.3257 acc 0.100 | val loss 4.6111 acc 0.036
Epoch 013/25 | train loss 4.2766 acc 0.123 | val loss 4.5978 acc 0.026
Epo

#### Cell F â€” run_epoch (with correct [B,C,T,H,W] permute)

In [6]:
# === Cell F â€” Metrics + epoch runner (for CNN+BiGRU) ===

def top1_acc(logits, y):
    return (logits.argmax(1) == y).float().mean().item()

def run_epoch(loader, train=True):
    if train:
        model.train()
    else:
        model.eval()

    total_loss = 0.0
    total_acc  = 0.0
    total_n    = 0

    opt.zero_grad(set_to_none=True)

    for x, y, _ in loader:
        x = x.to(device, non_blocking=True)  # [B,T,C,H,W]
        y = y.to(device, non_blocking=True)

        # ðŸ”½ New: permute for 3D CNN
        x = x.permute(0, 2, 1, 3, 4).contiguous()  # [B,C,T,H,W]

        with torch.amp.autocast(device_type=device.type, enabled=amp_on):
            logits = model(x)
            loss = criterion(logits, y)

        if train:
            scaler.scale(loss).backward()
            scaler.unscale_(opt)
            torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)
            scaler.step(opt)
            scaler.update()
            opt.zero_grad(set_to_none=True)

        with torch.no_grad():
            acc = top1_acc(logits, y)
            bs  = x.size(0)
            total_loss += loss.item() * bs
            total_acc  += acc * bs
            total_n    += bs

    return total_loss / total_n, total_acc / total_n



#### Cell G â€” Training Loop

In [8]:
# === Cell G â€” Training Loop ===
ckpt_dir = root / "checkpoints"
ckpt_dir.mkdir(exist_ok=True)

best_val_acc = -1.0

for epoch in range(1, epochs + 1):
    tr_loss, tr_acc = run_epoch(train_loader, train=True)
    va_loss, va_acc = run_epoch(val_loader,   train=False)

    print(f"Epoch {epoch:02d}/{epochs} | "
          f"train loss {tr_loss:.4f} acc {tr_acc:.3f} | "
          f"val loss {va_loss:.4f} acc {va_acc:.3f}")

    if va_acc > best_val_acc:
        best_val_acc = va_acc
        save_path = ckpt_dir / "best_r2plus1d_k400_top104.pt"
        torch.save(model.state_dict(), save_path)
        print(f"  âžœ New best val acc={best_val_acc:.3f} ")


Epoch 01/20 | train loss 1.3226 acc 0.936 | val loss 3.2730 acc 0.406
  âžœ New best val acc=0.406 
Epoch 02/20 | train loss 1.1859 acc 0.966 | val loss 3.3076 acc 0.396
Epoch 03/20 | train loss 1.1111 acc 0.969 | val loss 3.2866 acc 0.417
  âžœ New best val acc=0.417 
Epoch 04/20 | train loss 1.0726 acc 0.971 | val loss 3.2002 acc 0.396
Epoch 05/20 | train loss 1.0412 acc 0.976 | val loss 3.3046 acc 0.380
Epoch 06/20 | train loss 1.0268 acc 0.977 | val loss 3.3609 acc 0.401
Epoch 07/20 | train loss 1.0143 acc 0.978 | val loss 3.4485 acc 0.375
Epoch 08/20 | train loss 1.0180 acc 0.980 | val loss 3.5573 acc 0.396
Epoch 09/20 | train loss 0.9648 acc 0.986 | val loss 3.5597 acc 0.411
Epoch 10/20 | train loss 0.9828 acc 0.982 | val loss 3.3082 acc 0.438
  âžœ New best val acc=0.438 
Epoch 11/20 | train loss 0.9577 acc 0.984 | val loss 3.4350 acc 0.422
Epoch 12/20 | train loss 0.9466 acc 0.989 | val loss 3.4187 acc 0.391
Epoch 13/20 | train loss 0.9667 acc 0.982 | val loss 3.5652 acc 0.391


In [9]:
ckpt_dir = root / "checkpoints"
best_path = ckpt_dir / "best_r2plus1d_k400_top104.pt"

model.load_state_dict(torch.load(best_path, map_location=device))
model.to(device)

test_loss, test_acc = run_epoch(test_loader, train=False)
print(f"TEST â€” loss {test_loss:.4f} | acc {test_acc:.3f}")


TEST â€” loss 3.2564 | acc 0.449
