### Cell A â€” Imports & Config

In [1]:
# === Cell A â€” Imports, reproducibility, load balanced ROI manifest ===
import os, random
from pathlib import Path

import numpy as np
import pandas as pd

import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import DataLoader

import cv2
from tqdm.auto import tqdm

# Reproducibility
SEED = 42
random.seed(SEED)
np.random.seed(SEED)
torch.manual_seed(SEED)
torch.cuda.manual_seed_all(SEED)
torch.backends.cudnn.deterministic = False
torch.backends.cudnn.benchmark = True

# Keep CPU threads tame
torch.set_num_threads(1)

root = Path("..").resolve()
data_dir = root / "data" / "wlasl_preprocessed"

# Use the specific balanced ROI manifest you showed
man_path = data_dir / "manifest_nslt2000_roi_top104_balanced_clean.csv"
assert man_path.exists(), f"Manifest not found: {man_path}"

df = pd.read_csv(man_path)
print("Loaded:", man_path)
print(f"Samples: {len(df)} | classes={df['gloss'].nunique()}")
print("Columns:", df.columns.tolist())

print("label_new min/max:", df["label_new"].min(), df["label_new"].max())
print("label_new nunique:", df["label_new"].nunique())

df.head()


Loaded: /home/falasoul/notebooks/USD/AAI-590/Capstone/AAI-590-G3-ASL/data/wlasl_preprocessed/manifest_nslt2000_roi_top104_balanced_clean.csv
Samples: 1159 | classes=104
Columns: ['video_id', 'path', 'gloss', 'label', 'split', 'exists', 'label_new']
label_new min/max: 0 103
label_new nunique: 104


Unnamed: 0,video_id,path,gloss,label,split,exists,label_new
0,639,/home/falasoul/notebooks/USD/AAI-590/Capstone/...,accident,8,train,True,0
1,624,/home/falasoul/notebooks/USD/AAI-590/Capstone/...,accident,8,train,True,0
2,632,/home/falasoul/notebooks/USD/AAI-590/Capstone/...,accident,8,train,True,0
3,623,/home/falasoul/notebooks/USD/AAI-590/Capstone/...,accident,8,train,True,0
4,65009,/home/falasoul/notebooks/USD/AAI-590/Capstone/...,accident,8,train,True,0


### Cell B â€” WLASLDataset (using ROI + label_new) + WLASLDataset with on-the-fly augmentations

In [2]:
# === Cell B â€” WLASLDataset (ROI, label_new) with safe loading + on-the-fly augmentation ===
import torch, numpy as np, cv2, decord, random
from torch.utils.data import Dataset
decord.bridge.set_bridge('torch')


def _resize_112(frame_tchw: torch.Tensor) -> torch.Tensor:
    T, C, H, W = frame_tchw.shape
    arr = frame_tchw.permute(0, 2, 3, 1).cpu().numpy()
    out = np.empty((T, 112, 112, C), dtype=np.float32)
    for t in range(T):
        out[t] = cv2.resize(arr[t], (112, 112), interpolation=cv2.INTER_AREA)
    return torch.from_numpy(out).permute(0, 3, 1, 2)


def _normalize(frame_tchw, mean=(0.45,)*3, std=(0.225,)*3):
    mean = torch.tensor(mean, dtype=frame_tchw.dtype, device=frame_tchw.device)[None, :, None, None]
    std  = torch.tensor(std,  dtype=frame_tchw.dtype, device=frame_tchw.device)[None, :, None, None]
    return (frame_tchw - mean) / std


def uniform_temporal_indices(n_total, clip_len, stride):
    if n_total <= 0:
        return [0] * clip_len
    wanted = (clip_len - 1) * stride + 1
    if n_total >= wanted:
        start = (n_total - wanted) // 2
        return [start + i * stride for i in range(clip_len)]
    idxs = [min(i * stride, n_total - 1) for i in range(clip_len)]
    return idxs


class WLASLDataset(Dataset):
    def __init__(self, df, clip_len=32, stride=2, train=False):
        self.df = df.reset_index(drop=True)
        self.clip_len = clip_len
        self.stride = stride
        self.train = train

    def __len__(self):
        return len(self.df)

    # --------- augmentation helpers (on-the-fly, train only) ---------
    def _augment(self, frames: torch.Tensor) -> torch.Tensor:
        """
        frames: [T, C, H, W], values in [0,1] (float32).
        Only applied when self.train == True.
        """
        if not self.train:
            return frames

        T, C, H, W = frames.shape

        # 1) Random horizontal flip
        if random.random() < 0.5:
            frames = torch.flip(frames, dims=[3])  # flip width

        # 2) Random Gaussian blur
        if random.random() < 0.3:
            k = random.choice([3, 5])
            fr_np = frames.permute(0, 2, 3, 1).cpu().numpy()  # [T,H,W,C]
            for t in range(T):
                fr_np[t] = cv2.GaussianBlur(fr_np[t], (k, k), 0)
            frames = torch.from_numpy(fr_np).permute(0, 3, 1, 2)

        # 3) Random brightness / contrast
        if random.random() < 0.3:
            alpha = 1.0 + 0.4 * (random.random() - 0.5)   # contrast ~ [0.8, 1.2]
            beta  = 0.1 * (random.random() - 0.5)         # brightness ~ [-0.05, 0.05]
            frames = frames * alpha + beta
            frames = frames.clamp(0.0, 1.0)

        # 4) Random cutout mask (simulate occlusion)
        if random.random() < 0.3:
            mask_size = random.randint(16, 40)
            y0 = random.randint(0, max(0, H - mask_size))
            x0 = random.randint(0, max(0, W - mask_size))
            frames[:, :, y0:y0+mask_size, x0:x0+mask_size] = 0.0

        return frames

    def _safe_load_clip(self, path: str) -> torch.Tensor:
        """
        Try to read a clip with decord. If anything fails, return a dummy zero clip.
        Returned shape: [T, C, H, W], float32 in [0,1].
        """
        try:
            vr = decord.VideoReader(path)
            n = len(vr)
            if n <= 0:
                raise RuntimeError("no frames")

            idxs = uniform_temporal_indices(n, self.clip_len, self.stride)
            batch = vr.get_batch(idxs)        # [T,H,W,C]
            x = batch.float() / 255.0         # [0,1]
            x = x.permute(0, 3, 1, 2)         # [T,C,H,W]
            x = _resize_112(x)                # [T,3,112,112]
            x = self._augment(x)              # on-the-fly aug (train only)
            x = _normalize(x)                 # final normalization
            return x
        except Exception as e:
            print(f"[WARN] Failed to read video {path}: {e} â€” using zero clip.")
            x = torch.zeros(self.clip_len, 3, 112, 112, dtype=torch.float32)
            x = _normalize(x)
            return x

    def __getitem__(self, i):
        row = self.df.iloc[i]
        path = row["path"]
        label = int(row["label_new"])   # contiguous 0..C-1

        x = self._safe_load_clip(path)
        return x, label, path


#### Cell C â€” Split DataFrames & DataLoaders (single worker)

In [3]:
# === Cell C â€” Splits + DataLoaders (no multiprocessing) ===

train_df = df[df["split"] == "train"].reset_index(drop=True)
val_df   = df[df["split"] == "val"].reset_index(drop=True)
test_df  = df[df["split"] == "test"].reset_index(drop=True)

print("Split sizes:", len(train_df), "train |", len(val_df), "val |", len(test_df), "test")

clip_len = 32
stride   = 2
batch_size = 4   # small to be safe on GPU

train_ds = WLASLDataset(train_df, clip_len=clip_len, stride=stride, train=True)
val_ds   = WLASLDataset(val_df,   clip_len=clip_len, stride=stride, train=False)
test_ds  = WLASLDataset(test_df,  clip_len=clip_len, stride=stride, train=False)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Device:", device)

train_loader = DataLoader(
    train_ds,
    batch_size=batch_size,
    shuffle=True,
    num_workers=0,              # ðŸ”’ NO worker processes
    pin_memory=(device.type == "cuda"),
)

val_loader = DataLoader(
    val_ds,
    batch_size=batch_size,
    shuffle=False,
    num_workers=0,
    pin_memory=(device.type == "cuda"),
)

test_loader = DataLoader(
    test_ds,
    batch_size=batch_size,
    shuffle=False,
    num_workers=0,
    pin_memory=(device.type == "cuda"),
)

x_dbg, y_dbg, _ = next(iter(train_loader))
print("Sample batch shape:", x_dbg.shape, "| labels range:", y_dbg.min().item(), "->", y_dbg.max().item())


Split sizes: 831 train | 192 val | 136 test
Device: cuda
Sample batch shape: torch.Size([4, 32, 3, 112, 112]) | labels range: 60 -> 88


#### Cell D â€” Model: R3D-18 baseline

In [4]:
# === Cell D â€” CNN + BiGRU model with PRETRAINED ResNet-18 ===

from torchvision.models import resnet18

class CnnBiGRUClassifier(nn.Module):
    def __init__(
        self,
        num_classes: int,
        rnn_hidden: int = 256,
        rnn_layers: int = 1,
        dropout: float = 0.3,
        use_pretrained: bool = True,
    ):
        super().__init__()

        # 2D CNN backbone (ResNet-18)
        # use_pretrained=True -> ImageNet weights, False -> from scratch
        if use_pretrained:
            try:
                # Newer torchvision API
                from torchvision.models import ResNet18_Weights
                weights = ResNet18_Weights.IMAGENET1K_V1
                base = resnet18(weights=weights)
            except Exception:
                # Fallback for older versions
                base = resnet18(pretrained=True)
        else:
            base = resnet18(weights=None)

        # Take everything except the final FC and global pool
        self.cnn = nn.Sequential(*list(base.children())[:-2])  # conv -> layer4
        self.pool = nn.AdaptiveAvgPool2d((1, 1))

        self.cnn_out_dim = base.fc.in_features  # 512 for ResNet-18

        # BiGRU over time
        self.rnn = nn.GRU(
            input_size=self.cnn_out_dim,
            hidden_size=rnn_hidden,
            num_layers=rnn_layers,
            batch_first=True,
            bidirectional=True,
        )

        self.dropout = nn.Dropout(dropout)
        self.fc = nn.Linear(rnn_hidden * 2, num_classes)  # *2 for bidirectional

    def forward(self, x):
        """
        x: [B, T, C, H, W]
        """
        B, T, C, H, W = x.shape

        # Merge batch and time to process frames with 2D CNN
        x = x.view(B * T, C, H, W)              # [B*T, C, H, W]

        feats = self.cnn(x)                     # [B*T, C', h, w]
        feats = self.pool(feats)                # [B*T, C', 1, 1]
        feats = feats.view(B, T, self.cnn_out_dim)  # [B, T, F]

        # BiGRU over temporal dimension
        rnn_out, _ = self.rnn(feats)           # [B, T, 2*hidden]

        # Use last time step
        last = rnn_out[:, -1, :]               # [B, 2*hidden]

        out = self.dropout(last)
        logits = self.fc(out)                  # [B, num_classes]
        return logits

# Build PRETRAINED model
num_classes = df["label_new"].nunique()
print("num_classes:", num_classes)

model = CnnBiGRUClassifier(
    num_classes=num_classes,
    rnn_hidden=256,
    rnn_layers=1,
    dropout=0.3,
    use_pretrained=True,    # ðŸ”´ this is the key difference
).to(device)

print("Model on:", device)


num_classes: 104
Model on: cuda


#### Cell E â€” Optimizer, Scaler, Loss

In [5]:
# === Cell E â€” Optimizer, scaler, loss (pretrained) ===
from torch.amp import GradScaler

epochs = 20
lr     = 1e-4      # ðŸ”½ slightly lower than 3e-4 used for scratch
wd     = 1e-2
amp_on = True

opt = torch.optim.AdamW(model.parameters(), lr=lr, weight_decay=wd)
scaler = GradScaler(enabled=amp_on)

criterion = nn.CrossEntropyLoss(label_smoothing=0.1)  # can keep or remove smoothing
best_val_acc = -1.0


#### Cell F â€” run_epoch (with correct [B,C,T,H,W] permute)

In [6]:
# === Cell F â€” Metrics + epoch runner (for CNN+BiGRU) ===

def top1_acc(logits, y):
    return (logits.argmax(1) == y).float().mean().item()

def run_epoch(loader, train=True):
    if train:
        model.train()
    else:
        model.eval()

    total_loss = 0.0
    total_acc  = 0.0
    total_n    = 0

    opt.zero_grad(set_to_none=True)

    for x, y, _ in loader:
        # x is already [B, T, C, H, W] from WLASLDataset
        x = x.to(device, non_blocking=True)
        y = y.to(device, non_blocking=True)

        with torch.amp.autocast(device_type=device.type, enabled=amp_on):
            logits = model(x)
            loss = criterion(logits, y)

        if train:
            scaler.scale(loss).backward()
            scaler.step(opt)
            scaler.update()
            opt.zero_grad(set_to_none=True)

        with torch.no_grad():
            acc = top1_acc(logits, y)
            bs  = x.size(0)
            total_loss += loss.item() * bs
            total_acc  += acc * bs
            total_n    += bs

    return total_loss / total_n, total_acc / total_n


#### Cell G â€” Training Loop

In [None]:
# === Cell G â€” Training Loop ===
from pathlib import Path
import json

root = Path("..").resolve()
ckpt_dir = root / "checkpoints"
report_dir = root / "reports"
ckpt_dir.mkdir(parents=True, exist_ok=True)
report_dir.mkdir(parents=True, exist_ok=True)

# Notebook prefix (sanitized stem)
nb_prefix = Path("06_train_baseline_CNN_BiGRU_aug-RestNet18.ipynb").stem.replace(' ', '_')

best_val_acc = -1.0
history = {"train_loss": [], "train_acc": [], "val_loss": [], "val_acc": []}

best_ckpt_path = ckpt_dir / f"{nb_prefix}_best_cnn_bigru_pretrained_top104.pt"
history_path = report_dir / f"{nb_prefix}_train_history.json"

for epoch in range(1, epochs + 1):
    tr_loss, tr_acc = run_epoch(train_loader, train=True)
    va_loss, va_acc = run_epoch(val_loader,   train=False)

    print(f"Epoch {epoch:02d}/{epochs} | "
          f"train loss {tr_loss:.4f} acc {tr_acc:.3f} | "
          f"val loss {va_loss:.4f} acc {va_acc:.3f}")

    # record history
    history['train_loss'].append(float(tr_loss))
    history['train_acc'].append(float(tr_acc))
    history['val_loss'].append(float(va_loss))
    history['val_acc'].append(float(va_acc))

    # persist history after each epoch (safe)
    with open(history_path, 'w') as fh:
        json.dump(history, fh, indent=2)

    if va_acc > best_val_acc:
        best_val_acc = va_acc
        torch.save(model.state_dict(), str(best_ckpt_path))
        print(f"  âžœ New best val acc={best_val_acc:.3f} (model saved: {best_ckpt_path})")

# final save of history
with open(history_path, 'w') as fh:
    json.dump(history, fh, indent=2)

print('Training finished. History saved to', history_path)
print('Best checkpoint saved to', best_ckpt_path)


Epoch 01/20 | train loss 4.7299 acc 0.010 | val loss 4.6258 acc 0.036
  âžœ New best val acc=0.036 (model saved)
Epoch 02/20 | train loss 4.5225 acc 0.048 | val loss 4.5716 acc 0.062
  âžœ New best val acc=0.062 (model saved)
Epoch 03/20 | train loss 4.3202 acc 0.084 | val loss 4.5735 acc 0.042
Epoch 04/20 | train loss 4.1067 acc 0.138 | val loss 4.4875 acc 0.062
Epoch 05/20 | train loss 3.8339 acc 0.202 | val loss 4.5178 acc 0.068
  âžœ New best val acc=0.068 (model saved)
Epoch 06/20 | train loss 3.5696 acc 0.264 | val loss 4.5277 acc 0.073
  âžœ New best val acc=0.073 (model saved)
Epoch 07/20 | train loss 3.2973 acc 0.333 | val loss 4.5534 acc 0.089
  âžœ New best val acc=0.089 (model saved)
Epoch 08/20 | train loss 2.9894 acc 0.443 | val loss 4.5451 acc 0.109
  âžœ New best val acc=0.109 (model saved)
Epoch 09/20 | train loss 2.6626 acc 0.526 | val loss 4.5590 acc 0.120
  âžœ New best val acc=0.120 (model saved)
Epoch 10/20 | train loss 2.3752 acc 0.619 | val loss 4.6643 acc 0.104

In [None]:
# === Cell H â€” Evaluation & Reporting ===
# This cell loads the prefixed best checkpoint and the training history, runs inference on test_loader,
# computes present-label-safe metrics (confusion matrix, per-class accuracy, classification report),
# and saves artifacts into `reports/` and confirms checkpoint location in `checkpoints/`.

import json
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import confusion_matrix, classification_report
from collections import Counter

# Paths (must match those used in training cell)
root = Path("..").resolve()
ckpt_dir = root / "checkpoints"
report_dir = root / "reports"
nb_prefix = Path("06_train_baseline_CNN_BiGRU_aug-RestNet18.ipynb").stem.replace(' ', '_')
best_ckpt_path = ckpt_dir / f"{nb_prefix}_best_cnn_bigru_pretrained_top104.pt"
history_path = report_dir / f"{nb_prefix}_train_history.json"

assert best_ckpt_path.exists(), f"Best checkpoint not found: {best_ckpt_path}"
assert history_path.exists(), f"Training history not found: {history_path}"

# Load history and plot train/val curves
with open(history_path, 'r') as fh:
    history = json.load(fh)

plt.figure(figsize=(8, 4))
plt.plot(history['train_loss'], label='train_loss')
plt.plot(history['val_loss'], label='val_loss')
plt.xlabel('epoch')
plt.ylabel('loss')
plt.legend()
plt.title(f"{nb_prefix} â€” Loss curves")
loss_png = report_dir / f"{nb_prefix}_train_val_loss.png"
plt.savefig(loss_png, bbox_inches='tight')
plt.close()

plt.figure(figsize=(8, 4))
plt.plot(history['train_acc'], label='train_acc')
plt.plot(history['val_acc'], label='val_acc')
plt.xlabel('epoch')
plt.ylabel('accuracy')
plt.legend()
plt.title(f"{nb_prefix} â€” Accuracy curves")
acc_png = report_dir / f"{nb_prefix}_train_val_acc.png"
plt.savefig(acc_png, bbox_inches='tight')
plt.close()

print('Saved train/val plots to:', loss_png, acc_png)

# Load model
model.load_state_dict(torch.load(str(best_ckpt_path), map_location=device))
model.to(device)
model.eval()

# Run inference on test set and collect predictions
y_true = []
y_pred = []
paths = []

with torch.no_grad():
    for x, y, p in tqdm(test_loader, desc='Inference'):
        x = x.to(device)
        logits = model(x)
        preds = logits.argmax(1).cpu().numpy()
        y_cpu = y.cpu().numpy()
        y_true.extend(y_cpu.tolist())
        y_pred.extend(preds.tolist())
        paths.extend(list(p))

y_true = np.array(y_true, dtype=int)
y_pred = np.array(y_pred, dtype=int)

# compute present labels to avoid sklearn target_names mismatch
present_labels = np.union1d(np.unique(y_true), np.unique(y_pred)).astype(int)
labels_list = present_labels.tolist()

# Map label -> gloss (human-readable) using manifest df
label_to_name = {int(r['label_new']): str(r['gloss']) for _, r in df[['label_new', 'gloss']].iterrows()}
names_list = [label_to_name.get(int(lbl), str(int(lbl))) for lbl in labels_list]

# Confusion matrix (raw and normalized by true support)
cm = confusion_matrix(y_true, y_pred, labels=labels_list)
cm_norm = cm.astype(float) / (cm.sum(axis=1, keepdims=True) + 1e-12)

# Plot normalized confusion matrix
plt.figure(figsize=(10, 8))
sns.heatmap(cm_norm, xticklabels=names_list, yticklabels=names_list, cmap='viridis', vmin=0.0, vmax=1.0)
plt.xlabel('Predicted')
plt.ylabel('True')
plt.title(f"{nb_prefix} â€” Normalized Confusion Matrix (labels present: {len(labels_list)})")
plt.xticks(rotation=90)
plt.yticks(rotation=0)
cm_png = report_dir / f"{nb_prefix}_confusion_matrix_norm.png"
plt.savefig(cm_png, bbox_inches='tight')
plt.close()

# Per-class accuracy & support
support = cm.sum(axis=1)
per_class_acc = np.diag(cm) / (support + 1e-12)
per_class_df = pd.DataFrame({
    'label': labels_list,
    'name': names_list,
    'support': support.tolist(),
    'accuracy': per_class_acc.tolist()
})
per_class_csv = report_dir / f"{nb_prefix}_per_class_accuracy.csv"
per_class_df.to_csv(per_class_csv, index=False)

# Classification report (scikit-learn)
clf_report = classification_report(y_true, y_pred, labels=labels_list, target_names=names_list, zero_division=0, output_dict=True)
clf_txt = classification_report(y_true, y_pred, labels=labels_list, target_names=names_list, zero_division=0)

# Save reports
test_report_json = report_dir / f"{nb_prefix}_test_report.json"
with open(test_report_json, 'w') as fh:
    json.dump({
        'model_class': model.__class__.__name__,
        'num_parameters': sum(p.numel() for p in model.parameters()),
        'history_path': str(history_path),
        'best_checkpoint': str(best_ckpt_path),
        'train_samples': len(train_ds),
        'val_samples': len(val_ds),
        'test_samples': len(test_ds),
        'labels_present': labels_list,
        'label_names_present': names_list,
        'classification_report': clf_report,
        'hyperparameters': {
            'epochs': epochs,
            'lr': lr,
            'wd': wd,
            'amp_on': amp_on,
            'batch_size': batch_size,
            'clip_len': clip_len,
            'stride': stride
        }
    }, fh, indent=2)

test_report_txt = report_dir / f"{nb_prefix}_test_report.txt"
with open(test_report_txt, 'w') as fh:
    fh.write(f"Model: {model.__class__.__name__}\n")
    fh.write(f"Num parameters: {sum(p.numel() for p in model.parameters())}\n")
    fh.write(f"Train/Val/Test sizes: {len(train_ds)}/{len(val_ds)}/{len(test_ds)}\n")
    fh.write(f"Labels present ({len(labels_list)}): {labels_list}\n")
    fh.write('\nClassification report:\n')
    fh.write(clf_txt)

print('Saved artifacts:')
print(' -', cm_png)
print(' -', per_class_csv)
print(' -', test_report_json)
print(' -', test_report_txt)
print('\nSummary accuracy (macro, micro):')

# Simple metrics
macro_acc = np.mean(list(d['recall'] for d in clf_report.values() if isinstance(d, dict)))
micro_acc = (y_true == y_pred).mean()
print('Micro acc:', float(micro_acc))
print('Macro acc:', float(macro_acc))

# Also save a small predictions CSV for debugging / inspection
preds_csv = report_dir / f"{nb_prefix}_predictions.csv"
pd.DataFrame({'path': paths, 'y_true': y_true.tolist(), 'y_pred': y_pred.tolist()}).to_csv(preds_csv, index=False)
print('Saved predictions CSV to', preds_csv)


TEST â€” loss 5.0669 | acc 0.074
