(compact baseline, torchvision-free)

This is a small 3D CNN baseline (C3D-lite). It’s not as strong as an official R(2+1)D/ResNet-3D, but it trains fast and avoids extra library constraints. You can later swap in a stronger backbone with the same dataloaders.

### Cell 1 — Root, config, utils

In [7]:
from pathlib import Path
from src.data.wlasl_ds import WLASLDataset
import sys, json, yaml, math, time
root = Path("..").resolve()
sys.path.append(str(root))               # so "import src" works
sys.path.append(str(root / "src"))       # optional; parent append is enough
print("PYTHONPATH added:", root)

CFG = yaml.safe_load(open(root / "configs" / "wlasl100.yaml"))
CKPT_DIR = root / CFG["paths"]["checkpoints_dir"]
LOG_DIR  = root / CFG["paths"]["logs_dir"]
CKPT_DIR.mkdir(parents=True, exist_ok=True)
LOG_DIR.mkdir(parents=True, exist_ok=True)

from src.utils.seed import seed_everything
from src.utils.checkpoints import save_checkpoint, load_checkpoint
seed_everything(CFG["wlasl"]["split_seed"])


PYTHONPATH added: /home/falasoul/notebooks/USD/AAI-590/Capstone/AAI-590-G3-ASL


42

In [8]:
from src.utils.seed import seed_everything
from src.utils.checkpoints import save_checkpoint, load_checkpoint
print("imports OK; seed:", seed_everything(42))


imports OK; seed: 42


In [9]:
import torch, numpy as np, cv2, decord, random
from torch.utils.data import Dataset, DataLoader

decord.bridge.set_bridge("torch")

def _resize_112(frame_tchw: torch.Tensor) -> torch.Tensor:
    # frame_tchw: [T,C,H,W] float32 [0,1]
    T,C,H,W = frame_tchw.shape
    # Use OpenCV for speed; convert to NHWC
    arr = frame_tchw.permute(0,2,3,1).cpu().numpy()  # T,H,W,C
    out = np.empty((T,112,112,C), dtype=np.float32)
    for t in range(T):
        out[t] = cv2.resize(arr[t], (112,112), interpolation=cv2.INTER_AREA)
    out = torch.from_numpy(out).permute(0,3,1,2)  # T,C,112,112
    return out

def _normalize(frame_tchw: torch.Tensor, mean=(0.45,0.45,0.45), std=(0.225,0.225,0.225)) -> torch.Tensor:
    # per-channel normalization
    mean = torch.tensor(mean, dtype=frame_tchw.dtype, device=frame_tchw.device)[None,:,None,None]
    std  = torch.tensor(std,  dtype=frame_tchw.dtype, device=frame_tchw.device)[None,:,None,None]
    return (frame_tchw - mean) / std

def uniform_temporal_indices(n_total, clip_len, stride):
    # Aim to cover as much as possible; for short videos, loop-pad
    if n_total <= 0: return [0]*clip_len
    wanted = (clip_len-1)*stride + 1
    if n_total >= wanted:
        # center-start for consistent coverage
        start = (n_total - wanted)//2
        return [start + i*stride for i in range(clip_len)]
    # not enough frames: repeat last index
    idxs = [min(i*stride, n_total-1) for i in range(clip_len)]
    return idxs

class WLASLDataset(Dataset):
    def __init__(self, df: pd.DataFrame, clip_len=32, stride=2, train=False):
        self.df = df.reset_index(drop=True)
        self.clip_len = clip_len
        self.stride = stride
        self.train = train

    def __len__(self): return len(self.df)

    def __getitem__(self, i):
        row = self.df.iloc[i]
        path = row["path"]
        label = int(row["label"])
        vr = decord.VideoReader(path)
        n = len(vr)

        idxs = uniform_temporal_indices(n, self.clip_len, self.stride)
        batch = vr.get_batch(idxs)  # [T,H,W,C] uint8
        # to float [0,1], TCHW
        x = batch.float()/255.0
        x = x.permute(0,3,1,2)
        # spatial resize 112x112
        x = _resize_112(x)
        # normalize
        x = _normalize(x)
        return x, label, path


####  Cell 2 — Load manifest & build DataLoaders

In [10]:
import pandas as pd
import torch
from torch.utils.data import DataLoader, WeightedRandomSampler
from collections import Counter

# === Load dataset manifest (created in 02_preprocess_segments.ipynb) ===
MANIFEST = root / "data" / "metadata" / "wlasl100_manifest.csv"
m = pd.read_csv(MANIFEST)
print("Loaded manifest:", MANIFEST)
print("Total samples:", len(m))
print("Splits:", dict(m["split"].value_counts()))

# === Split subsets ===
train_df = m[m["split"] == "train"].copy()
val_df   = m[m["split"] == "val"].copy()
test_df  = m[m["split"] == "test"].copy()

# === Read config values ===
clip_len   = CFG["model"]["clip_len"]
frame_step = CFG["model"]["frame_stride"]
bs         = CFG["train"]["batch_size"]
nw         = CFG["train"]["num_workers"]

# === Import the dataset class (from 03_dataset_preview.ipynb or src/data/wlasl_ds.py) ===
# If you have the Dataset defined in the preview notebook, just re-run that cell before this.
# Otherwise, place it in `src/data/wlasl_ds.py` and import as shown:
# from src.data.wlasl_ds import WLASLDataset

# === Create train/val/test datasets ===
train_ds = WLASLDataset(train_df, clip_len=clip_len, stride=frame_step, train=True)
val_ds   = WLASLDataset(val_df,   clip_len=clip_len, stride=frame_step, train=False)
test_ds  = WLASLDataset(test_df,  clip_len=clip_len, stride=frame_step, train=False)

# === Handle class imbalance via WeightedRandomSampler ===
counts = train_df["label"].value_counts().to_dict()
weights = train_df["label"].map(lambda y: 1.0 / max(1, counts[y])).values
sampler = WeightedRandomSampler(
    torch.tensor(weights, dtype=torch.double),
    num_samples=len(train_df),
    replacement=True
)

# === Build DataLoaders ===
train_loader = DataLoader(
    train_ds, batch_size=bs, sampler=sampler,
    num_workers=nw, pin_memory=True
)
val_loader = DataLoader(
    val_ds, batch_size=bs, shuffle=False,
    num_workers=nw, pin_memory=True
)
test_loader = DataLoader(
    test_ds, batch_size=bs, shuffle=False,
    num_workers=nw, pin_memory=True
)

# === Confirm stats ===
num_classes = m["label"].nunique()
print(f"Classes: {num_classes}")
print(f"Train batches: {len(train_loader)} | Val batches: {len(val_loader)} | Test batches: {len(test_loader)}")


Loaded manifest: /home/falasoul/notebooks/USD/AAI-590/Capstone/AAI-590-G3-ASL/data/metadata/wlasl100_manifest.csv
Total samples: 752
Splits: {'train': np.int64(547), 'val': np.int64(124), 'test': np.int64(81)}
Classes: 100
Train batches: 69 | Val batches: 16 | Test batches: 11
