# sinGes-mini: Project Working Notebook

- Activate virtual env before running: `pyenv activate miniGes` (see Roadmap.md).
- Run cells top-to-bottom after configuring paths in `config/config.yaml`.


## Project Overview & Methodology (for Final-Year Submission)

- **Objective**: Recognize 30 ISL signs from video and generate grammatically correct sentences.
- **Dataset**: INCLUDE ISL videos; organized by category/label folders.
- **Pipeline**:
  - Preprocess videos (resize, optional MediaPipe landmarks).
  - Train sign recognition model (`r3d18_bilstm_attn`).
  - Fine-tune language model to form sentences.
  - Inference: Predict word(s) from video, generate a sentence.
- **Environment**: GPU-enabled PyTorch; dependencies listed in requirements.
- **Reproducibility**: Config-driven (`config/*.yaml`), fixed seed, logs saved under `logs/`.
- **Model Artifacts**: Checkpoints under `models/checkpoints/` and copied to `models/final_submission/` for handover.
- **Evaluation**: Accuracy, Top-5, confusion matrix, classification report saved in logs.

In [2]:
# Setup: add project root to sys.path and verify environment
from pathlib import Path
import sys, importlib

# In notebooks, __file__ is not defined. Use CWD (notebooks/) -> parent is project root.
NOTEBOOK_DIR = Path.cwd()
PROJECT_ROOT = NOTEBOOK_DIR if NOTEBOOK_DIR.name != 'notebooks' else NOTEBOOK_DIR.parents[0]
if str(PROJECT_ROOT) not in sys.path:
    sys.path.insert(0, str(PROJECT_ROOT))
print('Project root:', PROJECT_ROOT)

# Quick dependency check (prints versions if installed)
def _check(pkg):
    try:
        m = importlib.import_module(pkg)
        v = getattr(m, '__version__', 'n/a')
        print(f'{pkg}:', v)
    except Exception as e:
        print(f'{pkg}: NOT FOUND ->', e)
for p in ['torch','torchvision','opencv-python','mediapipe','transformers','numpy','pyyaml']:
    # opencv-python's import name is cv2
    name = 'cv2' if p=='opencv-python' else p
    _check(name)


Project root: /home/aditya/workhub/python/project/sinGes(mini)
torch: 2.8.0+cu128
torchvision: 0.23.0+cu128
cv2: 4.11.0
mediapipe: 0.10.21
transformers: 4.56.1
numpy: 1.26.4
pyyaml: NOT FOUND -> No module named 'pyyaml'


In [3]:
# Optional: install missing dependencies and show GPU info
# If you see errors like 'No module named pyyaml' or tqdm/ipywidgets warnings, uncomment and run installs below.
# !pip install pyyaml ipywidgets tqdm
# from google.colab import output; output.enable_custom_widget_manager()  # if in Colab

import torch, platform, sys, subprocess
print('Python:', sys.version.split()[0])
print('Platform:', platform.platform())
print('CUDA available:', torch.cuda.is_available())
if torch.cuda.is_available():
    try:
        print('GPU:', torch.cuda.get_device_name(0))
        print('GPU capability:', torch.cuda.get_device_capability(0))
    except Exception as e:
        print('GPU query failed:', e)
# Try nvidia-smi if present
try:
    out = subprocess.run(['nvidia-smi','-L'], capture_output=True, text=True)
    if out.returncode == 0:
        print(out.stdout.strip())
except FileNotFoundError:
    pass

Python: 3.11.9
Platform: Linux-6.17.7-arch1-1-x86_64-with-glibc2.42
CUDA available: True
GPU: NVIDIA GeForce RTX 3050 Laptop GPU
GPU capability: (8, 6)
GPU 0: NVIDIA GeForce RTX 3050 Laptop GPU (UUID: GPU-9ec88d1f-ff8a-9fc5-4f43-e91e7c1997be)


In [4]:
# Load configs and utilities (use absolute paths)
from src.utils import load_config, load_model_config, setup_logging, seed_everything, resolve_path
CONFIG_PATH = PROJECT_ROOT / 'config' / 'config.yaml'
MODEL_CONFIG_PATH = PROJECT_ROOT / 'config' / 'model_config.yaml'
print('Config path:', CONFIG_PATH)
print('Model config path:', MODEL_CONFIG_PATH)
config = load_config(str(CONFIG_PATH))
model_cfg = load_model_config(str(MODEL_CONFIG_PATH))
setup_logging(config['paths'].get('logs_dir','logs'))
seed_everything(config['project']['seed'])
config

Config path: /home/aditya/workhub/python/project/sinGes(mini)/config/config.yaml
Model config path: /home/aditya/workhub/python/project/sinGes(mini)/config/model_config.yaml


{'project': {'name': 'sinGes-mini',
  'description': 'Indian Sign Language recognition and sentence generation pipeline',
  'seed': 42},
 'paths': {'dataset_root': 'dataset',
  'raw_data_dir': 'data/raw',
  'processed_data_dir': 'data/processed',
  'splits_dir': 'data/train_test_split',
  'mediapipe_output_dir': 'data/processed/mediapipe',
  'recognition_checkpoint_dir': 'models/checkpoints/sign_recognition',
  'transformer_checkpoint_dir': 'models/checkpoints/transformer',
  'logs_dir': 'logs',
  'reports_dir': 'reports'},
 'preprocessing': {'frame_rate': 25,
  'frame_resize': [112, 112],
  'max_frames_per_clip': 32,
  'clip_stride': 2,
  'use_mediapipe': True,
  'store_landmarks': True,
  'normalize_mean': [0.485, 0.456, 0.406],
  'normalize_std': [0.229, 0.224, 0.225],
  'augmentations': {'rotation': [-10, 10],
   'scale': [0.9, 1.1],
   'brightness': [0.8, 1.2],
   'contrast': [0.85, 1.15],
   'horizontal_flip': True,
   'horizontal_flip_prob': 0.4,
   'translate_percent': [0.0, 0.

In [5]:
# Normalize config paths to absolute (relative to PROJECT_ROOT)
from pathlib import Path
abs_paths = {}
for k, v in config.get('paths', {}).items():
    p = Path(v)
    abs_paths[k] = str((PROJECT_ROOT / p).resolve()) if not p.is_absolute() else str(p.resolve())
config['paths'] = abs_paths
print('Absolute paths applied:')
for k, v in config['paths'].items():
    print(f'- {k}: {v}')

Absolute paths applied:
- dataset_root: /home/aditya/workhub/python/project/sinGes(mini)/dataset
- raw_data_dir: /home/aditya/workhub/python/project/sinGes(mini)/data/raw
- processed_data_dir: /home/aditya/workhub/python/project/sinGes(mini)/data/processed
- splits_dir: /home/aditya/workhub/python/project/sinGes(mini)/data/train_test_split
- mediapipe_output_dir: /home/aditya/workhub/python/project/sinGes(mini)/data/processed/mediapipe
- recognition_checkpoint_dir: /home/aditya/workhub/python/project/sinGes(mini)/models/checkpoints/sign_recognition
- transformer_checkpoint_dir: /home/aditya/workhub/python/project/sinGes(mini)/models/checkpoints/transformer
- logs_dir: /home/aditya/workhub/python/project/sinGes(mini)/logs
- reports_dir: /home/aditya/workhub/python/project/sinGes(mini)/reports


## Dataset discovery

In [6]:
# Tokenization + attention masks + causal mask demo
from transformers import AutoTokenizer
import torch

# Use pretrained tokenizer; set PAD if missing (e.g., GPT-2)
model_name = 'gpt2'
tokenizer = AutoTokenizer.from_pretrained(model_name)
if tokenizer.pad_token is None:
    tokenizer.pad_token = tokenizer.eos_token

samples = [
    "I happy today",
    "she beautiful",
]
max_len = 12
enc = tokenizer(
    samples,
    padding='max_length',
    truncation=True,
    max_length=max_len,
    return_tensors='pt'
)
input_ids = enc['input_ids']
attn_mask = enc['attention_mask']  # 1 = keep, 0 = pad
print('input_ids shape:', tuple(input_ids.shape))
print('attention_mask shape:', tuple(attn_mask.shape))
print('First sample input_ids:', input_ids[0].tolist())
print('First sample attention_mask:', attn_mask[0].tolist())

# Causal (look-ahead) mask for decoder-only models (applied internally during generation)
causal_mask = torch.tril(torch.ones((max_len, max_len), dtype=torch.float32))
print('Causal mask (top-left 8x8):\n', causal_mask[:8, :8])

input_ids shape: (2, 12)
attention_mask shape: (2, 12)
First sample input_ids: [40, 3772, 1909, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256]
First sample attention_mask: [1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0]
Causal mask (top-left 8x8):
 tensor([[1., 0., 0., 0., 0., 0., 0., 0.],
        [1., 1., 0., 0., 0., 0., 0., 0.],
        [1., 1., 1., 0., 0., 0., 0., 0.],
        [1., 1., 1., 1., 0., 0., 0., 0.],
        [1., 1., 1., 1., 1., 0., 0., 0.],
        [1., 1., 1., 1., 1., 1., 0., 0.],
        [1., 1., 1., 1., 1., 1., 1., 0.],
        [1., 1., 1., 1., 1., 1., 1., 1.]])


## Masking demonstration (Transformer)

This section demonstrates input padding and attention masks used for language models. We show:
- Tokenization with `padding=max_length` and `truncation`
- `attention_mask` where 1=keep token, 0=pad
- A causal (look-ahead) mask used inside decoder-only models (triangular matrix)

In [7]:
# Dataset / DB details: paths, classes, counts (robust to relative paths)
from pathlib import Path
from collections import Counter
from src.utils import discover_class_labels, strip_label_prefix
from src.train_recognition import discover_videos

# Use absolute path if available; otherwise join with PROJECT_ROOT
cfg_ds = config['paths']['dataset_root']
DATASET_ROOT = Path(cfg_ds)
if not DATASET_ROOT.is_absolute():
    DATASET_ROOT = (PROJECT_ROOT / DATASET_ROOT).resolve()
print('Dataset root:', DATASET_ROOT)

try:
    labels = discover_class_labels(DATASET_ROOT)
    labels_clean = [strip_label_prefix(x) for x in labels]
    print('Total classes:', len(labels))

    class_to_paths = discover_videos(Path(DATASET_ROOT))
    counts = {k: len(v) for k, v in class_to_paths.items()}
    print('Total videos:', sum(counts.values()))

    preview = list(sorted(((strip_label_prefix(k), v) for k, v in counts.items()), key=lambda x: x[0]))[:10]
    preview
except FileNotFoundError as e:
    print('Dataset not found. Please set config.paths.dataset_root to an existing folder.')
    print('Current value:', DATASET_ROOT)
    print('Tip: Place your dataset under', PROJECT_ROOT / 'dataset', 'or update config and rerun Cell 4 (absolute paths).')

Dataset root: /home/aditya/workhub/python/project/sinGes(mini)/dataset
Total classes: 46
Total videos: 884


## Optional: Preprocess raw videos (frames + landmarks)

In [None]:
# This step scans dataset_root for videos and writes .npz to data/raw and mediapipe outputs if enabled.
# Ensure mediapipe is installed, or set use_mediapipe=false in config.
from src.data_preprocessing import PreprocessingConfig, DataPreprocessor
pre_cfg = PreprocessingConfig.from_dict(config)
pre = DataPreprocessor(pre_cfg)
pre.process_dataset()
print('Preprocessing complete.')


2025-11-07 01:30:04,056 | INFO | src.data_preprocessing | Starting dataset preprocessing from /home/aditya/workhub/python/project/sinGes(mini)/dataset
I0000 00:00:1762459205.913428   43003 gl_context_egl.cc:85] Successfully initialized EGL. Major : 1 Minor: 5
I0000 00:00:1762459205.923655   43180 gl_context.cc:369] GL version: 3.2 (OpenGL ES 3.2 Mesa 25.2.6-arch1.1), renderer: Mesa Intel(R) UHD Graphics (TGL GT1)
INFO: Created TensorFlow Lite XNNPACK delegate for CPU.
W0000 00:00:1762459206.041101   43168 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
W0000 00:00:1762459206.082395   43165 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
W0000 00:00:1762459206.309802   43170 landmark_projection_calculator.cc:186] Using NORM_RECT without IMAGE_DIMENSIONS is only supported for the square ROI. Provide

KeyboardInterrupt: 



## Model summary (architecture and parameters)

Builds the recognition model from `model_config.yaml` on CPU and reports the total number of trainable parameters for documentation.

In [None]:
# Image masking demo using a database image (no video required)
import cv2, numpy as np
from pathlib import Path
import matplotlib.pyplot as plt

# Try to find an image in dataset first, else fallback to reports/figures, else generate synthetic
search_roots = [PROJECT_ROOT / 'dataset', PROJECT_ROOT / 'reports' / 'figures', PROJECT_ROOT / 'tmp']
img_exts = ['*.png','*.jpg','*.jpeg','*.bmp']
img_path = None
for root in search_roots:
    if not root.exists():
        continue
    for ext in img_exts:
        matches = [p for p in root.rglob(ext) if 'kaarthik' not in p.name.lower()]
        if matches:
            img_path = matches[0]
            break
    if img_path is not None:
        break

if img_path is None:
    # create a synthetic image (gradient + rectangle)
    H, W = 256, 384
    yy, xx = np.mgrid[0:H, 0:W]
    base = ((xx / W) * 255).astype(np.uint8)
    img = np.stack([base, np.flipud(base), base//2], axis=-1)
else:
    bgr = cv2.imread(str(img_path))
    if bgr is None:
        raise RuntimeError(f'Failed to load image: {img_path}')
    img = cv2.cvtColor(bgr, cv2.COLOR_BGR2RGB)
    H, W = img.shape[:2]

print('Using image:', img_path if img_path else 'synthetic')

# Spatial mask: semi-transparent center overlay (not full black)
mask_ratio = 0.4
mh, mw = int(H * mask_ratio), int(W * mask_ratio)
y0, x0 = (H - mh)//2, (W - mw)//2
alpha = 0.6
spatial = img.copy().astype(np.float32)
spatial[y0:y0+mh, x0:x0+mw, :] = (alpha * 0 + (1 - alpha) * spatial[y0:y0+mh, x0:x0+mw, :])
spatial = spatial.clip(0,255).astype(np.uint8)

# Random block mask: place K random rectangles
rng = np.random.default_rng(42)
K = 5
block = img.copy().astype(np.float32)
for _ in range(K):
    rh = int(H * rng.uniform(0.05, 0.15))
    rw = int(W * rng.uniform(0.05, 0.15))
    ry = int(rng.integers(0, max(1, H - rh)))
    rx = int(rng.integers(0, max(1, W - rw)))
    block[ry:ry+rh, rx:rx+rw, :] = (alpha * 0 + (1 - alpha) * block[ry:ry+rh, rx:rx+rw, :])
block = block.clip(0,255).astype(np.uint8)

# Show
fig, axes = plt.subplots(1, 3, figsize=(12,4))
axes[0].imshow(img); axes[0].set_title('Original'); axes[0].axis('off')
axes[1].imshow(spatial); axes[1].set_title('Center masked (overlay)'); axes[1].axis('off')
axes[2].imshow(block); axes[2].set_title('Random block masks'); axes[2].axis('off')
plt.tight_layout(); plt.show()

In [None]:
# Model summary on CPU (no GPU required)
from src.train_recognition import build_sign_recognition_model
from copy import deepcopy
import torch

model_cfg = deepcopy(model_cfg['sign_recognition']) if isinstance(model_cfg, dict) and 'sign_recognition' in model_cfg else deepcopy(load_model_config(str(MODEL_CONFIG_PATH))['sign_recognition'])
# Ensure classifier.num_classes is set
model_cfg.setdefault('classifier', {})
model_cfg['classifier']['num_classes'] = int(model_cfg['classifier'].get('num_classes', 30))

model = build_sign_recognition_model(model_cfg)
trainable_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
all_params = sum(p.numel() for p in model.parameters())
print('Architecture:', model_cfg.get('architecture', 'unknown'))
print('Trainable parameters (M):', round(trainable_params/1e6, 3))
print('Total parameters (M):', round(all_params/1e6, 3))
model.__class__.__name__

In [None]:
# Video masking demo: temporal and spatial masking on a sample video
import cv2, numpy as np, math
from pathlib import Path
import matplotlib.pyplot as plt

# Pick a sample video
video_path = next((p for p in (PROJECT_ROOT / 'tmp' / 'web_uploads').glob('*.mp4')), None)
assert video_path is not None, f'Place a sample .mp4 in {PROJECT_ROOT / "tmp" / "web_uploads"} to run this cell.'

# Load frames (downsample for speed)
cap = cv2.VideoCapture(str(video_path))
frames = []
success, frame = cap.read()
while success and len(frames) < 32:
    frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
    frames.append(frame)
    success, frame = cap.read()
cap.release()
if not frames:
    raise RuntimeError('No frames decoded from the sample video')

T = len(frames)
H, W = frames[0].shape[:2]
print('Loaded clip:', video_path.name, '| frames:', T, '| size:', (H, W))

# Temporal mask: keep every k-th frame, zero others
k = max(2, T // 8)
temporal_mask = np.zeros((T,), dtype=np.uint8)
temporal_mask[::k] = 1
masked_temporal = [f if temporal_mask[i] else np.zeros_like(f) for i, f in enumerate(frames)]

# Spatial mask: center rectangle mask applied to each frame
mask_ratio = 0.4
mh, mw = int(H * mask_ratio), int(W * mask_ratio)
y0, x0 = (H - mh)//2, (W - mw)//2
masked_spatial = []
for f in frames:
    m = f.copy()
    m[y0:y0+mh, x0:x0+mw, :] = 0  # black rectangle center mask
    masked_spatial.append(m)

# Visualize a few frames (original, temporal-masked, spatial-masked)
cols = 3
rows = min(4, T)
fig, axes = plt.subplots(rows, cols, figsize=(cols*4, rows*3))
for r in range(rows):
    idx = min(r * (T // rows + 1), T-1)
    for c in range(cols):
        ax = axes[r, c] if rows > 1 else axes[c]
        if c == 0:
            ax.imshow(frames[idx])
            ax.set_title(f'Original t={idx}')
        elif c == 1:
            ax.imshow(masked_temporal[idx])
            ax.set_title('Temporal masked')
        else:
            ax.imshow(masked_spatial[idx])
            ax.set_title('Spatial masked')
        ax.axis('off')
plt.tight_layout()
plt.show()

print('Temporal mask (1=keep, 0=zero) first 16:', temporal_mask[:16].tolist())

## Training logs viewer

Loads `logs/analysis/latest_training_summary.json` and plots epoch-wise metrics for documentation.

In [None]:
# Plot training metrics from latest summary
import json
from pathlib import Path
import matplotlib.pyplot as plt

summary_path = PROJECT_ROOT / 'logs' / 'analysis' / 'latest_training_summary.json'
if not summary_path.exists():
    print('No training summary found at', summary_path)
else:
    data = json.loads(summary_path.read_text())
    epochs = data.get('epochs', [])
    if not epochs:
        print('No epoch records found in summary:', summary_path)
    else:
        xs = [e['epoch'] for e in epochs]
        keys = [k for k in ['train_loss','train_acc','val_loss','val_acc','train_top5','val_top5'] if k in epochs[0]]

        fig, axes = plt.subplots(len(keys), 1, figsize=(7, 3*len(keys)))
        if len(keys) == 1:
            axes = [axes]
        for ax, k in zip(axes, keys):
            ys = [float(e.get(k)) if e.get(k) is not None else None for e in epochs]
            ax.plot(xs, ys, marker='o')
            ax.set_title(k)
            ax.set_xlabel('epoch')
            ax.grid(True, alpha=0.3)
        plt.tight_layout()
        plt.show()
        print('Latest archive:', data.get('latest_archive'))

## Evaluation metrics and visualizations

This section loads the latest archived training run and visualizes:
- Confusion matrix heatmap with class labels
- Per-class precision, recall, and F1 derived from the confusion matrix
- Class support (sample count per class)

In [None]:
# Load latest archived run with confusion matrix and class names
import json
from pathlib import Path

runs_dir = PROJECT_ROOT / 'logs' / 'sign_recognition_runs'
latest_run = None
if runs_dir.exists():
    candidates = sorted(runs_dir.glob('*.json'), key=lambda p: p.stat().st_mtime, reverse=True)
    latest_run = candidates[0] if candidates else None

if latest_run is None:
    print('No archived run found in', runs_dir)
    archived = None
else:
    print('Latest archived run:', latest_run)
    archived = json.loads(latest_run.read_text())
archived

In [None]:
# Confusion matrix heatmap (with readable labels)
import numpy as np
import matplotlib.pyplot as plt

from src.utils import strip_label_prefix

if not archived:
    print('No archived run loaded. Execute the cell above to load latest run.')
else:
    cm = np.array(archived.get('confusion_matrix', []), dtype=float)
    class_names = archived.get('class_names', [])
    if cm.size == 0 or not class_names:
        print('Confusion matrix or class names missing in archived run.')
    else:
        clean_names = [strip_label_prefix(c) for c in class_names]
        try:
            import seaborn as sns
            fig_w = max(6, len(clean_names) * 0.35)
            fig_h = max(5, len(clean_names) * 0.35)
            plt.figure(figsize=(fig_w, fig_h))
            ax = sns.heatmap(cm, annot=False, cmap='Blues', xticklabels=clean_names, yticklabels=clean_names)
            ax.set_xlabel('Predicted')
            ax.set_ylabel('True')
            ax.set_title('Confusion Matrix')
            ax.set_xticklabels(ax.get_xticklabels(), rotation=45, ha='right', fontsize=8)
            ax.set_yticklabels(ax.get_yticklabels(), rotation=0, fontsize=8)
            plt.tight_layout()
            plt.show()
        except Exception as e:
            print('seaborn not available, using matplotlib imshow')
            fig_w = max(6, len(clean_names) * 0.35)
            fig_h = max(5, len(clean_names) * 0.35)
            plt.figure(figsize=(fig_w, fig_h))
            plt.imshow(cm, cmap='Blues')
            plt.title('Confusion Matrix')
            plt.xlabel('Predicted')
            plt.ylabel('True')
            plt.xticks(range(len(clean_names)), clean_names, rotation=45, ha='right', fontsize=8)
            plt.yticks(range(len(clean_names)), clean_names, rotation=0, fontsize=8)
            plt.colorbar()
            plt.tight_layout()
            plt.show()

In [None]:
# Per-class metrics (precision, recall, F1) and support
import numpy as np
import pandas as pd

if not archived:
    print('No archived run loaded.')
else:
    cm = np.array(archived.get('confusion_matrix', []), dtype=float)
    class_names = archived.get('class_names', [])
    if cm.size == 0 or not class_names:
        print('Confusion matrix or class names missing in archived run.')
    else:
        # Support per class (true instances)
        support = cm.sum(axis=1)
        # True positives per class
        tp = np.diag(cm)
        # Predicted positives per class
        pred_pos = cm.sum(axis=0)
        # Metrics
        precision = np.divide(tp, pred_pos, out=np.zeros_like(tp), where=pred_pos!=0)
        recall = np.divide(tp, support, out=np.zeros_like(tp), where=support!=0)
        f1 = np.divide(2*precision*recall, precision+recall, out=np.zeros_like(tp), where=(precision+recall)!=0)

        df = pd.DataFrame({
            'class': class_names,
            'support': support.astype(int),
            'precision': np.round(precision, 4),
            'recall': np.round(recall, 4),
            'f1': np.round(f1, 4),
        }).sort_values('class')
        df

## Train sign recognition model

This step trains the model using the configuration files. After training, a copy of the trained weights is saved to a submission folder for your final-year project handover.

In [None]:
# Requires CUDA per training script. Adjust config/training if needed.
from src.train_recognition import train as train_sign_recognition
from pathlib import Path
import shutil, datetime, os

# Ensure relative paths in training code resolve against PROJECT_ROOT
_old_cwd = os.getcwd()
os.chdir(str(PROJECT_ROOT))
try:
    results = train_sign_recognition()
finally:
    os.chdir(_old_cwd)

print('Training complete. Final accuracy:', results.get('accuracy'))

# Save/copy trained model to a submission directory for final-year project
SUBMISSION_DIR = PROJECT_ROOT / 'models' / 'final_submission'
SUBMISSION_DIR.mkdir(parents=True, exist_ok=True)
ckpt_path = Path(results.get('checkpoint_path', ''))
if ckpt_path.exists():
    ts = datetime.datetime.now().strftime('%Y%m%d_%H%M%S')
    arch = results.get('architecture', 'model')
    dest = SUBMISSION_DIR / f'{arch}_sign_recognition_{ts}.pt'
    shutil.copy2(ckpt_path, dest)
    print('Copied trained checkpoint to:', dest)
else:
    print('Warning: checkpoint not found in results; skipping copy.')

results

## Inference demo

In [None]:
from pathlib import Path
from src.inference import SignRecognizer
import os

# Prefer submission checkpoint if available, otherwise take latest from checkpoints dir
SUBMISSION_DIR = PROJECT_ROOT / 'models' / 'final_submission'
ckpt_dir = resolve_path(config['paths']['recognition_checkpoint_dir'])

candidates = []
if SUBMISSION_DIR.exists():
    candidates.extend(sorted(SUBMISSION_DIR.glob('*.pt'), key=lambda p: p.stat().st_mtime, reverse=True))
if not candidates:
    candidates.extend(sorted(Path(ckpt_dir).glob('*.pt'), key=lambda p: p.stat().st_mtime, reverse=True))

assert candidates, f'No checkpoint found in {SUBMISSION_DIR} or {ckpt_dir}. Run training first.'
ckpt = candidates[0]
print('Using checkpoint:', ckpt)

# Ensure dataset_root relative path resolves against PROJECT_ROOT when SignRecognizer loads config
_old_cwd = os.getcwd()
os.chdir(str(PROJECT_ROOT))
try:
    rec = SignRecognizer(
        checkpoint_path=ckpt,
        config_path=str(CONFIG_PATH),
        model_config_path=str(MODEL_CONFIG_PATH)
    )
finally:
    os.chdir(_old_cwd)

# Provide a sample video path to test (skip any filename containing 'kaarthik')
sample_video = next((p for p in (PROJECT_ROOT / 'tmp' / 'web_uploads').glob('*.mp4') if 'kaarthik' not in p.name.lower()), None)
if sample_video is None:
    print('No sample .mp4 found in tmp/web_uploads (excluding names containing kaarthik). Skipping prediction.')
else:
    preds = rec.predict(sample_video, top_k=5)
    [(p.display_label, round(p.score,4)) for p in preds]