# sinGes-mini: Project Working Notebook

- Activate virtual env before running: `pyenv activate miniGes` (see Roadmap.md).
- Run cells top-to-bottom after configuring paths in `config/config.yaml`.


In [None]:
# Setup: add project root to sys.path and verify environment
from pathlib import Path
import sys, importlib

# In notebooks, __file__ is not defined. Use CWD (notebooks/) -> parent is project root.
NOTEBOOK_DIR = Path.cwd()
PROJECT_ROOT = NOTEBOOK_DIR if NOTEBOOK_DIR.name != 'notebooks' else NOTEBOOK_DIR.parents[0]
if str(PROJECT_ROOT) not in sys.path:
    sys.path.insert(0, str(PROJECT_ROOT))
print('Project root:', PROJECT_ROOT)

# Quick dependency check (prints versions if installed)
def _check(pkg):
    try:
        m = importlib.import_module(pkg)
        v = getattr(m, '__version__', 'n/a')
        print(f'{pkg}:', v)
    except Exception as e:
        print(f'{pkg}: NOT FOUND ->', e)
for p in ['torch','torchvision','opencv-python','mediapipe','transformers','numpy','pyyaml']:
    # opencv-python's import name is cv2
    name = 'cv2' if p=='opencv-python' else p
    _check(name)


In [None]:
# Load configs and utilities
from src.utils import load_config, load_model_config, setup_logging, seed_everything, resolve_path
config = load_config('config/config.yaml')
model_cfg = load_model_config('config/model_config.yaml')
setup_logging(config['paths'].get('logs_dir','logs'))
seed_everything(config['project']['seed'])
config


## Dataset discovery

In [None]:
from src.utils import discover_class_labels, strip_label_prefix
dataset_root = resolve_path(config['paths']['dataset_root'])
labels = discover_class_labels(dataset_root)
display_labels = [strip_label_prefix(x) for x in labels]
print('Labels discovered:', len(labels))
display_labels[:10]


## Optional: Preprocess raw videos (frames + landmarks)

In [None]:
# This step scans dataset_root for videos and writes .npz to data/raw and mediapipe outputs if enabled.
# Ensure mediapipe is installed, or set use_mediapipe=false in config.
from src.data_preprocessing import PreprocessingConfig, DataPreprocessor
pre_cfg = PreprocessingConfig.from_dict(config)
pre = DataPreprocessor(pre_cfg)
pre.process_dataset()
print('Preprocessing complete.')


## Train sign recognition model

In [None]:
# Requires CUDA per training script. Adjust config/training if needed.
from src.train_recognition import train as train_sign_recognition
results = train_sign_recognition()
results


## Inference demo

In [None]:
from pathlib import Path
from src.inference import SignRecognizer
ckpt_dir = resolve_path(config['paths']['recognition_checkpoint_dir'])
# pick latest checkpoint file in the directory (expects a single .pt)
candidates = sorted([p for p in ckpt_dir.glob('*.pt')], key=lambda p: p.stat().st_mtime, reverse=True)
assert candidates, f'No checkpoint found in {ckpt_dir}. Run training first.'
ckpt = candidates[0]
print('Using checkpoint:', ckpt)
rec = SignRecognizer(checkpoint_path=ckpt, config_path='config/config.yaml', model_config_path='config/model_config.yaml')
# Provide a sample video path to test
sample_video = Path('tmp/web_uploads').glob('*.mp4')
sample_video = next(sample_video, None)
assert sample_video is not None, 'Place a sample .mp4 in tmp/web_uploads to run this cell.'
preds = rec.predict(sample_video, top_k=5)
[(p.display_label, round(p.score,4)) for p in preds]
