# Colab Quickstart (5-10 min)

This notebook is an executable entry point to the repository.

It installs the package from Git and downloads the configs necessary to run a tiny **fixture** dataset (no NinaPro download):
`prepare -> splits -> traineval -> report`, then runs `size` for quick sizing estimates.

Outputs are written to `runs/colab_quickstart/` so your working tree stays clean.

> This is a tutorial run (`--profile colab_quickstart`), not a benchmark for reporting results.


In [None]:
import tempfile
import subprocess
import sys
from pathlib import Path
import shutil

In [None]:
def sh(cmd):
    print("+", " ".join(map(str, cmd)))
    subprocess.check_call(cmd)


IN_COLAB = "google.colab" in sys.modules
GIT_URL = "git+https://github.com/geronimobergk/semg-protocol-sensitivity.git"
LOCAL_SRC = Path("..").resolve()  # notebooks/ -> repo root

# fast-path: already importable
try:
    import tinyml_semg_classifier  # noqa F401

    print("Package already available â€” skipping install")
except Exception:
    if shutil.which("uv"):
        if IN_COLAB:
            sh(["uv", "pip", "install", GIT_URL])
        else:
            sh(["uv", "pip", "install", "-e", str(LOCAL_SRC)])
    else:
        if IN_COLAB:
            sh([sys.executable, "-m", "pip", "install", GIT_URL])
        else:
            sh([sys.executable, "-m", "pip", "install", "-e", str(LOCAL_SRC)])


In [None]:
REL_CFG = "configs/experiments/protocol_sensitivity_semg_cnn.yml"
REL_PROFILE = "configs/profiles/colab_quickstart.yaml"
BASE_URL = "https://raw.githubusercontent.com/geronimobergk/semg-protocol-sensitivity/fix/colab-notebook/"

cfg = Path("..") / REL_CFG
profile = Path("..") / REL_PROFILE

if cfg.exists() and profile.exists():
    BASE_CFG, PROFILE = cfg.resolve(), profile.resolve()
else:
    tmp = Path(tempfile.mkdtemp())
    BASE_CFG = tmp / Path(REL_CFG).name
    PROFILE = tmp / Path(REL_PROFILE).name
    subprocess.check_call(["curl", "-L", "-o", str(BASE_CFG), BASE_URL + REL_CFG])
    subprocess.check_call(["curl", "-L", "-o", str(PROFILE), BASE_URL + REL_PROFILE])

print("Using config:", BASE_CFG)
print("Using profile:", PROFILE)

In [None]:
# sanity check
import torch

print(
    "python:",
    sys.version.split()[0],
    "| torch:",
    torch.__version__,
    "| cuda:",
    torch.cuda.is_available(),
)

## Configure outputs (via overrides)

The base experiment config is loaded from the installed package. For a tutorial run, we redirect **all** outputs under `runs/colab_quickstart/`.

We also generate a tiny fixture file locally so the profile does not depend on repo files.


In [None]:
from pathlib import Path

import numpy as np

from tinyml_semg_classifier.utils.io import read_yaml


OUT_ROOT = (Path.cwd() / "runs" / "colab_quickstart").resolve()
OUT_ROOT.mkdir(parents=True, exist_ok=True)

fixture_path = OUT_ROOT / "fixture_tiny.npz"


def fixture_needs_regen(path: Path) -> bool:
    if not path.exists():
        return True
    try:
        with np.load(path) as data:
            required = [
                "X",
                "subject_id",
                "rep_id",
                "gesture_id",
                "exercise_id",
                "sample_start",
                "sample_end",
            ]
            if any(name not in data for name in required):
                return True
            arrays = [np.asarray(data[name]) for name in required[1:]]
            num_rows = arrays[0].shape[0]
            if any(arr.shape[0] != num_rows for arr in arrays):
                return True
            rows = set(zip(*(arr.tolist() for arr in arrays)))
            return len(rows) != num_rows
    except Exception:
        return True


if fixture_needs_regen(fixture_path):
    rng = np.random.default_rng(0)
    subjects = [1, 2]
    reps = [1, 2, 3]
    gestures = [1, 2]
    windows_per_combo = 5
    records = [
        (subject, rep, gesture)
        for subject in subjects
        for rep in reps
        for gesture in gestures
        for _ in range(windows_per_combo)
    ]
    num_windows = len(records)
    num_electrodes = 2
    num_samples = 8

    X = rng.standard_normal((num_windows, num_electrodes, num_samples)).astype(
        "float32"
    )
    subject_id = np.array([r[0] for r in records], dtype="int32")
    rep_id = np.array([r[1] for r in records], dtype="int32")
    gesture_id = np.array([r[2] for r in records], dtype="int32")
    exercise_id = np.ones(num_windows, dtype="int32")
    sample_start = np.arange(num_windows, dtype="int32") * num_samples
    sample_end = sample_start + (num_samples - 1)

    np.savez(
        fixture_path,
        X=X,
        subject_id=subject_id,
        rep_id=rep_id,
        gesture_id=gesture_id,
        exercise_id=exercise_id,
        sample_start=sample_start,
        sample_end=sample_end,
    )

    print("Wrote fixture:", fixture_path)


overrides_path = OUT_ROOT / "overrides_colab_quickstart.yaml"
overrides_path.write_text(
    """experiment:
  artifacts_root: "{artifacts_root}"
  runs_root: "{runs_root}"
  reports_root: "{reports_root}"
dataset:
  fixture_path: "{fixture_path}"
""".format(
        artifacts_root=OUT_ROOT / "artifacts",
        runs_root=OUT_ROOT / "runs",
        reports_root=OUT_ROOT / "reports",
        fixture_path=fixture_path,
    ),
    encoding="utf-8",
)

overrides = read_yaml(overrides_path) or {}

print("Overrides written to:", overrides_path)
print("Outputs root:", OUT_ROOT)


## Run the tiny end-to-end pipeline

This executes:

- `prepare` (fixture preprocessing)
- `splits` (pooled rep-disjoint + LOSO)
- `traineval` (tiny CNN, capped steps)
- `report` (aggregated tables)

Profile: `colab_quickstart`.


In [None]:
from tinyml_semg_classifier.cli import run_pipeline
from tinyml_semg_classifier.config import load_config


cfg = load_config(str(BASE_CFG), profile=PROFILE, overrides=[overrides])
run_pipeline(cfg)


## Inspect outputs

We print the generated protocol tables and one example `metrics.json` to confirm the pipeline produced results end-to-end.

In [None]:
import json
from pathlib import Path

reports_root = OUT_ROOT / "reports"
tables_path = reports_root / "protocol_tables.md"

print("protocol_tables.md ->", tables_path)
print(tables_path.read_text(encoding="utf-8"))

metrics_paths = sorted((OUT_ROOT / "runs").rglob("metrics.json"))
print("metrics.json files:", len(metrics_paths))
if metrics_paths:
    sample = metrics_paths[0]
    print("Example run ->", sample)
    payload = json.loads(sample.read_text(encoding="utf-8"))
    print(json.dumps(payload, indent=2)[:2000])

## Sizing: `size`

`size` benchmarks a few steps, probes concurrency, and estimates wall-time plus resources.

We keep this tiny and CPU-only for Colab speed.


In [None]:
import json

from tinyml_semg_classifier.cli import size
from tinyml_semg_classifier.config import load_config


cfg = load_config(str(BASE_CFG), profile=PROFILE, overrides=[overrides])

size(
    cfg,
    warmup_steps=1,
    bench_train_steps=5,
    bench_val_steps=5,
    device="cpu",
    max_k=1,
    max_gpus=1,
    alpha=1.0,
)

sizing_path = OUT_ROOT / "artifacts" / "sizing" / "sizing.json"
sizing = json.loads(sizing_path.read_text(encoding="utf-8"))

print("sizing.json ->", sizing_path)
print(json.dumps(sizing, indent=2)[:2000])

recommendation = sizing.get("recommendation") or {}
if recommendation:
    print("Recommendation:", recommendation)

walltime_by_gpus = sizing.get("walltime_by_gpus") or []
if walltime_by_gpus:
    print("Walltime by GPUs:", walltime_by_gpus)


## Next steps

- Switch `PROFILE` to `smoke` or `dev_mini` for a larger fixture run.
- Run without a profile (or with `dry_run`) for real NinaPro data.
- Use `configs/experiments/protocol_sensitivity_semg_cnn.yml` to change protocols, models, or seeds.
