# Stack — Diffusion Policy Training on Colab

Train the diffusion policy (ResNet18 + ConditionalUnet1D) on real demonstration data.

**Setup:** `Runtime > Change runtime type > T4 GPU` (or A100 with Colab Pro)

**Data:** 17 sessions on Google Drive (BU account), already COLMAP-processed with poses.

In [None]:
# Verify GPU
import torch
assert torch.cuda.is_available(), "No GPU! Change runtime type."
print(f"GPU: {torch.cuda.get_device_name(0)}")
print(f"VRAM: {torch.cuda.get_device_properties(0).total_memory / 1e9:.1f} GB")

In [None]:
# Clone repo and install as package
!git clone https://github.com/corneliusgruss/stack.git /content/stack 2>/dev/null || (cd /content/stack && git pull)
%cd /content/stack
!pip install -q -e .
!pip install -q wandb

## Mount Drive & Link Data

In [None]:
from google.colab import drive
drive.mount('/content/drive')

# Symlink Drive sessions into the repo's data directory
import os
from pathlib import Path

drive_sessions = Path('/content/drive/MyDrive/stack_sessions')
local_data = Path('/content/stack/data/raw')
local_data.mkdir(parents=True, exist_ok=True)

# Unzip any zipped sessions
import subprocess
if drive_sessions.exists():
    zips = sorted(drive_sessions.glob('*.zip'))
    for z in zips:
        session_dir = drive_sessions / z.stem
        if not session_dir.exists():
            print(f"Extracting {z.name}...")
            subprocess.run(['unzip', '-q', '-o', str(z), '-d', str(drive_sessions)], check=True)

# Symlink each session into data/raw/
sessions = sorted([d for d in drive_sessions.iterdir() if d.is_dir() and d.name.startswith('session_')])
for s in sessions:
    link = local_data / s.name
    if not link.exists():
        os.symlink(s, link)

# Verify
import json
valid = 0
for s in sorted(local_data.iterdir()):
    if not s.is_dir() or not s.name.startswith('session_'):
        continue
    has_poses = (s / 'poses.json').exists()
    has_rgb = (s / 'rgb').exists()
    meta = json.load(open(s / 'metadata.json')) if (s / 'metadata.json').exists() else {}
    processed = meta.get('slamProcessed', False)
    n_frames = meta.get('rgbFrameCount', '?')
    status = 'ready' if (has_poses and has_rgb and processed) else 'MISSING DATA'
    print(f"  {s.name}: {n_frames} frames — {status}")
    if status == 'ready':
        valid += 1

print(f"\n{valid} sessions ready for training")

## Train

In [None]:
# Train on real data
# ~12M params, 17 sessions, 100 epochs
# T4: ~15-30 min depending on dataset size
!python -m stack.scripts.train \
    --config configs/default.yaml \
    --data-dir data/raw \
    --output-dir outputs/real_v1 \
    --device cuda \
    --wandb

## Evaluate & Download

In [None]:
# Evaluate best checkpoint
!python -m stack.scripts.eval \
    --checkpoint outputs/real_v1/checkpoint_best.pt \
    --data-dir data/raw

In [None]:
# Copy checkpoint to Drive for persistence
!cp outputs/real_v1/checkpoint_best.pt /content/drive/MyDrive/stack_sessions/checkpoint_best.pt
!cp outputs/real_v1/normalizer.pt /content/drive/MyDrive/stack_sessions/normalizer.pt
print("Checkpoint saved to Drive")

In [None]:
# Or download directly
from google.colab import files
files.download('outputs/real_v1/checkpoint_best.pt')