# AML - Step 2 & Step 3 Runner (Colab)

This notebook runs Step 2 (feature sanity check) and Step 3 (evaluation) using `scripts/run.py`.

## 1. Setup Environment

In [None]:
# Clone repository (or upload your code)
!git clone --recursive https://github.com/sapeirone/aml-2025-mistake-detection.git code || echo "Repo already exists or using uploaded code"

# Change to code directory
import os
os.chdir('code')
print(f"Current directory: {os.getcwd()}")

In [None]:
# Install dependencies
!pip install -q torcheval
!pip install -q -r requirements-cpu.txt 2>/dev/null || pip install -q -r requirements.txt

## 2. Mount Google Drive (Optional)

If your features and checkpoints are in Google Drive, mount it here.

In [None]:
from google.colab import drive
drive.mount('/content/drive')

## 3. Load Data & Checkpoints

Uncomment and update paths to your features and checkpoints.

In [None]:
# Option 1: From Google Drive
# !mkdir -p data
# !unzip "/content/drive/MyDrive/path/to/features.zip" -d data/

# Option 2: From URL (if available)
# !wget -q <features_url> -O features.zip && unzip -q features.zip -d data/

# Option 3: Already in repo/data directory
print("Features should be in: data/video/omnivore/ and data/video/slowfast/")
!ls -la data/ 2>/dev/null || echo "Data directory not found - please add your features"

In [None]:
# Option 1: From Google Drive
# !mkdir -p checkpoints
# !unzip "/content/drive/MyDrive/path/to/error_recognition_best.zip" -d checkpoints/

# Option 2: From URL
# Download from: https://utdallas.app.box.com/s/uz3s1alrzucz03sleify8kazhuc1ksl3
# !wget -q <checkpoint_url> -O checkpoints.zip && unzip -q checkpoints.zip -d checkpoints/

# Option 3: Already in repo
print("Checkpoints should be in: checkpoints/error_recognition_best/")
!ls -la checkpoints/ 2>/dev/null || echo "Checkpoints directory not found - please add your checkpoints"

## 4. Step 2: Feature Sanity Check

In [None]:
# Run Step 2 with default path (data/)
!python scripts/run.py step2

In [None]:
# Or specify custom features root
# !python scripts/run.py step2 --features_root /path/to/features

## 5. Step 3: Evaluation Reproduction

Run evaluations for different backbones, variants, and splits. Update checkpoint paths with actual epoch numbers.

In [None]:
# Omnivore - MLP - Step split
!python scripts/run.py step3 --split step --backbone omnivore --variant MLP \
  --ckpt checkpoints/error_recognition_best/MLP/omnivore/error_recognition_MLP_omnivore_step_epoch_43.pt \
  --threshold 0.6

In [None]:
# Omnivore - MLP - Recordings split
!python scripts/run.py step3 --split recordings --backbone omnivore --variant MLP \
  --ckpt checkpoints/error_recognition_best/MLP/omnivore/error_recognition_MLP_omnivore_recordings_epoch_XX.pt \
  --threshold 0.4

In [None]:
# Omnivore - Transformer - Step split
!python scripts/run.py step3 --split step --backbone omnivore --variant Transformer \
  --ckpt checkpoints/error_recognition_best/Transformer/omnivore/error_recognition_Transformer_omnivore_step_epoch_XX.pt \
  --threshold 0.6

In [None]:
# Omnivore - Transformer - Recordings split
!python scripts/run.py step3 --split recordings --backbone omnivore --variant Transformer \
  --ckpt checkpoints/error_recognition_best/Transformer/omnivore/error_recognition_Transformer_omnivore_recordings_epoch_XX.pt \
  --threshold 0.4

In [None]:
# SlowFast - MLP - Step split
!python scripts/run.py step3 --split step --backbone slowfast --variant MLP \
  --ckpt checkpoints/error_recognition_best/MLP/slowfast/error_recognition_MLP_slowfast_step_epoch_XX.pt \
  --threshold 0.6

In [None]:
# SlowFast - MLP - Recordings split
!python scripts/run.py step3 --split recordings --backbone slowfast --variant MLP \
  --ckpt checkpoints/error_recognition_best/MLP/slowfast/error_recognition_MLP_slowfast_recordings_epoch_XX.pt \
  --threshold 0.4

In [None]:
# SlowFast - Transformer - Step split
!python scripts/run.py step3 --split step --backbone slowfast --variant Transformer \
  --ckpt checkpoints/error_recognition_best/Transformer/slowfast/error_recognition_Transformer_slowfast_step_epoch_XX.pt \
  --threshold 0.6

In [None]:
# SlowFast - Transformer - Recordings split
!python scripts/run.py step3 --split recordings --backbone slowfast --variant Transformer \
  --ckpt checkpoints/error_recognition_best/Transformer/slowfast/error_recognition_Transformer_slowfast_recordings_epoch_XX.pt \
  --threshold 0.4