## 1. Configuration


In [None]:
# Configuration - Update these values!
HF_DATASET_REPO = "your-username/captaincook4d-features"  # UPDATE THIS!
REPO_URL = "https://github.com/your-username/AML_mistake_detection.git"  # UPDATE THIS!
REPO_BRANCH = "main"  # or your branch name

# Backbones to train
BACKBONES = ["egovlp", "perceptionencoder"]
# Model variants
VARIANTS = ["MLP", "Transformer", "RNN"]

print(f"Will train {len(BACKBONES)} backbones × {len(VARIANTS)} variants = {len(BACKBONES) * len(VARIANTS)} models")
print(f"Backbones: {BACKBONES}")
print(f"Variants: {VARIANTS}")


## 2. Clone Repository and Install Dependencies


In [None]:
%%bash -s "$REPO_URL" "$REPO_BRANCH"
cd /content
if [ ! -d "AML_mistake_detection" ]; then
    git clone --branch "$2" "$1" AML_mistake_detection
else
    cd AML_mistake_detection && git pull
fi


In [None]:
import os
os.chdir('/content/AML_mistake_detection')

%pip install -q -r requirements.txt
%pip install -q wandb huggingface_hub

print("✓ Dependencies installed")


## 3. Download Features from HuggingFace


In [None]:
from huggingface_hub import hf_hub_download, login, list_repo_files
from google.colab import userdata
from pathlib import Path

# Login to HuggingFace
hf_token = userdata.get('HF_TOKEN')
login(token=hf_token)

# Create data/features directory structure
features_base = Path('/content/AML_mistake_detection/data/features')

for backbone in BACKBONES:
    backbone_dir = features_base / backbone
    backbone_dir.mkdir(parents=True, exist_ok=True)
    
    print(f"Downloading {backbone} features...")
    
    # List all files in the backbone directory on HF
    files = [f for f in list_repo_files(HF_DATASET_REPO, repo_type="dataset") 
             if f.startswith(f"{backbone}/") and f.endswith('.npy')]
    
    print(f"Found {len(files)} feature files")
    
    # Download each file
    for file_path in files:
        filename = Path(file_path).name
        local_path = backbone_dir / filename
        
        if not local_path.exists():
            hf_hub_download(
                repo_id=HF_DATASET_REPO,
                repo_type="dataset",
                filename=file_path,
                local_dir=features_base,
                local_dir_use_symlinks=False
            )
    
    print(f"✓ Downloaded {backbone} features to {backbone_dir}")

print("\n✓ All features downloaded")


## 4. Setup WandB


In [None]:
import wandb

# Login to WandB
wandb_key = userdata.get('WANDB_API_KEY')
wandb.login(key=wandb_key)

print("✓ WandB configured")


## 5. Train Models


In [None]:
import subprocess
import sys

# Training configuration
SPLIT = "recordings"
THRESHOLD = 0.6
EPOCHS = 100
BATCH_SIZE = 128

training_results = {}

for backbone in BACKBONES:
    for variant in VARIANTS:
        model_name = f"{variant}_{backbone}"
        print(f"\n{'='*60}")
        print(f"Training: {model_name}")
        print(f"{'='*60}")
        
        # Select appropriate training script
        if variant == "RNN":
            train_script = "scripts/train_rnn_baseline.py"
        else:
            train_script = "train_er.py"
        
        # Build training command
        cmd = [
            sys.executable, train_script,
            "--backbone", backbone,
            "--variant", variant,
            "--split", SPLIT,
            "--threshold", str(THRESHOLD),
            "--epochs", str(EPOCHS),
            "--batch_size", str(BATCH_SIZE),
            "--use_wandb"
        ]
        
        try:
            # Run training
            result = subprocess.run(cmd, check=True, capture_output=True, text=True)
            print(result.stdout)
            training_results[model_name] = "SUCCESS"
            print(f"✓ {model_name} training completed")
        except subprocess.CalledProcessError as e:
            print(f"✗ {model_name} training failed:")
            print(e.stderr)
            training_results[model_name] = "FAILED"

print(f"\n{'='*60}")
print("Training Summary:")
print(f"{'='*60}")
for model, status in training_results.items():
    status_icon = "✓" if status == "SUCCESS" else "✗"
    print(f"{status_icon} {model}: {status}")


## 6. Evaluate Models


In [None]:
evaluation_results = {}

for backbone in BACKBONES:
    for variant in VARIANTS:
        model_name = f"{variant}_{backbone}"
        print(f"\n{'='*60}")
        print(f"Evaluating: {model_name}")
        print(f"{'='*60}")
        
        # Build evaluation command
        cmd = [
            sys.executable, "core/evaluate.py",
            "--backbone", backbone,
            "--variant", variant,
            "--split", SPLIT,
            "--threshold", str(THRESHOLD)
        ]
        
        try:
            # Run evaluation
            result = subprocess.run(cmd, check=True, capture_output=True, text=True)
            print(result.stdout)
            evaluation_results[model_name] = "SUCCESS"
            print(f"✓ {model_name} evaluation completed")
        except subprocess.CalledProcessError as e:
            print(f"✗ {model_name} evaluation failed:")
            print(e.stderr)
            evaluation_results[model_name] = "FAILED"

print(f"\n{'='*60}")
print("Evaluation Summary:")
print(f"{'='*60}")
for model, status in evaluation_results.items():
    status_icon = "✓" if status == "SUCCESS" else "✗"
    print(f"{status_icon} {model}: {status}")


## 7. Generate Comparison Report


In [None]:
# Extract metrics from this notebook's outputs
print("Extracting metrics...")
cmd = [sys.executable, "analysis/extract_metrics.py"]
subprocess.run(cmd, check=True)

# Generate comparison tables
print("\nGenerating comparison tables...")
cmd = [sys.executable, "analysis/comparison_tables.py"]
subprocess.run(cmd, check=True)

# Generate visualizations
print("\nGenerating visualizations...")
cmd = [sys.executable, "analysis/comparison_visualizations.py"]
subprocess.run(cmd, check=True)

print("\n✓ All analysis complete!")
print("Check analysis/outputs/ for results")
