<a href="https://colab.research.google.com/github/aexomir/AML_mistake_detection/blob/feat%2Frnn/notebooks/rnn_baseline_colab.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# RNN Baseline Training for CaptainCook4D SupervisedER

This notebook trains the V_RNN (RNN/LSTM) baseline for mistake detection and compares it against V1 (MLP) and V2 (Transformer) baselines.

## What this notebook does:
1. **Setup**: Clone repository and install dependencies
2. **Load Data**: Load features, annotations, and optionally checkpoints from Google Drive
3. **Train**: Train the RNN baseline model
4. **Evaluate**: Evaluate the trained model
5. **Compare**: Compare results against V1 (MLP) and V2 (Transformer) baselines

## Prerequisites:
You need to have:
- Pre-extracted features (Omnivore and SlowFast) in `.npz` format or zip files
- Annotation files (should be in the repository or uploaded separately)
- (Optional) Pre-trained checkpoints for comparison

## Quick Start:
1. Configure paths in Section 1
2. Run all cells sequentially


In [None]:
# ============================================
# CONFIGURE YOUR REPOSITORY
# ============================================
# Option 1: Clone from GitHub (recommended)
REPO_URL = "https://github.com/aexomir/AML_mistake_detection.git"
REPO_BRANCH = "feat/rnn"  # Leave empty for default branch, or specify branch name

# Option 2: Manual upload - set REPO_URL to empty string and upload files manually
# REPO_URL = ""

REPO_DIR = "code"

print(f"Repository URL: {REPO_URL if REPO_URL else 'Manual upload mode'}")
print(f"Repository branch: {REPO_BRANCH if REPO_BRANCH else 'default'}")
print(f"Repository directory: {REPO_DIR}")


In [None]:
import os
import shutil

# Remove existing directory if it exists
if os.path.exists(REPO_DIR):
    print(f"Removing existing {REPO_DIR} directory...")
    shutil.rmtree(REPO_DIR)

# Clone repository
if REPO_URL:
    print(f"Cloning repository from {REPO_URL}...")
    clone_cmd = f"git clone {REPO_URL} {REPO_DIR}"
    result = os.system(clone_cmd)

    if result != 0:
        print(f"⚠ Clone failed. Please check the URL or upload files manually.")
        os.makedirs(REPO_DIR, exist_ok=True)
    else:
        print("✓ Repository cloned successfully")

        # Checkout specific branch if specified
        if REPO_BRANCH:
            print(f"Checking out branch: {REPO_BRANCH}")
            os.chdir(REPO_DIR)
            os.system(f"git checkout {REPO_BRANCH}")
            os.chdir('..')
            print(f"✓ Switched to branch: {REPO_BRANCH}")
else:
    print("Manual upload mode: Creating directory...")
    os.makedirs(REPO_DIR, exist_ok=True)

# Change to repository directory
if os.path.exists(REPO_DIR):
    os.chdir(REPO_DIR)
    print(f"\n✓ Changed to directory: {os.getcwd()}")
    print(f"\nRepository contents:")
    !ls -la
else:
    print(f"✗ Error: {REPO_DIR} directory not found!")


In [None]:
# Verify repository structure
import os

print(f"Current working directory: {os.getcwd()}")
print(f"\nChecking repository structure...")

required_items = [
    'scripts/train_rnn_baseline.py',
    'core/evaluate.py',
    'dataloader',
    'base.py',
    'constants.py'
]

missing = []
for item in required_items:
    if os.path.exists(item):
        print(f"✓ Found: {item}")
    else:
        print(f"✗ Missing: {item}")
        missing.append(item)

if missing:
    print(f"\n⚠ Warning: Some required files/directories are missing!")
    print(f"Please ensure all files are present before proceeding.")
else:
    print(f"\n✓ Repository structure looks good!")


In [None]:
# Install dependencies
# Colab comes with PyTorch pre-installed, so we'll work with that
# Remove PyTorch version constraints to avoid conflicts
if os.path.exists('requirements.txt'):
    !sed -i '/^torch==/d' requirements.txt 2>/dev/null || true
    !sed -i '/^torchvision==/d' requirements.txt 2>/dev/null || true

# Install torcheval (required for evaluation metrics)
!pip install -q torcheval

# Install all remaining dependencies from requirements.txt
if os.path.exists('requirements.txt'):
    !pip install -q -r requirements.txt
elif os.path.exists('requirements-cpu.txt'):
    !pip install -q -r requirements-cpu.txt

# Install additional dependencies for RNN baseline
!pip install -q wandb loguru

print("✓ All dependencies installed successfully")

# Verify PyTorch installation
import torch
print(f"\nPyTorch version: {torch.__version__}")
print(f"CUDA available: {torch.cuda.is_available()}")
if torch.cuda.is_available():
    print(f"CUDA device: {torch.cuda.get_device_name(0)}")


In [None]:
# ============================================
# CONFIGURE DATA PATHS
# ============================================
# Option 1: From Google Drive (recommended for large files)
USE_GOOGLE_DRIVE = True  # Set to False if uploading directly

# Paths on Google Drive (update these to match your Drive structure)
OMNIVORE_DRIVE_PATH = "/content/drive/MyDrive/AML_mistake_detection/omnivore.zip"  # Can be .zip or directory
SLOWFAST_DRIVE_PATH = "/content/drive/MyDrive/AML_mistake_detection/slowfast.zip"  # Can be .zip or directory
CHECKPOINTS_DRIVE_PATH = "/content/drive/MyDrive/AML_mistake_detection/error_recognition_best.zip"  # Can be .zip or directory
ANNOTATIONS_DRIVE_PATH = "/content/drive/MyDrive/AML_mistake_detection/annotations"  # Optional if in repo

# Option 2: Direct upload - set USE_GOOGLE_DRIVE = False and upload files in next cell

print("Data paths configured:")
print(f"  Use Google Drive: {USE_GOOGLE_DRIVE}")
print(f"  Omnivore: {OMNIVORE_DRIVE_PATH}")
print(f"  SlowFast: {SLOWFAST_DRIVE_PATH}")
print(f"  Checkpoints: {CHECKPOINTS_DRIVE_PATH}")
print(f"  Annotations: {ANNOTATIONS_DRIVE_PATH}")


In [None]:
# Mount Google Drive if using it
if USE_GOOGLE_DRIVE:
    from google.colab import drive
    drive.mount('/content/drive')
    print("✓ Google Drive mounted")
else:
    print("⚠ Google Drive not mounted. Please upload files directly using the file browser.")


In [None]:
# Create data directory structure
import os
os.makedirs('data/video/omnivore', exist_ok=True)
os.makedirs('data/video/slowfast', exist_ok=True)
os.makedirs('checkpoints', exist_ok=True)
os.makedirs('annotations/annotation_json', exist_ok=True)
os.makedirs('annotations/data_splits', exist_ok=True)
os.makedirs('er_annotations', exist_ok=True)

print("✓ Directory structure created")


In [None]:
# Load features from Google Drive or direct upload
import os
import shutil
import subprocess
import glob

def load_features(source_path, dest_path, feature_name):
    """Load features from source (zip file or directory) to destination."""
    if not os.path.exists(source_path):
        print(f"⚠ {feature_name}: Source path not found: {source_path}")
        return False

    print(f"Loading {feature_name} features from: {source_path}")

    # Check if it's a zip file
    is_zip = source_path.lower().endswith('.zip') or (os.path.isfile(source_path) and 'zip' in str(source_path))

    if is_zip:
        print(f"  Detected zip file, extracting...")
        temp_zip = f'/tmp/{feature_name.lower()}.zip'
        temp_extracted = f'/tmp/{feature_name.lower()}_extracted'

        try:
            shutil.copy(source_path, temp_zip)
            subprocess.run(['unzip', '-q', temp_zip, '-d', temp_extracted], check=True)

            # Find .npz files in extracted directory
            npz_files = glob.glob(os.path.join(temp_extracted, '**/*.npz'), recursive=True)

            if npz_files:
                # Copy all .npz files to destination
                for npz_file in npz_files:
                    shutil.copy2(npz_file, dest_path)
                print(f"  ✓ Extracted and copied {len(npz_files)} .npz files")

                # Cleanup
                shutil.rmtree(temp_extracted, ignore_errors=True)
                os.remove(temp_zip)
                return True
            else:
                print(f"  ⚠ No .npz files found in extracted zip")
                shutil.rmtree(temp_extracted, ignore_errors=True)
                os.remove(temp_zip)
                return False
        except Exception as e:
            print(f"  ✗ Error extracting {feature_name} zip: {e}")
            if os.path.exists(temp_extracted):
                shutil.rmtree(temp_extracted, ignore_errors=True)
            if os.path.exists(temp_zip):
                os.remove(temp_zip)
            return False
    else:
        # It's a directory
        print(f"  Detected directory, copying .npz files...")
        npz_files = glob.glob(os.path.join(source_path, '**/*.npz'), recursive=True)

        if npz_files:
            # Copy all .npz files to destination
            for npz_file in npz_files:
                shutil.copy2(npz_file, dest_path)
            print(f"  ✓ Copied {len(npz_files)} .npz files")
            return True
        else:
            print(f"  ⚠ No .npz files found in {source_path}")
            return False

# Load Omnivore and SlowFast features
if USE_GOOGLE_DRIVE:
    load_features(OMNIVORE_DRIVE_PATH, 'data/video/omnivore', 'Omnivore')
    load_features(SLOWFAST_DRIVE_PATH, 'data/video/slowfast', 'SlowFast')
else:
    print("⚠ Please upload features manually:")
    print("  1. Use the file browser to upload .npz files or zip files")
    print("  2. Extract/copy them to data/video/omnivore/ and data/video/slowfast/")

# Verify features
omnivore_count = len([f for f in os.listdir('data/video/omnivore') if f.endswith('.npz')]) if os.path.exists('data/video/omnivore') else 0
slowfast_count = len([f for f in os.listdir('data/video/slowfast') if f.endswith('.npz')]) if os.path.exists('data/video/slowfast') else 0
print(f"\nFeature file counts:")
print(f"  Omnivore: {omnivore_count} .npz files")
print(f"  SlowFast: {slowfast_count} .npz files")


In [None]:
# Load annotations (if not already in repository)
import os
import shutil

if USE_GOOGLE_DRIVE and os.path.exists(ANNOTATIONS_DRIVE_PATH):
    print(f"Loading annotations from: {ANNOTATIONS_DRIVE_PATH}")

    # Copy annotation_json
    annotation_json_src = os.path.join(ANNOTATIONS_DRIVE_PATH, 'annotation_json')
    if os.path.exists(annotation_json_src):
        for file in os.listdir(annotation_json_src):
            src = os.path.join(annotation_json_src, file)
            dst = os.path.join('annotations/annotation_json', file)
            if os.path.isfile(src):
                shutil.copy2(src, dst)
                print(f"  ✓ Copied {file}")

    # Copy data_splits
    data_splits_src = os.path.join(ANNOTATIONS_DRIVE_PATH, 'data_splits')
    if os.path.exists(data_splits_src):
        for file in os.listdir(data_splits_src):
            src = os.path.join(data_splits_src, file)
            dst = os.path.join('annotations/data_splits', file)
            if os.path.isfile(src):
                shutil.copy2(src, dst)
                print(f"  ✓ Copied {file}")

    # Copy er_annotations
    er_annotations_src = os.path.join(ANNOTATIONS_DRIVE_PATH, 'er_annotations')
    if os.path.exists(er_annotations_src):
        for file in os.listdir(er_annotations_src):
            src = os.path.join(er_annotations_src, file)
            dst = os.path.join('er_annotations', file)
            if os.path.isfile(src):
                shutil.copy2(src, dst)
                print(f"  ✓ Copied {file}")
else:
    print("⚠ Annotations not found in Drive. Checking repository...")

# Verify required annotation files
print("\nVerifying annotation files...")
required_files = [
    'annotations/annotation_json/step_annotations.json',
    'annotations/annotation_json/error_annotations.json',
    'er_annotations/recordings_combined_splits.json'
]

missing = []
for file in required_files:
    if os.path.exists(file):
        print(f"✓ Found: {file}")
    else:
        print(f"✗ Missing: {file}")
        missing.append(file)

if missing:
    print(f"\n⚠ Warning: {len(missing)} required annotation file(s) are missing!")
    print("Please ensure these files are available before running training.")
else:
    print("\n✓ All required annotation files are present!")


In [None]:
# Load checkpoints (optional - for comparison with existing baselines)
import os
import shutil
import subprocess

checkpoint_path = CHECKPOINTS_DRIVE_PATH if USE_GOOGLE_DRIVE else None

if checkpoint_path and os.path.exists(checkpoint_path):
    print(f"Loading checkpoints from: {checkpoint_path}")

    # Check if it's a zip file
    is_zip = checkpoint_path.lower().endswith('.zip') or (os.path.isfile(checkpoint_path) and 'zip' in str(checkpoint_path))

    if is_zip:
        print("Detected zip file, extracting...")
        shutil.copy(checkpoint_path, '/tmp/checkpoints.zip')

        try:
            subprocess.run(['unzip', '-q', '/tmp/checkpoints.zip', '-d', '/tmp/checkpoints_extracted'], check=True)

            # Find error_recognition_best directory
            extracted_base = '/tmp/checkpoints_extracted'
            extracted_path = None

            # Check common locations
            if os.path.exists(os.path.join(extracted_base, 'error_recognition_best')):
                extracted_path = os.path.join(extracted_base, 'error_recognition_best')
            elif os.path.exists(os.path.join(extracted_base, 'MLP')) or os.path.exists(os.path.join(extracted_base, 'Transformer')):
                extracted_path = extracted_base
            else:
                # Search recursively
                for root, dirs, files in os.walk(extracted_base):
                    if 'error_recognition_best' in dirs:
                        extracted_path = os.path.join(root, 'error_recognition_best')
                        break
                    if 'MLP' in dirs or 'Transformer' in dirs:
                        extracted_path = root
                        break

                if extracted_path is None:
                    extracted_path = extracted_base

            print(f"Copying from: {extracted_path}")
            shutil.copytree(extracted_path, 'checkpoints/error_recognition_best', dirs_exist_ok=True)

            # Cleanup
            shutil.rmtree('/tmp/checkpoints_extracted', ignore_errors=True)
            os.remove('/tmp/checkpoints.zip')
            print("✓ Checkpoints extracted")
        except Exception as e:
            print(f"✗ Error extracting checkpoints: {e}")
    else:
        # It's a directory
        print("Detected directory, copying...")
        if os.path.basename(checkpoint_path) == 'error_recognition_best':
            shutil.copytree(checkpoint_path, 'checkpoints/error_recognition_best', dirs_exist_ok=True)
        else:
            os.makedirs('checkpoints/error_recognition_best', exist_ok=True)
            for item in os.listdir(checkpoint_path):
                src = os.path.join(checkpoint_path, item)
                dst = os.path.join('checkpoints/error_recognition_best', item)
                if os.path.isdir(src):
                    shutil.copytree(src, dst, dirs_exist_ok=True)
                else:
                    shutil.copy2(src, dst)
        print("✓ Checkpoints copied")
else:
    print("⚠ Checkpoints not found. This is optional - you can still train the RNN baseline.")
    print("   If you want to compare with existing baselines, download checkpoints from:")
    print("   https://utdallas.app.box.com/s/uz3s1alrzucz03sleify8kazhuc1ksl3")

# Verify checkpoints
if os.path.exists('checkpoints/error_recognition_best'):
    pt_files = []
    for root, dirs, files in os.walk('checkpoints/error_recognition_best'):
        pt_files.extend([os.path.join(root, f) for f in files if f.endswith('.pt')])
    print(f"\n✓ Found {len(pt_files)} checkpoint files")
else:
    print("\n⚠ Checkpoints directory not found (this is optional)")


## 2.5. Configure Checkpoint Saving to Google Drive


In [None]:
# ============================================
# CONFIGURE CHECKPOINT SAVING TO GOOGLE DRIVE
# ============================================
# Checkpoints will be saved to Google Drive during and after training
# This ensures your trained models are preserved even if the Colab session ends

GDRIVE_CHECKPOINT_PATH = "/content/drive/MyDrive/AML_mistake_detection/error_recognition_best/RNN"
ENABLE_CHECKPOINT_SYNC = True  # Set to False to disable syncing to Google Drive

print("Checkpoint Configuration:")
print(f"  Enable Google Drive Sync: {ENABLE_CHECKPOINT_SYNC}")
print(f"  Google Drive Path: {GDRIVE_CHECKPOINT_PATH}")
print(f"  Local Checkpoint Path: checkpoints/error_recognition/RNN/")
print("\nCheckpoints will be synced after training completes.")


In [None]:
# Helper function to sync checkpoints to Google Drive
import os
import shutil
from tqdm import tqdm

def sync_checkpoints_to_gdrive(local_path, gdrive_path, backbone_name):
    """Sync checkpoints from local storage to Google Drive."""
    print(f"\n{'='*60}")
    print(f"Starting Checkpoint Sync to Google Drive")
    print(f"{'='*60}")
    
    if not ENABLE_CHECKPOINT_SYNC:
        print("⚠ Checkpoint sync is DISABLED")
        print("  Set ENABLE_CHECKPOINT_SYNC = True to enable")
        return False
    
    # Check Google Drive is mounted
    if not os.path.exists("/content/drive/MyDrive"):
        print("✗ ERROR: Google Drive not mounted!")
        print("  Please run the Google Drive mount cell first")
        return False
    
    # Check local path exists
    if not os.path.exists(local_path):
        print(f"✗ ERROR: Local checkpoint path not found")
        print(f"  Path: {local_path}")
        print(f"  This usually means training failed or checkpoints weren't saved")
        return False
    
    # List files in local path for debugging
    all_files = os.listdir(local_path)
    checkpoint_files = [f for f in all_files if f.endswith('.pt')]
    
    print(f"\nLocal checkpoint directory: {local_path}")
    print(f"  Total files: {len(all_files)}")
    print(f"  Checkpoint files (.pt): {len(checkpoint_files)}")
    
    if not checkpoint_files:
        print(f"✗ ERROR: No checkpoint files (.pt) found in {local_path}")
        print(f"  Files present: {all_files[:5]}")  # Show first 5 files
        return False
    
    # Create Google Drive directory
    backbone_gdrive_path = os.path.join(gdrive_path, backbone_name)
    try:
        os.makedirs(backbone_gdrive_path, exist_ok=True)
        print(f"\nGoogle Drive path: {backbone_gdrive_path}")
        
        # Verify directory was created
        if not os.path.exists(backbone_gdrive_path):
            print(f"✗ ERROR: Failed to create Google Drive directory")
            return False
        print(f"  ✓ Directory verified")
    except Exception as e:
        print(f"✗ ERROR: Cannot create Google Drive directory")
        print(f"  Error: {e}")
        return False
    
    # Copy each checkpoint file
    print(f"\nSyncing {len(checkpoint_files)} file(s)...")
    synced_files = []
    failed_files = []
    
    for ckpt_file in tqdm(checkpoint_files, desc="Syncing"):
        try:
            src = os.path.join(local_path, ckpt_file)
            dst = os.path.join(backbone_gdrive_path, ckpt_file)
            
            # Copy file
            shutil.copy2(src, dst)
            
            # Verify file was copied
            if os.path.exists(dst):
                src_size = os.path.getsize(src)
                dst_size = os.path.getsize(dst)
                if src_size == dst_size:
                    synced_files.append(ckpt_file)
                else:
                    print(f"\n⚠ Size mismatch for {ckpt_file}: {src_size} vs {dst_size}")
                    failed_files.append(ckpt_file)
            else:
                print(f"\n⚠ File not found after copy: {ckpt_file}")
                failed_files.append(ckpt_file)
        except Exception as e:
            print(f"\n✗ Error syncing {ckpt_file}: {e}")
            failed_files.append(ckpt_file)
    
    # Summary
    print(f"\n{'='*60}")
    print(f"Sync Complete!")
    print(f"{'='*60}")
    print(f"✓ Successfully synced: {len(synced_files)} file(s)")
    if failed_files:
        print(f"✗ Failed to sync: {len(failed_files)} file(s)")
        print(f"  Failed files: {failed_files}")
    print(f"\nLocation: {backbone_gdrive_path}")
    print(f"{'='*60}\n")
    
    return len(synced_files) > 0

print("✓ Checkpoint sync helper function loaded")


## 2.6. Verify Google Drive and Create Checkpoint Directories


In [None]:
# Verify Google Drive is mounted and create checkpoint directories
import os

print("="*60)
print("Pre-Training Verification")
print("="*60)

# Check if Google Drive is mounted
gdrive_root = "/content/drive/MyDrive"
if not os.path.exists(gdrive_root):
    print("✗ ERROR: Google Drive is not mounted!")
    print("  Please run the 'Mount Google Drive' cell (Section 2) first.")
    raise RuntimeError("Google Drive not mounted")
else:
    print(f"✓ Google Drive is mounted at {gdrive_root}")

# Create checkpoint directories on Google Drive
if ENABLE_CHECKPOINT_SYNC:
    print(f"\n✓ Checkpoint sync is enabled")
    print(f"Creating directories on Google Drive...")
    
    for backbone in ["omnivore", "slowfast"]:
        backbone_path = os.path.join(GDRIVE_CHECKPOINT_PATH, backbone)
        os.makedirs(backbone_path, exist_ok=True)
        
        # Verify directory was created
        if os.path.exists(backbone_path):
            print(f"  ✓ Created/verified: {backbone_path}")
        else:
            print(f"  ✗ Failed to create: {backbone_path}")
            raise RuntimeError(f"Cannot create Google Drive directory: {backbone_path}")
    
    # Test write permissions
    test_file = os.path.join(GDRIVE_CHECKPOINT_PATH, "test_write.tmp")
    try:
        with open(test_file, 'w') as f:
            f.write("test")
        os.remove(test_file)
        print(f"\n✓ Write permissions verified for Google Drive path")
    except Exception as e:
        print(f"\n✗ ERROR: Cannot write to Google Drive path!")
        print(f"  Error: {e}")
        raise RuntimeError(f"No write permissions to {GDRIVE_CHECKPOINT_PATH}")
else:
    print("\n⚠ Checkpoint sync is disabled")

print("\n" + "="*60)
print("✓ All pre-training checks passed!")
print("="*60)


## 3. Train RNN Baseline with Omnivore Features


In [None]:
!wandb login

In [None]:
# Train RNN baseline with Omnivore features
# Default hyperparameters: hidden_size=256, num_layers=2, bidirectional=True, rnn_type=LSTM
import subprocess
import sys
import os

# Ensure we're in the repository root directory
repo_root = os.getcwd()
if not os.path.exists("scripts/train_rnn_baseline.py"):
    print(f"⚠ Error: scripts/train_rnn_baseline.py not found in {repo_root}")
    print("Please make sure you're in the repository root directory.")
else:
    print(f"Running from directory: {repo_root}")

    cmd = [
        sys.executable, "scripts/train_rnn_baseline.py",
        "--variant", "RNN",
        "--backbone", "omnivore",
        "--split", "recordings",
        "--batch_size", "4",
        "--num_epochs", "20",
        "--lr", "1e-3",
        "--weight_decay", "1e-3",
        "--rnn_hidden_size", "256",
        "--rnn_num_layers", "2",
        "--rnn_dropout", "0.2",
        "--rnn_bidirectional", "True",
        "--rnn_type", "LSTM",
        # "--segment_features_directory", "data/"
    ]

    print("\nRunning command:")
    print(" ".join(cmd))
    print("\n" + "="*60 + "\n")

    # Set PYTHONPATH to include repo root for imports
    env = os.environ.copy()
    env['PYTHONPATH'] = repo_root + (os.pathsep + env.get('PYTHONPATH', ''))

    # Run from the repository root directory and capture output
    result = subprocess.run(
        cmd,
        cwd=repo_root,
        env=env,
        check=False,
        stdout=subprocess.PIPE,
        stderr=subprocess.STDOUT,
        text=True,
        bufsize=1
    )

    # Print output in real-time (already captured, but print it)
    if result.stdout:
        print(result.stdout)

    if result.returncode != 0:
        print(f"\n⚠ Training failed with exit code {result.returncode}")
        if result.stderr:
            print("Error output:")
            print(result.stderr)
    else:
        print("\n✓ Training completed successfully!")
        
        # Sync checkpoints to Google Drive
        local_ckpt_path = "checkpoints/error_recognition/RNN/omnivore"
        sync_checkpoints_to_gdrive(local_ckpt_path, GDRIVE_CHECKPOINT_PATH, "omnivore")


### Patching `core/models/blocks.py` to fix `pack_padded_sequence` error

In [None]:
import os

file_path = "core/models/blocks.py"

if os.path.exists(file_path):
    with open(file_path, 'r') as f:
        content = f.read()

    original_line = "x = nn.utils.rnn.pack_padded_sequence(x, lengths, batch_first=True, enforce_sorted=False)"
    replacement_line = "x = nn.utils.rnn.pack_padded_sequence(x, lengths.cpu(), batch_first=True, enforce_sorted=False)"

    if original_line in content:
        new_content = content.replace(original_line, replacement_line)
        with open(file_path, 'w') as f:
            f.write(new_content)
        print(f"Patched {file_path}: 'lengths' argument for pack_padded_sequence moved to CPU.")
    else:
        print(f"Original line not found in {file_path}. Patching failed or already applied.")
else:
    print(f"Error: {file_path} not found.")


## 4. Train RNN Baseline with SlowFast Features


In [None]:
# Train RNN baseline with SlowFast features
import subprocess
import sys
import os

# Ensure we're in the repository root directory
repo_root = os.getcwd()
if not os.path.exists("scripts/train_rnn_baseline.py"):
    print(f"⚠ Error: scripts/train_rnn_baseline.py not found in {repo_root}")
    print("Please make sure you're in the repository root directory.")
else:
    print(f"Running from directory: {repo_root}")

    cmd = [
        sys.executable, "scripts/train_rnn_baseline.py",
        "--variant", "RNN",
        "--backbone", "slowfast",
        "--split", "recordings",
        "--batch_size", "4",
        "--num_epochs", "20",
        "--lr", "1e-3",
        "--weight_decay", "1e-3",
        "--rnn_hidden_size", "256",
        "--rnn_num_layers", "2",
        "--rnn_dropout", "0.2",
        "--rnn_bidirectional", "True",
        "--rnn_type", "LSTM",
        # "--segment_features_directory", "data/"
    ]

    print("\nRunning command:")
    print(" ".join(cmd))
    print("\n" + "="*60 + "\n")

    # Set PYTHONPATH to include repo root for imports
    env = os.environ.copy()
    env['PYTHONPATH'] = repo_root + (os.pathsep + env.get('PYTHONPATH', ''))

    # Run from the repository root directory and capture output
    result = subprocess.run(
        cmd,
        cwd=repo_root,
        env=env,
        check=False,
        stdout=subprocess.PIPE,
        stderr=subprocess.STDOUT,
        text=True,
        bufsize=1
    )

    # Print output
    if result.stdout:
        print(result.stdout)

    if result.returncode != 0:
        print(f"\n⚠ Training failed with exit code {result.returncode}")
        if result.stderr:
            print("Error output:")
            print(result.stderr)
    else:
        print("\n✓ Training completed successfully!")
        
        # Sync checkpoints to Google Drive
        local_ckpt_path = "checkpoints/error_recognition/RNN/slowfast"
        sync_checkpoints_to_gdrive(local_ckpt_path, GDRIVE_CHECKPOINT_PATH, "slowfast")


## 5. Evaluate Trained Model


In [None]:
# Evaluate both trained models (Omnivore and SlowFast)
import subprocess
import sys
import os

def find_best_checkpoint(backbone):
    """Find the best checkpoint for a given backbone, checking both local and Google Drive."""
    # Check Google Drive first
    gdrive_dir = os.path.join(GDRIVE_CHECKPOINT_PATH, backbone)
    local_dir = f"checkpoints/error_recognition/RNN/{backbone}"
    
    # Try Google Drive first
    checkpoint_dir = gdrive_dir if os.path.exists(gdrive_dir) else local_dir
    
    if not os.path.exists(checkpoint_dir):
        print(f"⚠ No checkpoint directory found for {backbone}")
        return None
    
    # Look for the best model
    checkpoint_files = [f for f in os.listdir(checkpoint_dir) if f.endswith('_best.pt')]
    if checkpoint_files:
        return os.path.join(checkpoint_dir, checkpoint_files[0])
    
    # Fallback to any .pt file
    checkpoint_files = [f for f in os.listdir(checkpoint_dir) if f.endswith('.pt')]
    if checkpoint_files:
        return os.path.join(checkpoint_dir, sorted(checkpoint_files)[-1])  # Get latest
    
    return None

def evaluate_model(backbone, threshold=0.6):
    """Evaluate a trained RNN model."""
    print(f"\n{'='*60}")
    print(f"Evaluating RNN + {backbone.upper()}")
    print(f"{'='*60}")
    
    repo_root = os.getcwd()
    checkpoint_path = find_best_checkpoint(backbone)
    
    if checkpoint_path is None:
        print(f"⚠ No checkpoint found for {backbone}. Skipping evaluation.")
        return False
    
    print(f"Using checkpoint: {checkpoint_path}")
    
    if not os.path.exists(checkpoint_path):
        print(f"⚠ Checkpoint file not found: {checkpoint_path}")
        return False
    
    cmd = [
        sys.executable, "-m", "core.evaluate",
        "--variant", "RNN",
        "--backbone", backbone,
        "--split", "recordings",
        "--ckpt", checkpoint_path,
        "--threshold", str(threshold)
    ]
    
    print("\nRunning command:")
    print(" ".join(cmd))
    print()
    
    # Set PYTHONPATH to include repo root for imports
    env = os.environ.copy()
    env['PYTHONPATH'] = repo_root + (os.pathsep + env.get('PYTHONPATH', ''))
    
    # Run evaluation
    result = subprocess.run(cmd, cwd=repo_root, env=env, check=False)
    
    if result.returncode != 0:
        print(f"\n⚠ Evaluation failed for {backbone} with exit code {result.returncode}")
        return False
    else:
        print(f"\n✓ Evaluation completed successfully for {backbone}!")
        return True

# Evaluate both backbones
print("Starting evaluation of trained RNN models...")
print(f"Threshold: 0.6")
print()

omnivore_success = evaluate_model("omnivore", threshold=0.6)
slowfast_success = evaluate_model("slowfast", threshold=0.6)

print(f"\n{'='*60}")
print("Evaluation Summary")
print(f"{'='*60}")
print(f"RNN + Omnivore: {'✓ Success' if omnivore_success else '✗ Failed'}")
print(f"RNN + SlowFast: {'✓ Success' if slowfast_success else '✗ Failed'}")
print(f"{'='*60}")

if omnivore_success or slowfast_success:
    print("\nResults saved to: results/error_recognition/combined_results/")
    print("Proceed to the next section for comparison with other baselines.")


## 6. Compare Results: RNN vs MLP vs Transformer

This section compares the trained RNN baseline against existing MLP (V1) and Transformer (V2) baselines across all backbone combinations (Omnivore and SlowFast).

**What this section does:**
- Loads results from `results/error_recognition/combined_results/`
- Displays comparison table with key metrics (Precision, Recall, F1, Accuracy, AUC)
- Generates visualizations comparing all model combinations
- Identifies the best performing model configuration

**Note:** Ensure evaluation has been run for all models before running this comparison.


In [None]:
# Comprehensive results comparison: RNN vs MLP vs Transformer
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import os

# Configuration
threshold = 0.6
results_file = f"results/error_recognition/combined_results/step_True_substep_True_threshold_{threshold}.csv"

print(f"{'='*80}")
print("Comprehensive Model Comparison")
print(f"{'='*80}")
print(f"Results file: {results_file}")
print()

if not os.path.exists(results_file):
    print(f"⚠ Results file not found: {results_file}")
    print("Please run evaluation first.")
else:
    # Load results
    df = pd.read_csv(results_file)
    
    # Filter for recordings split
    df_filtered = df[df['Split'] == 'recordings'].copy()
    
    if len(df_filtered) == 0:
        print("⚠ No results found for 'recordings' split")
    else:
        # Select relevant columns
        columns_to_show = ['Variant', 'Backbone', 'Step Precision', 'Step Recall', 
                          'Step F1', 'Step Accuracy', 'Step AUC']
        
        # Check which columns exist
        existing_columns = [col for col in columns_to_show if col in df_filtered.columns]
        
        print("="*80)
        print("Model Performance Comparison (Recordings Split, Threshold=0.6)")
        print("="*80)
        print()
        
        # Display full comparison table
        comparison_df = df_filtered[existing_columns].copy()
        
        # Sort by F1 score (descending)
        if 'Step F1' in comparison_df.columns:
            comparison_df = comparison_df.sort_values('Step F1', ascending=False)
        
        # Format numeric columns
        numeric_columns = comparison_df.select_dtypes(include=[np.number]).columns
        for col in numeric_columns:
            comparison_df[col] = comparison_df[col].round(4)
        
        print(comparison_df.to_string(index=False))
        print()
        
        # Highlight best model
        if 'Step F1' in comparison_df.columns:
            best_idx = comparison_df['Step F1'].idxmax()
            best_model = comparison_df.loc[best_idx]
            print("="*80)
            print("Best Model (by F1 Score):")
            print("="*80)
            print(f"Variant: {best_model['Variant']}")
            print(f"Backbone: {best_model['Backbone']}")
            print(f"F1 Score: {best_model['Step F1']:.4f}")
            print()
        
        # Create visualizations
        print("="*80)
        print("Generating Visualizations...")
        print("="*80)
        
        # Prepare data for plotting
        variants = comparison_df['Variant'].tolist()
        backbones = comparison_df['Backbone'].tolist()
        labels = [f"{v}+{b}" for v, b in zip(variants, backbones)]
        
        metrics = ['Step Precision', 'Step Recall', 'Step F1', 'Step AUC']
        metrics = [m for m in metrics if m in comparison_df.columns]
        
        if len(metrics) > 0:
            # Create subplot for each metric
            fig, axes = plt.subplots(2, 2, figsize=(16, 12))
            fig.suptitle('Model Comparison: RNN vs MLP vs Transformer', fontsize=16, fontweight='bold')
            
            for idx, metric in enumerate(metrics):
                row = idx // 2
                col = idx % 2
                ax = axes[row, col]
                
                values = comparison_df[metric].tolist()
                colors = ['#1f77b4' if 'RNN' in label else '#ff7f0e' if 'MLP' in label else '#2ca02c' 
                         for label in labels]
                
                bars = ax.bar(range(len(labels)), values, color=colors, alpha=0.8, edgecolor='black')
                ax.set_xticks(range(len(labels)))
                ax.set_xticklabels(labels, rotation=45, ha='right')
                ax.set_ylabel(metric, fontsize=12)
                ax.set_title(metric, fontsize=13, fontweight='bold')
                ax.grid(axis='y', alpha=0.3, linestyle='--')
                ax.set_ylim([0, 1.0])
                
                # Add value labels on bars
                for bar, val in zip(bars, values):
                    height = bar.get_height()
                    ax.text(bar.get_x() + bar.get_width()/2., height + 0.01,
                           f'{val:.3f}', ha='center', va='bottom', fontsize=9)
            
            plt.tight_layout()
            plt.savefig('model_comparison.png', dpi=150, bbox_inches='tight')
            print("✓ Saved comparison plot to: model_comparison.png")
            plt.show()
            
            # Create grouped bar chart by backbone
            fig, ax = plt.subplots(figsize=(14, 8))
            
            backbones_unique = sorted(comparison_df['Backbone'].unique())
            variants_unique = sorted(comparison_df['Variant'].unique())
            
            x = np.arange(len(backbones_unique))
            width = 0.25
            
            for i, variant in enumerate(variants_unique):
                variant_data = []
                for backbone in backbones_unique:
                    matching = comparison_df[(comparison_df['Variant'] == variant) & 
                                            (comparison_df['Backbone'] == backbone)]
                    if len(matching) > 0 and 'Step F1' in matching.columns:
                        variant_data.append(matching['Step F1'].iloc[0])
                    else:
                        variant_data.append(0)
                
                offset = width * (i - len(variants_unique) / 2 + 0.5)
                bars = ax.bar(x + offset, variant_data, width, label=variant, alpha=0.8, edgecolor='black')
                
                # Add value labels
                for bar in bars:
                    height = bar.get_height()
                    if height > 0:
                        ax.text(bar.get_x() + bar.get_width()/2., height + 0.01,
                               f'{height:.3f}', ha='center', va='bottom', fontsize=9)
            
            ax.set_xlabel('Backbone', fontsize=13, fontweight='bold')
            ax.set_ylabel('Step F1 Score', fontsize=13, fontweight='bold')
            ax.set_title('F1 Score Comparison by Backbone and Variant', fontsize=14, fontweight='bold')
            ax.set_xticks(x)
            ax.set_xticklabels(backbones_unique)
            ax.legend(title='Variant', fontsize=11)
            ax.grid(axis='y', alpha=0.3, linestyle='--')
            ax.set_ylim([0, 1.0])
            
            plt.tight_layout()
            plt.savefig('f1_comparison_by_backbone.png', dpi=150, bbox_inches='tight')
            print("✓ Saved F1 comparison plot to: f1_comparison_by_backbone.png")
            plt.show()
            
            print()
            print("="*80)
            print("Analysis Complete!")
            print("="*80)
        else:
            print("⚠ No metrics found for visualization")
            
print()
print("Note: Make sure you've run training and evaluation for all models before comparison.")
