# Experiment 3: Internal Masking vs Occlusion Training

**Goal:** Test whether internal channel masking during training can improve robustness to occlusions, as an alternative to training on occluded images.

**Sessions (6 per model):**
- S1: Clean train (baseline)
- S2: Occluded train (standard practice)
- S3: Clean train + mask backbone_early
- S4: Clean train + mask backbone_late
- S5: Clean train + mask neck
- S6: Clean train + mask head

**Evaluation:**
- All models tested on both `test_clean` and `test_occluded` (40%)
- Using our custom evaluation system (P/R/F1, per-class, counting)

## 0. Configuration

**EDIT THIS CELL to switch between smoke test and full run:**

In [None]:
# ============================================================
# EXPERIMENT 3 CONFIGURATION - EDIT THIS CELL ONLY
# ============================================================

# EPOCHS: Set to 1 for smoke test, 50 for full experiment
EPOCHS = 1  # <-- CHANGE THIS: 1 = smoke test, 50 = full run

# Models to run (both for full experiment, one for quick test)
MODELS = ["yolov8m", "rtdetr-l"]

# Sessions to run (all 6 for full, fewer for quick test)
SESSIONS_TO_RUN = ["S1_clean_train", "S2_occ_train", "S3_mask_backbone_early", 
                   "S4_mask_backbone_late", "S5_mask_neck", "S6_mask_head"]

# Masking parameters (fixed for all sessions)
P_APPLY = 0.5      # Probability of applying masking per batch
P_CHANNELS = 0.2   # Fraction of channels to zero when masking

# Training parameters
IMGSZ = 640
BATCH = -1  # Auto batch size
PATIENCE = 10  # Early stopping patience
SEED = 42  # Fixed seed for reproducibility

# Occlusion level for test
OCCLUSION_LEVEL = "level_040"  # 40% occlusion

# ============================================================
# GOOGLE DRIVE - RUN_ID for resume capability
# ============================================================
# IMPORTANT: Smoke test and full run should use DIFFERENT RUN_IDs!
#
# Option 1: Automatic (recommended)
#   - Each run gets a unique timestamp-based ID
#   - Smoke test won't interfere with full run
#
# Option 2: Manual resume
#   - Set RUN_ID to a specific string to continue a previous run
#
import time
if EPOCHS <= 1:
    # Smoke test: use a fixed ID so it's easy to delete/ignore
    RUN_ID = "E3_SMOKE_TEST"
else:
    # Full run: use timestamp for unique ID
    RUN_ID = time.strftime("E3_%Y%m%d_%H%M%S")

# To RESUME a specific run, uncomment and set:
# RUN_ID = "E3_20240115_143022"  # <-- paste your RUN_ID here

# ============================================================
print(f"Configuration:")
print(f"  EPOCHS: {EPOCHS} {'(SMOKE TEST)' if EPOCHS <= 1 else '(FULL RUN)'}")
print(f"  MODELS: {MODELS}")
print(f"  SESSIONS: {len(SESSIONS_TO_RUN)} sessions")
print(f"  Masking: p_apply={P_APPLY}, p_channels={P_CHANNELS}")
print(f"  RUN_ID: {RUN_ID}")
print()
if EPOCHS <= 1:
    print("NOTE: Smoke test uses RUN_ID='E3_SMOKE_TEST'")
    print("      This will NOT interfere with your full 50-epoch run.")
    print("      Full run will get a unique timestamp-based RUN_ID.")

## 1. Setup & Mount Google Drive (FIRST!)

**IMPORTANT:** Drive is mounted at the very beginning so you can approve permissions and then leave the computer running overnight.

In [None]:
# Check if in Colab and mount Drive FIRST
import sys
import os
from pathlib import Path

IN_COLAB = 'google.colab' in sys.modules

if IN_COLAB:
    print("Running in Google Colab")
    
    # ============================================================
    # MOUNT GOOGLE DRIVE FIRST - This will prompt for permission
    # ============================================================
    from google.colab import drive
    drive.mount('/content/drive')
    print("Google Drive mounted successfully!")
    
    # Clone repo if not already cloned
    if not os.path.exists('/content/Deep_Learning_Gil_Alon'):
        !git clone https://github.com/gil-attar/Deep_Learning_Project_Gil_Alon.git Deep_Learning_Gil_Alon
    os.chdir('/content/Deep_Learning_Gil_Alon')
else:
    print("Running locally - Drive backup not needed")
    if 'Experiment_3' in os.getcwd():
        os.chdir('../..')

print(f"Working directory: {os.getcwd()}")

## 1.1 Setup Drive Folder Structure (Same as Experiment 1)

In [None]:
from pathlib import Path

PROJECT_NAME = "Deep_Learning_Project_Gil_Alon"

if IN_COLAB:
    # Setup Drive paths (same structure as Experiment 1)
    DRIVE_ROOT = Path("/content/drive/MyDrive/Colab_Outputs") / PROJECT_NAME / RUN_ID
    PERSIST_E3_RUNS = DRIVE_ROOT / "E3_runs"  # Training outputs go here
    
    PERSIST_E3_RUNS.mkdir(parents=True, exist_ok=True)
    
    # ============================================================
    # SAVE RUN_ID TO DRIVE (so you can find it later to resume)
    # ============================================================
    run_id_file = DRIVE_ROOT / "RUN_ID.txt"
    run_id_file.write_text(f"{RUN_ID}\n\nTo resume after disconnect:\n1. Set RUN_ID = \"{RUN_ID}\" in config cell\n2. Re-run all cells\n")
    
    # Also save to a "latest" file for easy lookup
    latest_file = Path("/content/drive/MyDrive/Colab_Outputs") / PROJECT_NAME / "LATEST_E3_RUN_ID.txt"
    latest_file.write_text(f"{RUN_ID}")
    
    print(f"Drive root: {DRIVE_ROOT}")
    print(f"E3 runs will be saved to: {PERSIST_E3_RUNS}")
    print(f"")
    print(f"========================================")
    print(f"RUN_ID saved to: {run_id_file}")
    print(f"Latest run ID: {latest_file}")
    print(f"========================================")
    
    # Symlink local runs/ to Drive (so training writes directly to Drive)
    REPO = Path.cwd()
    E3_RUNS_IN_REPO = REPO / "runs" / "exp3"
    
    # Remove local dir if exists, then symlink to Drive
    !rm -rf "{E3_RUNS_IN_REPO}"
    E3_RUNS_IN_REPO.parent.mkdir(parents=True, exist_ok=True)
    !ln -s "{PERSIST_E3_RUNS}" "{E3_RUNS_IN_REPO}"
    
    print(f"\nSymlink created: {E3_RUNS_IN_REPO} -> {PERSIST_E3_RUNS}")
else:
    PERSIST_E3_RUNS = None
    print("Running locally - no Drive symlink needed")

In [None]:
# Install dependencies
!pip install -q ultralytics roboflow pyyaml pillow numpy matplotlib pandas tqdm

In [None]:
# Verify GPU
import torch
print(f"PyTorch version: {torch.__version__}")
print(f"CUDA available: {torch.cuda.is_available()}")
if torch.cuda.is_available():
    print(f"GPU: {torch.cuda.get_device_name(0)}")

In [None]:
# Imports
from pathlib import Path
import json
import shutil
import yaml
import pandas as pd
from datetime import datetime

from ultralytics import YOLO, RTDETR

# Import our experiment modules
sys.path.insert(0, str(Path.cwd()))
from experiments.Experiment_3.mask_presets import (
    get_mask_prefixes, get_session_config, SESSIONS
)
from experiments.Experiment_3.channel_masking import MaskingManager

print("All imports successful!")

## 2. Download Dataset

In [None]:
# Download dataset if not exists
if not Path("data/raw/train/images").exists():
    os.environ["ROBOFLOW_API_KEY"] = "zEF9icmDY2oTcPkaDcQY"
    !python scripts/download_dataset.py --output_dir data/raw
else:
    print("Dataset already exists")

# Verify
print(f"Train images: {len(list(Path('data/raw/train/images').glob('*')))}")
print(f"Val images: {len(list(Path('data/raw/valid/images').glob('*')))}")
print(f"Test images: {len(list(Path('data/raw/test/images').glob('*')))}")

## 3. Build Evaluation Indices & Generate Occluded Data

**IMPORTANT:** All sessions use the SAME dataset splits and SAME occluded test images (fixed seed=42).

In [None]:
# Build evaluation indices (same for ALL sessions)
if not Path("data/processed/evaluation/test_index.json").exists():
    !python scripts/build_evaluation_indices.py \
        --dataset_root data/raw \
        --output_dir data/processed/evaluation
else:
    print("Evaluation indices already exist")

# Verify
with open("data/processed/evaluation/test_index.json") as f:
    test_index = json.load(f)
print(f"Test set: {test_index['metadata']['num_images']} images, {test_index['metadata']['total_objects']} objects")

In [None]:
# Generate occluded TRAINING data for S2 (seed=42 for reproducibility)
occluded_train_dir = Path("data/occluded_train_040/level_040")

if not occluded_train_dir.exists():
    print("Generating occluded training data (40% occlusion, seed=42)...")
    !python scripts/generate_synthetic_occlusions.py \
        --test_index data/processed/evaluation/train_index.json \
        --images_dir data/raw/train/images \
        --labels_dir data/raw/train/labels \
        --output_dir data/occluded_train_040 \
        --levels 0.4 \
        --seed 42
else:
    print(f"Occluded training data already exists at {occluded_train_dir}")

In [None]:
# Generate occluded TEST data (seed=42 - SAME for all sessions!)
occluded_test_dir = Path(f"data/synthetic_occlusion/{OCCLUSION_LEVEL}")

if not occluded_test_dir.exists():
    print(f"Generating occluded test data ({OCCLUSION_LEVEL}, seed=42)...")
    !python scripts/generate_synthetic_occlusions.py \
        --test_index data/processed/evaluation/test_index.json \
        --images_dir data/raw/test/images \
        --labels_dir data/raw/test/labels \
        --output_dir data/synthetic_occlusion \
        --levels 0.4 \
        --seed 42
else:
    print(f"Occluded test data already exists at {occluded_test_dir}")

print(f"\nAll sessions will use:")
print(f"  - Same train images: data/raw/train/")
print(f"  - Same occluded train (S2 only): {occluded_train_dir}")
print(f"  - Same test images: data/raw/test/")
print(f"  - Same occluded test: {occluded_test_dir}")

## 4. Create Data YAML Files

In [None]:
# Create data.yaml files for clean and occluded training

# Load class names from original data.yaml
with open('data/raw/data.yaml', 'r') as f:
    original_config = yaml.safe_load(f)

# Clean training data.yaml (for S1, S3-S6)
clean_config = {
    'path': str(Path('data/raw').resolve()),
    'train': 'train/images',
    'val': 'valid/images',
    'test': 'test/images',
    'names': original_config['names'],
    'nc': len(original_config['names'])
}

Path('data/processed').mkdir(parents=True, exist_ok=True)
with open('data/processed/data_clean.yaml', 'w') as f:
    yaml.dump(clean_config, f, default_flow_style=False)
print("Created data/processed/data_clean.yaml")

# Occluded training data.yaml (for S2 only)
occ_train_config = {
    'path': str(Path('data').resolve()),
    'train': 'occluded_train_040/level_040/images',
    'val': 'raw/valid/images',
    'test': 'raw/test/images',
    'names': original_config['names'],
    'nc': len(original_config['names'])
}

with open('data/processed/data_occ_train.yaml', 'w') as f:
    yaml.dump(occ_train_config, f, default_flow_style=False)
print("Created data/processed/data_occ_train.yaml")

## 5. Training Functions

In [None]:
def get_model(model_name: str):
    """Load a model by name."""
    if 'yolo' in model_name.lower():
        return YOLO(f"{model_name}.pt")
    elif 'rtdetr' in model_name.lower():
        return RTDETR(f"{model_name}.pt")
    else:
        raise ValueError(f"Unknown model: {model_name}")


def get_model_type(model_name: str) -> str:
    """Get model type for mask presets."""
    if 'yolo' in model_name.lower():
        return 'yolo'
    elif 'rtdetr' in model_name.lower():
        return 'rtdetr'
    else:
        raise ValueError(f"Unknown model: {model_name}")


def is_session_complete(output_dir: Path, model_name: str, session_name: str) -> bool:
    """Check if a session is already complete (has DONE marker and weights)."""
    run_dir = output_dir / f"{model_name}__{session_name}"
    done_marker = run_dir / "DONE"
    weights_file = run_dir / "weights" / "best.pt"
    return done_marker.exists() and weights_file.exists()


def train_session(
    model_name: str,
    session_name: str,
    epochs: int,
    output_dir: Path,
    p_apply: float = 0.5,
    p_channels: float = 0.2
) -> dict:
    """
    Train a single session with crash recovery.
    
    Returns:
        Dictionary with training results and paths
    """
    session_config = get_session_config(session_name)
    run_name = f"{model_name}__{session_name}"
    run_dir = output_dir / run_name
    
    print(f"\n{'='*60}")
    print(f"TRAINING: {run_name}")
    print(f"{'='*60}")
    print(f"Description: {session_config['description']}")
    print(f"Epochs: {epochs}")
    
    # Check if already completed
    if is_session_complete(output_dir, model_name, session_name):
        print(f"Session already completed. Skipping.")
        return {"status": "skipped", "run_dir": str(run_dir)}
    
    # Select data.yaml based on session
    if session_config['train_data'] == 'occluded':
        data_yaml = 'data/processed/data_occ_train.yaml'
    else:
        data_yaml = 'data/processed/data_clean.yaml'
    
    print(f"Data: {data_yaml}")
    
    # Load model
    model = get_model(model_name)
    
    # Setup masking if needed
    masking_manager = None
    if session_config['mask_location'] is not None:
        mask_location = session_config['mask_location']
        model_type = get_model_type(model_name)
        layer_prefixes = get_mask_prefixes(model_type, mask_location)
        
        print(f"Masking: {mask_location} -> layers {layer_prefixes}")
        print(f"Masking params: p_apply={p_apply}, p_channels={p_channels}")
        
        masking_manager = MaskingManager(model.model, p_apply, p_channels)
        num_hooks = masking_manager.add_masking_to_layers(layer_prefixes)
        print(f"Added {num_hooks} masking hooks")
    else:
        print("Masking: None")
    
    # Train
    try:
        results = model.train(
            data=data_yaml,
            epochs=epochs,
            imgsz=IMGSZ,
            batch=BATCH,
            patience=PATIENCE,
            save=True,
            project=str(output_dir),
            name=run_name,
            exist_ok=True,
            pretrained=True,
            optimizer='auto',
            verbose=True,
            seed=SEED
        )
        
        # Mark as done
        run_dir.mkdir(parents=True, exist_ok=True)
        (run_dir / "DONE").touch()
        
        # Save masking summary if used
        if masking_manager:
            with open(run_dir / "masking_summary.json", 'w') as f:
                json.dump(masking_manager.get_summary(), f, indent=2)
        
        print(f"\nTraining complete: {run_name}")
        print(f"Saved to: {run_dir}")
        
        return {
            "status": "success",
            "run_dir": str(run_dir),
            "weights_path": str(run_dir / "weights" / "best.pt")
        }
        
    except Exception as e:
        print(f"\nTraining FAILED: {run_name}")
        print(f"Error: {e}")
        
        run_dir.mkdir(parents=True, exist_ok=True)
        (run_dir / "FAILED").write_text(str(e))
        
        return {"status": "failed", "error": str(e), "run_dir": str(run_dir)}
    
    finally:
        if masking_manager:
            masking_manager.remove_all_hooks()

print("Training functions defined!")

## 6. Run All Training Sessions

In [None]:
# Run all training sessions
# Output goes directly to Drive via symlink (crash-safe!)

output_dir = Path("runs/exp3")
output_dir.mkdir(parents=True, exist_ok=True)

print(f"Output directory: {output_dir}")
if IN_COLAB:
    print(f"(Symlinked to Drive: {PERSIST_E3_RUNS})")

# Check for existing completed sessions
print("\nChecking for previously completed sessions...")
for model_name in MODELS:
    for session_name in SESSIONS_TO_RUN:
        if is_session_complete(output_dir, model_name, session_name):
            print(f"  Found: {model_name}__{session_name}")

# Run training
training_results = []
total_sessions = len(MODELS) * len(SESSIONS_TO_RUN)
current = 0

for model_name in MODELS:
    for session_name in SESSIONS_TO_RUN:
        current += 1
        print(f"\n[{current}/{total_sessions}] {model_name} - {session_name}")
        
        result = train_session(
            model_name=model_name,
            session_name=session_name,
            epochs=EPOCHS,
            output_dir=output_dir,
            p_apply=P_APPLY,
            p_channels=P_CHANNELS
        )
        
        result['model'] = model_name
        result['session'] = session_name
        training_results.append(result)

# Summary
print("\n" + "="*60)
print("TRAINING SUMMARY")
print("="*60)
for r in training_results:
    status_icon = "OK" if r['status'] == 'success' else "SKIP" if r['status'] == 'skipped' else "FAIL"
    print(f"[{status_icon}] {r['model']}__{r['session']}")

# Save results
with open(output_dir / "training_results.json", 'w') as f:
    json.dump(training_results, f, indent=2)
print(f"\nResults saved to {output_dir / 'training_results.json'}")

## 7. Evaluation Functions

In [None]:
# Import evaluation modules
from evaluation.io import load_ground_truth, load_class_names
from evaluation.metrics import (
    eval_detection_prf_at_iou,
    eval_per_class_metrics_and_confusions,
    eval_counting_quality
)
from evaluation.plots import plot_all_metrics
from tqdm import tqdm


def generate_predictions(model_path: str, test_images_dir: str, test_index: dict) -> list:
    """Generate predictions for a trained model on a test set."""
    # Load model based on path
    if 'rtdetr' in model_path.lower():
        model = RTDETR(model_path)
    else:
        model = YOLO(model_path)
    
    predictions = []
    test_images_dir = Path(test_images_dir)
    
    for img_data in tqdm(test_index['images'], desc="Inference"):
        image_path = test_images_dir / img_data['image_filename']
        
        if not image_path.exists():
            continue
        
        results = model.predict(
            source=str(image_path),
            conf=0.01,
            imgsz=640,
            verbose=False
        )[0]
        
        detections = []
        if len(results.boxes) > 0:
            for i in range(len(results.boxes)):
                detections.append({
                    "class_id": int(results.boxes.cls[i].item()),
                    "class_name": results.names[int(results.boxes.cls[i].item())],
                    "confidence": float(results.boxes.conf[i].item()),
                    "bbox": results.boxes.xyxy[i].tolist(),
                    "bbox_format": "xyxy"
                })
        
        predictions.append({
            "image_id": img_data['image_id'],
            "detections": detections
        })
    
    return predictions


def is_eval_complete(output_dir: Path, model_name: str, session_name: str, test_type: str) -> bool:
    """Check if evaluation is already complete."""
    eval_dir = output_dir / "evaluations" / f"{model_name}__{session_name}__test_{test_type}"
    return (eval_dir / "metrics.json").exists()


def evaluate_session(
    model_name: str,
    session_name: str,
    weights_path: str,
    test_type: str,
    output_dir: Path
) -> dict:
    """Evaluate a trained model on a test set."""
    run_name = f"{model_name}__{session_name}"
    eval_name = f"{run_name}__test_{test_type}"
    eval_dir = output_dir / "evaluations" / eval_name
    eval_dir.mkdir(parents=True, exist_ok=True)
    
    print(f"\nEvaluating: {eval_name}")
    
    # Select test set
    if test_type == 'clean':
        test_images_dir = "data/raw/test/images"
    else:
        test_images_dir = f"data/synthetic_occlusion/{OCCLUSION_LEVEL}/images"
    
    # Load test index
    with open("data/processed/evaluation/test_index.json") as f:
        test_index = json.load(f)
    
    # Generate predictions
    predictions = generate_predictions(weights_path, test_images_dir, test_index)
    
    # Save predictions (useful for debugging, but don't backup to Drive)
    with open(eval_dir / "predictions.json", 'w') as f:
        json.dump({"predictions": predictions}, f)
    
    # Load ground truth
    gts = load_ground_truth("data/processed/evaluation/test_index.json")
    class_names = load_class_names("data/processed/evaluation/test_index.json")
    
    # Run metrics
    threshold_sweep = eval_detection_prf_at_iou(
        predictions, gts,
        iou_threshold=0.5,
        conf_thresholds=[0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8]
    )
    
    best_thr = max(threshold_sweep.keys(), key=lambda k: threshold_sweep[k]['f1'])
    best_metrics = threshold_sweep[best_thr]
    
    per_class = eval_per_class_metrics_and_confusions(
        predictions, gts,
        conf_threshold=float(best_thr),
        class_names=class_names
    )
    
    counting = eval_counting_quality(
        predictions, gts,
        conf_threshold=float(best_thr),
        class_names=class_names
    )
    
    # Generate plots (saved locally, not backed up to Drive)
    plot_all_metrics(
        threshold_sweep=threshold_sweep,
        per_class_results=per_class['per_class'],
        confusion_data=per_class,
        counting_results=counting,
        output_dir=str(eval_dir),
        run_name=eval_name
    )
    
    # Save metrics (this IS backed up via symlink)
    metrics = {
        "run_name": eval_name,
        "model": model_name,
        "session": session_name,
        "test_type": test_type,
        "best_threshold": float(best_thr),
        "precision": best_metrics['precision'],
        "recall": best_metrics['recall'],
        "f1": best_metrics['f1'],
        "tp": best_metrics['tp'],
        "fp": best_metrics['fp'],
        "fn": best_metrics['fn'],
        "count_mae_matched": counting['matched_only']['global_mae'],
        "count_mae_all": counting['all_predictions']['global_mae']
    }
    
    with open(eval_dir / "metrics.json", 'w') as f:
        json.dump(metrics, f, indent=2)
    
    print(f"  Best F1: {best_metrics['f1']:.4f} @ conf={best_thr}")
    
    return metrics

print("Evaluation functions defined!")

## 8. Run All Evaluations

In [None]:
# Run evaluations on both clean and occluded test sets
all_metrics = []

for model_name in MODELS:
    for session_name in SESSIONS_TO_RUN:
        # Check if training completed
        if not is_session_complete(output_dir, model_name, session_name):
            print(f"SKIP (no training): {model_name}__{session_name}")
            continue
        
        # Find weights
        run_dir = output_dir / f"{model_name}__{session_name}"
        weights_path = run_dir / "weights" / "best.pt"
        
        if not weights_path.exists():
            print(f"WARNING: Weights not found for {model_name}__{session_name}")
            continue
        
        # Evaluate on both test sets
        for test_type in ['clean', 'occluded']:
            if is_eval_complete(output_dir, model_name, session_name, test_type):
                print(f"SKIP (already done): {model_name}__{session_name}__test_{test_type}")
                # Load existing metrics
                eval_dir = output_dir / "evaluations" / f"{model_name}__{session_name}__test_{test_type}"
                with open(eval_dir / "metrics.json") as f:
                    metrics = json.load(f)
            else:
                metrics = evaluate_session(
                    model_name, session_name, str(weights_path), test_type, output_dir
                )
            
            all_metrics.append(metrics)

# Save all metrics
with open(output_dir / "all_metrics.json", 'w') as f:
    json.dump(all_metrics, f, indent=2)

print(f"\nAll evaluations complete! Results in {output_dir}")

## 9. Generate Summary Table & Plots

In [None]:
# Create summary DataFrame
df = pd.DataFrame(all_metrics)

if len(df) > 0:
    # Pivot for nice display
    summary_clean = df[df['test_type'] == 'clean'][['model', 'session', 'f1', 'precision', 'recall']].copy()
    summary_clean = summary_clean.rename(columns={'f1': 'F1_clean', 'precision': 'P_clean', 'recall': 'R_clean'})

    summary_occ = df[df['test_type'] == 'occluded'][['model', 'session', 'f1', 'precision', 'recall']].copy()
    summary_occ = summary_occ.rename(columns={'f1': 'F1_occ', 'precision': 'P_occ', 'recall': 'R_occ'})

    summary = pd.merge(summary_clean, summary_occ, on=['model', 'session'])

    print("\n" + "="*80)
    print("EXPERIMENT 3 RESULTS SUMMARY")
    print("="*80)
    print(summary.to_string(index=False))

    # Save CSV
    summary.to_csv(output_dir / "summary_metrics.csv", index=False)
    print(f"\nSaved to {output_dir / 'summary_metrics.csv'}")
else:
    print("No metrics to summarize")

In [None]:
# Generate comparison bar plots
import matplotlib.pyplot as plt
import numpy as np

if len(df) > 0:
    def plot_comparison(df, metric_col, test_type, title, output_path):
        data = df[df['test_type'] == test_type]
        sessions = data['session'].unique()
        models = data['model'].unique()
        
        x = np.arange(len(sessions))
        width = 0.35
        
        fig, ax = plt.subplots(figsize=(14, 6))
        
        for i, model in enumerate(models):
            model_data = data[data['model'] == model]
            values = [model_data[model_data['session'] == s][metric_col].values[0] 
                      if len(model_data[model_data['session'] == s]) > 0 else 0 
                      for s in sessions]
            offset = width * (i - len(models)/2 + 0.5)
            bars = ax.bar(x + offset, values, width, label=model)
            
            for bar, val in zip(bars, values):
                ax.text(bar.get_x() + bar.get_width()/2, bar.get_height() + 0.01,
                       f'{val:.3f}', ha='center', va='bottom', fontsize=8)
        
        ax.set_xlabel('Session')
        ax.set_ylabel(metric_col.upper())
        ax.set_title(title)
        ax.set_xticks(x)
        ax.set_xticklabels([s.replace('_', '\n') for s in sessions], fontsize=9)
        ax.legend()
        ax.set_ylim(0, 1.0)
        ax.grid(axis='y', alpha=0.3)
        
        plt.tight_layout()
        plt.savefig(output_path, dpi=150, bbox_inches='tight')
        plt.show()
        print(f"Saved: {output_path}")

    plots_dir = output_dir / "plots"
    plots_dir.mkdir(exist_ok=True)

    plot_comparison(df, 'f1', 'clean', 'Experiment 3: F1 Score on Clean Test Set', 
                    plots_dir / "comparison_f1_clean.png")
    plot_comparison(df, 'f1', 'occluded', f'Experiment 3: F1 Score on Occluded Test Set ({OCCLUSION_LEVEL})', 
                    plots_dir / "comparison_f1_occluded.png")
else:
    print("No data to plot")

## 10. Final Summary

In [None]:
print("\n" + "="*80)
print("EXPERIMENT 3 COMPLETE!")
print("="*80)

print(f"\nConfiguration:")
print(f"  Epochs: {EPOCHS}")
print(f"  Models: {MODELS}")
print(f"  Sessions: {SESSIONS_TO_RUN}")
print(f"  Masking: p_apply={P_APPLY}, p_channels={P_CHANNELS}")
print(f"  Seed: {SEED}")

print(f"\nOutputs saved to: {output_dir}")
if IN_COLAB:
    print(f"  (Backed up to Drive: {PERSIST_E3_RUNS})")

print(f"\nKey files:")
print(f"  - training_results.json")
print(f"  - all_metrics.json")
print(f"  - summary_metrics.csv")
print(f"  - <model>__<session>/weights/best.pt (trained weights)")
print(f"  - <model>__<session>/DONE (completion marker)")

print("\n" + "="*80)