# UMTA Experiment 1: Architecture Search (Kaggle Notebook - Multi-GPU)

This notebook runs **Experiment 1** with **multi-GPU parallel execution**.

## Multi-GPU Execution Strategy
- **Automatically detects and uses all available GPUs** (e.g., 2x T4)
- **Splits architectures across GPUs** for parallel training
- **Example:** With 2 GPUs and 4 architectures:
  - GPU 0: SimpleCNN, ResUNet
  - GPU 1: UNet, AttentionUNet
- **~2x faster** than sequential execution

**Repo:** https://github.com/hanjidani/UMTA_Final_Experiments


In [None]:
# Cell 1: Install Dependencies
# Install required libraries for CLIP and training

!pip install -q ftfy regex tqdm
!pip install -q git+https://github.com/openai/CLIP.git


In [None]:
# Cell 2: Clone Repository

import os

# Remove existing repo to ensure we get the latest code
if os.path.exists("UMTA_Final_Experiments"):
    !rm -rf UMTA_Final_Experiments

# Clone the repository
!git clone https://github.com/hanjidani/UMTA_Final_Experiments.git

# Move into the directory
%cd UMTA_Final_Experiments

print("‚úÖ Repository cloned and ready")




In [None]:
# Cell 3: Unify Kaggle Dataset Structure
# This cell handles Kaggle's split dataset folders (train.X1, train.X2, etc.)
# and merges them into a single unified structure using symbolic links

import os
import yaml

# Define paths
input_root = "/kaggle/input/imagenet100"
unified_root = "/tmp/imagenet100"  # Unified dataset location

print(f"üõ†Ô∏è Fixing Kaggle dataset structure from {input_root}...")

# Create destination folders
os.makedirs(f"{unified_root}/train", exist_ok=True)
os.makedirs(f"{unified_root}/val", exist_ok=True)

# Merge 'train.X*' folders into a single 'train' folder using symlinks
# This makes Python think it's one big folder without actually copying files
!cp -rs {input_root}/train.X*/* {unified_root}/train/ 2>/dev/null || echo "No train.X* folders found, checking for single train folder..."
!cp -rs {input_root}/val.X*/* {unified_root}/val/ 2>/dev/null || echo "No val.X* folders found, checking for single val folder..."

# Fallback: If no split folders, try direct copy/symlink
if not os.listdir(f"{unified_root}/train"):
    !cp -rs {input_root}/train/* {unified_root}/train/ 2>/dev/null || true
if not os.listdir(f"{unified_root}/val"):
    !cp -rs {input_root}/val/* {unified_root}/val/ 2>/dev/null || true

print("‚úÖ Dataset unified successfully!")

# Update config file to point to unified location
config_path = 'exp1_architecture/config.yaml'

with open(config_path, 'r') as f:
    config = yaml.safe_load(f)

# Point config to the new unified location
if 'data' not in config:
    config['data'] = {}
config['data']['path'] = unified_root

with open(config_path, 'w') as f:
    yaml.dump(config, f, default_flow_style=False)

print(f"‚úÖ Config updated to read from: {unified_root}")


In [None]:
# Update config file to point to unified location
config_path = 'exp1_architecture/config.yaml'

with open(config_path, 'r') as f:
    config = yaml.safe_load(f)

# Point config to the new unified location
if 'data' not in config:
    config['data'] = {}
config['data']['path'] = unified_root

with open(config_path, 'w') as f:
    yaml.dump(config, f, default_flow_style=False)

print(f"‚úÖ Config updated to read from: {unified_root}")


In [None]:
# Cell 4: Select Pair Index (Optional) + Check GPU Availability
# ==========================================
#        PAIR SELECTION (OPTIONAL)
# ==========================================
# Set this to None to run ALL pairs from config
# Or set to 0-9 to run a specific pair across all architectures
#
# Pairs:
# 0: Fish -> Shark       (Easy)
# 1: Finch -> Bunting    (Easy)
# 2: Plant -> Frog       (Med-Easy)
# 3: Dog -> Cat          (Med-Easy)
# 4: Truck -> Car        (Medium)
# 5: Snake -> Lizard     (Medium)
# 6: Bird -> Object      (Med-Hard)
# 7: Bird -> Artifact    (Med-Hard)
# 8: Fish -> Paper       (Hard)
# 9: Fish -> Traffic Light (Hard)
# ==========================================

PAIR_INDEX = None  # <--- Set to None for all pairs, or 0-9 for specific pair

# Check GPU availability
import torch

num_gpus = torch.cuda.device_count() if torch.cuda.is_available() else 0

print(f"üîç GPU Detection:")
print(f"   Available GPUs: {num_gpus}")

if num_gpus > 0:
    for i in range(num_gpus):
        gpu_name = torch.cuda.get_device_name(i)
        gpu_memory = torch.cuda.get_device_properties(i).total_memory / 1e9
        print(f"   GPU {i}: {gpu_name} ({gpu_memory:.1f} GB)")
    
    if num_gpus >= 2:
        print(f"\n‚úÖ Multi-GPU mode enabled: {num_gpus} GPUs will train architectures in parallel")
    else:
        print(f"\n‚ö†Ô∏è  Only {num_gpus} GPU available. Will use single-GPU mode.")
else:
    print("\n‚ö†Ô∏è  No GPUs detected. Will use CPU (very slow).")

if PAIR_INDEX is not None:
    print(f"\nüìå Running Pair Index: {PAIR_INDEX}")
else:
    print(f"\nüìå Running: All pairs from config")


In [None]:
# Cell 5: Run Experiment (Multi-GPU Mode)
# Execute the training script with multi-GPU parallel execution
# The script automatically splits architectures across available GPUs
# If PAIR_INDEX is set, runs that specific pair; otherwise runs all pairs

if PAIR_INDEX is not None:
    print(f"üöÄ Starting Experiment 1: Pair {PAIR_INDEX} with Multi-GPU Parallel Execution...")
    print("=" * 60)
    !python exp1_architecture/run.py --multi_gpu --pair_index {PAIR_INDEX}
else:
    print("üöÄ Starting Experiment 1: All Pairs with Multi-GPU Parallel Execution...")
    print("=" * 60)
    !python exp1_architecture/run.py --multi_gpu


In [None]:
# Cell 6: Download Results
# Create download links for the results CSV and summary

from IPython.display import FileLink, display
import os
from pathlib import Path

# Find the latest results directory (multi-GPU results have "multi_gpu_" prefix)
results_base = Path("exp1_architecture/results")
if results_base.exists():
    # Get the most recent timestamped directory (prefer multi_gpu_ directories)
    result_dirs = sorted([d for d in results_base.iterdir() if d.is_dir()], reverse=True)
    
    # Prefer multi_gpu_ directories if available
    multi_gpu_dirs = [d for d in result_dirs if "multi_gpu" in d.name]
    if multi_gpu_dirs:
        latest_dir = multi_gpu_dirs[0]
    elif result_dirs:
        latest_dir = result_dirs[0]
    else:
        latest_dir = None
    
    if latest_dir:
        result_file = latest_dir / "results.csv"
        summary_file = latest_dir / "summary.csv"
        best_arch_file = latest_dir / "best_architecture.json"
        
        if result_file.exists():
            print(f"‚úÖ Experiment Complete!")
            print(f"Results saved in: {latest_dir}")
            print(f"\nüìä Download Results:")
            display(FileLink(str(result_file)))
            
            if summary_file.exists():
                print(f"\nüìà Download Summary:")
                display(FileLink(str(summary_file)))
            
            if best_arch_file.exists():
                print(f"\nüèÜ Download Best Architecture:")
                display(FileLink(str(best_arch_file)))
        else:
            print(f"‚ö†Ô∏è Results directory found but results.csv not found in {latest_dir}")
    else:
        print("‚ö†Ô∏è No results directories found")
else:
    print("‚ö†Ô∏è Results directory not found. Check the training logs above for errors.")
