# üß™ UNSW-NB15 Manuscript Run (Draft Mode)

**Fast 18-Experiment Pipeline (Seed 42 Only)**

This notebook runs the "fast" grid for immediate manuscript generation:
- **2 Tasks**: Binary, Multiclass
- **3 Models**: Logistic Regression, Random Forest, XGBoost
- **3 Strategies**: S0 (None), S1 (Class Weight), S2a (RandomOverSampler)
- **1 Seed**: 42 (Reproducible)

**Total**: 2 √ó 3 √ó 3 √ó 1 = **18 experiments**
**Est. Time**: ~45-60 mins

---

In [1]:
# ==========================================
# 1. MOUNT GOOGLE DRIVE
# ==========================================
from google.colab import drive
import os

print("üìÅ Mounting Google Drive for Results...")
drive.mount('/content/drive')

DRIVE_BASE_DIR = "/content/drive/MyDrive/UNSW_Archive"

üìÅ Mounting Google Drive for Results...


KeyboardInterrupt: 

In [None]:
# ==========================================
# 2. CONFIGURATION
# ==========================================
# @title ‚öôÔ∏è **Experiment Settings**

REPO_URL = "https://github.com/StartDust/ML_PAPER_REVIEW.git"  # @param {type:"string"}
BRANCH = "main"  # @param {type:"string"}
CONFIG_FILE = "configs/fast.yaml" # @param ["configs/main.yaml", "configs/fast.yaml"]
FORCE_FRESH_RUN = True  # @param {type:"boolean"}
SYNC_INTERVAL_SECONDS = 60  # @param {type:"integer"}

PROJECT_DIR = "/content/ml_project"

In [None]:
# ==========================================
# 3. SETUP REPOSITORY
# ==========================================
import os

print("\nüì• Setting up code from GitHub...")

if os.path.exists(PROJECT_DIR):
    print(f"   Repository exists, pulling latest...")
    !cd {PROJECT_DIR} && git pull
else:
    print(f"   Cloning from {REPO_URL}...")
    !git clone -b {BRANCH} {REPO_URL} {PROJECT_DIR}

os.chdir(PROJECT_DIR)
print(f"   üìÇ Working directory: {os.getcwd()}")

In [None]:
# ==========================================
# 4. INSTALL DEPENDENCIES
# ==========================================
print("\nüì¶ Installing Dependencies...")
!pip install -q -r requirements.txt
print("   ‚úÖ Dependencies installed.")

In [None]:
# ==========================================
# 5. PREPARE CONFIGURATION
# ==========================================
import yaml
import shutil
from pathlib import Path

print("\n‚öôÔ∏è  Optimizing Configuration for Colab...")

config_path = Path(PROJECT_DIR) / CONFIG_FILE
temp_config_path = Path(PROJECT_DIR) / "configs" / "colab_optimized.yaml"

with open(config_path, "r") as f:
    config = yaml.safe_load(f)

original_jobs = config.get('experiments', {}).get('n_jobs', 'Unknown')
print(f"   ‚ÑπÔ∏è  Loading config: {CONFIG_FILE}")

# Force sequential pipeline (prevents fork bomb on Colab)
config['experiments']['n_jobs'] = 1

# Calculate total experiments
n_seeds = config['experiments'].get('n_seeds', 1)
tasks = config['experiments'].get('tasks', ['binary', 'multi'])
models = config['experiments'].get('models', ['lr', 'rf', 'xgb'])
strategies = config['experiments'].get('strategies', ['s0', 's1', 's2a'])

TOTAL_EXPERIMENTS = len(tasks) * len(models) * len(strategies) * n_seeds

print(f"   üìä Experiment Grid:")
print(f"      - Tasks:      {tasks}")
print(f"      - Models:     {models}")
print(f"      - Strategies: {strategies}")
print(f"      - Seeds:      {n_seeds} (1 seed)")
print(f"      - Total:      {TOTAL_EXPERIMENTS} experiments (0-{TOTAL_EXPERIMENTS - 1})")

with open(temp_config_path, "w") as f:
    yaml.dump(config, f, default_flow_style=False)

print(f"   ‚úÖ Optimized config saved.")

In [None]:
# ==========================================
# 6. SETUP RESULTS DIRECTORY & CLEANUP
# ==========================================
from datetime import datetime

TIMESTAMP = datetime.now().strftime("%Y%m%d_%H%M%S")
RESULTS_DIR = f"{DRIVE_BASE_DIR}/run_draft_{TIMESTAMP}"
os.makedirs(RESULTS_DIR, exist_ok=True)
print(f"‚úÖ Results will be saved to: {RESULTS_DIR}")

if FORCE_FRESH_RUN:
    print("\nüßπ Cleaning previous results for fresh run...")
    # Check both result dirs to be safe
    for d in ["results", "results_fast"]:
        metrics_dir = Path(PROJECT_DIR) / d / "metrics"
        if metrics_dir.exists():
            try:
                shutil.rmtree(metrics_dir)
                print(f"   ‚úÖ Cleared {d}/metrics.")
            except: pass
else:
    print("\nüîÑ Incremental mode: Keeping existing results.")

In [None]:
# ==========================================
# 7. RUN EXPERIMENTS (With Sync)
# ==========================================
import sys
import subprocess
import time

print("\n" + "=" * 70)
print(f"üöÄ STARTING MANUSCRIPT GRID (0-{TOTAL_EXPERIMENTS-1})")
print("=" * 70)
print(f"   Config:        {temp_config_path}")
print(f"   Sync Interval: {SYNC_INTERVAL_SECONDS}s")
print(f"   Drive Target:  {RESULTS_DIR}")
print("=" * 70 + "\n")

LOCAL_RESULTS = str(Path(PROJECT_DIR) / config['results_dir'])
print(f"   ‚ÑπÔ∏è  Local Output: {LOCAL_RESULTS}")

# Run main.py in subprocess
proc = subprocess.Popen(
    [sys.executable, "main.py", "--config", str(temp_config_path)],
    stdout=subprocess.PIPE,
    stderr=subprocess.STDOUT,
    text=True,
    bufsize=1
)

last_sync_time = time.time()
exit_code = 0

try:
    while True:
        return_code = proc.poll()
        
        if proc.stdout:
            line = proc.stdout.readline()
            if line:
                print(line, end='')
        
        current_time = time.time()
        if current_time - last_sync_time >= SYNC_INTERVAL_SECONDS:
            # Sync including logs and CSVs
            !rsync -avq --include='*/' --include='*.json' --include='*.png' --include='*.csv' --include='*.log' --exclude='*' {LOCAL_RESULTS}/ "{RESULTS_DIR}/"
            print(f"üíæ Synced artifacts to Drive...")
            last_sync_time = current_time
        
        if return_code is not None:
            remaining = proc.stdout.read() if proc.stdout else ""
            if remaining:
                print(remaining)
            exit_code = return_code
            break
        
        time.sleep(0.1)

except KeyboardInterrupt:
    print("\n‚ö†Ô∏è  Interrupted by user. Saving progress...")
    proc.terminate()
    proc.wait()
    exit_code = 130

if exit_code == 0:
    print("\n‚úÖ MANUSCRIPT RUN COMPLETED SUCCESSFULLY!")
else:
    print(f"\n‚ö†Ô∏è  Run completed with exit code {exit_code}")

In [None]:
# ==========================================
# 8. FINAL SYNC
# ==========================================
print("\nüì§ Performing final full sync to Drive...")
!rsync -av {LOCAL_RESULTS}/ "{RESULTS_DIR}/"

if os.path.exists(temp_config_path):
    os.remove(temp_config_path)
    print(f"üóëÔ∏è  Cleaned up temp config.")

print("\n" + "=" * 70)
print("üìä EXECUTION SUMMARY")
print("=" * 70)
print(f"   Results Saved To:  {RESULTS_DIR}")
print(f"   Completed at:      {datetime.now().isoformat()}")
print("=" * 70)

---

## üìÅ View Results

check your results in Google Drive:
```
My Drive/UNSW_Archive/run_draft_YYYYMMDD_HHMMSS/
‚îú‚îÄ‚îÄ metrics/
‚îú‚îÄ‚îÄ tables/
‚îú‚îÄ‚îÄ learning_curves/
‚îú‚îÄ‚îÄ logs/
‚îî‚îÄ‚îÄ ...
```