# RL Swarm Coordinator Node (Google Colab)

This notebook runs a **coordinator node** that:
- Manages round/stage progression
- Coordinates with worker nodes via Google Drive
- Shares rollouts via Google Drive (no P2P networking)
- Participates in training
- No blockchain, Docker, or peer identity required

**Before running:**
1. Mount your Google Drive
2. Set experiment configuration below
3. Configure rollout sharing frequency and retention
4. Run all cells in order

**For worker nodes:** Use `colab_worker.ipynb` with the **same EXPERIMENT_NAME**

## 1. Configuration

In [None]:
# Experiment Configuration
EXPERIMENT_NAME = 'qwen_0.6b_seed42'  # Must be same across all nodes
NODE_ROLE = 'coordinator'  # DO NOT CHANGE
NODE_ID = 'coordinator_0'  # Unique ID for this node

# Model Configuration
MODEL_NAME = 'Gensyn/Qwen2.5-0.5B-Instruct'  # HuggingFace model
SEED = 42

# Training Configuration
MAX_ROUNDS = 1000
NUM_GENERATIONS = 2
NUM_TRANSPLANT_TREES = 2

# Coordinator Configuration
ADVANCEMENT_STRATEGY = 'hybrid'  # 'time_based', 'completion_based', or 'hybrid'
ROUND_DURATION_MINUTES = 10
MIN_SUBMISSION_PERCENT = 0.5
MAX_ROUND_DURATION_MINUTES = 20

# Rollout Sharing Configuration
ROLLOUT_PUBLISH_FREQUENCY = 'stage'  # 'generation', 'stage', or 'round'
ROLLOUT_CLEANUP_ENABLED = False      # Set to True to enable cleanup
ROLLOUT_KEEP_LAST_N_ROUNDS = 10      # Only used if cleanup enabled
ROLLOUT_ARCHIVE_OLD = False          # Archive instead of delete

# Optional: HuggingFace Token (for pushing trained models)
HUGGINGFACE_TOKEN = None  # Set to your token or keep None

# Optional: Wandb Configuration
WANDB_API_KEY = None  # Set to your Wandb API key or keep None
WANDB_PROJECT = 'rl-swarm-colab'

print(f"âœ“ Experiment: {EXPERIMENT_NAME}")
print(f"âœ“ Node Role: {NODE_ROLE}")
print(f"âœ“ Node ID: {NODE_ID}")
print(f"âœ“ Model: {MODEL_NAME}")
print(f"âœ“ Rollout frequency: {ROLLOUT_PUBLISH_FREQUENCY}")
print(f"âœ“ Cleanup enabled: {ROLLOUT_CLEANUP_ENABLED}")

## 2. Mount Google Drive

In [None]:
from google.colab import drive
import os

# Mount Google Drive
drive.mount('/content/drive')

# Set base path
GDRIVE_BASE_PATH = '/content/drive/MyDrive/rl-swarm'
os.makedirs(GDRIVE_BASE_PATH, exist_ok=True)

print(f"âœ“ Google Drive mounted at: {GDRIVE_BASE_PATH}")

## 3. System Setup & Dependencies

In [None]:
# Check GPU availability
import torch

if torch.cuda.is_available():
    print(f"âœ“ GPU available: {torch.cuda.get_device_name(0)}")
    print(f"  Memory: {torch.cuda.get_device_properties(0).total_memory / 1e9:.2f} GB")
else:
    print("âڑ  No GPU detected - training will be slow")
    print("  Consider: Runtime > Change runtime type > GPU")

In [None]:
# Clone repository
import os
if not os.path.exists('/content/rl-swarm'):
    !git clone https://github.com/Elrashid/rl-swarm.git /content/rl-swarm
    print("âœ“ Repository cloned")
else:
    print("âœ“ Repository already exists")

%cd /content/rl-swarm

# Install dependencies
print("Installing dependencies (this may take 3-5 minutes)...")
!pip install -q -r requirements.txt
!pip install -q gensyn-genrl==0.1.9

print("âœ“ Dependencies installed")

In [None]:
if WANDB_API_KEY:
    import wandb
    wandb.login(key=WANDB_API_KEY)
    print("âœ“ Wandb configured")
else:
    print("â„¹ Wandb disabled (WANDB_API_KEY not set)")

## 6. Set Environment Variables

In [None]:
import os

# Set environment variables
os.environ['GDRIVE_PATH'] = GDRIVE_BASE_PATH
os.environ['EXPERIMENT_NAME'] = EXPERIMENT_NAME
os.environ['NODE_ROLE'] = NODE_ROLE
os.environ['NODE_ID'] = NODE_ID
os.environ['MODEL_NAME'] = MODEL_NAME
os.environ['SEED'] = str(SEED)

if HUGGINGFACE_TOKEN:
    os.environ['HUGGINGFACE_ACCESS_TOKEN'] = HUGGINGFACE_TOKEN

if WANDB_API_KEY:
    os.environ['WANDB_API_KEY'] = WANDB_API_KEY
    os.environ['WANDB_PROJECT'] = WANDB_PROJECT

print("âœ“ Environment variables set")

## 7. Initialize Experiment

In [None]:
import os
import uuid

# Set environment variables
os.environ['GDRIVE_PATH'] = GDRIVE_BASE_PATH
os.environ['EXPERIMENT_NAME'] = EXPERIMENT_NAME
os.environ['NODE_ROLE'] = NODE_ROLE
os.environ['NODE_ID'] = NODE_ID or f"coord_{uuid.uuid4().hex[:8]}"
os.environ['MODEL_NAME'] = MODEL_NAME
os.environ['SEED'] = str(SEED)

# Rollout configuration
os.environ['ROLLOUT_PUBLISH_FREQUENCY'] = ROLLOUT_PUBLISH_FREQUENCY
os.environ['ROLLOUT_CLEANUP_ENABLED'] = str(ROLLOUT_CLEANUP_ENABLED)
os.environ['ROLLOUT_KEEP_LAST_N_ROUNDS'] = str(ROLLOUT_KEEP_LAST_N_ROUNDS)
os.environ['ROLLOUT_ARCHIVE_OLD'] = str(ROLLOUT_ARCHIVE_OLD)

if HUGGINGFACE_TOKEN:
    os.environ['HUGGINGFACE_ACCESS_TOKEN'] = HUGGINGFACE_TOKEN

if WANDB_API_KEY:
    os.environ['WANDB_API_KEY'] = WANDB_API_KEY
    os.environ['WANDB_PROJECT'] = WANDB_PROJECT

print("âœ“ Environment variables set")
print(f"  Node ID: {os.environ['NODE_ID']}")
print(f"  Rollout frequency: {ROLLOUT_PUBLISH_FREQUENCY}")

## 8. Start Training & Coordination

**This cell will run until interrupted or max rounds reached.**

The coordinator will:
- Advance rounds based on configured strategy
- Coordinate with worker nodes via Google Drive
- Train the model and log metrics
- Save checkpoints every 10 rounds

**Monitor logs below. Press the stop button to gracefully shutdown.**

In [None]:
from rgym_exp.utils.experiment_manager import init_experiment

# Initialize experiment structure in Google Drive
config_overrides = {
    'training.max_round': MAX_ROUNDS,
    'training.num_generations': NUM_GENERATIONS,
    'training.num_transplant_trees': NUM_TRANSPLANT_TREES,
    'training.seed': SEED,
    'coordinator_manager.advancement_strategy': ADVANCEMENT_STRATEGY,
    'coordinator_manager.round_duration_minutes': ROUND_DURATION_MINUTES,
    'coordinator_manager.min_submission_percent': MIN_SUBMISSION_PERCENT,
    'coordinator_manager.max_round_duration_minutes': MAX_ROUND_DURATION_MINUTES,
    'communication.rollout_publish_frequency': ROLLOUT_PUBLISH_FREQUENCY,
    'communication.rollout_retention.cleanup_enabled': ROLLOUT_CLEANUP_ENABLED,
    'communication.rollout_retention.keep_last_n_rounds': ROLLOUT_KEEP_LAST_N_ROUNDS,
    'communication.rollout_retention.archive_old_rollouts': ROLLOUT_ARCHIVE_OLD,
}

init_experiment(
    gdrive_base_path=GDRIVE_BASE_PATH,
    experiment_name=EXPERIMENT_NAME,
    config_overrides=config_overrides
)

print(f"âœ“ Experiment initialized: {EXPERIMENT_NAME}")
print(f"  Path: {GDRIVE_BASE_PATH}/experiments/{EXPERIMENT_NAME}")
print(f"  Rollout frequency: {ROLLOUT_PUBLISH_FREQUENCY}")
print(f"  Cleanup enabled: {ROLLOUT_CLEANUP_ENABLED}")

In [None]:
import sys
import subprocess

%cd /content/rl-swarm

print("="*60)
print(f"Starting Coordinator Node: {NODE_ID}")
print(f"Experiment: {EXPERIMENT_NAME}")
print(f"Model: {MODEL_NAME}")
print("="*60)
print()

# Run training
try:
    subprocess.run([
        sys.executable, '-m', 'rgym_exp.runner.swarm_launcher',
        '--config-name', 'colab-gdrive'
    ])
    
except KeyboardInterrupt:
    print("
" + "="*60)
    print("Training interrupted by user")
    print("="*60)
except Exception as e:
    print(f"
❌ Error: {e}")
    import traceback
    traceback.print_exc()

In [None]:
import sys
import subprocess

%cd /content/rl-swarm

print("="*60)
print(f"Starting Coordinator Node: {NODE_ID}")
print(f"Experiment: {EXPERIMENT_NAME}")
print(f"Model: {MODEL_NAME}")
print("="*60)
print()

# Run training
try:
    subprocess.run([
        sys.executable, '-m', 'rgym_exp.runner.swarm_launcher',
        '--config-name', 'colab-gdrive'
    ])
    
except KeyboardInterrupt:
    print("
" + "="*60)
    print("Training interrupted by user")
    print("="*60)
except Exception as e:
    print(f"
❌ Error: {e}")
    import traceback
    traceback.print_exc()

## 9. Monitor Progress (Optional)

Run this cell in a separate window to monitor progress while training continues.

In [None]:
from rgym_exp.utils.experiment_manager import get_experiment_status, get_experiment_metrics
import pandas as pd

# Get current status
status = get_experiment_status(GDRIVE_BASE_PATH, EXPERIMENT_NAME)
print(f"Current Round: {status['current_round']}")
print(f"Current Stage: {status['current_stage']}")
print(f"Active Peers: {status['active_peers']}")
print(f"Total Submissions: {status['total_metric_entries']}")
print()

# Load and display recent metrics
try:
    df = get_experiment_metrics(GDRIVE_BASE_PATH, EXPERIMENT_NAME)
    if not df.empty:
        print("Recent metrics (last 10 rounds):")
        recent = df[df['node_id'] == NODE_ID].tail(10)
        print(recent[['round', 'stage', 'my_reward']].to_string(index=False))
    else:
        print("No metrics available yet")
except Exception as e:
    print(f"Could not load metrics: {e}")

## 10. Resume Training (If Disconnected)

If your Colab session disconnects:
1. Re-run all cells above (keep same EXPERIMENT_NAME and NODE_ID)
2. The system will automatically resume from the last checkpoint
3. Training continues from the last saved round