# Svend Reasoning Model Training

This notebook trains Svend reasoning models on Google Colab with A100 GPUs.

**Requirements:**
- Colab Pro+ (for A100 access)
- Google Drive (for checkpoint persistence)
- WandB account (optional, for experiment tracking)

**Strategy:**
1. Mount Drive for checkpoint persistence
2. Validate infrastructure before training
3. Train with frequent checkpoints (Colab can timeout)
4. Evaluate before scaling up

## 1. Setup Environment

In [None]:
# Check GPU
!nvidia-smi

In [None]:
# Mount Google Drive
from google.colab import drive
drive.mount('/content/drive')

In [None]:
# Clone repository (or upload)
!git clone https://github.com/YOUR_USERNAME/reasoning-lab.git
%cd reasoning-lab

In [None]:
# Install dependencies
!pip install -q torch transformers datasets accelerate wandb
!pip install -q sympy  # For math tools
!pip install -q sentencepiece tiktoken  # For tokenization

In [None]:
# Verify GPU and memory
import torch

print(f"PyTorch version: {torch.__version__}")
print(f"CUDA available: {torch.cuda.is_available()}")

if torch.cuda.is_available():
    print(f"GPU: {torch.cuda.get_device_name()}")
    print(f"Memory: {torch.cuda.get_device_properties(0).total_memory / 1e9:.1f} GB")
    print(f"bf16 supported: {torch.cuda.is_bf16_supported()}")

## 2. Configure Training

In [None]:
# Training configuration
CONFIG = {
    # Model scale (start small, scale up after validation)
    "model_scale": "medium",  # tiny, small, medium, large, xl, xxl, flagship
    
    # Training
    "experiment_name": "svend-colab-v1",
    "num_epochs": 2,
    "max_steps": None,  # Set to limit steps (e.g., 1000 for testing)
    
    # Checkpointing (critical for Colab!)
    "save_steps": 500,  # Save every 500 steps
    "checkpoint_dir": "/content/drive/MyDrive/svend-checkpoints",
    
    # Logging
    "use_wandb": True,
    "wandb_project": "svend",
    
    # Resume from checkpoint (set to checkpoint path to resume)
    "resume_from": None,
}

print("Configuration:")
for k, v in CONFIG.items():
    print(f"  {k}: {v}")

In [None]:
# Initialize WandB (optional)
if CONFIG["use_wandb"]:
    import wandb
    wandb.login()
    print("WandB initialized")

## 3. Validate Infrastructure

In [None]:
# Run infrastructure validation
from src.pipeline import PipelineRunner, PipelineConfig
from src.pipeline.config import ModelScale, TrainingConfig, create_quick_test_config

# Quick validation
config = create_quick_test_config()
runner = PipelineRunner(config)

if runner.validate_infrastructure():
    print("\n[SUCCESS] Infrastructure validated - ready for training!")
else:
    print("\n[ERROR] Infrastructure validation failed - fix issues before continuing")

## 4. Prepare Data

In [None]:
# Load datasets
from datasets import load_dataset

# You can customize which datasets to use
print("Loading datasets...")

# Example: SlimOrca for reasoning
try:
    slimorca = load_dataset("Open-Orca/SlimOrca", split="train")
    print(f"SlimOrca: {len(slimorca)} examples")
except Exception as e:
    print(f"Could not load SlimOrca: {e}")

# Example: GSM8K for math
try:
    gsm8k = load_dataset("gsm8k", "main", split="train")
    print(f"GSM8K: {len(gsm8k)} examples")
except Exception as e:
    print(f"Could not load GSM8K: {e}")

## 5. Create Model and Start Training

In [None]:
from src.pipeline import PipelineRunner, PipelineConfig
from src.pipeline.config import ModelScale, TrainingConfig, DataConfig

# Map string to enum
scale_map = {
    "tiny": ModelScale.TINY,
    "small": ModelScale.SMALL,
    "medium": ModelScale.MEDIUM,
    "large": ModelScale.LARGE,
    "xl": ModelScale.XL,
    "xxl": ModelScale.XXL,
    "flagship": ModelScale.FLAGSHIP,
}

model_scale = scale_map[CONFIG["model_scale"]]

# Create training config
training = TrainingConfig(
    experiment_name=CONFIG["experiment_name"],
    model_scale=model_scale,
    num_epochs=CONFIG["num_epochs"],
    max_steps=CONFIG["max_steps"],
    save_steps=CONFIG["save_steps"],
    output_dir=CONFIG["checkpoint_dir"],
    use_wandb=CONFIG["use_wandb"],
    wandb_project=CONFIG["wandb_project"],
    resume_from=CONFIG["resume_from"],
)

# Apply scale defaults
training.apply_scale_defaults()

print(f"\nTraining Configuration:")
print(f"  Scale: {model_scale.value}")
print(f"  Batch size: {training.batch_size}")
print(f"  Grad accumulation: {training.gradient_accumulation_steps}")
print(f"  Effective batch: {training.effective_batch_size}")
print(f"  Learning rate: {training.learning_rate}")

In [None]:
# Create pipeline config
config = PipelineConfig(
    training=training,
    checkpoint_dir=CONFIG["checkpoint_dir"],
    strict_validation=False,  # Don't fail on validation for now
)

# Create runner
runner = PipelineRunner(config)

print("Pipeline configured!")

In [None]:
# START TRAINING
# This cell will run for a long time!
# Progress is saved to Google Drive

print("Starting training...")
print("Checkpoints will be saved to:", CONFIG["checkpoint_dir"])
print("If Colab disconnects, re-run the notebook and set resume_from to continue.")
print("="*60)

result = runner.run()

print("\n" + "="*60)
print(f"Training completed!")
print(f"Success: {result.success}")
print(f"Final step: {result.final_step}")
print(f"Final loss: {result.final_loss:.4f}")
print(f"Training time: {result.training_time_seconds / 3600:.2f} hours")

## 6. Evaluate Model

In [None]:
# Run evaluation
from src.evaluation import EvaluationSuite, quick_eval

# Quick eval for validation
print("Running quick evaluation...")
metrics = quick_eval(runner.model, runner.model.tokenizer)

print("\nEvaluation Results:")
for name, value in metrics.items():
    if isinstance(value, float):
        print(f"  {name}: {value:.4f}")
    else:
        print(f"  {name}: {value}")

In [None]:
# Test model generation
test_prompts = [
    "What is 15% of 200?",
    "If a train travels at 60 mph for 2.5 hours, how far does it go?",
    "What is the derivative of x^2 + 3x?",
]

print("Testing model generation:")
print("="*60)

for prompt in test_prompts:
    print(f"\nPrompt: {prompt}")
    # Generate response (implementation depends on your model)
    # response = runner.model.generate(prompt)
    # print(f"Response: {response}")
    print("[Generation code would go here]")

## 7. Save and Export

In [None]:
# Save final model to Drive
final_path = f"{CONFIG['checkpoint_dir']}/{CONFIG['experiment_name']}/final"

print(f"Saving final model to: {final_path}")
# runner.checkpoint_manager.save(runner.model, step=result.final_step, name="final")

print("Model saved!")

In [None]:
# List saved checkpoints
import os

checkpoint_base = CONFIG['checkpoint_dir']
if os.path.exists(checkpoint_base):
    print("Saved checkpoints:")
    for item in os.listdir(checkpoint_base):
        item_path = os.path.join(checkpoint_base, item)
        if os.path.isdir(item_path):
            print(f"  {item}")
else:
    print("No checkpoints found")

## 8. Resume Training (if needed)

If Colab disconnects, run the following to resume:

In [None]:
# RESUME TRAINING
# Set the checkpoint path to resume from

RESUME_FROM = "/content/drive/MyDrive/svend-checkpoints/svend-colab-v1/step_00001000"

# Update config
CONFIG["resume_from"] = RESUME_FROM

# Re-create training config with resume
training = TrainingConfig(
    experiment_name=CONFIG["experiment_name"],
    model_scale=model_scale,
    num_epochs=CONFIG["num_epochs"],
    save_steps=CONFIG["save_steps"],
    output_dir=CONFIG["checkpoint_dir"],
    use_wandb=CONFIG["use_wandb"],
    wandb_project=CONFIG["wandb_project"],
    resume_from=RESUME_FROM,
)
training.apply_scale_defaults()

config = PipelineConfig(training=training)
runner = PipelineRunner(config)

# Resume training
result = runner.run()

## Notes

### Colab Tips:
- A100 sessions can run up to 24 hours
- Always checkpoint frequently (every 500 steps)
- Mount Drive at the start to persist checkpoints
- Use WandB to track experiments across sessions

### Next Steps:
1. After validation at current scale, increase `model_scale`
2. Run full evaluation before production
3. Train safety classifier separately
4. Deploy to production infrastructure