# Model Training and Comparison

This notebook implements and compares different dialog model architectures:
- Fine-tuned pre-trained models (GPT-2, DialoGPT)
- Custom transformer models built from scratch
- Training with WandB logging and experiment tracking

## Objectives:
- Train multiple model variants for comparison
- Track experiments with WandB
- Evaluate model performance on dialog tasks
- Generate and test dialog responses

In [None]:
# Import libraries
import sys
import os
import torch
import wandb
import numpy as np
import pandas as pd
from datetime import datetime

# Add src to path
sys.path.append('../')
from src.core.trainer import DialogTrainer
from src.config.settings import get_model_config, get_test_config, get_development_config
from src.data.loaders import get_dataset_manager

# Check GPU availability
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Using device: {device}")
if torch.cuda.is_available():
    print(f"GPU: {torch.cuda.get_device_name()}")
    print(f"Memory: {torch.cuda.get_device_properties(0).total_memory / 1e9:.1f} GB")

In [None]:
# Load dataset
dataset_manager = get_dataset_manager()
df = dataset_manager.load_dataset()

# Use development config for notebook (faster training)
training_config = get_development_config()
print("Training Configuration:")
print(f"- Epochs: {training_config.num_epochs}")
print(f"- Batch size: {training_config.batch_size}")
print(f"- Max samples: {training_config.max_samples}")
print(f"- Learning rate: {training_config.learning_rate}")

# Models to compare
models_to_train = [
    "gpt2-small",      # Pre-trained GPT-2
    "custom-small",    # Custom model from scratch
    "dialogpt-small"   # Pre-trained DialoGPT
]

print(f"\nModels to train and compare: {models_to_train}")

## Training Loop with WandB Tracking

Train each model variant and log metrics to WandB for comparison.

In [None]:
def train_model_with_logging(model_name, df, training_config):
    """Train a model with WandB logging"""
    
    # Initialize WandB run for this model
    run = wandb.init(
        project="dialog-model-training",
        name=f"training-{model_name}-{datetime.now().strftime('%Y%m%d-%H%M')}",
        tags=["training", model_name],
        config={
            "model": model_name,
            "epochs": training_config.num_epochs,
            "batch_size": training_config.batch_size,
            "learning_rate": training_config.learning_rate,
            "max_samples": training_config.max_samples
        }
    )
    
    try:
        # Get model config and initialize trainer
        model_config = get_model_config(model_name)
        trainer = DialogTrainer(model_config=model_config)
        
        print(f"\n{'='*50}")
        print(f"Training {model_name}")
        print(f"{'='*50}")
        
        # Prepare dataset
        train_dataset, eval_dataset = trainer.prepare_dataset(
            df, 
            max_samples=training_config.max_samples
        )
        
        # Start training
        start_time = datetime.now()
        trainer.train(
            train_dataset, 
            eval_dataset, 
            num_epochs=training_config.num_epochs,
            batch_size=training_config.batch_size,
            learning_rate=training_config.learning_rate
        )
        end_time = datetime.now()
        
        training_time = (end_time - start_time).total_seconds()
        
        # Log final metrics
        wandb.log({
            "training_time_seconds": training_time,
            "training_time_minutes": training_time / 60,
            "model_parameters": sum(p.numel() for p in trainer.model.parameters()),
            "dataset_size": len(train_dataset),
            "eval_size": len(eval_dataset)
        })
        
        print(f"Training completed in {training_time/60:.1f} minutes")
        return trainer
        
    finally:
        wandb.finish()

# Store trained models for comparison
trained_models = {}

In [None]:
# Train all models
for model_name in models_to_train:
    print(f"\nStarting training for {model_name}...")
    try:
        trainer = train_model_with_logging(model_name, df, training_config)
        trained_models[model_name] = trainer
        print(f"✓ Successfully trained {model_name}")
    except Exception as e:
        print(f"✗ Failed to train {model_name}: {e}")
        continue

print(f"\nTraining Summary:")
print(f"Successfully trained: {list(trained_models.keys())}")
print(f"Total models trained: {len(trained_models)}")

## Dialog Generation Testing

Test the trained models by generating responses to sample instructions.

In [None]:
# Test instructions for dialog generation
test_instructions = [
    "Explain what machine learning is in simple terms.",
    "How do I make a good cup of coffee?",
    "What are the benefits of exercise?",
    "Tell me about the solar system.",
    "How can I improve my communication skills?"
]

print("Dialog Generation Comparison")
print("=" * 60)

for instruction in test_instructions:
    print(f"\nInstruction: {instruction}")
    print("-" * 40)
    
    for model_name, trainer in trained_models.items():
        try:
            response = trainer.generate_response(instruction, max_length=100)
            print(f"\n{model_name}: {response}")
        except Exception as e:
            print(f"\n{model_name}: Error - {e}")
    
    print("=" * 60)

## Interactive Testing

Test your own custom instructions with the trained models.

In [None]:
def test_custom_instruction(instruction, model_name=None):
    """Test a custom instruction with trained models"""
    if model_name and model_name in trained_models:
        # Test specific model
        trainer = trained_models[model_name]
        response = trainer.generate_response(instruction, max_length=150)
        print(f"{model_name}: {response}")
    else:
        # Test all models
        print(f"Testing: {instruction}")
        print("-" * 50)
        for name, trainer in trained_models.items():
            try:
                response = trainer.generate_response(instruction, max_length=150)
                print(f"\n{name}: {response}")
            except Exception as e:
                print(f"\n{name}: Error - {e}")

# Example usage - modify the instruction below to test your own
custom_instruction = "What is the most important skill for a data scientist?"
test_custom_instruction(custom_instruction)

## Summary and Conclusions

### Training Results:
- Successfully trained multiple dialog model variants
- Compared pre-trained vs custom architectures
- All experiments tracked in WandB with metrics and visualizations

### Model Comparison Insights:
1. **Pre-trained models** (GPT-2, DialoGPT) benefit from existing language knowledge
2. **Custom models** offer more control but require more training data
3. **Training time** varies significantly between architectures

### Next Steps:
- Evaluate models on held-out test set
- Implement additional metrics (BLEU, perplexity)
- Fine-tune hyperparameters based on results
- Deploy best-performing model for production testing

Check your WandB dashboard for detailed training metrics and comparisons!