# SloughGPT Training on Google Colab

Train your own SloughGPT model with GPU acceleration!

## Quick Start

In [None]:
# Clone the repository
!git clone https://github.com/iamtowbee/sloughGPT.git
%cd sloughGPT

In [None]:
# Install dependencies
!pip install torch numpy

## Training Configuration

In [None]:
import torch
import sys
sys.path.insert(0, '.')

from domains.training import TrainingConfig, Trainer

# Check GPU
device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Training on: {device}")
if torch.cuda.is_available():
    print(f"GPU: {torch.cuda.get_device_name(0)}")

## Train Model

In [None]:
# Configuration - adjust these values
CONFIG = {
    "data_path": "datasets/karpathy/corpus.jsonl",
    "epochs": 10,
    "batch_size": 64,
    "n_embed": 256,      # Embedding dimension
    "n_layer": 6,        # Number of layers
    "n_head": 8,         # Number of attention heads
    "vocab_size": 5000,  # Vocabulary size
    "learning_rate": 1e-3,
}

# Create config
config = TrainingConfig(
    data_path=CONFIG["data_path"],
    epochs=CONFIG["epochs"],
    batch_size=CONFIG["batch_size"],
    n_embed=CONFIG["n_embed"],
    n_layer=CONFIG["n_layer"],
    vocab_size=CONFIG["vocab_size"],
    learning_rate=CONFIG["learning_rate"],
    max_batches=1000,  # Batches per epoch
)

print(f"Config: {CONFIG}")

In [None]:
# Create trainer and train
print("Creating trainer...")
trainer = Trainer(config)
trainer.setup()

num_params = sum(p.numel() for p in trainer.model.model.parameters())
print(f"Model parameters: {num_params:,}")

# Train
print("\nStarting training...")
results = trainer.train()
print(f"\nTraining complete! Results: {results}")

## Save Model

In [None]:
import os
from pathlib import Path

# Save trained model
output_dir = Path("models/sloughgpt")
output_dir.mkdir(parents=True, exist_ok=True)

torch.save({
    'model_state_dict': trainer.model.model.state_dict(),
    'config': CONFIG,
}, output_dir / "sloughgpt_trained.pt")

print(f"Model saved to: {output_dir / 'sloughgpt_trained.pt'}")

# Download to local machine
from google.colab import files
files.download(output_dir / "sloughgpt_trained.pt")

## Generate Text (Test Model)

In [None]:
# Generate text with trained model
# (Requires implementing generate method in your model)
print("Training complete! Model saved.")
print("\nTo generate text, load the model and use it.")