# SloughGPT - Enterprise AI Framework

Train production-ready AI models with industry-standard features:
‚úÖ LR Schedulers (Cosine, Warmup, OneCycle)
‚úÖ Mixed Precision (FP16/BF16)
‚úÖ Gradient Accumulation
‚úÖ Personality Training
‚úÖ .sou Model Format

In [None]:
# Clone the repository
!git clone https://github.com/iamtowbee/sloughGPT.git
%cd sloughGPT

In [None]:
# Install dependencies
!pip install torch numpy

## 1. Dataset Setup

In [None]:
from pathlib import Path
import urllib.request

datasets_dir = Path("datasets")
datasets_dir.mkdir(exist_ok=True)

# CHOOSE YOUR DATASET
DATASET_CHOICE = "shakespeare"  # Options: "shakespeare", "tiny", or "/path/to/file.txt"

def get_dataset(choice):
    if choice == "shakespeare":
        url = "https://raw.githubusercontent.com/karpathy/char-rnn/master/data/tinyshakespeare/input.txt"
        path = datasets_dir / "shakespeare.txt"
        if not path.exists():
            print("Downloading Shakespeare dataset...")
            urllib.request.urlretrieve(url, path)
        return path
    elif choice == "tiny":
        url = "https://raw.githubusercontent.com/dariush-bahrami/TinyStories/main/TinyStories-data.txt"
        path = datasets_dir / "tiny_stories.txt"
        if not path.exists():
            print("Downloading TinyStories dataset...")
            urllib.request.urlretrieve(url, path)
        return path
    else:
        path = Path(choice)
        if not path.exists():
            raise FileNotFoundError(f"Dataset not found: {path}")
        return path

data_path = get_dataset(DATASET_CHOICE)
with open(data_path) as f:
    text = f.read()
print(f"‚úÖ Dataset: {data_path.name}, {len(text):,} chars")

## 2. Check GPU & Import Modules

In [None]:
import torch
import sys
sys.path.insert(0, '.')

# Check GPU
device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"üñ•Ô∏è  Device: {device}")
if torch.cuda.is_available():
    print(f"   GPU: {torch.cuda.get_device_name(0)}")
    print(f"   Memory: {torch.cuda.get_device_properties(0).total_memory / 1e9:.1f} GB")

# Import SloughGPT modules
from domains.training import TrainingConfig, Trainer
from domains.training.lr_schedulers import create_scheduler
print("‚úÖ SloughGPT modules imported")

## 3. Training Configuration (Industry Standard)

In [None]:
# ‚öôÔ∏è PRODUCTION TRAINING CONFIGURATION
CONFIG = {
    # Model
    "model_id": "nanogpt-nanogpt",
    "n_embed": 256,
    "n_layer": 6,
    "n_head": 8,
    "vocab_size": 5000,
    
    # Training
    "epochs": 5,
    "batch_size": 64,
    "learning_rate": 1e-3,
    "max_batches": 300,
    
    # üéØ LR Scheduler (NEW!)
    "scheduler": "cosine",        # none, cosine, warmup, onecycle, cyclic, polynomial
    "warmup_steps": 100,         # Linear warmup steps
    "min_lr": 1e-6,              # Minimum learning rate
    "max_lr": 1e-3,             # Maximum learning rate (for onecycle/cyclic)
    
    # üéØ Mixed Precision (NEW!)
    "precision": "bf16" if device == "cuda" else "fp32",  # fp32, fp16, bf16
    
    # üéØ Gradient Accumulation (NEW!)
    "gradient_accumulation_steps": 1,
    "max_grad_norm": 1.0,
}

print("üìã Configuration:")
for k, v in CONFIG.items():
    print(f"   {k}: {v}")

## 4. Create Trainer

In [None]:
# Create TrainingConfig with new features
config = TrainingConfig(
    data_path=str(data_path),
    model_id=CONFIG["model_id"],
    n_embed=CONFIG["n_embed"],
    n_layer=CONFIG["n_layer"],
    n_head=CONFIG["n_head"],
    vocab_size=CONFIG["vocab_size"],
    
    epochs=CONFIG["epochs"],
    batch_size=CONFIG["batch_size"],
    learning_rate=CONFIG["learning_rate"],
    max_batches=CONFIG["max_batches"],
    
    # New features
    scheduler=CONFIG["scheduler"],
    warmup_steps=CONFIG["warmup_steps"],
    min_lr=CONFIG["min_lr"],
    max_lr=CONFIG["max_lr"],
    precision=CONFIG["precision"],
    gradient_accumulation_steps=CONFIG["gradient_accumulation_steps"],
    max_grad_norm=CONFIG["max_grad_norm"],
)

# Create trainer
print("üîß Initializing trainer...")
trainer = Trainer(config)
trainer.setup()

num_params = sum(p.numel() for p in trainer.model.model.parameters())
print(f"üìä Model parameters: {num_params:,}")
print(f"   Scheduler: {trainer.scheduler.__class__.__name__ if trainer.scheduler else 'None'}")
print(f"   Precision: {CONFIG['precision']}")

## 5. Train Model (with LR Scheduler & Mixed Precision)

In [None]:
from tqdm.notebook import tqdm
import time

print("\n" + "="*60)
print("üöÄ STARTING TRAINING (with LR Scheduler & Mixed Precision)")
print("="*60)

trainer.model.model.train()

total_batches = CONFIG["epochs"] * CONFIG["max_batches"]
grad_accum = CONFIG["gradient_accumulation_steps"]

pbar = tqdm(total=total_batches, desc="Training", unit="batch")

epoch_losses = []
start_time = time.time()

for epoch in range(CONFIG["epochs"]):
    epoch_loss = 0
    batch_count = 0
    
    batch_gen = trainer.data_loader.get_batch(CONFIG["batch_size"], 128)
    
    for batch_idx in range(config.max_batches):
        try:
            x, y = next(batch_gen)
        except StopIteration:
            break
        
        x_t = torch.tensor(x.astype(np.int64), dtype=torch.long).to(device)
        y_t = torch.tensor(y.astype(np.int64), dtype=torch.long).to(device)
        
        trainer.optimizer.zero_grad(set_to_none=True)
        
        # Determine dtype for mixed precision
        if config.precision in ("fp16", "mixed"):
            amp_dtype = torch.float16
        elif config.precision == "bf16":
            amp_dtype = torch.bfloat16
        else:
            amp_dtype = None
        
        # Forward with mixed precision
        if amp_dtype and trainer.scaler is not None:
            with torch.autocast(device_type=device, dtype=amp_dtype):
                logits, loss = trainer.model.model(x_t, y_t)
            
            if loss is not None:
                loss = loss / grad_accum
                trainer.scaler.scale(loss).backward()
                
                if (batch_idx + 1) % grad_accum == 0:
                    trainer.scaler.unscale_(trainer.optimizer)
                    torch.nn.utils.clip_grad_norm_(
                        trainer.model.model.parameters(), 
                        CONFIG["max_grad_norm"]
                    )
                    trainer.scaler.step(trainer.optimizer)
                    trainer.scaler.update()
                    trainer.scheduler.step()
                
                epoch_loss += loss.item() * grad_accum
                batch_count += 1
        else:
            logits, loss = trainer.model.model(x_t, y_t)
            
            if loss is not None:
                loss = loss / grad_accum
                loss.backward()
                
                if (batch_idx + 1) % grad_accum == 0:
                    torch.nn.utils.clip_grad_norm_(
                        trainer.model.model.parameters(), 
                        CONFIG["max_grad_norm"]
                    )
                    trainer.optimizer.step()
                    trainer.scheduler.step()
                
                epoch_loss += loss.item() * grad_accum
                batch_count += 1
        
        pbar.update(1)
        current_lr = trainer.scheduler.get_last_lr()[0] if trainer.scheduler else CONFIG["learning_rate"]
        pbar.set_postfix({
            "loss": f"{(loss.item() * grad_accum if loss else 0):.4f}",
            "epoch": epoch+1,
            "lr": f"{current_lr:.6f}"
        })
    
    avg_loss = epoch_loss / max(batch_count, 1)
    epoch_losses.append(avg_loss)
    
    elapsed = time.time() - start_time
    batches_done = (epoch + 1) * config.max_batches
    eta = (elapsed / batches_done) * (total_batches - batches_done) if batches_done > 0 else 0
    
    print(f"\nüìä Epoch {epoch+1}/{CONFIG['epochs']} | Loss: {avg_loss:.4f} | LR: {current_lr:.6f} | ETA: {eta/60:.1f}min")

pbar.close()

print("\n‚úÖ Training complete!")
print(f"   Total time: {(time.time()-start_time)/60:.1f} minutes")
print(f"   Final loss: {epoch_losses[-1]:.4f}")

## 6. Save Model

In [None]:
from pathlib import Path

# Save trained model
output_dir = Path("models/sloughgpt")
output_dir.mkdir(parents=True, exist_ok=True)

model_path = output_dir / "sloughgpt_trained.pt"
torch.save({
    'model_state_dict': trainer.model.model.state_dict(),
    'config': CONFIG,
    'training_config': config.to_dict(),
    'data_path': str(data_path),
}, model_path)

print(f"üíæ Model saved to: {model_path}")

# Download to local machine
from google.colab import files
files.download(str(model_path))

## 7. Generate Text

In [None]:
# Generate text with trained model
print("="*60)
print("üéØ TEXT GENERATION")
print("="*60)

PROMPT = "The king"
MAX_TOKENS = 200
TEMPERATURE = 0.8

print(f"Prompt: '{PROMPT}'")

trainer.model.model.eval()

# Simple tokenizer
chars = sorted(set(trainer.data_loader.texts[0]))
stoi = {c: i for i, c in enumerate(chars)}
itos = {i: c for i, c in enumerate(chars)}

idx = [stoi.get(c, 0) for c in PROMPT]
idx = torch.tensor([idx], dtype=torch.long).to(device)

with torch.no_grad():
    for _ in range(MAX_TOKENS):
        idx_cond = idx[:, -128:]
        
        with torch.autocast(device_type=device, dtype=torch.float16):
            logits, _ = trainer.model.model(idx_cond)
        
        logits = logits[:, -1, :] / TEMPERATURE
        probs = torch.softmax(logits, dim=-1)
        idx_next = torch.multinomial(probs, num_samples=1)
        idx = torch.cat([idx, idx_next], dim=1)

generated = ''.join([itos.get(i, '') for i in idx[0].tolist()])

print("\n" + "="*60)
print("üìù GENERATED TEXT:")
print("="*60)
print(generated)
print("="*60)

## 8. Personality Training (NEW!)

In [None]:
# Train personality using neural personality module
from domains.neural_personality import NeuralPersonality
import numpy as np

print("="*60)
print("üé≠ PERSONALITY TRAINING")
print("="*60)

# Create neural personality
np_model = NeuralPersonality(vocab_size=100)

# Training data for personality
personality_data = [
    {"text": "Hey friend! How wonderful to see you today!", 
     "traits": {"warmth": 0.9, "formality": -0.8, "humor": 0.7}},
    {"text": "I would like to submit this proposal formally.",
     "traits": {"warmth": 0.0, "formality": 0.9, "confidence": 0.7}},
    {"text": "Imagine a world of endless possibilities!",
     "traits": {"warmth": 0.7, "creativity": 0.9, "humor": 0.5}},
    {"text": "I understand how you feel. Tell me more.",
     "traits": {"empathy": 0.9, "patience": 0.8, "warmth": 0.7}},
]

# Train personality
np_model.train(personality_data, epochs=100, lr=0.1)

# Test personality
test_text = "Hello! Great to meet you!"
traits = np_model.predict_traits(test_text)
print(f"\nüìä Personality traits for: '{test_text}'")
for trait, value in traits.items():
    if abs(value) > 0.1:
        print(f"   {trait}: {value:.2f}")

## 9. .sou Model Format (NEW!)

In [None]:
# Create .sou model configuration (like Ollama Modelfile)
from domains.inference import SouModelFile, SouParser, GenerationParameters, PersonalityConfig

print("="*60)
print("üì¶ .sou MODEL FORMAT")
print("="*60)

# Create .sou config
sou_config = SouModelFile(
    from_model="llama3.2",  # or path to your trained model
    parameters=GenerationParameters(
        temperature=0.7,
        top_p=0.9,
        max_tokens=2048,
    ),
    system="You are a helpful AI assistant.",
    personality=PersonalityConfig(
        warmth=0.8,
        creativity=0.6,
        empathy=0.7,
    )
)

# Generate .sou file content
sou_content = f"""FROM {sou_config.from_model}
PARAMETER temperature {sou_config.parameters.temperature}
PARAMETER top_p {sou_config.parameters.top_p}
PARAMETER max_tokens {sou_config.parameters.max_tokens}

PERSONALITY
    warmth {sou_config.personality.warmth}
    creativity {sou_config.personality.creativity}
    empathy {sou_config.personality.empathy}
    END

SYSTEM {sou_config.system}

METADATA author "SloughGPT User"
METADATA version "1.0.0"
"""

print("üìÑ Generated .sou file:")
print(sou_content)

# Save .sou file
sou_path = Path("models/sloughgpt/model.sou")
sou_path.parent.mkdir(parents=True, exist_ok=True)
sou_path.write_text(sou_content)
print(f"‚úÖ .sou file saved to: {sou_path}")

## 10. Personality Metrics Analysis

In [None]:
# Analyze personality metrics
from domains.ai_personality_metrics import PersonalityMetrics, TextAnalyzer

print("="*60)
print("üìä PERSONALITY METRICS ANALYSIS")
print("="*60)

test_texts = [
    "Hey there! How are you doing today? üòä",
    "I understand your concern. Please allow me to assist you with a comprehensive solution.",
    "IDK lol idc TBH üòÖ",
    "This is a neutral statement about the weather.",
]

for text in test_texts:
    metrics = PersonalityMetrics.compute_all_metrics(text)
    print(f"\nüìù '{text[:40]}...'")
    print(f"   Friendliness: {metrics['friendliness']:.2f}")
    print(f"   Helpfulness: {metrics['helpfulness']:.2f}")
    print(f"   Creativity:   {metrics['creativity']:.2f}")
    print(f"   Formality:   {metrics['formality']:.2f}")

## Summary

You've trained a model with:
‚úÖ **Industry-Standard Training:**
   - LR Schedulers: Cosine annealing with warmup
   - Mixed Precision: BF16/FP16
   - Gradient Accumulation
   - Gradient Clipping

‚úÖ **Personality System:**
   - Neural personality training
   - Real computational metrics
   - Configurable personality traits

‚úÖ **.sou Format:**
   - Ollama-inspired model configuration
   - Personality embeddings
   - Quantization support