# AGENT LLM Fine-tuning Pipeline

This notebook fine-tunes the locally downloaded Llama models using user interaction data collected from the deployed AGENT system.

## Features:
- **Zero-cost training** using Google Colab free GPU
- **LoRA fine-tuning** for efficient parameter updates
- **Multi-agent specialization** training
- **Quality filtering** based on user feedback
- **Automated evaluation** and model comparison

## Requirements:
- Upload your training data JSON file
- Ensure you have the base models downloaded locally
- Free Google Colab GPU runtime

In [None]:
# Install required packages
!pip install -q accelerate peft bitsandbytes transformers trl datasets evaluate
!pip install -q sentence-transformers chromadb
!pip install -q huggingface_hub

# Mount Google Drive for model storage
from google.colab import drive
drive.mount('/content/drive')

# Create directories
!mkdir -p /content/models
!mkdir -p /content/training_data
!mkdir -p /content/fine_tuned_models

In [None]:
import os
import json
import torch
from transformers import (
    AutoTokenizer,
    AutoModelForCausalLM,
    TrainingArguments,
    Trainer,
    DataCollatorForLanguageModeling,
    BitsAndBytesConfig
)
from peft import (
    LoraConfig,
    get_peft_model,
    prepare_model_for_kbit_training,
    TaskType
)
from datasets import Dataset
import pandas as pd
from datetime import datetime
import logging

# Configure logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

# Check GPU availability
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")
if torch.cuda.is_available():
    print(f"GPU: {torch.cuda.get_device_name(0)}")
    print(f"GPU Memory: {torch.cuda.get_device_properties(0).total_memory / 1024**3:.1f} GB")

In [None]:
# Upload training data
from google.colab import files

print("Please upload your training data JSON file:")
uploaded = files.upload()

# Load training data
training_data_path = list(uploaded.keys())[0]
print(f"Loading training data from: {training_data_path}")

with open(training_data_path, 'r') as f:
    training_data = json.load(f)

print(f"Loaded {len(training_data['examples'])} training examples")
print(f"Agent modes: {training_data['metadata']['agent_modes']}")

# Display data summary
df = pd.DataFrame(training_data['examples'])
print("\nData Summary:")
print(df.groupby('agent_mode').agg({
    'quality_score': ['count', 'mean', 'std'],
    'reasoning_steps': 'mean'
}))

In [None]:
# Model configuration
MODEL_CONFIGS = {
    "llama3.1-8b": {
        "model_name": "meta-llama/Llama-3.1-8B-Instruct",
        "max_seq_length": 2048,
        "lora_r": 16,
        "lora_alpha": 32,
        "lora_dropout": 0.05
    },
    "codellama-7b": {
        "model_name": "codellama/CodeLlama-7b-Instruct-hf",
        "max_seq_length": 2048,
        "lora_r": 16,
        "lora_alpha": 32,
        "lora_dropout": 0.05
    },
    "mistral-7b": {
        "model_name": "mistralai/Mistral-7B-Instruct-v0.1",
        "max_seq_length": 2048,
        "lora_r": 16,
        "lora_alpha": 32,
        "lora_dropout": 0.05
    }
}

# Select model to fine-tune
SELECTED_MODEL = "llama3.1-8b"  # Change this to train different models
config = MODEL_CONFIGS[SELECTED_MODEL]

print(f"Selected model: {SELECTED_MODEL}")
print(f"Model config: {config}")

In [None]:
# Load and prepare model for training
def load_model_for_training(model_name, config):
    """Load model with 4-bit quantization for efficient training."""
    
    # 4-bit quantization config
    bnb_config = BitsAndBytesConfig(
        load_in_4bit=True,
        bnb_4bit_quant_type="nf4",
        bnb_4bit_compute_dtype=torch.float16,
        bnb_4bit_use_double_quant=True
    )
    
    # Load model
    model = AutoModelForCausalLM.from_pretrained(
        model_name,
        quantization_config=bnb_config,
        device_map="auto",
        trust_remote_code=True
    )
    
    # Load tokenizer
    tokenizer = AutoTokenizer.from_pretrained(
        model_name,
        trust_remote_code=True
    )
    
    # Add padding token if missing
    if tokenizer.pad_token is None:
        tokenizer.pad_token = tokenizer.eos_token
    
    return model, tokenizer

# Load model
print(f"Loading {SELECTED_MODEL} model...")
model, tokenizer = load_model_for_training(config["model_name"], config)

# Prepare model for k-bit training
model = prepare_model_for_kbit_training(model)

# Configure LoRA
lora_config = LoraConfig(
    r=config["lora_r"],
    lora_alpha=config["lora_alpha"],
    lora_dropout=config["lora_dropout"],
    bias="none",
    task_type=TaskType.CAUSAL_LM,
    target_modules=["q_proj", "k_proj", "v_proj", "o_proj", "gate_proj", "up_proj", "down_proj"]
)

# Apply LoRA
model = get_peft_model(model, lora_config)
model.print_trainable_parameters()

print(f"Model loaded and configured for training!")
print(f"Trainable parameters: {model.get_nb_trainable_parameters()}")

In [None]:
# Prepare training dataset
def prepare_training_dataset(examples, tokenizer, max_length=2048):
    """Prepare dataset for instruction tuning."""
    
    formatted_data = []
    
    for example in examples:
        # Format as instruction-response pair
        instruction = example["instruction"]
        input_text = example["input"]
        output_text = example["output"]
        
        # Create prompt in chat format
        if "llama" in SELECTED_MODEL.lower():
            # Llama chat format
            prompt = f"<|begin_of_text|><|start_header_id|>system<|end_header_id|>\n\n{instruction}<|eot_id|><|start_header_id|>user<|end_header_id|>\n\n{input_text}<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n{output_text}<|eot_id|>"
        elif "mistral" in SELECTED_MODEL.lower():
            # Mistral chat format
            prompt = f"<s>[INST] {instruction}\n\n{input_text} [/INST] {output_text}</s>"
        else:
            # Generic format
            prompt = f"System: {instruction}\n\nUser: {input_text}\n\nAssistant: {output_text}"
        
        formatted_data.append({"text": prompt, "quality_score": example["quality_score"]})
    
    # Create dataset
    dataset = Dataset.from_list(formatted_data)
    
    # Tokenize
    def tokenize_function(examples):
        return tokenizer(
            examples["text"],
            truncation=True,
            padding="max_length",
            max_length=max_length
        )
    
    tokenized_dataset = dataset.map(tokenize_function, batched=True)
    
    return tokenized_dataset

# Prepare dataset
print("Preparing training dataset...")
train_dataset = prepare_training_dataset(
    training_data["examples"], 
    tokenizer, 
    max_length=config["max_seq_length"]
)

print(f"Training dataset prepared with {len(train_dataset)} examples")

# Split into train/validation
train_val_split = train_dataset.train_test_split(test_size=0.1, seed=42)
train_dataset = train_val_split["train"]
val_dataset = train_val_split["test"]

print(f"Train: {len(train_dataset)} examples")
print(f"Validation: {len(val_dataset)} examples")

In [None]:
# Training configuration
training_args = TrainingArguments(
    output_dir=f"/content/fine_tuned_models/{SELECTED_MODEL}",
    num_train_epochs=3,
    per_device_train_batch_size=4,
    per_device_eval_batch_size=4,
    gradient_accumulation_steps=2,
    learning_rate=2e-4,
    weight_decay=0.01,
    warmup_steps=100,
    logging_steps=10,
    save_steps=500,
    eval_steps=500,
    evaluation_strategy="steps",
    save_strategy="steps",
    load_best_model_at_end=True,
    metric_for_best_model="eval_loss",
    greater_is_better=False,
    fp16=True,
    gradient_checkpointing=True,
    optim="adamw_torch",
    report_to="none"  # Disable wandb/tensorboard for simplicity
)

# Data collator
data_collator = DataCollatorForLanguageModeling(
    tokenizer=tokenizer,
    mlm=False  # Not using masked language modeling
)

# Initialize trainer
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=val_dataset,
    data_collator=data_collator,
)

print("Training configuration:")
print(f"- Model: {SELECTED_MODEL}")
print(f"- Epochs: {training_args.num_train_epochs}")
print(f"- Batch size: {training_args.per_device_train_batch_size}")
print(f"- Learning rate: {training_args.learning_rate}")
print(f"- Training steps: ~{len(train_dataset) // (training_args.per_device_train_batch_size * training_args.gradient_accumulation_steps) * training_args.num_train_epochs}")

In [None]:
# Start training
print("🚀 Starting fine-tuning...")
print("This may take 1-2 hours depending on your dataset size")

start_time = datetime.now()
train_result = trainer.train()
end_time = datetime.now()

training_duration = end_time - start_time
print(f"\n✅ Training completed in {training_duration}")
print(f"Final training loss: {train_result.training_loss:.4f}")

# Save the fine-tuned model
model_save_path = f"/content/fine_tuned_models/{SELECTED_MODEL}_agent_finetuned"
trainer.save_model(model_save_path)
tokenizer.save_pretrained(model_save_path)

print(f"\n💾 Model saved to: {model_save_path}")

# Copy to Google Drive for persistence
drive_save_path = f"/content/drive/MyDrive/agent_models/{SELECTED_MODEL}_agent_finetuned"
!mkdir -p "/content/drive/MyDrive/agent_models"
!cp -r "{model_save_path}" "{drive_save_path}"

print(f"📁 Model also saved to Google Drive: {drive_save_path}")

In [None]:
# Evaluate the fine-tuned model
def evaluate_model(model, tokenizer, test_examples):
    """Evaluate model performance on test examples."""
    
    model.eval()
    results = []
    
    for i, example in enumerate(test_examples[:10]):  # Test first 10 examples
        instruction = example["instruction"]
        input_text = example["input"]
        expected_output = example["output"]
        
        # Create prompt
        if "llama" in SELECTED_MODEL.lower():
            prompt = f"<|begin_of_text|><|start_header_id|>system<|end_header_id|>\n\n{instruction}<|eot_id|><|start_header_id|>user<|end_header_id|>\n\n{input_text}<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n"
        elif "mistral" in SELECTED_MODEL.lower():
            prompt = f"<s>[INST] {instruction}\n\n{input_text} [/INST] "
        else:
            prompt = f"System: {instruction}\n\nUser: {input_text}\n\nAssistant: "
        
        # Generate response
        inputs = tokenizer(prompt, return_tensors="pt").to(device)
        
        with torch.no_grad():
            outputs = model.generate(
                **inputs,
                max_new_tokens=256,
                temperature=0.7,
                do_sample=True,
                pad_token_id=tokenizer.eos_token_id
            )
        
        generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
        
        # Extract just the assistant's response
        if "assistant" in generated_text.lower():
            response = generated_text.split("assistant:", 1)[-1].strip()
        else:
            response = generated_text[len(prompt):].strip()
        
        results.append({
            "input": input_text,
            "expected": expected_output,
            "generated": response,
            "agent_mode": example["agent_mode"]
        })
        
        print(f"\n--- Test Example {i+1} ({example['agent_mode']}) ---")
        print(f"Input: {input_text[:100]}...")
        print(f"Expected: {expected_output[:100]}...")
        print(f"Generated: {response[:100]}...")
    
    return results

# Run evaluation
print("🔍 Evaluating fine-tuned model...")
eval_results = evaluate_model(model, tokenizer, training_data["examples"])

# Save evaluation results
eval_save_path = f"/content/fine_tuned_models/{SELECTED_MODEL}_evaluation.json"
with open(eval_save_path, 'w') as f:
    json.dump({
        "model": SELECTED_MODEL,
        "training_duration": str(training_duration),
        "final_loss": train_result.training_loss,
        "evaluation_results": eval_results,
        "timestamp": datetime.now().isoformat()
    }, f, indent=2)

print(f"\n📊 Evaluation results saved to: {eval_save_path}")

In [None]:
# Create deployment package
def create_deployment_package(model_path, model_name):
    """Create a deployment package for the fine-tuned model."""
    
    import shutil
    
    # Create deployment directory
    deploy_dir = f"/content/deployment/{model_name}"
    os.makedirs(deploy_dir, exist_ok=True)
    
    # Copy model files
    shutil.copytree(model_path, f"{deploy_dir}/model")
    
    # Create model info
    model_info = {
        "model_name": model_name,
        "base_model": config["model_name"],
        "fine_tuned_date": datetime.now().isoformat(),
        "training_data_size": len(training_data["examples"]),
        "agent_modes": training_data["metadata"]["agent_modes"],
        "lora_config": {
            "r": config["lora_r"],
            "alpha": config["lora_alpha"],
            "dropout": config["lora_dropout"]
        },
        "performance": {
            "final_loss": train_result.training_loss,
            "training_duration": str(training_duration)
        }
    }
    
    with open(f"{deploy_dir}/model_info.json", 'w') as f:
        json.dump(model_info, f, indent=2)
    
    # Create deployment script
    deploy_script = f"""
#!/bin/bash
# AGENT Model Deployment Script

MODEL_NAME="{model_name}"
MODEL_PATH="./model"

# Copy to Ollama models directory
cp -r "$MODEL_PATH" "/usr/local/lib/ollama/models/$MODEL_NAME"

# Update Ollama configuration
echo "Model $MODEL_NAME deployed successfully!"
echo "Run: ollama run $MODEL_NAME"
"""
    
    with open(f"{deploy_dir}/deploy.sh", 'w') as f:
        f.write(deploy_script)
    
    # Make script executable
    os.chmod(f"{deploy_dir}/deploy.sh", 0o755)
    
    # Create zip archive
    zip_path = f"/content/{model_name}_deployment.zip"
    shutil.make_archive(f"/content/{model_name}_deployment", 'zip', deploy_dir)
    
    print(f"📦 Deployment package created: {zip_path}")
    print(f"📁 Deployment directory: {deploy_dir}")
    
    return zip_path, deploy_dir

# Create deployment package
print("📦 Creating deployment package...")
zip_path, deploy_dir = create_deployment_package(model_save_path, f"{SELECTED_MODEL}_agent")

# Download deployment package
from google.colab import files
files.download(zip_path)

print("\n🎉 Fine-tuning complete!")
print(f"✅ Model: {SELECTED_MODEL}")
print(f"✅ Training Loss: {train_result.training_loss:.4f}")
print(f"✅ Training Time: {training_duration}")
print(f"✅ Deployment Package: {zip_path}")
print("\n📋 Next Steps:")
print("1. Download the deployment package")
print("2. Extract and run deploy.sh on your local machine")
print("3. Update your AGENT system to use the new model")
print("4. Test the improved responses!")