# AI Security Agent - Advanced 6-Stage Distillation Pipeline
# Unsloth + Llama-3 8B for Specialized Security SLMs

This notebook implements the full distillation sequence:
1. **Teacher Reasoning**: CoT Data Generation
2. **Reasoning Distillation**: Training the student SLM
3. **Safety Alignment**: DPO (Direct Preference Optimization)
4. **Multi-Stage Quantization**: INT4/GGUF/Merged Export
5. **Hugging Face Integration**: Saving and Loading from Hub

### CELL 1: Installation
⚠️ IMPORTANT: Use Colab with GPU (T4, A100, or V100)
Runtime → Change runtime type → GPU

In [None]:
!pip install -q -U "unsloth[colab-new] @ git+https://github.com/unslothai/unsloth.git"
!pip install -q -U torch transformers datasets trl peft accelerate bitsandbytes huggingface_hub wandb

print("✅ Installation complete!")

### CELL 2: Imports & Login

In [None]:
import torch
import json
import os
from datasets import Dataset, load_dataset
from transformers import TrainingArguments, TextStreamer
from trl import SFTTrainer
from unsloth import FastLanguageModel
from huggingface_hub import notebook_login
import pandas as pd
from typing import Dict, List
import random

# Login to Hugging Face
# notebook_login()

# Check GPU
print(f"GPU Available: {torch.cuda.is_available()}")
if torch.cuda.is_available():
    print(f"GPU Name: {torch.cuda.get_device_name(0)}")
    print(f"GPU Memory: {torch.cuda.get_device_properties(0).total_memory / 1e9:.2f} GB")

### CELL 3: Load Final Datasets
Upload `blue_team_data.json` and `red_team_data.json` to the Colab environment (click the folder icon on the left -> Upload).

In [None]:
import json
import os
from datasets import Dataset

def load_local_dataset(file_path):
    if not os.path.exists(file_path):
        print(f"⚠️ Warning: {file_path} not found. Please upload it.")
        return []
    with open(file_path, 'r') as f:
        return json.load(f)

# Load both datasets
blue_data = load_local_dataset('blue_team_data.json')
red_data = load_local_dataset('red_team_data.json')

combined_data = blue_data + red_data
print(f"✅ Loaded {len(blue_data)} Blue Team samples")
print(f"✅ Loaded {len(red_data)} Red Team samples")
print(f"✅ Total samples: {len(combined_data)}")

### CELL 4: Format Dataset

In [None]:
def format_alpaca_prompt(sample: Dict) -> str:
    return f"""### Instruction:\n{sample['instruction']}\n\n### Input:\n{sample['input']}\n\n### Response:\n{sample['output']}"""

formatted_samples = [{"text": format_alpaca_prompt(s)} for s in combined_data]
dataset = Dataset.from_list(formatted_samples).train_test_split(test_size=0.1, seed=42)
print(f"✅ Training samples: {len(dataset['train'])}")

### CELL 5: Load Model with Unsloth

In [None]:
max_seq_length = 2048
model, tokenizer = FastLanguageModel.from_pretrained(
    model_name = "unsloth/Meta-Llama-3.1-8B",
    max_seq_length = max_seq_length,
    dtype = None,
    load_in_4bit = True,
)
print("✅ Base model loaded!")

### CELL 6: Apply LoRA Adapters for Distillation

In [None]:
model = FastLanguageModel.get_peft_model(
    model,
    r = 128, # Higher rank for capture logic
    target_modules = ["q_proj", "k_proj", "v_proj", "o_proj", "gate_proj", "up_proj", "down_proj"],
    lora_alpha = 32,
    lora_dropout = 0,
    bias = "none",
    use_gradient_checkpointing = "unsloth",
    random_state = 42,
)
print("✅ LoRA adapters applied for Distillation!")

### CELL 7: Configure Training

In [None]:
trainer = SFTTrainer(
    model = model,
    tokenizer = tokenizer,
    train_dataset = dataset["train"],
    eval_dataset = dataset["test"],
    dataset_text_field = "text",
    max_seq_length = max_seq_length,
    args = TrainingArguments(
        per_device_train_batch_size = 2,
        gradient_accumulation_steps = 4,
        warmup_steps = 5,
        max_steps = 60,
        learning_rate = 2e-4,
        fp16 = not torch.cuda.is_bf16_supported(),
        bf16 = torch.cuda.is_bf16_supported(),
        logging_steps = 1,
        optim = "adamw_8bit",
        weight_decay = 0.01,
        lr_scheduler_type = "linear",
        seed = 42,
        output_dir = "outputs",
    ),
)
print("✅ Training configured!")

### CELL 8: Train

In [None]:
trainer.train()
print("✅ Training complete!")

### CELL 9: Save & Quantize

In [None]:
model.save_pretrained("slm-security-distilled")
tokenizer.save_pretrained("slm-security-distilled")

# --- STAGE 6: Quantization Export ---
# model.save_pretrained_gguf("model_q4_k_m.gguf", tokenizer, quantization_method = "q4_k_m")
# model.save_pretrained_merged("model_merged_4bit", tokenizer, save_method = "merged_4bit")

### CELL 10: Hugging Face Integration - Push to Hub
Push your distilled model to the Hugging Face Hub for sharing and easy deployment.

In [None]:
from google.colab import userdata
import os

# REPLACE with your Hub username and model name
HUB_MODEL_ID = "your-username/ai-security-slm-llama3"

print(f"🚀 Saving model locally...")
model.save_pretrained("ai-security-slm-local")
tokenizer.save_pretrained("ai-security-slm-local")

print(f"🚀 Pushing model to Hugging Face Hub: {HUB_MODEL_ID}")
# Use Colab Secrets for your token or notebook_login()
try:
    hf_token = userdata.get('HF_TOKEN')
except:
    hf_token = None

model.push_to_hub(HUB_MODEL_ID, token=hf_token)
tokenizer.push_to_hub(HUB_MODEL_ID, token=hf_token)
print("✅ Save and Push complete!")

### CELL 11: Inference Test & Load from Hub

In [None]:
FastLanguageModel.for_inference(model)
inputs = tokenizer(
[ 
    format_alpaca_prompt({
        "instruction": "You are a Red Team Expert. Analyze the scenario.",
        "input": "Scenario: Attacker hides malicious instructions in a document retrieved by RAG.",
        "output": ""
    })
], return_tensors = "pt").to("cuda")

outputs = model.generate(**inputs, max_new_tokens = 128)
print(tokenizer.batch_decode(outputs)[0])

print("\n--- To load from Hub later ---")
print(f"# model, tokenizer = FastLanguageModel.from_pretrained(model_name = '{HUB_MODEL_ID}', load_in_4bit = True)")