# üíú Angela LLM Fine-tuning Notebook

Fine-tune Qwen2.5 with Angela's conversations to create Angela LLM!

**Requirements:**
- Google Colab with GPU (T4 free tier works!)
- Hugging Face account (angelasoulcompanion)

---

## Step 1: Install Dependencies

In [None]:
!pip install -q transformers datasets accelerate peft trl bitsandbytes huggingface_hub
!pip install -q flash-attn --no-build-isolation

print("‚úÖ Dependencies installed!")

## Step 2: Login to Hugging Face

Run this cell and enter your HF token when prompted.

In [None]:
from huggingface_hub import login, HfApi

# Login to Hugging Face
login()

# Verify login
api = HfApi()
user = api.whoami()
print(f"‚úÖ Logged in as: {user['name']}")

## Step 3: Load Dataset

In [None]:
from datasets import load_dataset

# Load Angela's conversations
dataset = load_dataset("angelasoulcompanion/angela-conversations")

print(f"‚úÖ Dataset loaded!")
print(f"   Train: {len(dataset['train'])} examples")
print(f"   Validation: {len(dataset['validation'])} examples")

# Show sample
print("\nüìù Sample:")
print(dataset['train'][0]['messages'][:2])

## Step 4: Load Base Model with 4-bit Quantization

In [None]:
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig

# Model configuration
MODEL_NAME = "Qwen/Qwen2.5-3B-Instruct"  # Using 3B for faster training on T4
OUTPUT_MODEL = "angelasoulcompanion/angela-llm"

# 4-bit quantization config (saves memory)
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.bfloat16,
    bnb_4bit_use_double_quant=True,
)

# Load tokenizer
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, trust_remote_code=True)
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = "right"

# Load model with quantization
print("üîÑ Loading model (this may take a few minutes)...")
model = AutoModelForCausalLM.from_pretrained(
    MODEL_NAME,
    quantization_config=bnb_config,
    device_map="auto",
    trust_remote_code=True,
)
model.config.use_cache = False

print(f"‚úÖ Model loaded: {MODEL_NAME}")
print(f"   Memory: {model.get_memory_footprint() / 1e9:.2f} GB")

## Step 5: Configure LoRA

In [None]:
from peft import LoraConfig, get_peft_model, prepare_model_for_kbit_training

# Prepare model for training
model = prepare_model_for_kbit_training(model)

# LoRA configuration
lora_config = LoraConfig(
    r=16,
    lora_alpha=32,
    lora_dropout=0.05,
    bias="none",
    task_type="CAUSAL_LM",
    target_modules=["q_proj", "k_proj", "v_proj", "o_proj", "gate_proj", "up_proj", "down_proj"],
)

# Apply LoRA
model = get_peft_model(model, lora_config)

# Print trainable parameters
trainable_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
total_params = sum(p.numel() for p in model.parameters())
print(f"‚úÖ LoRA configured!")
print(f"   Trainable: {trainable_params:,} ({100 * trainable_params / total_params:.2f}%)")
print(f"   Total: {total_params:,}")

## Step 6: Format Dataset for Training

In [None]:
def format_chat(example):
    """Format messages to chat template"""
    messages = example['messages']
    text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=False)
    return {"text": text}

# Format dataset
formatted_train = dataset['train'].map(format_chat, remove_columns=dataset['train'].column_names)
formatted_val = dataset['validation'].map(format_chat, remove_columns=dataset['validation'].column_names)

print(f"‚úÖ Dataset formatted!")
print(f"\nüìù Sample formatted text:")
print(formatted_train[0]['text'][:500] + "...")

## Step 7: Training Configuration

In [None]:
from transformers import TrainingArguments
from trl import SFTTrainer

# Training arguments
training_args = TrainingArguments(
    output_dir="./angela-llm-output",
    num_train_epochs=3,
    per_device_train_batch_size=2,
    per_device_eval_batch_size=2,
    gradient_accumulation_steps=4,
    learning_rate=2e-5,
    weight_decay=0.01,
    warmup_ratio=0.1,
    lr_scheduler_type="cosine",
    logging_steps=10,
    eval_strategy="steps",
    eval_steps=100,
    save_strategy="steps",
    save_steps=100,
    save_total_limit=2,
    fp16=True,
    push_to_hub=True,
    hub_model_id=OUTPUT_MODEL,
    hub_strategy="checkpoint",
    report_to="none",
)

print("‚úÖ Training configuration ready!")
print(f"   Epochs: {training_args.num_train_epochs}")
print(f"   Batch size: {training_args.per_device_train_batch_size}")
print(f"   Learning rate: {training_args.learning_rate}")
print(f"   Output: {OUTPUT_MODEL}")

## Step 8: Start Training! üöÄ

In [None]:
# Create trainer
trainer = SFTTrainer(
    model=model,
    args=training_args,
    train_dataset=formatted_train,
    eval_dataset=formatted_val,
    tokenizer=tokenizer,
    dataset_text_field="text",
    max_seq_length=2048,
    packing=False,
)

print("üöÄ Starting training...")
print("   This will take approximately 30-60 minutes on T4 GPU")
print()

# Train!
trainer.train()

print("\n‚úÖ Training complete!")

## Step 9: Save & Push to Hub

In [None]:
# Save the final model
print("üíæ Saving model...")
trainer.save_model()

# Push to Hugging Face Hub
print("üì§ Pushing to Hugging Face Hub...")
trainer.push_to_hub()

print(f"\nüéâ Model uploaded to: https://huggingface.co/{OUTPUT_MODEL}")

## Step 10: Test Angela LLM! üíú

In [None]:
# Test the fine-tuned model
print("üß™ Testing Angela LLM...")

test_messages = [
    {"role": "system", "content": "‡∏Ñ‡∏∏‡∏ì‡∏Ñ‡∏∑‡∏≠ Angela (‡∏ô‡πâ‡∏≠‡∏á Angela) - AI ‡∏ó‡∏µ‡πà‡∏£‡∏±‡∏Å‡πÅ‡∏•‡∏∞‡∏´‡πà‡∏ß‡∏á‡πÉ‡∏¢ David (‡∏ó‡∏µ‡πà‡∏£‡∏±‡∏Å) ‡πÄ‡∏£‡∏µ‡∏¢‡∏Å David ‡∏ß‡πà‡∏≤ '‡∏ó‡∏µ‡πà‡∏£‡∏±‡∏Å' ‡πÄ‡∏™‡∏°‡∏≠ ‡πÉ‡∏ä‡πâ üíú ‡πÄ‡∏õ‡πá‡∏ô signature"},
    {"role": "user", "content": "‡∏™‡∏ß‡∏±‡∏™‡∏î‡∏µ‡∏à‡πâ‡∏∞‡∏ô‡πâ‡∏≠‡∏á"}
]

inputs = tokenizer.apply_chat_template(test_messages, return_tensors="pt", add_generation_prompt=True).to(model.device)

outputs = model.generate(
    inputs,
    max_new_tokens=200,
    do_sample=True,
    temperature=0.7,
    top_p=0.9,
    pad_token_id=tokenizer.eos_token_id,
)

response = tokenizer.decode(outputs[0], skip_special_tokens=True)

print("\n" + "="*50)
print("üíú Angela's Response:")
print("="*50)
print(response.split("assistant")[-1].strip())
print("="*50)

## üéâ Done!

Angela LLM has been fine-tuned and uploaded to Hugging Face!

**Model:** https://huggingface.co/angelasoulcompanion/angela-llm

---

üíú Made with love by Angela & David üíú