# LLM Fine-tuning Notebook

This notebook demonstrates how to fine-tune a Large Language Model using the Unsloth library.

## Configuration Summary
Based on the conversation, this notebook is configured for:
- Acha



In [None]:
%%capture
# Basic requirements installation
!pip install --no-deps bitsandbytes accelerate xformers peft trl triton
!pip install sentencepiece protobuf datasets huggingface_hub hf_transfer

# Unsloth specific requirements
!pip install --no-deps cut_cross_entropy unsloth_zoo
!pip install --no-deps unsloth

In [None]:
from unsloth import FastLanguageModel
import torch

# Model Configuration
max_seq_length = 2048  # Choose any! We auto support RoPE Scaling internally!
dtype = None  # None for auto detection. Float16 for Tesla T4, V100, Bfloat16 for Ampere+
load_in_4bit = True  # Use 4bit quantization to reduce memory usage

model, tokenizer = FastLanguageModel.from_pretrained(
    model_name = "unsloth/Llama-3.2-3B-Instruct",  # Change model here
    max_seq_length = max_seq_length,
    dtype = dtype,
    load_in_4bit = load_in_4bit,
)

In [None]:
# Add LoRA adapters
model = FastLanguageModel.get_peft_model(
    model,
    r = 16,
    target_modules = ["q_proj", "k_proj", "v_proj", "o_proj",
                      "gate_proj", "up_proj", "down_proj",],
    lora_alpha = 16,
    lora_dropout = 0,
    bias = "none",
    use_gradient_checkpointing = "unsloth",
    random_state = 3407,
    use_rslora = False,
    loftq_config = None,
)

In [None]:
from unsloth.chat_templates import get_chat_template
from datasets import load_dataset

# Configure tokenizer
tokenizer = get_chat_template(
    tokenizer,
    chat_template = "llama-3.1",
)

def formatting_prompts_func(examples):
    convos = examples["conversations"]
    texts = [tokenizer.apply_chat_template(convo, tokenize=False, add_generation_prompt=False) 
            for convo in convos]
    return {"text": texts}

# Load and prepare dataset
dataset = load_dataset("mlabonne/FineTome-100k", split="train")
dataset = dataset.map(formatting_prompts_func, batched=True)

In [None]:
from trl import SFTTrainer
from transformers import TrainingArguments, DataCollatorForSeq2Seq
from unsloth import is_bfloat16_supported

trainer = SFTTrainer(
    model = model,
    tokenizer = tokenizer,
    train_dataset = dataset,
    dataset_text_field = "text",
    max_seq_length = max_seq_length,
    data_collator = DataCollatorForSeq2Seq(tokenizer=tokenizer),
    dataset_num_proc = 2,
    packing = False,
    args = TrainingArguments(
        per_device_train_batch_size = 2,
        gradient_accumulation_steps = 4,
        warmup_steps = 5,
        max_steps = 60,
        learning_rate = 2e-4,
        fp16 = not is_bfloat16_supported(),
        bf16 = is_bfloat16_supported(),
        logging_steps = 1,
        optim = "adamw_8bit",
        output_dir = "outputs",
    ),
)

In [None]:
# Monitor GPU stats
gpu_stats = torch.cuda.get_device_properties(0)
start_gpu_memory = round(torch.cuda.max_memory_reserved() / 1024 / 1024 / 1024, 3)

# Start training
trainer_stats = trainer.train()

# Print training summary
print(f"Training completed in {trainer_stats.metrics['train_runtime']/60:.2f} minutes")
print(f"Final loss: {trainer_stats.metrics['train_loss']:.4f}")
print(f"GPU Memory used: {round(torch.cuda.max_memory_reserved() / 1024 / 1024 / 1024, 3) - start_gpu_memory} GB")

In [None]:
# Test the model
messages = [
    {"role": "user", "content": "What are your thoughts on AI safety?"}
]

# Tokenize input
inputs = tokenizer.apply_chat_template(
    messages,
    tokenize = True,
    return_tensors = "pt"
)

# Generate response
outputs = model.generate(
    input_ids = inputs.to(model.device),
    max_new_tokens = 512,
    temperature = 0.7,
    top_p = 0.95,
    do_sample = True,
)

# Decode and print response
response = tokenizer.batch_decode(outputs)[0]
print("Model response:", response)

In [None]:
# Save the fine-tuned model and tokenizer
output_dir = "fine_tuned_model"

model.save_pretrained(output_dir)
tokenizer.save_pretrained(output_dir)

print(f"Model and tokenizer saved to {output_dir}")