# Fine-tuning a 1B Parameter Language Model

This notebook demonstrates how to fine-tune a 1B parameter language model using the Hugging Face Transformers library. We'll use a smaller model like GPT-2 or BLOOM-1b7 as our base model.

## 1. Install Required Dependencies

In [None]:
!pip install transformers datasets torch accelerate bitsandbytes peft wandb

## 2. Import Required Libraries

In [None]:
import torch
from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    TrainingArguments,
    Trainer,
    DataCollatorForLanguageModeling
)
from datasets import load_dataset
import wandb
from peft import LoraConfig, get_peft_model, prepare_model_for_kbit_training
import bitsandbytes as bnb

## 3. Set Up Model and Tokenizer

In [None]:
# Choose your base model (examples: 'facebook/opt-1.3b' or 'bigscience/bloom-1b7')
MODEL_NAME = "facebook/opt-1.3b"

# Load tokenizer
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
tokenizer.pad_token = tokenizer.eos_token

# Load model with 8-bit quantization for memory efficiency
model = AutoModelForCausalLM.from_pretrained(
    MODEL_NAME,
    load_in_8bit=True,
    device_map="auto",
    torch_dtype=torch.float16
)

# Prepare model for training
model = prepare_model_for_kbit_training(model)

## 4. Configure LoRA for Parameter-Efficient Fine-tuning

In [None]:
# Define LoRA configuration
lora_config = LoraConfig(
    r=16,  # rank
    lora_alpha=32,
    target_modules=["q_proj", "v_proj"],  # target attention modules
    lora_dropout=0.05,
    bias="none",
    task_type="CAUSAL_LM"
)

# Get PEFT model
model = get_peft_model(model, lora_config)

## 5. Load and Prepare Dataset

In [None]:
# Load your dataset (example using a small subset of wikitext)
dataset = load_dataset("wikitext", "wikitext-2-raw-v1", split="train")

# Tokenize function
def tokenize_function(examples):
    return tokenizer(examples["text"], padding="max_length", truncation=True, max_length=512)

# Tokenize dataset
tokenized_dataset = dataset.map(tokenize_function, batched=True)

# Create data collator
data_collator = DataCollatorForLanguageModeling(tokenizer=tokenizer, mlm=False)

## 6. Set Up Training Arguments

In [None]:
# Initialize wandb
wandb.init(project="llm-finetuning")

# Define training arguments
training_args = TrainingArguments(
    output_dir="./results",
    num_train_epochs=3,
    per_device_train_batch_size=4,
    gradient_accumulation_steps=4,
    warmup_steps=100,
    logging_steps=10,
    save_steps=100,
    learning_rate=2e-4,
    fp16=True,
    report_to="wandb",
    remove_unused_columns=False
)

## 7. Initialize Trainer and Start Training

In [None]:
# Initialize trainer
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_dataset,
    data_collator=data_collator,
)

# Start training
trainer.train()

## 8. Save the Fine-tuned Model

In [None]:
# Save the model
trainer.save_model("./fine_tuned_model")
tokenizer.save_pretrained("./fine_tuned_model")

## 9. Test the Fine-tuned Model

In [None]:
# Load the fine-tuned model
fine_tuned_model = AutoModelForCausalLM.from_pretrained(
    "./fine_tuned_model",
    load_in_8bit=True,
    device_map="auto",
    torch_dtype=torch.float16
)

# Test generation
test_prompt = "Once upon a time"
inputs = tokenizer(test_prompt, return_tensors="pt").to("cuda")
outputs = fine_tuned_model.generate(
    **inputs,
    max_length=100,
    num_return_sequences=1,
    temperature=0.7,
    top_p=0.9
)
print(tokenizer.decode(outputs[0], skip_special_tokens=True))