# Finetune Llama-3-1B-Chat

## Load Data

In [1]:
from datasets import load_dataset

# Load dataset
dataset = load_dataset('billingsmoore/LotsawaHouse-bo-en', split='train')
dataset = dataset.train_test_split(.1)

## Load the Model With QLoRA

In [3]:
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer, TrainingArguments, BitsAndBytesConfig
from peft import LoraConfig, get_peft_model
from trl import SFTTrainer

model_id = "meta-llama/Llama-3.2-1B-Instruct"

# 4-bit quantization for efficient training
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.bfloat16,
    bnb_4bit_use_double_quant=True
)

# Load model and tokenizer
model = AutoModelForCausalLM.from_pretrained(model_id, quantization_config=bnb_config, device_map="auto")
tokenizer = AutoTokenizer.from_pretrained(model_id)
tokenizer.pad_token = tokenizer.eos_token  # Handle padding

# Define LoRA configuration
lora_config = LoraConfig(
    r=16, 
    lora_alpha=32,
    target_modules=["q_proj", "v_proj"],  # Target attention layers
    lora_dropout=0.05,
    bias="none",
    task_type="CAUSAL_LM"
)

# Apply LoRA
model = get_peft_model(model, lora_config)
model.print_trainable_parameters()  # Check trainable params



trainable params: 1,703,936 || all params: 1,237,518,336 || trainable%: 0.1377


## Tokenize Data in Message Format

In [8]:
def tokenize_function(examples):

    messages = [
        str({"messages": [
            {"role": "system", "content": "You are a skilled translator of Tibetan texts. You answer only with the requested translation and nothing else."}, 
            {"role": "user", "content": f"Translate from Tibetan to English: {bo}"}, 
            {"role": "assistant", "content": f"{en}"}]
            }) for bo, en in zip(examples['bo'],examples['en'])]

    return tokenizer(messages, truncation=True, padding="max_length", max_length=512)

tokenized_datasets = dataset.map(tokenize_function, batched=True)


Map:   0%|          | 0/96772 [00:00<?, ? examples/s]

Map:   0%|          | 0/10753 [00:00<?, ? examples/s]

## Train the Model

In [9]:
%env WANDB_PROJECT=llama-tests

env: WANDB_PROJECT=llama-tests


In [12]:
training_args = TrainingArguments(
    output_dir="llama3-Chat-1B-finetune-test",
    per_device_train_batch_size=4,
    gradient_accumulation_steps=4,
    num_train_epochs=1,
    learning_rate=2e-4,
    fp16=True,
    logging_steps=10,
    save_strategy="epoch",
    evaluation_strategy="epoch",  # Run evaluation at the end of each epoch
    eval_steps=100,  # Evaluate every 100 steps (optional)
    save_total_limit=2,  # Keep only 2 best checkpoints
    metric_for_best_model="loss",  # Track loss for early stopping
    greater_is_better=False,  # Lower loss is better
    report_to="wandb"
)


trainer = SFTTrainer(
    model=model,
    train_dataset=tokenized_datasets["train"],
    eval_dataset=tokenized_datasets["test"],  # Add evaluation dataset
    args=training_args,
    peft_config=lora_config
    )


trainer.train()




Converting train dataset to ChatML:   0%|          | 0/96772 [00:00<?, ? examples/s]

Applying chat template to train dataset:   0%|          | 0/96772 [00:00<?, ? examples/s]

Truncating train dataset:   0%|          | 0/96772 [00:00<?, ? examples/s]

Converting eval dataset to ChatML:   0%|          | 0/10753 [00:00<?, ? examples/s]

Applying chat template to eval dataset:   0%|          | 0/10753 [00:00<?, ? examples/s]

Truncating eval dataset:   0%|          | 0/10753 [00:00<?, ? examples/s]

[34m[1mwandb[0m: Using wandb-core as the SDK backend.  Please refer to https://wandb.me/wandb-core for more information.
[34m[1mwandb[0m: Currently logged in as: [33mbillingsmoore[0m. Use [1m`wandb login --relogin`[0m to force relogin


Epoch,Training Loss,Validation Loss
0,0.5151,0.50423




TrainOutput(global_step=6048, training_loss=0.6055477753518119, metrics={'train_runtime': 9975.9978, 'train_samples_per_second': 9.7, 'train_steps_per_second': 0.606, 'total_flos': 2.897949371746222e+17, 'train_loss': 0.6055477753518119})

In [13]:
trainer.model.save_pretrained("qlora-llama3.2-1b")

