# Setup

## Installs & Imports

In [None]:
%pip install wandb -qU
%pip install huggingface-hub
%pip install trl
%pip install -U bitsandbytes
%pip install peft

In [None]:
import os
import huggingface_hub
import wandb
import torch
from datasets import load_dataset
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig, TrainingArguments
import bitsandbytes as bnb
from peft import LoraConfig, get_peft_model
from trl import SFTTrainer

## Verifying the environment

In [None]:
if os.getenv("COLAB_RELEASE_TAG"):
    from google.colab import userdata
    wandb_token = userdata.get('WANDB_API_KEY')
    hf_token = userdata.get('HF_TOKEN')
    model_dir = "./"
else:
    # Check wandb token
    wandb_token = os.environ["WANDB_API_KEY"]
    # Check hf env
    hf_token = os.environ["HF_TOKEN"]
    model_dir = os.environ["HF_HOME"] + "/models"

### Wandb & HF

In [None]:
wandb.login(key=wandb_token)
huggingface_hub.login(hf_token)
wandb.init(project="applied-ai-lecture",)

# Pipeline

## Load Dataset

In [None]:
dataset = load_dataset("HWGuncoverAI/my_name_is", split="train")
dataset

In [None]:
def add_name(sample):
    sample["conversation"][1]["content"] = sample["conversation"][1]["content"].replace("<NAME>", "John Doe")
    return sample
dataset = dataset.map(add_name)
dataset[0]

## Load Model

In [None]:
# Quantization config
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.float16,
    bnb_4bit_use_double_quant=True,
)

In [None]:
MODEL_ID = "meta-llama/Llama-3.2-1B-Instruct"

tokenizer = AutoTokenizer.from_pretrained(MODEL_ID, trust_remote_code=True)
model = AutoModelForCausalLM.from_pretrained(MODEL_ID, quantization_config=bnb_config, device_map="auto", trust_remote_code=True)
model

In [None]:
def find_all_linear_names(model):
    cls = bnb.nn.Linear4bit
    lora_module_names = set()
    for name, module in model.named_modules():
        if isinstance(module, cls):
            names = name.split('.')
            lora_module_names.add(names[0] if len(names) == 1 else names[-1])
    if 'lm_head' in lora_module_names:  # needed for 16 bit
        lora_module_names.remove('lm_head')
    return list(lora_module_names)

modules = find_all_linear_names(model)

# LoRA config
peft_config = LoraConfig(
    r=16,
    lora_alpha=32,
    lora_dropout=0.05,
    bias="none",
    task_type="CAUSAL_LM",
    target_modules=modules
)

model = get_peft_model(model, peft_config)

### Test Tokenizer

In [None]:
print(tokenizer.apply_chat_template(conversation=dataset[0]["conversation"], tokenize=False))
print(tokenizer.apply_chat_template(conversation=dataset[0]["conversation"]))

### Test Model

In [None]:
# cut off last message since that is the expected model response
conversation = dataset[0]["conversation"][:-1]
response = model.generate(tokenizer.apply_chat_template(conversation=conversation, add_generation_prompt=True, return_tensors="pt").to(model.device), max_length=100)
print(tokenizer.decode(response[0]))

## Prepare tokenizer & dataset

In [None]:
def prepare_sample(sample):
    sample["len"] = len(tokenizer.apply_chat_template(conversation=sample["conversation"]))
    sample["text"] = tokenizer.apply_chat_template(conversation=sample["conversation"], tokenize=False)
    return sample

dataset = dataset.map(prepare_sample, num_proc=os.cpu_count())

In [None]:
print(tokenizer.model_max_length)
print(max(dataset["len"]))
tokenizer.model_max_length = max(dataset["len"])

# Define Training

In [None]:
dataset = dataset.train_test_split(test_size=0.2)
train_dataset, test_dataset = dataset["train"], dataset["test"]

In [None]:
args = TrainingArguments(
    run_name="lecture-test-run",
    output_dir=model_dir + "/tuned_llama",
    num_train_epochs=1,
    per_device_train_batch_size=1,
    report_to="wandb",
    evaluation_strategy = 'steps',
    logging_steps=1,
)

In [None]:
trainer = SFTTrainer(
    model=model,
    train_dataset=train_dataset,
    eval_dataset=test_dataset,
    args=args
)

# Run Training

In [None]:
trainer.train()

## Check Tuned Model

In [None]:
model.eval()
conversation = test_dataset[0]["conversation"][:-1]
response = model.generate(tokenizer.apply_chat_template(conversation=conversation, add_generation_prompt=True, return_tensors="pt").to(model.device), max_length=100)
print(tokenizer.decode(response[0]))