# Setup

## Installs & Imports

In [None]:
%pip install wandb -qU
%pip install huggingface-hub
%pip install trl

In [None]:
import os
import huggingface_hub
import wandb
import torch
import trl

## Verifying the environment

In [None]:
# Check wandb token
os.environ["WANDB_API_KEY"]

# Check hf env
os.environ["HF_TOKEN"]
os.environ["HF_HOME"]

In [None]:
wandb.login(key=os.environ["WANDB_API_KEY"])
huggingface_hub.login(os.environ["HF_TOKEN"])
wandb.init(project="applied-ai-lecture",)

# Pipeline

## Load Dataset

In [None]:
from datasets import Dataset
dataset = Dataset.from_json("./chris_train.json")
print(dataset[0])
def add_conversation(sample):
    sample["conversation"] = [{"role": "user", "content": sample["instruction"]}, {"role": "assistant", "content": sample["output"]}]
    return sample
dataset = dataset.map(add_conversation, num_proc=os.cpu_count())
dataset[0]

## Load Model

In [None]:
from transformers import AutoModelForCausalLM, AutoTokenizer
MODEL_ID = "meta-llama/Llama-3.2-1B-Instruct"

tokenizer = AutoTokenizer.from_pretrained(MODEL_ID, trust_remote_code=True)
model = AutoModelForCausalLM.from_pretrained(MODEL_ID, device_map="auto", trust_remote_code=True)
model

### Test Tokenizer

In [None]:
print(tokenizer.apply_chat_template(conversation=dataset[0]["conversation"], tokenize=False))
print(tokenizer.apply_chat_template(conversation=dataset[0]["conversation"]))

### Test Model

In [None]:
# cut off last message since that is the expected model response
conversation = dataset[0]["conversation"][:-1]
response = model.generate(tokenizer.apply_chat_template(conversation=conversation, add_generation_prompt=True, return_tensors="pt").to(model.device), max_length=100)
print(tokenizer.decode(response[0]))

## Prepare tokenizer & dataset

In [None]:
def prepare_sample(sample):
    sample["len"] = len(tokenizer.apply_chat_template(conversation=sample["conversation"]))
    sample["text"] = tokenizer.apply_chat_template(conversation=sample["conversation"], tokenize=False)
    return sample

dataset = dataset.map(prepare_sample, num_proc=os.cpu_count())

In [None]:
print(tokenizer.model_max_length)
print(max(dataset["len"]))
tokenizer.model_max_length = max(dataset["len"])

# Define Training

In [None]:
from transformers import TrainingArguments

args = TrainingArguments(
    # other args and kwargs here
    output_dir="./tuned_model",
    num_train_epochs=1,
    per_device_train_batch_size=1,
    report_to="wandb",  # enable logging to W&B
    run_name="lecture-test-run",  # name of the W&B run (optional)
    logging_steps=1,  # how often to log to W&B
)

In [None]:
from trl import SFTTrainer

# Setting sft parameters
trainer = SFTTrainer(
    model=model,
    train_dataset=dataset,
    args=args
)

# Run Training

In [None]:
trainer.train()

## Check Tuned Model

In [None]:
model.eval()
# cut off last message since that is the expected model response
conversation = dataset[5]["conversation"][:-1]
response = model.generate(tokenizer.apply_chat_template(conversation=conversation, add_generation_prompt=True, return_tensors="pt").to(model.device), max_length=100)
print(tokenizer.decode(response[0]))