##Install Required Libraries

In [None]:
!pip install torch==2.3.1
!pip install --user trl==0.11.4
!pip install peft==0.14.0
!pip install matplotlib==3.9.0
!pip install pandas
!pip install numpy==1.26.0
!pip install --user datasets==3.2.0
!pip install transformers==4.45.2

##Import Libraries

In [None]:
import multiprocessing
import os
import requests
import tarfile
import pandas as pd
import matplotlib.pyplot as plt
import torch

from datasets import load_dataset
from peft import LoraConfig
from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    GPT2Tokenizer,
    set_seed,
    GenerationConfig,
)
from trl import DPOConfig, DPOTrainer

##Load and Configure Model & Tokenizer

In [None]:
model = AutoModelForCausalLM.from_pretrained("gpt2")
model_ref = AutoModelForCausalLM.from_pretrained("gpt2")
tokenizer = GPT2Tokenizer.from_pretrained("gpt2")

tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = "right"
model.config.use_cache = False

model

##Load and Preprocess Dataset

In [None]:
#Load dataset
ds = load_dataset("BarraHome/ultrafeedback_binarized")

#Reduce dataset size
for key in ds:
    cnt = 50
    ds[key] = ds[key].select(range(cnt))

#Process dataset
def process(row):
    del row["prompt_id"], row["messages"], row["score_chosen"], row["score_rejected"]
    row["chosen"] = row["chosen"][-1]["content"]
    row["rejected"] = row["rejected"][-1]["content"]
    return row

ds = ds.map(process, num_proc=multiprocessing.cpu_count(), load_from_cache_file=False)

train_dataset = ds['train_prefs']
eval_dataset = ds['test_prefs']

train_dataset[0]

##LoRA Configuration

In [None]:
peft_config = LoraConfig(
    r=4,
    target_modules=['c_proj', 'c_attn'],
    task_type="CAUSAL_LM",
    lora_alpha=8,
    lora_dropout=0.1,
    bias="none",
)

##DPO Configuration

In [None]:
training_args = DPOConfig(
    beta=0.1,
    output_dir="dpo",
    num_train_epochs=5,
    per_device_train_batch_size=1,
    per_device_eval_batch_size=1,
    remove_unused_columns=False,
    logging_steps=10,
    gradient_accumulation_steps=1,
    learning_rate=1e-4,
    evaluation_strategy="epoch",
    warmup_steps=2,
    fp16=False,
    save_steps=500,
    report_to='none'
)

##DPO Trainer Initialization

In [None]:
trainer = DPOTrainer(
    model=model,
    ref_model=None,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=eval_dataset,
    tokenizer=tokenizer,
    peft_config=peft_config,
    max_length=512,
)

##Model Training

In [None]:
trainer.train()

##Plot Training vs Evaluation Loss

In [None]:
log = pd.DataFrame(trainer.state.log_history)
log_t = log[log['loss'].notna()]
log_e = log[log['eval_loss'].notna()]

plt.plot(log_t["epoch"], log_t["loss"], label="train_loss")
plt.plot(log_e["epoch"], log_e["eval_loss"], label="eval_loss")
plt.legend()
plt.show()

##Load Trained or Pre-Trained DPO Model

In [None]:
#Try loading trained model
try:
    dpo_model = AutoModelForCausalLM.from_pretrained('./dpo/checkpoint-250')
except:
    #If unavailable, load pre-trained checkpoint
    url = 'https://cf-courses-data.s3.us.cloud-object-storage.appdomain.cloud/YIDeT3qihEpWChdXN_RmTg/DPO-tar.gz'
    filename = './DPO.tar'

    response = requests.get(url)
    with open(filename, 'wb') as f:
        f.write(response.content)

    if tarfile.is_tarfile(filename):
        with tarfile.open(filename, 'r') as tar:
            tar.extractall()
            print("Files extracted:", tar.getnames())
    dpo_model = AutoModelForCausalLM.from_pretrained('./DPO')

##Generate and Compare Responses

In [None]:
set_seed(42)
tokenizer = GPT2Tokenizer.from_pretrained('gpt2')

generation_config = GenerationConfig(
    do_sample=True,
    top_k=1,
    temperature=0.1,
    max_new_tokens=25,
    pad_token_id=tokenizer.eos_token_id
)

PROMPT = "Is a higher octane gasoline better for your car?"
inputs = tokenizer(PROMPT, return_tensors='pt')

#DPO model response
outputs_dpo = dpo_model.generate(**inputs, generation_config=generation_config)
print("DPO response:\t", tokenizer.decode(outputs_dpo[0], skip_special_tokens=True))

#GPT-2 baseline response
gpt2_model = AutoModelForCausalLM.from_pretrained('gpt2')
outputs_gpt2 = gpt2_model.generate(**inputs, generation_config=generation_config)
print("\nGPT2 response:\t", tokenizer.decode(outputs_gpt2[0], skip_special_tokens=True))
