In [None]:
# Cell 1: Setup and Dependencies
# Install required libraries
!pip install -q pandas datasets torch trl transformers==4.48.0 accelerate

import warnings
warnings.filterwarnings("ignore")

# Import libraries
import os
import torch
from datasets import load_dataset
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
from trl import DPOConfig, DPOTrainer
from google.colab import userdata

In [None]:
# Cell 2: Load Dataset from Hugging Face Hub
hf_username = "your_user_name"
dataset = load_dataset(f"{hf_username}/youtube-titles-dpo")

print(dataset)

In [None]:
# Cell 3: Load Base Model and Tokenizer
# Load the base model to be fine-tuned
model_name = "Qwen/Qwen2.5-0.5B-Instruct"

model = AutoModelForCausalLM.from_pretrained(model_name, torch_dtype=torch.bfloat16)
tokenizer = AutoTokenizer.from_pretrained(model_name)
tokenizer.pad_token = tokenizer.eos_token

In [None]:
# Cell 4: Configure and Run DPOTrainer

ft_model_name = "Qwen2.5-0.5B-DPO-YouTube-Titles"

# Configure DPO training arguments
training_args = DPOConfig(
    output_dir=ft_model_name,
    logging_steps=25,
    per_device_train_batch_size=8,
    per_device_eval_batch_size=8,
    num_train_epochs=3,
    load_best_model_at_end=True,
    metric_for_best_model="eval_loss",
    save_strategy="epoch",
    eval_strategy="epoch",
    gradient_accumulation_steps=2,
    learning_rate=5e-6,
    beta=0.1,
)

# Initialize the DPOTrainer
trainer = DPOTrainer(
    model=model,
    args=training_args,
    train_dataset=dataset['train'],
    eval_dataset=dataset['valid'],
    processing_class=tokenizer,
)

print("Starting DPO training...")
trainer.train()
print("Training complete.")

In [None]:
# Cell 5: Push Fine-Tuned Model to Hub
from huggingface_hub import notebook_login
notebook_login()
model_id = f"{hf_username}/{ft_model_name}"
trainer.push_to_hub(model_id)
print(f"Model pushed to {model_id}")