# Direct Preference Optimization (DPO) Training with SageMaker

This notebook demonstrates how to use the DPOTrainer to fine-tune large language models using Direct Preference Optimization (DPO). DPO is a technique that trains models to align with human preferences by learning from preference data without requiring a separate reward model.

## Lab 1 - Data preparation

In this notebook, we are going to prepare the dataset for later on fine-tuning Qwen 2.5 - 7B Instruct

***

### Prerequisites

### Install requirements

In [None]:
%pip install -r requirements.txt

#### Setup and dependencies

In [None]:
import boto3
from sagemaker.core.helper.session_helper import Session, get_execution_role

sess = Session()
sagemaker_session_bucket = None

if sagemaker_session_bucket is None and sess is not None:
    # set to default bucket if a bucket name is not given
    sagemaker_session_bucket = sess.default_bucket()

try:
    role = get_execution_role()
except ValueError:
    iam = boto3.client("iam")
    role = iam.get_role(RoleName="sagemaker_execution_role")["Role"]["Arn"]

s3_client = boto3.client("s3")
sess = Session(default_bucket=sagemaker_session_bucket)
bucket_name = sess.default_bucket()
default_prefix = sess.default_bucket_prefix

print(f"sagemaker role arn: {role}")
print(f"sagemaker bucket: {sess.default_bucket()}")
print(f"sagemaker session region: {sess.boto_region_name}")

***

### Prepare the dataset

In [None]:
import datasets
from datasets import load_dataset

dataset = (
    load_dataset(
        "HumanLLMs/Human-Like-DPO-Dataset",
        split="train",
        streaming=True,
    )
    .take(3000)
    .shuffle(buffer_size=1000)
)

dataset = datasets.Dataset.from_generator(lambda: dataset, features=dataset.features)

In [None]:
import pandas as pd

df = pd.DataFrame(dataset)

df.head()

In [None]:
from sklearn.model_selection import train_test_split

train, val = train_test_split(df, test_size=0.2, random_state=42)
train, test = train_test_split(df, test_size=0.1, random_state=42)

print("Number of train elements: ", len(train))
print("Number of validation elements: ", len(val))
print("Number of test elements: ", len(test))

In [None]:
from datasets import Dataset
from tqdm import tqdm

def prepare_dataset_sm_dpo_train_val(sample):
    try:
        return {
            "prompt": sample["prompt"],
            "chosen": sample["chosen"],
            "rejected": sample["rejected"]
        }
    except Exception as e:
        print(f"Error: {e}")

        raise e

def prepare_dataset_sm_dpo_test(sample):
    try:
        return {
            "query": sample["prompt"],
            "response": sample["chosen"]
        }
    except Exception as e:
        print(f"Error: {e}")

        raise e

In [None]:
from datasets import Dataset, DatasetDict
from random import randint

train_dataset = Dataset.from_pandas(train)
val_dataset = Dataset.from_pandas(val)

# LLMAJ support a maximum number of 1000 records.
test_dataset = Dataset.from_pandas(test)

dataset = DatasetDict(
    {"train": train_dataset, "val": val_dataset, "test": test_dataset}
)


train_dataset = dataset["train"].map(
    prepare_dataset_sm_dpo_train_val, remove_columns=list(train_dataset.features)
)

val_dataset = dataset["val"].map(
    prepare_dataset_sm_dpo_train_val, remove_columns=list(val_dataset.features)
)

test_dataset = dataset["test"].map(
    prepare_dataset_sm_dpo_test, remove_columns=list(test_dataset.features)
)

#### Upload to Amazon S3

In [None]:
import shutil

In [None]:
# save train_dataset to s3 using our SageMaker session
if default_prefix:
    input_path = f"{default_prefix}/datasets/serverless-model-customization-sft"
else:
    input_path = f"datasets/serverless-model-customization-sft"

train_dataset_s3_path = f"s3://{bucket_name}/{input_path}/train/humanlike_dpo_train.jsonl"
val_dataset_s3_path = f"s3://{bucket_name}/{input_path}/val/humanlike_dpo_val.jsonl"
test_dataset_s3_path = f"s3://{bucket_name}/{input_path}/test/humanlike_dpo_test.jsonl"

In [None]:
train_dataset.to_json("./data/train/humanlike_dpo_train.jsonl", orient="records")
val_dataset.to_json("./data/val/humanlike_dpo_val.jsonl", orient="records")
test_dataset.to_json("./data/test/humanlike_dpo_test.jsonl", orient="records")

s3_client.upload_file(
    "./data/train/humanlike_dpo_train.jsonl", bucket_name, f"{input_path}/train/humanlike_dpo_train.jsonl"
)
s3_client.upload_file(
    "./data/val/humanlike_dpo_val.jsonl", bucket_name, f"{input_path}/val/humanlike_dpo_val.jsonl"
)
s3_client.upload_file(
    "./data/test/humanlike_dpo_test.jsonl", bucket_name, f"{input_path}/test/humanlike_dpo_test.jsonl"
)

shutil.rmtree("./data")

print(f"Training data uploaded to:")
print(train_dataset_s3_path)
print(val_dataset_s3_path)
print(test_dataset_s3_path)

#### Create Training Dataset

In [None]:
from sagemaker.ai_registry.dataset import DataSet
from sagemaker.ai_registry.dataset_utils import CustomizationTechnique

In [None]:
dataset_train = DataSet.create(
    name="humanlike-dpo-train",
    source=train_dataset_s3_path,
    customization_technique=CustomizationTechnique.DPO,
    wait=True,
)

print(f"TRAINING_DATASET ARN: {dataset_train.arn}")

dataset_val = DataSet.create(
    name="humanlike-dpo-val",
    source=val_dataset_s3_path,
    customization_technique=CustomizationTechnique.DPO,
    wait=True,
)

print(f"VALIDATION_DATASET ARN: {dataset_val.arn}")

dataset_test = DataSet.create(
    name="humanlike-dpo-test",
    source=test_dataset_s3_path,
    wait=True,
)

print(f"TEST_DATASET ARN: {dataset_test.arn}")