In [None]:
# Load the dataset.

import datasets # type: ignore

dataset = datasets.load_dataset('ZSvedic/phi3-arena-short-dpo', split='train')
n_rows = len(dataset)
print(str(n_rows))

In [None]:
# Load the model and corresponding tokenizer.
import torch # type: ignore
import utils.llm_utils as llm

model_name = 'microsoft/Phi-3-mini-4k-instruct'
tokenizer, model = llm.load_tokenizer_and_model(model_name)
print(f'Allocated GPU memory: {torch.cuda.memory_allocated() / (1024*1024):,.1f} MB')

In [3]:
# Configure DPOTrainer.

from trl import DPOTrainer, DPOConfig

# Set up the training arguments
training_args = DPOConfig(
    output_dir="../results",
    logging_dir="../logs",
    per_device_train_batch_size=4,
    num_train_epochs=3,
    max_prompt_length = 108, # 60 words x 6 ch x 1.2 buffer / 4 chars_in_token
    max_length = 324, # (60+120) words x 6 ch x 1.2 buffer / 4 chars_in_token
    remove_unused_columns=False,
)

trainer = DPOTrainer(
    model=model,
    ref_model=None, # Not sure if this is needed?
    tokenizer=tokenizer,
    train_dataset=dataset,
    args=training_args)

# For debugging purposes, save the inital model before training.
trainer.save_model("../results/base-model")

In [None]:
trainer.train()

In [5]:
# Save.
trainer.save_model(training_args.output_dir)