In [1]:
import os

os.environ["HF_TOKEN"] = "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"

In [2]:
import json
import torch
import pandas as pd

from transformers import AutoModelForCausalLM, AutoTokenizer, TrainingArguments, BitsAndBytesConfig
from datasets import load_dataset, Dataset
from peft import PeftModel
from trl import DPOTrainer, DPOConfig
from typing import Dict, List, Union
from accelerate import Accelerator

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
with open("dpo_pairs_bacchus_v2.jsonl") as json_file:
    dataset = list(json_file)

In [4]:
dataset = list(map(json.loads, dataset))
dataset = Dataset.from_pandas(pd.DataFrame(dataset))

dataset

Dataset({
    features: ['prompt', 'chosen', 'rejected'],
    num_rows: 500
})

In [5]:
# # Set the device
# device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Set model
MODEL_NAME = "meta-llama/Meta-Llama-3.1-8B-Instruct"

In [6]:
# Load tokenizer
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)

accelerator = Accelerator()

Detected kernel version 5.4.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.


In [7]:
# Setting pad_token
tokenizer.eos_token = "<|end_of_text|>"
tokenizer.pad_token = tokenizer.eos_token

# tokenizer.add_special_tokens({"eos_token": "<|end_of_text|>"})

In [8]:
# LoRA configuration
from peft import LoraConfig

peft_config = LoraConfig(
    r=16,
    lora_alpha=16,
    lora_dropout=0.05,
    bias="none",
    task_type="CAUSAL_LM",
    target_modules=['k_proj', 'gate_proj', 'v_proj', 'up_proj', 'q_proj', 'o_proj', 'down_proj']
)

# Model to fine-tune
model = AutoModelForCausalLM.from_pretrained(
    MODEL_NAME,
    torch_dtype=torch.float16,
    load_in_4bit=True,
    device_map="auto"
)
model.config.use_cache = False

# Training arguments
dpo_config = DPOConfig(
    output_dir="./results_standard_dpo",
    num_train_epochs=1,
    per_device_train_batch_size=1,
    gradient_accumulation_steps=8,
    save_steps=1000,
    logging_steps=2,
    save_total_limit=2,
    learning_rate=2e-4,
    fp16=False,
    bf16=True,
    max_grad_norm=1.0,
    warmup_ratio=0.1,
    group_by_length=False,
    lr_scheduler_type="cosine",
    beta=0.1,
    max_prompt_length=512,
    max_length=4096,
    remove_unused_columns=False,
)

# Create DPO trainer
dpo_trainer = DPOTrainer(
    model,
    args=dpo_config,
    train_dataset=dataset,
    tokenizer=tokenizer,
    peft_config=peft_config,
    beta=0.1,
    max_prompt_length=1024,
    max_length=1536,
)

# Fine-tune model with DPO
dpo_trainer.train()


The `load_in_4bit` and `load_in_8bit` arguments are deprecated and will be removed in the future versions. Please, pass a `BitsAndBytesConfig` object in `quantization_config` argument instead.
Loading checkpoint shards: 100%|██████████| 4/4 [00:09<00:00,  2.27s/it]

Deprecated positional argument(s) used in DPOTrainer, please use the DPOConfig to set these arguments instead.
Map: 100%|██████████| 500/500 [00:12<00:00, 38.97 examples/s]
Detected kernel version 5.4.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.
  with compute_loss_context_manager():
Could not estimate the number of tokens of the input, floating-point operations will not be computed


Step,Training Loss
2,0.6931
4,0.3911
6,0.045
8,0.0025
10,0.0
12,0.0


KeyboardInterrupt: 

In [None]:
# Prepare for distributed training
trainer = accelerator.prepare(trainer)

In [None]:
# Train the model
trainer.train()

In [None]:
# Save the final model
trainer.save_model("./standard_dpo_llama3.1-8b")