In [1]:
import torch
import pandas as pd
from transformers import AutoModelForCausalLM, BartTokenizer, BartForConditionalGeneration
from transformers import Trainer, TrainingArguments
from peft import LoraConfig
from datasets import load_dataset, Dataset, DatasetDict
from trl import (
    DPOConfig,
    DPOTrainer,
    ModelConfig,
    RichProgressCallback,
    get_kbit_device_map,
    get_peft_config,
    get_quantization_config,
)

import logging
import multiprocessing
import os
from contextlib import nullcontext
import bitsandbytes as bnb


  from .autonotebook import tqdm as notebook_tqdm


In [2]:
# Function to process a single split of the dataset
def reformat_split(split, split_name):
    prompts = []
    chosen_answers = []
    rejected_answers = []
    split_column = []

    for entry in split:
        post = entry['info']['post']
        summary1 = entry['summaries'][0]['text']
        summary2 = entry['summaries'][1]['text']
        
        if entry['choice'] == 0:
            chosen = summary1
            rejected = summary2
        else:
            chosen = summary2
            rejected = summary1
        
        prompts.append(f"Write a TLDR for the following text: {post}")
        chosen_answers.append(chosen)
        rejected_answers.append(rejected)
        split_column.append(split_name)

    reformatted_split = pd.DataFrame({
        "Prompt": prompts,
        "Chosen": chosen_answers,
        "Rejected": rejected_answers,
        "Split": split_column
    })
    return reformatted_split

# Load the dataset splits
dataset = load_dataset('openai/summarize_from_feedback', 'comparisons')
train_split = dataset['train']
validation_split = dataset['validation']

# Reformat each split
reformatted_train = reformat_split(train_split, "train")
reformatted_validation = reformat_split(validation_split, "validation")

# Combine the splits
combined_data = pd.concat([reformatted_train, reformatted_validation], ignore_index=True)


In [3]:
# load dataset
dataset = Dataset.from_pandas(combined_data)

# Split the dataset back into train and validation sets based on the 'Split' column
train_dataset = dataset.filter(lambda example: example['Split'] == 'train')
validation_dataset = dataset.filter(lambda example: example['Split'] == 'validation')

# Create a DatasetDict to hold the train and validation datasets
dataset_dict = DatasetDict({
    'train': train_dataset,
    'validation': validation_dataset
})

Filter: 100%|████████████████| 178944/178944 [00:00<00:00, 267668.92 examples/s]
Filter: 100%|████████████████| 178944/178944 [00:00<00:00, 266096.45 examples/s]


In [4]:
# Check if GPU is available and set device
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Load the BART model and tokenizer
# or 'facebook/bart-large' or facebook/bart-large-cnn' for a larger model
model_name = 'facebook/bart-large-cnn'
tokenizer = BartTokenizer.from_pretrained(model_name)
if tokenizer.pad_token is None:
    tokenizer.pad_token = tokenizer.eos_token
if tokenizer.chat_template is None:
    tokenizer.chat_template = "{% for message in messages %}{{message['role'] + ': ' + message['content'] + '\n\n'}}{% endfor %}{{ eos_token }}"
tokenizer.padding_side = "left"

model = AutoModelForCausalLM.from_pretrained(
    model_name,
    torch_dtype=torch.float16,
)
model.config.use_cache = False

# Reference model
ref_model = AutoModelForCausalLM.from_pretrained(
    model_name,
    torch_dtype=torch.float16,
    load_in_4bit=True
)
model.to(device)




BartForCausalLM(
  (model): BartDecoderWrapper(
    (decoder): BartDecoder(
      (embed_tokens): Embedding(50264, 1024, padding_idx=1)
      (embed_positions): BartLearnedPositionalEmbedding(1026, 1024)
      (layers): ModuleList(
        (0-11): 12 x BartDecoderLayer(
          (self_attn): BartAttention(
            (k_proj): Linear(in_features=1024, out_features=1024, bias=True)
            (v_proj): Linear(in_features=1024, out_features=1024, bias=True)
            (q_proj): Linear(in_features=1024, out_features=1024, bias=True)
            (out_proj): Linear(in_features=1024, out_features=1024, bias=True)
          )
          (activation_fn): GELUActivation()
          (self_attn_layer_norm): LayerNorm((1024,), eps=1e-05, elementwise_affine=True)
          (encoder_attn): BartAttention(
            (k_proj): Linear(in_features=1024, out_features=1024, bias=True)
            (v_proj): Linear(in_features=1024, out_features=1024, bias=True)
            (q_proj): Linear(in_features=

In [5]:
def format_dataset(example):
    # Format instruction
    message = {"role": "user", "content": example['Prompt']}
    prompt = tokenizer.apply_chat_template([message], tokenize=False, add_generation_prompt=True)

    # Format chosen answer
    message = {"role": "assistant", "content": example['Chosen']}
    chosen = tokenizer.apply_chat_template([message], tokenize=False)

    # Format rejected answer
    message = {"role": "assistant", "content": example['Rejected']}
    rejected = tokenizer.apply_chat_template([message], tokenize=False)

    return {
        "prompt": prompt,
        "chosen": chosen,
        "rejected": rejected,
    }



In [7]:
# Save columns
original_columns = train_dataset.column_names
dataset_train = train_dataset.map(
    format_dataset,
    remove_columns=original_columns
)
original_columns = validation_dataset.column_names
dataset_validation = validation_dataset.map(
    format_dataset,
    remove_columns=original_columns
)

Map: 100%|███████████████████████| 92858/92858 [00:18<00:00, 5046.14 examples/s]
Map: 100%|███████████████████████| 86086/86086 [00:16<00:00, 5198.10 examples/s]


In [17]:
# Define the training arguments
training_args = TrainingArguments(
    per_device_train_batch_size=4,
    gradient_accumulation_steps=4,
    gradient_checkpointing=True,
    learning_rate=5e-5,
    lr_scheduler_type="cosine",
    num_train_epochs=3,
    max_steps=200,
    save_strategy="no",
    evaluation_strategy="epoch",
    logging_steps=1,
    output_dir="./results",
    optim="paged_adamw_32bit",
    warmup_steps=100,
    bf16=True,
    model_init_kwargs={}
)

peft_config = LoraConfig(
    r=16,
    lora_alpha=16,
    lora_dropout=0.05,
    bias="none",
    task_type="CAUSAL_LM",
    target_modules=['k_proj', 'gate_proj', 'v_proj', 'up_proj', 'q_proj', 'o_proj', 'down_proj']
)

# Define the DPO configuration
dpo_config = DPOConfig(
    output_dir="./results",
    model_init_kwargs={},
    ref_model_init_kwargs={},
)

TypeError: __init__() got an unexpected keyword argument 'model_init_kwargs'

In [16]:
# Create the DPO trainer
trainer = DPOTrainer(
    model=model,
    args=training_args,
    beta=0.1,
    train_dataset=dataset_train,
    eval_dataset=dataset_validation,
    tokenizer=tokenizer,
    peft_config= peft_config,
    **dpo_config.model_init_kwargs,
)

# Train the model
trainer.train()

# Save the model
model.save_pretrained("./fine-tuned-bart")
tokenizer.save_pretrained("./fine-tuned-bart")

AttributeError: 'TrainingArguments' object has no attribute 'model_init_kwargs'