In [None]:
!git clone https://github.com/cg123/mergekit.git

In [None]:
%cd mergekit
!pip install -e .

In [None]:
## create gpt2-small.yml file

#slices:
#  - sources:
#    - model: gpt2
#      layer_range: [0, 6]
#merge_method: passthrough
#dtype: float16


In [None]:
!mergekit-yaml /gpt2-small.yml ./gpt2-small

In [None]:
from transformers import GPT2ForSequenceClassification, GPT2Tokenizer
import torch
from torch.utils.data import DataLoader, TensorDataset
import torch.nn.functional as F
import torch.optim as optim
from torch import nn
import json
import random
from datasets import load_dataset

torch.cuda.empty_cache()
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [None]:
tokenizer = GPT2Tokenizer.from_pretrained("/gpt2-small")
tokenizer.pad_token = tokenizer.eos_token  # Set padding token
model = GPT2ForSequenceClassification.from_pretrained("/gpt2-small", num_labels=2)
model.to(device)
model.config.pad_token_id = model.config.eos_token_id

In [None]:
chosen_input_ids = []
rejected_input_ids = []
chosen_attention_mask = []
rejected_attention_mask = []

dataset = load_dataset("winglian/no_robots_rlhf")
train_dataset = dataset['train']
prompts = [item['prompt'] for item in train_dataset]
chosen = [item['chosen'] for item in train_dataset]
rejected = [item['rejected'] for item in train_dataset]
max_length = 512
encodings = tokenizer(prompts, chosen, rejected, truncation=True, padding='max_length', max_length=max_length)
for i, prompt  in enumerate(prompts):
    chosen_input_ids.append(tokenizer(prompts[i], chosen[i], truncation=True, padding='max_length', max_length=max_length)['input_ids'])
    chosen_attention_mask.append(tokenizer(prompts[i], chosen[i], truncation=True, padding='max_length', max_length=max_length)['attention_mask'])
    #answer2 is the rejected answer
    rejected_input_ids.append(tokenizer(prompts[i], rejected[i], truncation=True, padding='max_length', max_length=max_length)['input_ids'])
    rejected_attention_mask.append(tokenizer(prompts[i], rejected[i], truncation=True, padding='max_length', max_length=max_length)['attention_mask'])

chosen_input_ids = torch.tensor(chosen_input_ids).to(device)
rejected_input_ids = torch.tensor(rejected_input_ids).to(device)
chosen_attention_mask = torch.tensor(chosen_attention_mask).to(device)
rejected_attention_mask = torch.tensor(rejected_attention_mask).to(device)

dataset = TensorDataset(chosen_input_ids, chosen_attention_mask, rejected_input_ids, rejected_attention_mask)
loader = DataLoader(dataset, batch_size=16, shuffle=True)

In [None]:
optimizer = optim.AdamW(model.parameters(), lr=1e-5)
epochs = 2
for epoch in range(epochs):
    for i, batch in enumerate(loader):

        chosen_input_ids, chosen_attention_mask, rejected_input_ids, rejected_attention_mask = batch
        optimizer.zero_grad()

        rewards_chosen = model(input_ids=chosen_input_ids, attention_mask=chosen_attention_mask)[0]

        rewards_rejected = model(input_ids=rejected_input_ids, attention_mask=rejected_attention_mask)[0]

        loss = -nn.functional.logsigmoid(rewards_chosen - rewards_rejected).mean()

        loss.backward()
        optimizer.step()

        print(f"Epoch {epoch+1}/{epochs}, Batch {i+1}/{len(loader)}, Loss: {loss.item()}")

In [None]:
save_directory = "reward_model"
model.save_pretrained(save_directory)
tokenizer.save_pretrained(save_directory)