# Imports and deciding on the model
Load the dataset and the model + tokenizer

In [1]:
import torch
import torch.nn.functional as F
from transformers import GPT2Tokenizer, GPT2LMHeadModel
from transformers import BartTokenizer, BartForConditionalGeneration
from torch.utils.data import DataLoader, Dataset
from torch.optim import AdamW
from datasets import load_dataset

# Load the dataset
dataset = load_dataset('openai/summarize_from_feedback', 'comparisons')

# Check if GPU is available and set device
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Load the BART model and tokenizer
# or 'facebook/bart-large' or facebook/bart-large-cnn' for a larger model
model_name = 'facebook/bart-large-cnn'
tokenizer = BartTokenizer.from_pretrained(model_name)
model = BartForConditionalGeneration.from_pretrained(model_name)

# Pad and load model + tokenizer to GPU
tokenizer.pad_token = tokenizer.eos_token  # Set pad_token to eos_token
model.resize_token_embeddings(len(tokenizer))
model.to(device)  # Move model to GPU

import subprocess

def print_gpu_usage():
    result = subprocess.run(['nvidia-smi'], stdout=subprocess.PIPE)
    print(result.stdout.decode('utf-8'))



  from .autonotebook import tqdm as notebook_tqdm


# Processing the Dataset
Next we create a SummarizationDataset Class to preprocess the data we get into something more easily usable by our model and training.
And a function to tokenize the Dataset for BART training.

In [2]:
batch_size = 2
# Custom Dataset class for BART to handle our data format
class SummarizationDataset(Dataset):
    def __init__(self, data):
        self.data = data

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        item = self.data[idx]
        text = item['info']['post']
        summary1 = item['summaries'][0]['text']
        summary2 = item['summaries'][1]['text']
        preference = item['choice']
        return {
            'text': text,
            'summary1': summary1,
            'summary2': summary2,
            'preference': preference
        }

# Prepare the DataLoader
train_data = SummarizationDataset([item for item in dataset['train']])
train_dataloader = DataLoader(train_data, batch_size=2, shuffle=True)

# Function to prepare inputs for BART
def prepare_bart_inputs(text, summary, tokenizer, device):
    inputs = tokenizer(text, return_tensors='pt', max_length=512, truncation=True, padding=True)
    targets = tokenizer(summary, return_tensors='pt', max_length=150, truncation=True, padding=True)
    inputs = {key: value.to(device) for key, value in inputs.items()}
    targets = targets['input_ids'].to(device)
    return inputs, targets

# Reward function
For our reward function we softmax the logits of the summaries before we take the logarithm and then subtract the means depending on which summary is preferred.

In [3]:
# Define reward function with numerical stability
def reward_function(logits1, logits2, preference, epsilon=1e-10):
    probs1 = F.softmax(logits1, dim=-1)
    probs2 = F.softmax(logits2, dim=-1)
    
    # Add epsilon to avoid log(0)
    log_probs1 = torch.log(probs1 + epsilon)
    log_probs2 = torch.log(probs2 + epsilon)
    
    if preference == 0:
        reward = torch.mean(log_probs1) - torch.mean(log_probs2)
    else:
        reward = torch.mean(log_probs2) - torch.mean(log_probs1)
    
    return reward

# Training the model and saving it
We train the model using our training data and the above reward function that should prioritize the preferred summary.

In [None]:
# BART
# Training loop
optimizer = AdamW(model.parameters(), lr=1e-5)
num_epochs = 3
# gradient clipping value
max_grad_norm = 1.0

model.train()
for epoch in range(num_epochs):
    for batch in train_dataloader:
        text = batch['text']
        summary1 = batch['summary1']
        summary2 = batch['summary2']
        preferences = batch['preference'].to(device)
    
        # Prepare batched inputs for BART
        inputs1, targets1 = prepare_bart_inputs(text, summary1, tokenizer, device)
        inputs2, targets2 = prepare_bart_inputs(text, summary2, tokenizer, device)
    
        outputs1 = model(**inputs1, labels=targets1)
        logits1 = outputs1.logits
    
        outputs2 = model(**inputs2, labels=targets2)
        logits2 = outputs2.logits
    
        reward = 0
        for i in range(len(preferences)):
            reward += reward_function(logits1[i], logits2[i], preferences[i])
    
        loss = -reward
    
        optimizer.zero_grad()
        loss.backward()
        
        torch.nn.utils.clip_grad_norm_(model.parameters(), max_grad_norm)
        
        optimizer.step()
    
        print(f'Epoch: {epoch}, Loss: {loss.item()}')
        # Empty GPU cache can comment this line but ran into issues with my 3060 super without it.
        torch.cuda.empty_cache()  

# Save the fine-tuned model
model.save_pretrained('./fine-tuned-bart-large-cnn')
tokenizer.save_pretrained('./fine-tuned-bart-large-cnn')

Epoch: 0, Loss: -0.4731292724609375
Epoch: 0, Loss: -0.3854236602783203
Epoch: 0, Loss: 0.07097148895263672
Epoch: 0, Loss: -0.08627510070800781
Epoch: 0, Loss: -0.15632343292236328
Epoch: 0, Loss: 0.027915000915527344
Epoch: 0, Loss: -3.1625165939331055
Epoch: 0, Loss: 3.805112838745117
Epoch: 0, Loss: -5.229035377502441
Epoch: 0, Loss: 0.9829587936401367
Epoch: 0, Loss: -3.8709659576416016
Epoch: 0, Loss: -5.159585952758789
Epoch: 0, Loss: 4.029119491577148
Epoch: 0, Loss: 3.0377979278564453
Epoch: 0, Loss: -4.709360122680664
Epoch: 0, Loss: 6.1434221267700195
Epoch: 0, Loss: -2.6728925704956055
Epoch: 0, Loss: -3.5996809005737305
Epoch: 0, Loss: -2.4040584564208984
Epoch: 0, Loss: 4.4459638595581055
Epoch: 0, Loss: -1.1931400299072266
Epoch: 0, Loss: -4.738104820251465
Epoch: 0, Loss: 5.0283050537109375
Epoch: 0, Loss: -3.918550491333008
Epoch: 0, Loss: -4.182159423828125
Epoch: 0, Loss: 2.9199438095092773
Epoch: 0, Loss: 1.4412050247192383
Epoch: 0, Loss: 2.3835649490356445
Epoch: 