In [1]:
import torch
import torch.nn.functional as F
from transformers import GPT2Tokenizer, GPT2LMHeadModel
from transformers import BartTokenizer, BartForConditionalGeneration
from torch.utils.data import DataLoader, Dataset
from torch.optim import AdamW
from datasets import load_dataset

# Load the dataset
dataset = load_dataset('openai/summarize_from_feedback', 'comparisons')

# Check if GPU is available and set device
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Load the GPT2 model and tokenizer

# Initialize the tokenizer and model
# tokenizer = GPT2Tokenizer.from_pretrained('gpt2')
# model = GPT2LMHeadModel.from_pretrained('gpt2')

# Load the BART model and tokenizer
model_name = 'facebook/bart-base'  # or 'facebook/bart-large' for a larger model
tokenizer = BartTokenizer.from_pretrained(model_name)
model = BartForConditionalGeneration.from_pretrained(model_name)

# Pad and load model + tokenizer to GPU
tokenizer.pad_token = tokenizer.eos_token  # Set pad_token to eos_token
model.resize_token_embeddings(len(tokenizer))
model.to(device)  # Move model to GPU

import subprocess

def print_gpu_usage():
    result = subprocess.run(['nvidia-smi'], stdout=subprocess.PIPE)
    print(result.stdout.decode('utf-8'))



  from .autonotebook import tqdm as notebook_tqdm


In [2]:
batch_size = 2
# Preprocess the dataset
class SummaryDataset(Dataset):
    def __init__(self, data, tokenizer, max_length=512):
        self.data = data
        self.tokenizer = tokenizer
        self.max_length = max_length

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        item = self.data[idx]
        text = item['info']['post']
        summary1 = item['summaries'][0]['text']
        summary2 = item['summaries'][1]['text']
        preference = item['choice']
        
        # Tokenize text and summaries
        inputs = self.tokenizer(text, return_tensors='pt', max_length=self.max_length, truncation=True, padding='max_length')
        summary1_inputs = self.tokenizer(summary1, return_tensors='pt', max_length=self.max_length, truncation=True, padding='max_length')
        summary2_inputs = self.tokenizer(summary2, return_tensors='pt', max_length=self.max_length, truncation=True, padding='max_length')
        
        return {
            'input_ids': inputs['input_ids'].squeeze(0),
            'attention_mask': inputs['attention_mask'].squeeze(0),
            'summary1_ids': summary1_inputs['input_ids'].squeeze(0),
            'summary1_attention_mask': summary1_inputs['attention_mask'].squeeze(0),
            'summary2_ids': summary2_inputs['input_ids'].squeeze(0),
            'summary2_attention_mask': summary2_inputs['attention_mask'].squeeze(0),
            'preference': torch.tensor(preference)
        }



# Custom Dataset class for BART to handle our data format
class SummarizationDataset(Dataset):
    def __init__(self, data):
        self.data = data

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        item = self.data[idx]
        text = item['info']['post']
        summary1 = item['summaries'][0]['text']
        summary2 = item['summaries'][1]['text']
        preference = item['choice']
        return {
            'text': text,
            'summary1': summary1,
            'summary2': summary2,
            'preference': preference
        }

# Prepare the DataLoader
train_data = SummarizationDataset([item for item in dataset['train']])
train_dataloader = DataLoader(train_data, batch_size=2, shuffle=True)

# Function to prepare inputs for BART
def prepare_bart_inputs(text, summary, tokenizer, device):
    inputs = tokenizer(text, return_tensors='pt', max_length=512, truncation=True, padding=True)
    targets = tokenizer(summary, return_tensors='pt', max_length=150, truncation=True, padding=True)
    inputs = {key: value.to(device) for key, value in inputs.items()}
    targets = targets['input_ids'].to(device)
    return inputs, targets



# # Prepare dataset
# train_data = [item for item in dataset['train']]
# val_data = [item for item in dataset['validation']]

# train_dataset = SummaryDataset(train_data, tokenizer)
# val_dataset = SummaryDataset(val_data, tokenizer)

# train_dataloader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
# val_dataloader = DataLoader(val_dataset, batch_size=batch_size)

In [3]:
# def reward_function(logits1, logits2, preference):
#     # Use the mean of logits as a proxy for quality
#     score1 = logits1.mean(dim=-1)
#     score2 = logits2.mean(dim=-1)
    
#     # Compute reward based on preference
#     if preference == 0:
#         reward = score1 - score2
#     else:
#         reward = score2 - score1
    
#     return reward.mean()

# Define reward function with numerical stability
def reward_function(logits1, logits2, preference, epsilon=1e-10):
    probs1 = F.softmax(logits1, dim=-1)
    probs2 = F.softmax(logits2, dim=-1)
    
    # Add epsilon to avoid log(0)
    log_probs1 = torch.log(probs1 + epsilon)
    log_probs2 = torch.log(probs2 + epsilon)
    
    if preference == 0:
        reward = torch.mean(log_probs1) - torch.mean(log_probs2)
    else:
        reward = torch.mean(log_probs2) - torch.mean(log_probs1)
    
    return reward

In [None]:
# GPT2
# Training loop with gradient clipping and reduced learning rate
optimizer = AdamW(model.parameters(), lr=1e-5)  # Reduced learning rate
num_epochs = 3
max_grad_norm = 1.0  # Gradient clipping

model.train()
for epoch in range(num_epochs):
    for batch in train_dataloader:
        input_ids = batch['input_ids'].to(device)
        attention_mask = batch['attention_mask'].to(device)
        summary1_ids = batch['summary1_ids'].to(device)
        summary1_attention_mask = batch['summary1_attention_mask'].to(device)
        summary2_ids = batch['summary2_ids'].to(device)
        summary2_attention_mask = batch['summary2_attention_mask'].to(device)
        preferences = batch['preference'].to(device)
        
        outputs1 = model(input_ids, attention_mask=attention_mask, labels=summary1_ids)
        logits1 = outputs1.logits
        
        outputs2 = model(input_ids, attention_mask=attention_mask, labels=summary2_ids)
        logits2 = outputs2.logits
        
        reward = 0
        for i in range(len(preferences)):
            reward += reward_function(logits1[i], logits2[i], preferences[i])
        
        loss = -reward  # Maximize reward by minimizing negative reward

        optimizer.zero_grad()
        loss.backward()
        
        # Gradient clipping
        torch.nn.utils.clip_grad_norm_(model.parameters(), max_grad_norm)
        
        optimizer.step()

        # Print GPU usage
        print_gpu_usage()

        # Print and empty cache
        print(f'Epoch: {epoch}, Batch Size: {batch_size}, Loss: {loss.item()}')
        torch.cuda.empty_cache()  # Empty GPU cache

# Save the fine-tuned model
model.save_pretrained('./fine-tuned-gpt2')
tokenizer.save_pretrained('./fine-tuned-gpt2')

Epoch: 0, Batch Size: 2, Loss: 0.2357616424560547
Epoch: 0, Batch Size: 2, Loss: -0.10102081298828125
Epoch: 0, Batch Size: 2, Loss: 0.2595386505126953
Epoch: 0, Batch Size: 2, Loss: -0.12047958374023438
Epoch: 0, Batch Size: 2, Loss: 0.07787704467773438
Epoch: 0, Batch Size: 2, Loss: 0.22821807861328125
Epoch: 0, Batch Size: 2, Loss: 0.3718605041503906
Epoch: 0, Batch Size: 2, Loss: -0.12065696716308594
Epoch: 0, Batch Size: 2, Loss: 0.21064376831054688
Epoch: 0, Batch Size: 2, Loss: 0.1778125762939453
Epoch: 0, Batch Size: 2, Loss: -0.3082084655761719
Epoch: 0, Batch Size: 2, Loss: 0.054584503173828125
Epoch: 0, Batch Size: 2, Loss: -0.013692855834960938
Epoch: 0, Batch Size: 2, Loss: 0.2678050994873047
Epoch: 0, Batch Size: 2, Loss: 0.25801849365234375
Epoch: 0, Batch Size: 2, Loss: 0.19888877868652344
Epoch: 0, Batch Size: 2, Loss: 0.3365955352783203
Epoch: 0, Batch Size: 2, Loss: 0.06058311462402344
Epoch: 0, Batch Size: 2, Loss: -0.10507583618164062
Epoch: 0, Batch Size: 2, Loss:

In [4]:
# BART
# Training loop
optimizer = AdamW(model.parameters(), lr=1e-5)
num_epochs = 3
max_grad_norm = 1.0

model.train()
for epoch in range(num_epochs):
    for batch in train_dataloader:
        text = batch['text']
        summary1 = batch['summary1']
        summary2 = batch['summary2']
        preferences = batch['preference'].to(device)
    
        # Prepare batched inputs for BART
        inputs1, targets1 = prepare_bart_inputs(text, summary1, tokenizer, device)
        inputs2, targets2 = prepare_bart_inputs(text, summary2, tokenizer, device)
    
        outputs1 = model(**inputs1, labels=targets1)
        logits1 = outputs1.logits
    
        outputs2 = model(**inputs2, labels=targets2)
        logits2 = outputs2.logits
    
        reward = 0
        for i in range(len(preferences)):
            reward += reward_function(logits1[i], logits2[i], preferences[i])
    
        loss = -reward
    
        optimizer.zero_grad()
        loss.backward()
        
        torch.nn.utils.clip_grad_norm_(model.parameters(), max_grad_norm)
        
        optimizer.step()
    
        print(f'Epoch: {epoch}, Loss: {loss.item()}')
        torch.cuda.empty_cache()  # Empty GPU cache

# Save the fine-tuned model
model.save_pretrained('./fine-tuned-bart')
tokenizer.save_pretrained('./fine-tuned-bart')

ValueError: Unable to create tensor, you should probably activate truncation and/or padding with 'padding=True' 'truncation=True' to have batched tensors with the same length. Perhaps your features (`input_ids` in this case) have excessive nesting (inputs type `list` where type `int` is expected).