In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from transformers import GPT2Tokenizer, GPT2LMHeadModel
import torch
from torch.utils.data import Dataset, DataLoader
from tqdm import tqdm

In [15]:
data = pd.read_csv('/kaggle/input/amazon-fine-food-reviews/Reviews.csv')

data = data[['Text', 'Summary']].dropna()

data['Text'] = data['Text'].str.lower()
data['Summary'] = data['Summary'].str.lower()

tokenizer = GPT2Tokenizer.from_pretrained('gpt2')
model = GPT2LMHeadModel.from_pretrained('gpt2')

In [16]:
# Divide the dataset into training and testing (75:25)
data_used, waste_data = train_test_split(data, test_size=0.6, random_state=42)
train_data, test_data = train_test_split(data_used, test_size=0.25, random_state=42)

In [17]:
# Implement a custom dataset class
class ReviewDataset(Dataset):
    def __init__(self, texts, summaries, tokenizer, max_length=40):
        self.texts = texts
        self.summaries = summaries
        self.tokenizer = tokenizer
        self.max_length = max_length
    
    def __len__(self):
        return len(self.texts)
    
    def __getitem__(self, idx):
        text = self.texts.iloc[idx]
        summary = self.summaries.iloc[idx]

        input_ids = self.tokenizer.encode(text, truncation=True, max_length=self.max_length, padding='max_length')
        labels = self.tokenizer.encode(summary, truncation=True, max_length=self.max_length, padding='max_length')
        
        return torch.tensor(input_ids), torch.tensor(labels)

# Initialize datasets and dataloaders
train_dataset = ReviewDataset(train_data['Text'], train_data['Summary'], tokenizer)
test_dataset = ReviewDataset(test_data['Text'], test_data['Summary'], tokenizer)
train_dataloader = DataLoader(train_dataset, batch_size=16, shuffle=True)
test_dataloader = DataLoader(test_dataset, batch_size=16, shuffle=False)



In [18]:
torch.cuda.empty_cache()

In [19]:
# Fine-tune the GPT-2 model
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)
model.to(device)
optimizer = torch.optim.AdamW(model.parameters(), lr=1e-5)
epochs = 3
tokenizer.pad_token = tokenizer.eos_token

torch.cuda.empty_cache()
for epoch in range(epochs):
    model.train()
    total_loss = 0
    c=0
    for input_ids, labels in tqdm(train_dataloader, desc=f'Epoch {epoch + 1}/{epochs}'):
        c+=1
        if c%500==0:
            print(c)
        input_ids = input_ids.to(device)
        labels = labels.to(device)
        
        optimizer.zero_grad()
        
        outputs = model(input_ids=input_ids, labels=labels)
        loss = outputs.loss
        total_loss += loss.item()
        
        loss.backward()
        optimizer.step()
        
        torch.cuda.empty_cache()
    avg_train_loss = total_loss / len(train_dataloader)
    print(f'Average training loss: {avg_train_loss}')

# Save the fine-tuned model
model.save_pretrained('fine_tuned_gpt2_model')


cuda


Epoch 1/3:   5%|▍         | 500/10658 [01:13<24:43,  6.85it/s]

500


Epoch 1/3:   9%|▉         | 1000/10658 [02:25<23:32,  6.84it/s]

1000


Epoch 1/3:  14%|█▍        | 1500/10658 [03:38<22:46,  6.70it/s]

1500


Epoch 1/3:  19%|█▉        | 2000/10658 [04:50<21:15,  6.79it/s]

2000


Epoch 1/3:  23%|██▎       | 2500/10658 [06:02<19:38,  6.92it/s]

2500


Epoch 1/3:  28%|██▊       | 3000/10658 [07:15<18:25,  6.93it/s]

3000


Epoch 1/3:  33%|███▎      | 3500/10658 [08:27<18:14,  6.54it/s]

3500


Epoch 1/3:  38%|███▊      | 4000/10658 [09:39<16:08,  6.87it/s]

4000


Epoch 1/3:  42%|████▏     | 4500/10658 [10:51<14:48,  6.93it/s]

4500


Epoch 1/3:  47%|████▋     | 5000/10658 [12:03<13:29,  6.99it/s]

5000


Epoch 1/3:  52%|█████▏    | 5500/10658 [13:15<12:15,  7.01it/s]

5500


Epoch 1/3:  56%|█████▋    | 6000/10658 [14:27<11:04,  7.01it/s]

6000


Epoch 1/3:  61%|██████    | 6500/10658 [15:39<09:55,  6.98it/s]

6500


Epoch 1/3:  66%|██████▌   | 7000/10658 [16:51<08:49,  6.91it/s]

7000


Epoch 1/3:  70%|███████   | 7500/10658 [18:03<07:48,  6.74it/s]

7500


Epoch 1/3:  75%|███████▌  | 8000/10658 [19:15<06:37,  6.68it/s]

8000


Epoch 1/3:  80%|███████▉  | 8500/10658 [20:27<05:11,  6.93it/s]

8500


Epoch 1/3:  84%|████████▍ | 9000/10658 [21:39<03:57,  6.98it/s]

9000


Epoch 1/3:  89%|████████▉ | 9500/10658 [22:51<02:49,  6.82it/s]

9500


Epoch 1/3:  94%|█████████▍| 10000/10658 [24:03<01:34,  6.96it/s]

10000


Epoch 1/3:  99%|█████████▊| 10500/10658 [25:16<00:23,  6.83it/s]

10500


Epoch 1/3: 100%|██████████| 10658/10658 [25:39<00:00,  6.93it/s]


Average training loss: 0.9779283038154734


Epoch 2/3:   5%|▍         | 500/10658 [01:12<24:48,  6.82it/s]

500


Epoch 2/3:   9%|▉         | 1000/10658 [02:24<23:24,  6.87it/s]

1000


Epoch 2/3:  14%|█▍        | 1500/10658 [03:36<21:47,  7.00it/s]

1500


Epoch 2/3:  19%|█▉        | 2000/10658 [04:48<21:03,  6.85it/s]

2000


Epoch 2/3:  23%|██▎       | 2500/10658 [06:01<20:09,  6.74it/s]

2500


Epoch 2/3:  28%|██▊       | 3000/10658 [07:13<18:09,  7.03it/s]

3000


Epoch 2/3:  33%|███▎      | 3500/10658 [08:25<17:51,  6.68it/s]

3500


Epoch 2/3:  38%|███▊      | 4000/10658 [09:38<16:05,  6.90it/s]

4000


Epoch 2/3:  42%|████▏     | 4500/10658 [10:50<14:31,  7.07it/s]

4500


Epoch 2/3:  47%|████▋     | 5000/10658 [12:02<13:43,  6.87it/s]

5000


Epoch 2/3:  52%|█████▏    | 5500/10658 [13:15<12:25,  6.91it/s]

5500


Epoch 2/3:  56%|█████▋    | 6000/10658 [14:27<11:07,  6.98it/s]

6000


Epoch 2/3:  61%|██████    | 6500/10658 [15:39<09:54,  6.99it/s]

6500


Epoch 2/3:  66%|██████▌   | 7000/10658 [16:51<08:39,  7.04it/s]

7000


Epoch 2/3:  70%|███████   | 7500/10658 [18:03<07:23,  7.12it/s]

7500


Epoch 2/3:  75%|███████▌  | 8000/10658 [19:15<06:27,  6.86it/s]

8000


Epoch 2/3:  80%|███████▉  | 8500/10658 [20:27<05:11,  6.93it/s]

8500


Epoch 2/3:  84%|████████▍ | 9000/10658 [21:39<04:02,  6.85it/s]

9000


Epoch 2/3:  89%|████████▉ | 9500/10658 [22:51<02:45,  7.00it/s]

9500


Epoch 2/3:  94%|█████████▍| 10000/10658 [24:02<01:33,  7.00it/s]

10000


Epoch 2/3:  99%|█████████▊| 10500/10658 [25:14<00:22,  7.00it/s]

10500


Epoch 2/3: 100%|██████████| 10658/10658 [25:37<00:00,  6.93it/s]


Average training loss: 0.939564469412492


Epoch 3/3:   5%|▍         | 500/10658 [01:11<24:18,  6.96it/s]

500


Epoch 3/3:   9%|▉         | 1000/10658 [02:23<23:08,  6.96it/s]

1000


Epoch 3/3:  14%|█▍        | 1500/10658 [03:35<21:58,  6.95it/s]

1500


Epoch 3/3:  19%|█▉        | 2000/10658 [04:46<21:08,  6.83it/s]

2000


Epoch 3/3:  23%|██▎       | 2500/10658 [05:58<19:38,  6.92it/s]

2500


Epoch 3/3:  28%|██▊       | 3000/10658 [07:10<17:55,  7.12it/s]

3000


Epoch 3/3:  33%|███▎      | 3500/10658 [08:21<17:00,  7.02it/s]

3500


Epoch 3/3:  38%|███▊      | 4000/10658 [09:33<15:36,  7.11it/s]

4000


Epoch 3/3:  42%|████▏     | 4500/10658 [10:45<14:41,  6.99it/s]

4500


Epoch 3/3:  47%|████▋     | 5000/10658 [11:56<13:36,  6.93it/s]

5000


Epoch 3/3:  52%|█████▏    | 5500/10658 [13:08<12:18,  6.98it/s]

5500


Epoch 3/3:  56%|█████▋    | 6000/10658 [14:20<11:32,  6.72it/s]

6000


Epoch 3/3:  61%|██████    | 6500/10658 [15:32<09:58,  6.94it/s]

6500


Epoch 3/3:  66%|██████▌   | 7000/10658 [16:44<08:43,  6.98it/s]

7000


Epoch 3/3:  70%|███████   | 7500/10658 [17:56<07:29,  7.03it/s]

7500


Epoch 3/3:  75%|███████▌  | 8000/10658 [19:08<06:21,  6.97it/s]

8000


Epoch 3/3:  80%|███████▉  | 8500/10658 [20:20<05:08,  7.00it/s]

8500


Epoch 3/3:  84%|████████▍ | 9000/10658 [21:32<04:00,  6.90it/s]

9000


Epoch 3/3:  89%|████████▉ | 9500/10658 [22:43<02:46,  6.96it/s]

9500


Epoch 3/3:  94%|█████████▍| 10000/10658 [23:55<01:35,  6.90it/s]

10000


Epoch 3/3:  99%|█████████▊| 10500/10658 [25:07<00:23,  6.79it/s]

10500


Epoch 3/3: 100%|██████████| 10658/10658 [25:30<00:00,  6.96it/s]


Average training loss: 0.9291138535543373


In [20]:
model.save_pretrained('fine_tuned_gpt2_model_base')

In [43]:
!pip install rouge
from rouge import Rouge

def calculate_rouge_scores(hypotheses, references):
    rouge = Rouge()
    scores = rouge.get_scores(hypotheses, references, avg=True)
    return scores




In [64]:
from transformers.utils import logging
logging.set_verbosity_error()
list_of_rouge = []

if torch.cuda.is_available():
    torch.cuda.empty_cache()

# for input_ids, labels in tqdm(test_dataloader, desc=f'Epoch {epoch + 1}/{epochs}'):
for input_ids, labels in tqdm(test_dataloader):
        
    input_ids = input_ids.to(device)
    labels = labels.to(device)
    attention_mask = torch.ones(input_ids.shape, dtype=torch.long).to(device)
    pad_token_id = tokenizer.eos_token_id
    
    outputs = model.generate(input_ids, attention_mask=attention_mask, pad_token_id=pad_token_id, max_length=80)
    output_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
    labels_text = tokenizer.decode(labels[0], skip_special_tokens=True)
    roug_score = calculate_rouge_scores(output_text, labels_text)
    list_of_rouge.append(roug_score)
    torch.cuda.empty_cache()

100%|██████████| 3553/3553 [03:02<00:00, 19.42it/s]


In [65]:
len(list_of_rouge)

3553

In [66]:
list_of_rouge[0]

{'rouge-1': {'r': 0.3333333333333333,
  'p': 0.06896551724137931,
  'f': 0.11428571144489805},
 'rouge-2': {'r': 0.0, 'p': 0.0, 'f': 0.0},
 'rouge-l': {'r': 0.3333333333333333,
  'p': 0.06896551724137931,
  'f': 0.11428571144489805}}

In [67]:
final_rouge = {'rouge-1': {'f': 0.0, 'p': 0.0, 'r': 0.0},'rouge-2': {'f': 0.0, 'p': 0.0, 'r': 0.0},'rouge-l': {'f': 0.0, 'p': 0.0, 'r': 0.0}}
for rg in list_of_rouge:
    final_rouge['rouge-1']['r']+=rg['rouge-1']['r']
    final_rouge['rouge-1']['p']+=rg['rouge-1']['p']
    final_rouge['rouge-1']['f']+=rg['rouge-1']['f']
    final_rouge['rouge-2']['r']+=rg['rouge-2']['r']
    final_rouge['rouge-2']['p']+=rg['rouge-2']['p']
    final_rouge['rouge-2']['f']+=rg['rouge-2']['f']
    final_rouge['rouge-l']['r']+=rg['rouge-l']['r']
    final_rouge['rouge-l']['p']+=rg['rouge-l']['p']
    final_rouge['rouge-l']['f']+=rg['rouge-l']['f']

final_rouge['rouge-1']['r']/=len(list_of_rouge)
final_rouge['rouge-1']['p']/=len(list_of_rouge)
final_rouge['rouge-1']['f']/=len(list_of_rouge)
final_rouge['rouge-2']['r']/=len(list_of_rouge)
final_rouge['rouge-2']['p']/=len(list_of_rouge)
final_rouge['rouge-2']['f']/=len(list_of_rouge)
final_rouge['rouge-l']['r']/=len(list_of_rouge)
final_rouge['rouge-l']['p']/=len(list_of_rouge)
final_rouge['rouge-l']['f']/=len(list_of_rouge)

print(final_rouge)

{'rouge-1': {'f': 0.0841702417285644, 'p': 0.051084033100745045, 'r': 0.3079353399021223}, 'rouge-2': {'f': 0.020264212268463918, 'p': 0.012273728389982757, 'r': 0.0889187865602414}, 'rouge-l': {'f': 0.07986952382854193, 'p': 0.04838538159016817, 'r': 0.2949689895110542}}
