In [1]:
import torch
import pandas as pd
import json
import gzip
import re
import os

from utils import testing_utils
from torch.utils.data import DataLoader, Dataset
from utils import model_utils
from tqdm import tqdm

In [2]:
save_path = "Saved_weights/EXP3/"
os.makedirs(save_path, exist_ok= True)

## Initializing Model

In [3]:
MODEL_NAME = "gpt2-medium"
mt = model_utils.ModelAndTokenizer(MODEL_NAME, low_cpu_mem_usage=False)
model = mt.model
tokenizer = mt.tokenizer
tokenizer.pad_token = tokenizer.eos_token
print(f"Model {MODEL_NAME} initialized")
print()

Model gpt2-medium initialized



## Define dataset and dataloader

In [4]:
def parse(path):
  g = gzip.open(path, 'rb')
  for l in g:
    yield eval(l)

def getDF(path):
  i = 0
  df = {}
  for d in parse(path):
    df[i] = d
    i += 1
  return pd.DataFrame.from_dict(df, orient='index')

test_df = getDF('Data/Amazon_reviews/reviews_Electronics_5.json.gz')

In [5]:
test_df = test_df[(test_df['overall']==1.0) | (test_df['overall']==5.0)]

In [6]:
import re
CLEANR = re.compile('<.*?>|&([a-z0-9]+|#[0-9]{1,6}|#x[0-9a-f]{1,6});|/.*/')

def cleanhtml(raw_html):
    raw_html = raw_html.replace("\\", "")
    raw_html = raw_html.replace("&#039;", "\'")
    cleantext = re.sub(CLEANR, ' ', raw_html)
    split = cleantext.strip().split(" ")
    if(split[0].isnumeric()):
      split = split[1:]
    return " ".join([w for w in split if len(w.strip()) > 0])

class Amazon_Review(Dataset):
    def __init__(self, data_frame, limiter = 20000):
        self.x = []
        self.y = []
        self.limiter = limiter
        self.counter = {
            "positive": 0,
            "negative": 0
        }

        for index, row in tqdm(data_frame.iterrows()):
            sentiment = "positive" if row["overall"] == 5.0 else "negative"
            if(self.counter[sentiment] == self.limiter):
                break_loop = True
                for k in self.counter:
                    if(self.counter[k] != self.limiter):
                      break_loop = False
                      break
                if(break_loop):
                    break
                else:
                    continue

            self.x.append("<REVIEW>: " + cleanhtml(row["reviewText"]) + " <SENTIMENT>")
            self.y.append(f" {sentiment}")
            self.counter[sentiment] += 1
        
    def __len__(self):
        return len(self.y)

    def __getitem__(self, idx):
        return self.x[idx], self.y[idx]

In [7]:
test_dataset = Amazon_Review(test_df.sample(frac = 1))
len(test_dataset)

205923it [00:05, 38544.30it/s]


40000

In [8]:
test_dataset[0]

("<REVIEW>: I have been using theTranscend 32 GB Class 10 SDHC Flash Memory Card (TS32GSDHC10E)for some time now, and have been quite pleased on a price to performance basis. However, as the photo count on an individual card goes up, so does the amount of time I spend staring blankly at my computer screen while the photos transfer. I was interested to see how much of an improvement could be realized by stepping up to the SanDisk Extreme.While I realize that I am comparing apples to oranges, the performance of the SanDisk Extreme really blew my hair back. I now understand the price differential. The read speed, which I expected to be substantially better, did not disappoint. Though, it was the write speed that really opened my eyes.An occasion arose to photograph a large family with eight children under the age of ten. I figured this would provide the perfect proving ground to test the Extreme's mettle. I realized almost instantly that I would not have been able to shoot at anywhere clo

In [9]:
testing_dataloader = DataLoader(test_dataset, batch_size = 1)

## Adapter-Tuning

In [10]:
from utils.tuning_utils import Adapter
adapters = torch.load("Saved_weights/Final/Adapters/gpt2-medium/adapter_dim__32.pth")

test_results = testing_utils.test(
    testing_dataloader,
    model, tokenizer,
    light_weight_tuning = adapters, algo = "adapter",
    # limit = 1000
)

with open(save_path + "adapter_extrapolated.json", "w") as f:
    json.dump(test_results, f)

print(test_results["confusion_matrix"])
print(test_results["balanced_accuracy"])


testing .... 



  1%|▏         | 573/40000 [00:26<29:25, 22.34it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (2369 > 1024). Running this sequence through the model will result in indexing errors
100%|██████████| 40000/40000 [30:37<00:00, 21.77it/s]


{'tp': 18540, 'fn': 1398, 'fp': 1677, 'tn': 18240}
0.9228416042737457


## Prefix-Tuning

In [11]:
prefix_embeddings = torch.load("Saved_weights/Final/Prefix_Tuning/gpt2-medium/prefix_size__2.pth")

test_results = testing_utils.test(
    testing_dataloader,
    model, tokenizer,
    light_weight_tuning = prefix_embeddings, algo = "prefix",
    prefix_size = 2
)

with open(save_path + "prefix_extrapolated.json", "w") as f:
    json.dump(test_results, f)

print(test_results["confusion_matrix"])
print(test_results["balanced_accuracy"])


testing .... 



100%|██████████| 40000/40000 [28:38<00:00, 23.28it/s]


{'tp': 19668, 'fn': 270, 'fp': 4163, 'tn': 15754}
0.8887202987795076


## Prompt-Tuning

In [12]:
soft_tokens = torch.load("Saved_weights/Final/Prompt_Tuning/gpt2-medium/prompt_size__8.pth", map_location='cuda:0')

test_results = testing_utils.test(
    testing_dataloader,
    model, tokenizer,
    light_weight_tuning = soft_tokens, algo = "prompt",
    prefix_size = soft_tokens.shape[1]
)

with open(save_path + "promt_extrapolated.json", "w") as f:
    json.dump(test_results, f)

print(test_results["confusion_matrix"])
print(test_results["balanced_accuracy"])


testing .... 



100%|██████████| 40000/40000 [24:07<00:00, 27.63it/s]


{'tp': 19694, 'fn': 244, 'fp': 4203, 'tn': 15714}
0.8883681527511608


## Full Fine-tuning

In [13]:
mt_2 = model_utils.ModelAndTokenizer(MODEL_NAME, low_cpu_mem_usage=False)
model_2 = mt.model
tokenizer_2 = mt.tokenizer
tokenizer_2.pad_token = tokenizer_2.eos_token
print(f"Duplicate Model {MODEL_NAME} initialized")

Duplicate Model gpt2-medium initialized


In [14]:
weights = torch.load("Saved_weights/Final/Finetune/gpt2-medium/gpt2-medium____data_40000.pth")
model_2.load_state_dict(weights)

<All keys matched successfully>

In [15]:
test_results = testing_utils.test(
    testing_dataloader,
    model_2, tokenizer_2,
)

with open(save_path + "finetune_extrapolated.json", "w") as f:
    json.dump(test_results, f)

print(test_results["confusion_matrix"])
print(test_results["balanced_accuracy"])


testing .... 



100%|██████████| 40000/40000 [23:47<00:00, 28.02it/s]


{'tp': 17406, 'fn': 2532, 'fp': 8475, 'tn': 11442}
0.7237452143216497
