In [21]:
import pandas as pd
import numpy as np
import torch
from transformers import GPT2Tokenizer, GPT2LMHeadModel
from nltk.translate.bleu_score import sentence_bleu
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.feature_extraction.text import TfidfVectorizer
from tqdm import tqdm
import os



In [22]:
# Prototype option:
os.environ['PYTORCH_ENABLE_MPS_FALLBACK'] = '1' # Set a fallback to CPU if MPS is not yet implemented

# Assign cuda to the device to use for training
if torch.cuda.is_available(): 
 dev = "cuda:0" 
 print("This model will run on CUDA")
# elif  torch.backends.mps.is_available(): 
#  dev = "mps:0"
#  print("This model will run on MPS")
else:
 dev = "cpu" 
 print("This model will run on CPU")
device = torch.device(dev) 

This model will run on CPU


In [23]:
# Load the desired model and tokenizer
model_name = "20K"
output_dir = f"../7. Models/{model_name}/"
model = GPT2LMHeadModel.from_pretrained(output_dir)
tokenizer = GPT2Tokenizer.from_pretrained(output_dir)

In [24]:
# Load the validation dataset
val_file = f'./../3. Cleaned Data/{model_name}_val.csv'
val_df = pd.read_csv(val_file)

In [25]:
# Function to generate predictions using the model
def generate_prediction(prompt):
    input_ids = tokenizer.encode(prompt, return_tensors='pt').to(device)
    attention_mask = torch.ones(input_ids.shape, dtype=torch.long, device=device)
    with torch.no_grad():
        output = model.generate(input_ids, attention_mask=attention_mask, max_length=100, num_return_sequences=1, no_repeat_ngram_size=2)
    return tokenizer.decode(output[0], skip_special_tokens=True)

In [26]:
# Calculate BLEU score and cosine distance
bleu_scores = []
cosine_distances = []

vectorizer = TfidfVectorizer()

In [27]:
val_df['predicted'] = ''
val_df['BLEU'] = 0.0
val_df['cosine'] = 0.0

for index, row in tqdm(val_df.iterrows(), total=val_df.shape[0]):
    original = row['original']
    corrected = row['corrected']
    
    # Generate prediction
    prompt = f"input: {original} output:"
    prediction = generate_prediction(prompt)
    
    # Save the predicted result
    val_df.loc[index, 'predicted'] = prediction

    # Calculate BLEU score
    bleu_score = sentence_bleu([corrected.split()], prediction.split(), weights=(0.33, 0.33, 0.33))
    
    # Save the BLEU score
    val_df.loc[index, 'BLEU'] = bleu_score

    # Calculate cosine distance
    vector_matrix = vectorizer.fit_transform([corrected, prediction])
    cosine_distance = cosine_similarity(vector_matrix[0:1], vector_matrix[1:2])[0][0]

    # Save the cosine distance
    val_df.loc[index, 'cosine'] = cosine_distance


  0%|          | 0/2000 [00:00<?, ?it/s]Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
  0%|          | 1/2000 [00:01<1:04:01,  1.92s/it]Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
  0%|          | 2/2000 [00:03<1:03:30,  1.91s/it]Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
  0%|          | 3/2000 [00:05<1:04:12,  1.93s/it]Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
  0%|          | 4/2000 [00:07<1:05:11,  1.96s/it]Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
  0%|          | 5/2000 [00:09<1:05:47,  1.98s/it]Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
  0%|          | 6/2000 [00:11<1:00:52,  1.83s/it]Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
The hypothesis contains 0 counts of 2-gram overlaps.
Therefore the BLEU score evaluates to 0, independently of
how many N-gram overlaps of lower order it co

In [None]:
val_gen = f'./../3. Cleaned Data/{model_name}_val_gen.csv'
val_df.to_csv(val_gen)