In [1]:
import numpy as np
import nltk
import pandas as pd
import torch
from torch.utils.data import DataLoader
from evaluator import Evaluator
from transformers import AutoTokenizer
from models.EmpathicSimilarityModel import EmpathicSimilarityModel
from dataset import EmpathicStoriesDataset
from pytorch_lightning import Trainer
from pytorch_lightning.callbacks import ModelCheckpoint

In [2]:
import torch.multiprocessing as mp

mp.set_sharing_strategy('file_system')

In [3]:
torch.set_default_dtype(torch.float32)

In [4]:
import ssl

try:
    _create_unverified_https_context = ssl._create_unverified_context
except AttributeError:
    pass
else:
    ssl._create_default_https_context = _create_unverified_https_context

In [5]:
df_test_pairs = pd.read_csv('data/PAIRS (test).csv')
df_test_stories = pd.read_csv('data/STORIES (test).csv')

In [6]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
evaluator = Evaluator(device=device, df_test_pairs=df_test_pairs, df_test_stories=None)

[nltk_data] Downloading package wordnet to /Users/mili/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
[nltk_data] Downloading package punkt to /Users/mili/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package omw-1.4 to /Users/mili/nltk_data...
[nltk_data]   Package omw-1.4 is already up-to-date!


In [7]:
test_dataset = EmpathicStoriesDataset(task="similarity", data_pairs=df_test_pairs, data_stories=None, labels=[])
test_dataloader = DataLoader(test_dataset, batch_size=8, shuffle=False)

In [8]:
all_predictions = []
all_labels = []

In [9]:
model_checkpoint = "models/EmpathicStoriesBART.ckpt"
checkpoint = torch.load(model_checkpoint, map_location=torch.device('cpu'))  # Load on CPU
model_state_dict = checkpoint['state_dict'] 
model = EmpathicSimilarityModel(checkpoint)  



In [10]:
model.to(device=device) 
model.eval()

EmpathicSimilarityModel(
  (lm_model): BartForConditionalGeneration(
    (model): BartModel(
      (shared): Embedding(50265, 768, padding_idx=1)
      (encoder): BartEncoder(
        (embed_tokens): Embedding(50265, 768, padding_idx=1)
        (embed_positions): BartLearnedPositionalEmbedding(1026, 768)
        (layers): ModuleList(
          (0-5): 6 x BartEncoderLayer(
            (self_attn): BartSdpaAttention(
              (k_proj): Linear(in_features=768, out_features=768, bias=True)
              (v_proj): Linear(in_features=768, out_features=768, bias=True)
              (q_proj): Linear(in_features=768, out_features=768, bias=True)
              (out_proj): Linear(in_features=768, out_features=768, bias=True)
            )
            (self_attn_layer_norm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
            (activation_fn): GELUActivation()
            (fc1): Linear(in_features=768, out_features=3072, bias=True)
            (fc2): Linear(in_features=3072, out_

In [11]:
for batch in test_dataloader:
    stories_a, stories_b, labels = batch
    combined_stories = []
    for story_a, story_b in zip(stories_a, stories_b):
        combined_stories.append((story_a, story_b))
    with torch.no_grad():
        predictions = model(combined_stories)
    all_predictions.append(predictions)
    all_labels.append(labels)

In [12]:
print(len(all_predictions[0][0]))
print(len(all_predictions[0]))
print(len(all_predictions))

768
8
50


In [13]:
print(len(all_labels))
print(len(all_labels[0]))

50
8


In [None]:
average_predictions = np.mean(all_predictions, axis=1)

In [None]:
evaluator = Evaluator(device=device, df_test_pairs=df_test_pairs, df_test_stories=None)
pearson_corr = evaluator.get_pearson(all_predictions, all_labels)
# Print the results
print("Pearson Correlation Coefficient:", pearson_corr)

In [None]:
print(all_labels)

In [None]:
print(all_predictions[0].item())

In [None]:
len(all_predictions)