In [1]:
import pandas as pd
import numpy as np
import sys

In [2]:
train_set = pd.read_csv('../data/train.csv')
test_set = pd.read_csv('../data/test.csv')
val_set = pd.read_csv('../data/val.csv')

In [3]:
train_set['preferred'].replace({'T':0, 'N':1,'O':2}, inplace=True)
val_set['preferred'].replace({'T':0, 'N':1,'O':2}, inplace=True)
test_set['preferred'].replace({'T':0, 'N':1,'O':2}, inplace=True)

In [4]:
import torch

class getDataset(torch.utils.data.Dataset):
    def __init__(self, encodings, labels):
        self.encodings = encodings
        self.labels = labels

    def __getitem__(self, idx):
        item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
        item['labels'] = torch.tensor(self.labels[idx])
        return item

    def __len__(self):
        return len(self.labels)

### Load from saved fine-tuned models

In [5]:
import torch
from transformers import AutoModelForSequenceClassification, AutoTokenizer, Trainer

model_list = ['distilbert-base-uncased', 'xlnet-base-cased', 'roberta-base', 'roberta-large', 'albert-base-v2', 'microsoft/deberta-base']   
cmodel = model_list[3]

filename = "../models/" + str(cmodel)
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
#print(device)

model = AutoModelForSequenceClassification.from_pretrained(filename, num_labels = 3)
tokenizer = AutoTokenizer.from_pretrained(cmodel) 
model.to(device)
print("Model loaded")

trainer = Trainer(model=model)

Model loaded


### Performance Evaluation on Pixie

In [6]:
test_encodings = tokenizer(test_set['proc_sent'].tolist(), truncation=True, padding='max_length', max_length=100)    
test_dataset = getDataset(test_encodings, test_set['preferred'].tolist())

outputs = trainer.predict(test_dataset)
y_pred = outputs.predictions.argmax(1)
y_test = test_dataset.labels

from sklearn.metrics import classification_report, confusion_matrix
print(classification_report(y_test, y_pred, digits=4))
print(confusion_matrix(y_test, y_pred))

***** Running Prediction *****
  Num examples = 1778
  Batch size = 16


              precision    recall  f1-score   support

           0     0.8860    0.9302    0.9075       802
           1     0.6899    0.6130    0.6492       323
           2     0.8875    0.8821    0.8848       653

    accuracy                         0.8549      1778
   macro avg     0.8211    0.8084    0.8138      1778
weighted avg     0.8509    0.8549    0.8523      1778

[[746  40  16]
 [ 68 198  57]
 [ 28  49 576]]


In [7]:
### Performance evalution for Implicit Comparisons 

In [8]:
X_test_imp = test_set[test_set['comparison'] == 1]['proc_sent']
y_test_imp = test_set[test_set['comparison'] == 1]['preferred']

test_encodings = tokenizer(X_test_imp.tolist(), truncation=True, padding='max_length', max_length=100)    
test_dataset = getDataset(test_encodings, y_test_imp.tolist())

outputs = trainer.predict(test_dataset)
y_pred_imp = outputs.predictions.argmax(1)
#y_test = test_dataset.labels

from sklearn.metrics import classification_report, confusion_matrix
print(classification_report(y_test_imp, y_pred_imp, digits=4))
print(confusion_matrix(y_test_imp, y_pred_imp))

***** Running Prediction *****
  Num examples = 1004
  Batch size = 16


              precision    recall  f1-score   support

           0     0.8684    0.9260    0.8963       392
           1     0.6159    0.5312    0.5705       160
           2     0.8996    0.8916    0.8956       452

    accuracy                         0.8476      1004
   macro avg     0.7946    0.7830    0.7874      1004
weighted avg     0.8422    0.8476    0.8440      1004

[[363  22   7]
 [ 37  85  38]
 [ 18  31 403]]


In [9]:
### Performance evalution for Explicit Comparisons 

In [10]:
X_test_exp = test_set[test_set['comparison'] == 2]['proc_sent']
y_test_exp = test_set[test_set['comparison'] == 2]['preferred']

test_encodings = tokenizer(X_test_exp.tolist(), truncation=True, padding='max_length', max_length=100)    
test_dataset = getDataset(test_encodings, y_test_exp.tolist())

outputs = trainer.predict(test_dataset)
y_pred_exp = outputs.predictions.argmax(1)
#y_test = test_dataset.labels

from sklearn.metrics import classification_report, confusion_matrix
print(classification_report(y_test_exp, y_pred_exp, digits=4))
print(confusion_matrix(y_test_exp, y_pred_exp))

***** Running Prediction *****
  Num examples = 774
  Batch size = 16


              precision    recall  f1-score   support

           0     0.9033    0.9341    0.9185       410
           1     0.7584    0.6933    0.7244       163
           2     0.8607    0.8607    0.8607       201

    accuracy                         0.8643       774
   macro avg     0.8408    0.8294    0.8345       774
weighted avg     0.8617    0.8643    0.8626       774

[[383  18   9]
 [ 31 113  19]
 [ 10  18 173]]
