In [None]:
import torch
import pandas
import numpy
import matplotlib.pyplot as plt
import warnings
from transformers import AutoTokenizer, AutoModelForSequenceClassification
from functions import get_dataset, get_model_accuracy, plot_chart
from constants import DEVICE, SAMPLES_PER_DATASET, RANDOM_STATE, MESSAGES
from multiagent import WeightedAverage, Plurality, ProbabilitiesSum, Borda, MaxProb
from tqdm import tqdm
from sklearn.metrics import ConfusionMatrixDisplay

# Libraries setup (and ignoring warnings)
warnings.simplefilter(action='ignore', category=FutureWarning)
tqdm.pandas()

# Loading the models (from local machine) and tokenizers (from HuggingFace hub)
tokenizer_xlnet = AutoTokenizer.from_pretrained('xlnet/xlnet-base-cased')
model_xlnet = AutoModelForSequenceClassification.from_pretrained('models/xlnet-saved', num_labels = 3).to(DEVICE)
tokenizer_bert = AutoTokenizer.from_pretrained('google-bert/bert-base-uncased')
model_bert = AutoModelForSequenceClassification.from_pretrained('models/xlnet-saved', num_labels = 3).to(DEVICE)
tokenizer_gpt2 = AutoTokenizer.from_pretrained('openai-community/gpt2')
model_gpt2 = AutoModelForSequenceClassification.from_pretrained('models/gpt2-saved', num_labels = 3).to(DEVICE)

# Getting 
df1 = get_dataset('DS1.csv', 'test').sample(n=SAMPLES_PER_DATASET)
df2 = get_dataset('DS2.csv', 'test').sample(n=SAMPLES_PER_DATASET)
df3 = get_dataset('DS3.csv', 'test').sample(n=SAMPLES_PER_DATASET)

# Merge the three test datasets in a single one
df = pandas.concat([df1, df2, df3])

# Plotting the labels distribution in the three test datasets
X_axis = numpy.arange(len(MESSAGES))
plt.bar(X_axis, list(df1['labels'].value_counts(sort=False)), 0.2, label = 'Test dataset 0') 
plt.bar(X_axis - 0.2, list(df2['labels'].value_counts(sort=False)), 0.2, label = 'Test dataset 1') 
plt.bar(X_axis + 0.2, list(df3['labels'].value_counts(sort=False)), 0.2, label = 'Test dataset 2') 
plt.xticks(X_axis, MESSAGES) 
plt.xlabel("Labels") 
plt.ylabel("Number of sentences") 
plt.title("Labels distribution") 
plt.legend(ncol=3, loc='lower center', bbox_to_anchor=(0.5, -0.25)) 
plt.show()

# Plotting the labels distribution in the merged dataset
plot_chart(['Hate', 'Offensive', 'None'], list(df['labels'].value_counts()), 'Labels distribution in merged test dataset', y_label='Number of sentences')

In [None]:
# Calculating GPT2's accuracies on each of the test set separetly
gpt_df1_acc, gpt_df1_cm = get_model_accuracy(model_gpt2, tokenizer_gpt2, df1)
gpt_df2_acc, gpt_df2_cm = get_model_accuracy(model_gpt2, tokenizer_gpt2, df2)
gpt_df3_acc, gpt_df3_cm = get_model_accuracy(model_gpt2, tokenizer_gpt2, df3)

disp_df1_cm = ConfusionMatrixDisplay(confusion_matrix=gpt_df1_cm, display_labels=['0','1','2'])
disp_df2_cm = ConfusionMatrixDisplay(confusion_matrix=gpt_df2_cm, display_labels=['0','1','2'])
disp_df3_cm = ConfusionMatrixDisplay(confusion_matrix=gpt_df3_cm, display_labels=['0','1','2'])

# Plotting the result
plot_chart(['Dataset 1','Dataset 2','Dataset 3'] , [gpt_df1_acc, gpt_df2_acc, gpt_df3_acc], 'GP2 accuracies')

fig, axes = plt.subplots(3, 1, figsize=(10, 15))

axes[0].set_title('GPT-2 Confusion Matrix - Dataset 1')
axes[1].set_title('GPT-2 Confusion Matrix - Dataset 2')
axes[2].set_title('GPT-2 Confusion Matrix - Dataset 3')

disp_df1_cm.plot(cmap=plt.cm.Blues, ax=axes[0])
disp_df2_cm.plot(cmap=plt.cm.Blues, ax=axes[1])
disp_df3_cm.plot(cmap=plt.cm.Blues, ax=axes[2])

plt.tight_layout()
plt.show()

In [None]:
# Calculating BERT's accuracies on each of the test set separetly
bert_df1_acc, bert_df1_cm = get_model_accuracy(model_bert, tokenizer_bert, df1)
bert_df2_acc, bert_df2_cm = get_model_accuracy(model_bert, tokenizer_bert, df2)
bert_df3_acc, bert_df3_cm = get_model_accuracy(model_bert, tokenizer_bert, df3)

disp_df1_cm = ConfusionMatrixDisplay(confusion_matrix=bert_df1_cm, display_labels=['0','1','2'])
disp_df2_cm = ConfusionMatrixDisplay(confusion_matrix=bert_df2_cm, display_labels=['0','1','2'])
disp_df3_cm = ConfusionMatrixDisplay(confusion_matrix=bert_df3_cm, display_labels=['0','1','2'])

# Plotting the result
plot_chart(['Dataset 1','Dataset 2','Dataset 3'], [bert_df1_acc, bert_df2_acc, bert_df3_acc], 'BERT accuracies')

fig, axes = plt.subplots(3, 1, figsize=(10, 15))

axes[0].set_title('BERT Confusion Matrix - Dataset 1')
axes[1].set_title('BERT Confusion Matrix - Dataset 2')
axes[2].set_title('BERT Confusion Matrix - Dataset 3')

disp_df1_cm.plot(cmap=plt.cm.Blues, ax=axes[0])
disp_df2_cm.plot(cmap=plt.cm.Blues, ax=axes[1])
disp_df3_cm.plot(cmap=plt.cm.Blues, ax=axes[2])

plt.tight_layout()
plt.show()

In [None]:
# Calculating XLNet's accuracies on each of the test set separetly
xlnet_df1_acc, xlnet_df1_cm = get_model_accuracy(model_xlnet, tokenizer_xlnet, df1)
xlnet_df2_acc, xlnet_df2_cm  = get_model_accuracy(model_xlnet, tokenizer_xlnet, df2)
xlnet_df3_acc, xlnet_df3_cm  = get_model_accuracy(model_xlnet, tokenizer_xlnet, df3)

disp_df1_cm = ConfusionMatrixDisplay(confusion_matrix=xlnet_df1_cm, display_labels=['0','1','2'])
disp_df2_cm = ConfusionMatrixDisplay(confusion_matrix=xlnet_df2_cm, display_labels=['0','1','2'])
disp_df3_cm = ConfusionMatrixDisplay(confusion_matrix=xlnet_df3_cm, display_labels=['0','1','2'])

# Plotting the result
plot_chart(['Dataset 1','Dataset 2','Dataset 3'], [xlnet_df1_acc, xlnet_df2_acc, xlnet_df3_acc], 'XLNet accuracies')

fig, axes = plt.subplots(3, 1, figsize=(10, 15))

axes[0].set_title('XLNET Confusion Matrix - Dataset 1')
axes[1].set_title('XLNET Confusion Matrix - Dataset 2')
axes[2].set_title('XLNET Confusion Matrix - Dataset 3')

disp_df1_cm.plot(cmap=plt.cm.Blues, ax=axes[0])
disp_df2_cm.plot(cmap=plt.cm.Blues, ax=axes[1])
disp_df3_cm.plot(cmap=plt.cm.Blues, ax=axes[2])

plt.tight_layout()
plt.show()

In [None]:
# Calculating each model accuracy on the merged test set
gpt2_all_acc, gpt2_cm = get_model_accuracy(model_gpt2, tokenizer_gpt2, df)
bert_all_acc, bert_cm = get_model_accuracy(model_bert, tokenizer_bert, df)
xlnet_all_acc, xlnet_cm = get_model_accuracy(model_xlnet, tokenizer_xlnet, df)



In [None]:
disp_gpt2_cm = ConfusionMatrixDisplay(confusion_matrix=gpt2_cm, display_labels=['0','1','2'])
disp_bert_cm = ConfusionMatrixDisplay(confusion_matrix=bert_cm, display_labels=['0','1','2'])
disp_xlnet_cm = ConfusionMatrixDisplay(confusion_matrix=xlnet_cm, display_labels=['0','1','2'])

# Plotting the result
plot_chart(['GPT2','BERT','XLNet'], [gpt2_all_acc, bert_all_acc, xlnet_all_acc], 'Model accuracies on test set')

fig, axes = plt.subplots(3, 1, figsize=(10, 15))

axes[0].set_title('GPT-2 Confusion Matrix - All dataset')
axes[1].set_title('BERT Confusion Matrix - All dataset')
axes[2].set_title('XLNET Confusion Matrix - All dataset')

disp_gpt2_cm.plot(cmap=plt.cm.Blues, ax=axes[0])
disp_bert_cm.plot(cmap=plt.cm.Blues, ax=axes[1])
disp_xlnet_cm.plot(cmap=plt.cm.Blues, ax=axes[2])

plt.tight_layout()
plt.show()

In [None]:
# Initializing all the multiagent voting rules
rules = [ProbabilitiesSum(), Plurality(), MaxProb(), WeightedAverage(), Borda()]

# Calculating all the multiagent results
probabilities = {}
for index, row in tqdm(df.iterrows(), total=len(df)):
    # Calculating GPT2's prediction
    inputs_gpt2    = tokenizer_gpt2(row['text'], return_tensors="pt", max_length=128, truncation=True).to(DEVICE)
    probabilities['gpt2']   = torch.softmax(model_gpt2(**inputs_gpt2).logits, 1)

    # Calculating BERT's prediction
    inputs_xlnet    = tokenizer_xlnet(row['text'], return_tensors="pt", max_length=128, truncation=True).to(DEVICE)
    probabilities['xlnet']   = torch.softmax(model_xlnet(**inputs_xlnet).logits, 1)

    # Calculating XLNET's prediction
    inputs_bert    = tokenizer_bert(row['text'], return_tensors="pt", max_length=128, truncation=True).to(DEVICE)
    probabilities['bert']   = torch.softmax(model_bert(**inputs_bert).logits, 1)

    # Applying each voting rule
    for r in rules:
        r(probabilities, row['labels'])

# Plotting the results
plot_chart([r.__class__.__name__ for r in rules], [ r.correct_counter/len(df) for r in rules], 'Multiagent accuracies')
