In [None]:
import pandas as pd, numpy as np
import seaborn as sns
from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix

### Load prediction from fine-tuned models

In [None]:
# first load the text and gold labels
input = pd.read_csv('finetuned-models-output/fBERT_in_output_predictions.csv', delimiter=';')
full_results = input[['text', 'gold']]

In [None]:
# the probabilities were stored as strings, revert them to lists of floats
def fix_prob(val):
    """Probability is seen as a full string, extract actual floats from string"""
    x = val.replace('[',"").replace(']',"").split()
    new_prob = [float(x[0]), float(x[1])]
    return new_prob

In [None]:
# Then add the predicitons as new columns
for model in ['bert-base-uncased', 'fBERT', 'hateBERT']:
    for domain in ['in','cross']:
        data = pd.read_csv(f'fine-tune results/{model}_{domain}_output_predictions.csv', delimiter=';')
        full_results[f'prediction_{model}_{domain}'] = data['prediction']
        full_results[f'probability_{model}_{domain}'] = data['probability'].transform(fix_prob) # apply float fix


### Start ensemble calculations

In [None]:
# set up the functions for calculating the voting

def soft_major_in(row):
    # sum all probabilities
    summed = np.sum([
                    row['probability_bert-base-uncased_in'],
                    row['probability_fBERT_in'],
                    row['probability_hateBERT_in']
                    ],
                    axis=0)
    # return the max score as final prediction
    final_soft_pred = 0 if summed[0] > summed[1] else 1
    return final_soft_pred

def hard_major_in(row):
    # sum all predictions
    summed = np.sum([
                row['prediction_bert-base-uncased_in'],
                row['prediction_fBERT_in'],
                row['prediction_hateBERT_in']
                ], axis=0)
    # take average and return final prediction
    if summed/3 < 0.5:
        return 0
    else: return 1


def soft_major_cross(row):
    summed = np.sum([
                    row['probability_bert-base-uncased_cross'],
                    row['probability_fBERT_cross'],
                    row['probability_hateBERT_cross']
                    ],
                    axis=0)
    final_soft_pred = 0 if summed[0] > summed[1] else 1
    return final_soft_pred

def hard_major_cross(row):
    summed = np.sum([
                row['prediction_bert-base-uncased_cross'],
                row['prediction_fBERT_cross'],
                row['prediction_hateBERT_cross']
                ], axis=0)
    if summed/3 < 0.5:
        return 0
    else: return 1


In [None]:
# apply calculations and store in new column
full_results['soft_major_in'] = full_results.apply(soft_major_in, axis=1)
full_results['hard_major_in'] = full_results.apply(hard_major_in, axis=1)
full_results['soft_major_cross'] = full_results.apply(soft_major_cross, axis=1)
full_results['hard_major_cross'] = full_results.apply(hard_major_cross, axis=1)

In [None]:
# get lists of final predictions to get easy acces to performance metrics
gold = full_results.gold
target_names = ['NOT OFF', 'OFF']

soft_pred_in = full_results.soft_major_in
hard_pred_in = full_results.hard_major_in

soft_pred_cross = full_results.soft_major_cross
hard_pred_cross = full_results.hard_major_cross

bert_in = full_results['prediction_bert-base-uncased_in']
bert_cross = full_results['prediction_bert-base-uncased_cross']

fbert_in = full_results.prediction_fBERT_in
fbert_cross = full_results.prediction_fBERT_cross

hatebert_in = full_results.prediction_hateBERT_in
hatebert_cross = full_results.prediction_hateBERT_cross

### Save the final results (all fine-tuned models and soft- hard voting ensembles)

In [None]:
full_results_out = pd.DataFrame(zip(full_results.text, gold, bert_in, bert_cross,
                                    fbert_in, fbert_cross, hatebert_in, hatebert_cross,
                                    soft_pred_in, soft_pred_cross, hard_pred_in, hard_pred_cross),
                                    columns=[
                                    'text', 'gold', 'bert_in', 'bert_cross',
                                    'fbert_in', 'fbert_cross', 'hatebert_in', 'hatebert_cross',
                                    'soft_pred_in', 'soft_pred_cross', 'hard_pred_in', 'hard_pred_cross'])
full_results_out.head()
full_results_out.to_csv('ensemble_output_all_models.csv', sep=';')

### Print all classification reports

In [None]:
print(classification_report(gold, bert_in, target_names=target_names))
print(classification_report(gold, bert_cross, target_names=target_names))

In [None]:
print(classification_report(gold, fbert_in, target_names=target_names))
print(classification_report(gold, fbert_cross, target_names=target_names))

In [None]:
print(classification_report(gold, hatebert_in, target_names=target_names))
print(classification_report(gold, hatebert_cross, target_names=target_names))

In [None]:
print(confusion_matrix(gold, hatebert_in))
print(confusion_matrix(gold, hatebert_cross))

In [None]:
print(classification_report(gold, soft_pred_in, target_names=target_names))
print(classification_report(gold, soft_pred_cross, target_names=target_names))

In [None]:
print(confusion_matrix(gold, soft_pred_in))
print(confusion_matrix(gold, soft_pred_cross))

In [None]:
print(classification_report(gold, hard_pred_in, target_names=target_names))

print(classification_report(gold, hard_pred_cross, target_names=target_names))

In [None]:
print(confusion_matrix(gold, hard_pred_in))
print(confusion_matrix(gold, hard_pred_cross))