In [32]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from IPython.display import HTML
from sklearn.metrics import accuracy_score
from sklearn.metrics import f1_score
from IPython.display import HTML, display
import tabulate

### Evaluation of models

In [12]:
################################################################
# Ground Truth
ground_truth = np.load('predictions/ground_truth.npy')

# Transfer Learning Model Predictions
bert_pred = np.load('predictions/bert_predictions.npy')
glove_pred = np.load('predictions/lstm_glove_predictions.npy')
elmo_pred = [np.argmax(i)+1 for i in np.load('predictions/elmo_predict.npy')]

# Other Model Predictions
svm_pred = [int(i) for i in np.load('predictions/svm_predictions.npy')]
log_pred = [int(i) for i in np.load('predictions/log_predictions.npy')]
rf_pred  = [int(i) for i in np.load('predictions/rf_predictions.npy')]
lstm_pred = np.load('predictions/lstm_predictions.npy')

# Small Data Subsample Predictions 
ground_truth_10 = np.load('predictions/ground_truth_10.npy')
bert_pred_10 = np.load('predictions/bert_predictions_10.npy')
svm_pred_10 = [int(i) for i in np.load('predictions/svm_predictions_10.npy')]
log_pred_10 = [int(i) for i in np.load('predictions/log_predictions_10.npy')]
################################################################
# Accuracy #
bert_acc = round(accuracy_score(ground_truth,bert_pred),2)
glove_acc = round(accuracy_score(ground_truth,glove_pred),2)
elmo_acc = round(accuracy_score(ground_truth, elmo_pred),2)
svm_acc = round(accuracy_score(ground_truth,svm_pred),2)
log_acc = round(accuracy_score(ground_truth,log_pred),2)
rf_acc = round(accuracy_score(ground_truth,rf_pred),2)
lstm_acc = round(accuracy_score(ground_truth,lstm_pred),2)

bert_10_acc = round(accuracy_score(ground_truth_10, bert_pred_10),2)
svm_10_acc = round(accuracy_score(ground_truth_10, svm_pred_10),2)
log_10_acc = round(accuracy_score(ground_truth_10, log_pred_10),2)
################################################################

In [24]:
table = [["BERT", bert_acc], ["ELMO", elmo_acc], ["GloVe",glove_acc], ["SVM TF-IDF",svm_acc], 
        ["Logistic Regression TF-IDF",log_acc], ["Random Forrest TF-IDF",rf_acc], ["LSTM", lstm_acc]]
display(HTML(tabulate.tabulate(table, tablefmt='html', headers=('Model', 'Accuracy'),stralign='left')))

Model,Accuracy
BERT,0.79
ELMO,0.73
GloVe,0.76
SVM TF-IDF,0.77
Logistic Regression TF-IDF,0.77
Random Forrest TF-IDF,0.73
LSTM,0.66


In [43]:
bert_f1 = f1_score(ground_truth,bert_pred, average=None)
svm_f1 = f1_score(ground_truth,svm_pred, average=None)
log_f1 = f1_score(ground_truth, log_pred, average=None)
glove_f1 = f1_score(ground_truth, glove_pred, average=None)

In [67]:
# Interactive Matrix of Classes vs Models where Values are F1-Scores
# Generated HTML File

import pandas as pd
from bokeh.io import output_file, show, save
from bokeh.models import BasicTicker, ColorBar, LinearColorMapper, ColumnDataSource, PrintfTickFormatter
from bokeh.plotting import figure
from bokeh.transform import transform
from bokeh.models import HoverTool

df = pd.DataFrame(
    [bert_f1, svm_f1, log_f1, glove_f1],
    index=['BERT', 'SVM-TFIDF', 'LOG-TFIDF', "GloVe"],
    columns=[str(i) for i in range (1,24)])
df.index.name = 'Classes'
df.columns.name = 'Models'


# Prepare data.frame in the right format
df = df.stack().rename("value").reset_index()

# here the plot :
output_file("class_f1scores.html")

# You can use your own palette here
colors = ['#ff6775','#ffb3ba','#ffdfba','#ffffba', '#bae1ff', '#baffc9', '#5d7f64']

# Had a specific mapper to map color with value
mapper = LinearColorMapper(
    palette=colors, low=df.value.min(), high=df.value.max())
# Define a figure
p = figure(
    plot_width=1800,
    plot_height=900,
    title="Class F-1 Scores",
    x_range=list(df.Classes.drop_duplicates()),
    y_range=list(df.Models.drop_duplicates()),
    toolbar_location=None,
    tools="",
    x_axis_location="above")
# Create rectangle for heatmap
p.rect(
    x="Classes",
    y="Models",
    width=1,
    height=1,
    source=ColumnDataSource(df),
    line_color='#121914',
    fill_color=transform('value', mapper))
# Add legend
color_bar = ColorBar(
    color_mapper=mapper,
    location=(0, 0),
    ticker=BasicTicker(desired_num_ticks=len(colors)))

p.add_layout(color_bar, 'right')
tooltips = [('F1','@value'), ('Class', '@Models')]
p.add_tools(HoverTool(tooltips=tooltips))
save(p)
show(p)