In [None]:
import pandas as pd
from sklearn import metrics
import matplotlib.pyplot as plt
import numpy as np

In [None]:
def get_all_results(dataset_name, embeddings, classifiers, y_true):
    accuracy = []
    balanced_accuracy = []
    f1 = []
    emb_names = []
    cls_names = []
    for emb_name in embeddings:
        for cls_name in classifiers:
            fname = f'{dataset_name}/predictions/model_{emb_name}_{cls_name}.csv'
            y_pred = pd.read_csv(fname, header=None)
            accuracy.append(metrics.accuracy_score(y_true=y_true, y_pred=y_pred))
            balanced_accuracy.append(metrics.balanced_accuracy_score(y_true=y_true, y_pred=y_pred))
            f1.append(metrics.f1_score(y_true=y_true, y_pred=y_pred, average='weighted'))
            emb_names.append(emb_name)
            cls_names.append(cls_name)
    results = pd.DataFrame({
        'nlp_embedding': emb_names,
        'nlp_classifier': cls_names,
        'accuracy': accuracy,
        'balanced_accuracy': balanced_accuracy,
        'f1_score': f1})
    results

In [None]:
def draw_conf_matrix(y_true, y_pred):
    fig, ax = plt.subplots(figsize=(14, 14))
    labels = np.unique(y_true)
    cm = metrics.confusion_matrix(y_true, y_pred, labels=labels)
    cm_display = metrics.ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=labels)
    cm_display.plot(ax=ax, values_format='.0f')
    plt.show()

In [None]:
dataset_name = 'small_balanced'
nlp_embeddings = ['bert', 'small-bert', 'glove', 'word2vec']
nlp_classifiers = ['naive-bayes', 'svm', 'xgboost', 'cnn']

In [None]:
test_data = pd.read_csv(f'data/test/{dataset_name}.csv')
y_true = test_data.genre

In [None]:
get_all_results(dataset_name, nlp_embeddings, nlp_classifiers, y_true)

In [None]:
fname = f'predictions/{dataset_name}/model_small-bert_cnn.csv'
y_pred = pd.read_csv(fname, header=None)
draw_conf_matrix(y_true, y_pred)