In [1]:
import pandas as pd
from sklearn import metrics
import matplotlib.pyplot as plt
import numpy as np

In [6]:
def get_all_results(dataset_name, embeddings, classifiers, y_true):
    '''
    Print accuracy, balanced accuracy and F1 score for provided dataset, embedding methods and classifiers.
    Parameters:
        dataset_name (str): Dataset name.
        embeddings (list): Names of embedding methods.
        classifiers (list): Names of classifiers.
        y_true (Series): Real genres of the test data.
    '''
    accuracy = []
    balanced_accuracy = []
    f1 = []
    emb_names = []
    cls_names = []
    for emb_name in embeddings:
        for cls_name in classifiers:
            fname = f'predictions/{dataset_name}/model_{emb_name}_{cls_name}.csv'
            y_pred = pd.read_csv(fname, header=None)
            accuracy.append(metrics.accuracy_score(y_true=y_true, y_pred=y_pred))
            balanced_accuracy.append(metrics.balanced_accuracy_score(y_true=y_true, y_pred=y_pred))
            f1.append(metrics.f1_score(y_true=y_true, y_pred=y_pred, average='weighted'))
            emb_names.append(emb_name)
            cls_names.append(cls_name)
    results = pd.DataFrame({
        'nlp_embedding': emb_names,
        'nlp_classifier': cls_names,
        'accuracy': accuracy,
        'balanced_accuracy': balanced_accuracy,
        'f1_score': f1})
    print(results)

In [7]:
# Parameters

dataset_name = 'dataset_proc'
nlp_embeddings = ['smaller-bert', 'glove']
nlp_classifiers = ['naive-bayes', 'svm', 'xgboost', 'cnn']

In [8]:
# Reading test data from CSV file

test_data = pd.read_csv(f'data/test/{dataset_name}.csv')
y_true = test_data.genre

In [9]:
# Printing results

get_all_results(dataset_name, nlp_embeddings, nlp_classifiers, y_true)

  nlp_embedding nlp_classifier  accuracy  balanced_accuracy  f1_score
0  smaller-bert    naive-bayes  0.379479           0.380968  0.328668
1  smaller-bert            svm  0.399837           0.391419  0.353035
2  smaller-bert        xgboost  0.375407           0.372651  0.374183
3  smaller-bert            cnn  0.415309           0.405351  0.401595
4         glove    naive-bayes  0.333876           0.353781  0.259667
5         glove            svm  0.320033           0.314322  0.295821
6         glove        xgboost  0.343648           0.346914  0.336383
7         glove            cnn  0.374593           0.378082  0.382742
