In [None]:
%matplotlib inline

from __future__ import absolute_import, print_function, unicode_literals

import numpy as np
import os
import pandas as pd

from itertools import product
from sklearn.metrics import accuracy_score, precision_recall_fscore_support
from tqdm import tqdm_notebook

In [None]:
experiment = 'experiment0'
results_path = '../../results/%s' % experiment
classifiers = ['baseline', 'decision_tree', 'log', 'mlp_5000', 'naive_bayes', 'svm']
representations = ['handcrafted', 'hashed', 'negative_hashed']
corpora = ['sensem', 'semeval']

In [None]:
results = []
labels = []
labels_count = []

for classifier, representation, corpus in\
    tqdm_notebook(product(*(classifiers, representations, corpora)),
                  total=len(classifiers)*len(representations)*len(corpora)):
    try:
        path = os.path.join(results_path, '%s.csv' % ('_'.join([classifier, representation, corpus])))
        df = pd.read_csv(path)
    except OSError:
        continue
    
    for (lemma, corpus_split), lcdf in df.groupby(['lemma', 'corpus'], sort=False):
        if corpus_split == 'train':
            labels, labels_count = np.unique(lcdf.true, return_counts=True)
        
        rdf = {'lemma': lemma, 'corpus': '%s.%s' % (corpus, corpus_split), 'num_classes': labels.shape[0],
               'classifier': classifier, 'representation': representation}
        rdf['accuracy'] = accuracy_score(lcdf.true, lcdf.prediction)
        rdf['macro_precision'], rdf['macro_recall'], _, _ =\
            precision_recall_fscore_support(lcdf.true, lcdf.prediction, average='macro')
        rdf['micro_precision'], rdf['micro_recall'], _, _ =\
            precision_recall_fscore_support(lcdf.true, lcdf.prediction, average='micro')
        rdf['weighted_precision'], rdf['weighted_recall'], _, _ =\
            precision_recall_fscore_support(lcdf.true, lcdf.prediction, average='weighted')

        if labels.shape[0] > 1:
            precision, recall, _, _ =\
                precision_recall_fscore_support(lcdf.true, lcdf.prediction, average=None, labels=labels)
            mask = np.ones(recall.shape, dtype=np.bool)
            mask[np.argmax(recall)] = False
            rdf['pmfc'] = precision[~mask][0]
            rdf['rmlfc'] = recall[mask].mean()
        else:  # Ill defined metrics for such case
            rdf['pmfc'] = np.nan
            rdf['rmlfc'] = np.nan
        
        results.append(rdf)

columns = ['classifier', 'representation', 'lemma', 'num_classes', 'corpus', 'accuracy',
           'macro_precision', 'macro_recall', 'pmfc', 'rmlfc', 'micro_precision', 'micro_recall',
           'weighted_precision', 'weighted_recall']

results = pd.DataFrame(results, columns=columns)
results.to_csv('./data/%s.csv' % experiment, index=False, float_format='%.2e')
results.head()