In [6]:
import numpy as np
import json
import os

In [7]:
def print_to_file(to_file, filename='model_selection'):
    f = open(filename, 'a')
    f.write(str(to_file))
    f.close()

In [8]:
data_dir = 'log/'
num_folds = 5

experiments = os.listdir(data_dir)
for experiment in experiments:
    
    with open(data_dir + experiment + '/infos') as f:
        infos = json.loads(f.readline().replace("'", '"'))
    
    with open(data_dir + experiment + '/folds_performances') as f:
        data = json.loads(f.readline().replace("'", '"'))
    
    performances = {}
    for i in range(num_folds):
        performances[i] = data[str(i)]

    print_to_file('Experiment: {}'.format(experiment))
    print_to_file('\nInfos: {}'.format(infos))
    
    num_epochs = infos['num_epochs']
    total_epoch_performace = [0] * num_epochs
    for epoch in range(num_epochs):
        for k in range (num_folds):
            total_epoch_performace[epoch] += performances[k][epoch]
        total_epoch_performace[epoch] /= num_folds
    best_epoch = total_epoch_performace.index(max(total_epoch_performace))
    print_to_file('\nStatistical best epoch: {}'.format(best_epoch))
    
    total_max_performance = [0] * num_folds
    for k in  range (num_folds):
        total_max_performance[k] = performances[k].index(max(performances[k]))
    print_to_file('\nMitchell best epoch: {}'.format(sum(total_max_performance) / num_folds))
    
    break

In [9]:
    with open(data_dir + experiment + '/confusion_matrices') as f:
        data = json.loads(f.readline().replace("'", '"'))

    conf_matrices = {}
    for k in range(num_folds):
        conf_matrices[k] = {}
        for epoch in range(num_epochs):
             conf_matrices[k][epoch] = data[str(k)][str(epoch)]

    conf_matrix = {}
    for k in range(num_folds):
        conf_matrix[k] = conf_matrices[k][best_epoch]

In [10]:
    num_classes = 10
    num_samples = [0] * num_folds
    for k in range(num_folds):
        for i in range(num_classes):
            for j in range(num_classes):
                num_samples[k] += conf_matrix[k][i][j]

In [11]:
    accuracy = [0] * num_folds
    for k in range(num_folds):
        for i in range(num_classes):
            accuracy[k] += conf_matrix[k][i][i]
        accuracy[k] /= num_samples[k]

    print_to_file('\nAccuracy: {} +-{}'.format(np.asarray(accuracy).mean(), np.asarray(accuracy).std()))


In [12]:
    macro_precision = [0] * num_folds
    precision = [0] * num_folds
    precision_total = [0] * num_classes
    for k in range(num_folds):
        precision[k] = [0] * num_classes
        for i in range(num_classes):
            precision[k][i] = conf_matrix[k][i][i]
            for j in range(num_classes):
                precision_total[i] += conf_matrix[k][j][i]
            if precision_total[i] != 0:
                precision[k][i] /= precision_total[i]
            else:
                precision[k][i] = float('NaN')
            macro_precision[k] += precision[k][i] / num_classes

    print_to_file('\nMacro precision: {} +-{}'.format(np.asarray(macro_precision).mean(), np.asarray(macro_precision).std()))

In [13]:
    macro_recall = [0] * num_folds
    recall = [0] * num_folds
    recall_total = [0] * num_classes
    for k in range(num_folds):
        recall[k] = [0] * num_classes
        for i in range(num_classes):
            recall[k][i] = conf_matrix[k][i][i]
            for j in range(num_classes):
                recall_total[i] += conf_matrix[k][i][j]
            if recall_total[i] != 0:
                recall[k][i] /= recall_total[i]
            else:
                recall[k][i] = float('NaN')
            macro_recall[k] += recall[k][i] / num_classes

    print_to_file('\nMacro recall: {} +-{}'.format(np.asarray(macro_recall).mean(), np.asarray(macro_recall).std()))

In [14]:
    macro_f = [0] * num_folds
    f = [0] * num_folds
    for k in range(num_folds):
        f[k] = [0] * num_classes
        for i in range(num_classes):
            if precision[k][i] + recall [k][i] != 0:
                f[k][i] = 2 * precision[k][i] * recall[k][i] / (precision[k][i] + recall[k][i])
            else:
                f[k][i] = float('NaN')
            macro_f[k] += f[k][i] / num_classes

    print_to_file('\nMacro F1: {} +-{}\n\n'.format(np.asarray(macro_f).mean(), np.asarray(macro_f).std()))

In [15]:
print('Statistics saved to file.')

Statistics saved to file.
