In [64]:
import sklearn
import sklearn.metrics as metrics
from sklearn.preprocessing import MultiLabelBinarizer

import confusionmatrix as cmm
import numpy as np
from glob import glob
import sys
from time import time
import pickle
import os

In [43]:
def sizeof_fmt(num, suffix='B'):
    for unit in ['','Ki','Mi','Gi','Ti','Pi','Ei','Zi']:
        if abs(num) < 1024.0:
            return "%3.1f%s%s" % (num, unit, suffix)
        num /= 1024.0
    return "%.1f%s%s" % (num, 'Yi', suffix)

In [6]:
def get_label_names(file = 'reference/classes.sorted.real_names.txt'):
    with open(file) as f:
        return [x.strip() for x in f.readlines() if x.strip() != '']

In [None]:
def get_labels_of_file(file, force_reload = False):
    pickle_name = file + '.pickle'
    if os.path.exists(pickle_name) and not force_reload:
        with open(pickle_name, 'rb') as f:
            return pickle.load(f)
    else:
        with open(file) as f:
            next(f)
            els= [[int(y.split(':')[0]) for y in x.split(' ')] for x in f.read().split('\n') if x.strip() != '']
            
            with open(pickle_name, 'wb') as f:
                pickle.dump(els, f)
            return els


In [68]:
def get_scores(y_true, y_pred_filename, labels):
    y_pred = get_labels_of_file(y_pred_filename, force_reload = False)
    assert(len(y_pred) == len(y_true)) #, 'Length of y_pred and y_true must be same! train/test wrong?')
    mlb = MultiLabelBinarizer().fit([range(len(labels))])
    y_pred_t = mlb.transform(y_true)
    y_true_t = mlb.transform(y_pred)
    return (
            metrics.classification_report(y_true_t, y_pred_t),
            metrics.f1_score(y_true_t, y_pred_t, average = 'macro'),
            metrics.coverage_error(y_true_t, y_pred_t)
    )

In [21]:
labels = get_label_names()
y_true_train = get_labels_of_file('reference/classes.real.train.txt', False)
y_true_test = get_labels_of_file('reference/classes.real.test.txt', False)

In [70]:
for file in glob('results/*_results.txt'):
    y_true_used = y_true_test if file.count('test_results') == 1 else y_true_train
    file_size = os.path.getsize(file)
    print("# {} ({})".format(file, sizeof_fmt(file_size)))
    sys.stdout.flush()
    scores = get_scores(y_true_used, file, labels = labels)

    print(scores)
    sys.stdout.flush()

# results/START_TREE__0__NUM_TREE__1__BIAS__1.0__LOG_LOSS_COEFF__1.0__MAX_LEAF__1__LBL_PER_LEAF__1_test_results.txt (51.0KiB)


  'precision', 'predicted', average, warn_for)
  'recall', 'true', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'recall', 'true', average, warn_for)


('             precision    recall  f1-score   support\n\n          0       0.00      0.00      0.00         1\n          1       0.27      0.60      0.37         5\n          2       0.52      0.74      0.61        31\n          3       0.00      0.00      0.00         0\n          4       0.00      0.00      0.00         0\n          5       0.22      0.40      0.29         5\n          6       0.25      1.00      0.40         1\n          7       0.14      0.15      0.15        13\n          8       0.20      0.25      0.22         4\n          9       0.00      0.00      0.00         0\n         10       0.00      0.00      0.00         3\n         11       0.18      0.20      0.19        10\n         12       0.00      0.00      0.00         0\n         13       0.08      0.33      0.13         3\n         14       0.55      0.86      0.67         7\n         15       0.00      0.00      0.00         0\n         16       0.52      0.85      0.65        13\n         17       0.25  