# Metrics evaluation notebook

_Alex Malz (NYU)_
_Renee Hlozek (Toronto)_
_Rahul Biswas (Stockholm University)_
_Tarek Alam (UCL)_
_Rafael Martinez-Galarza (Harvard)_
_Anita Bahmanyar (Toronto)_

This notebook runs tests and makes plots for the real classifiers.

In [None]:
# import string
# import itertools
# import random
# import os
# import csv

import numpy as np
import pandas as pd
from pycm import ConfusionMatrix

import proclam
from proclam import *

In [None]:
import matplotlib as mpl
mpl.use('Agg')
mpl.rcParams['text.usetex'] = False
mpl.rcParams['mathtext.rm'] = 'serif'
mpl.rcParams['font.family'] = 'serif'
mpl.rcParams['font.serif'] = 'Times New Roman'
mpl.rcParams['axes.titlesize'] = 16
mpl.rcParams['axes.labelsize'] = 14
mpl.rcParams['savefig.dpi'] = 250
mpl.rcParams['savefig.format'] = 'pdf'
mpl.rcParams['savefig.bbox'] = 'tight'
import matplotlib.pyplot as plt
%matplotlib inline

This notebook defines the metrics (and corresponding truth tables) to be run via an input file and to produce plots based on the output.

In [None]:
metricslist = ['Brier', 'LogLoss']
colors = ['b', 'r']
dirname = 'examples/'
markerlist = ['o', 's', '*']

## Read in data

Loading in the data from the Mystery Dataset

In [None]:
mystery = {}
mystery['label'] = 'Unknown'
mystery['names'] = ['RandomForest', 'KNeighbors', 'MLPNeuralNet']

Loading in the data from SNPhotCC

In [None]:
snphotcc = {}
snphotcc['label'] = 'SNPhotCC'
prefixes = ['Templates', 'Wavelets']
suffixes = ['BoostForest', 'KNN', 'NB', 'NeuralNetwork', 'SVM']
snphotcc['names'] = []
for prefix in prefixes:
    for suffix in suffixes:
        snphotcc['names'].append(prefix+suffix)

Loading in the data from ProClaM

In [None]:
plasticc = {}
plasticc['label'] = 'ProClaM'
plasticc['names'] = ['Idealized', 'Guess', 'Tunnel', 'Broadbrush', 'Cruise', 'SubsumedTo', 'SubsumedFrom']

In [None]:
# old_snphotcc_names = []
# for prefix in ['templates_', 'wavelets_']:
#     for suffix in ['boost_forest', 'knn', 'nb', 'neural_network', 'svm']:
#         old_snphotcc_names.append(prefix+suffix+'.dat')

# for i in range(len(snphotcc_names)):
#     name = old_snphotcc_names[i]
#     fileloc = dirname+'classifications/'+name
#     snphotcc_info = pd.read_csv(fileloc, sep=' ')
#     full = snphotcc_info.set_index('Object').join(truth_snphotcc.set_index('Object'))
#     name = snphotcc_names[i]
    
#     truth = full['Type'] - 1
#     snphotcc_truth_table = proclam.metrics.util.det_to_prob(truth)
#     fileloc = 'examples/'+name+'/truth_table_'+name+'.csv'
#     with open(fileloc, 'wb') as truth_place:
#         np.savetxt(fileloc, snphotcc_truth_table, delimiter=' ')
    
#     probs = full[['1', '2', '3']]
#     fileloc = 'examples/'+name+'/predicted_prob_'+name+'.csv'
#     probs.to_csv(fileloc, sep=' ', index=False, header=True)

In [None]:
# more_names = snphotcc_names
# more_classifications = ['%s/predicted_prob_%s.csv'%(name,name) for name in snphotcc_names]
# more_truth_tables = ['%s/truth_table_%s.csv'%(name,name) for name in snphotcc_names]
# more_class_pairs = zip(more_classifications, more_truth_tables)

In [None]:
def make_class_pairs(data_info_dict):
    return zip(data_info_dict['classifications'], data_info_dict['truth_tables'])

def make_file_locs(data_info_dict):
    names = data_info_dict['names']
    data_info_dict['dirname'] = dirname + data_info_dict['label'] + '/'
    data_info_dict['classifications'] = ['%s/predicted_prob_%s.csv'%(name, name) for name in names]
    data_info_dict['truth_tables'] = ['%s/truth_table_%s.csv'%(name, name) for name in names]
#     print(data_info_dict)
    return data_info_dict

In [None]:
for dataset in [mystery, snphotcc, plasticc]:
    dataset = make_file_locs(dataset)
    dataset['class_pairs'] = make_class_pairs(dataset)

## Make the plots

In [None]:
def plot_cm(probs, truth, name, loc=''):
    cm = proclam.metrics.util.prob_to_cm(probs, truth)
    plt.matshow(cm.T, vmin=0., vmax=1.)
# plt.xticks(range(max(truth)+1), names)
# plt.yticks(range(max(truth)+1), names)
    plt.xlabel('predicted class')
    plt.ylabel('true class')
    plt.colorbar()
    plt.title(name)
    plt.savefig(loc+name+'_cm.png')
    plt.close()

In [None]:
def read_class_pairs(pair, dataset, cc):#loc='', title=''):
    loc=dataset['dirname']
    title=dataset['label']+' '+dataset['names'][cc]
    clfile = pair[0]
    truthfile = pair[1]
    prob_mat = pd.read_csv(loc+clfile, delim_whitespace=True).values
    nobj = np.shape(prob_mat)[0]
    nclass = np.shape(prob_mat)[1]
    truth_values = pd.read_csv(loc+truthfile, delim_whitespace=True).values
    nobj_truth = np.shape(truth_values)[0]
    nclass_truth = np.shape(truth_values)[1]
    tvec = np.where(truth_values==1)[1]
#     if nclass_truth!= nclass:
#         print('Truth table of size %i x %i and prob matrix of size %i x %i do not match up in size'%(nobj,nclass,nobj_truth,nclass_truth))
#     else:
#         print('Considering classifications with %i classes'%nclass)
    pmat = prob_mat
    plot_cm(pmat, tvec, title, loc=loc+dataset['names'][cc]+'/')
    return pmat, tvec

In [None]:
def make_patch_spines_invisible(ax):
    ax.set_frame_on(True)
    ax.patch.set_visible(False)
    for sp in ax.spines.values():
        sp.set_visible(False)
        
def per_metric_helper(ax, n, data, metric_names, codes, shapes, colors):
    plot_n = n+1
    in_x = np.arange(len(codes))
    ax_n = ax
    n_factor = 0.1 * (plot_n - 2)
    if plot_n>1:
        ax_n = ax.twinx()
        rot_ang = 270
        label_space = 15.
    else:
        rot_ang = 90
        label_space = 0.
    if plot_n>2:
        ax_n.spines["right"].set_position(("axes", 1. + 0.1 * (plot_n-1)))
        make_patch_spines_invisible(ax_n)
        ax_n.spines["right"].set_visible(True)
    handle = ax_n.scatter(in_x+n_factor*np.ones_like(data[n]), data[n], marker=shapes[n], s=10, color=colors[n], label=metric_names[n])
    ax_n.set_ylabel(metric_names[n], rotation=rot_ang, fontsize=14, labelpad=label_space)
#     ax_n.set_ylim(0.9 * min(data[n]), 1.1 * max(data[n]))
    return(ax, ax_n, handle)

def metric_plot(dataset, metric_names, shapes, colors):
    codes = dataset['names']
    data = dataset['results']
    title = dataset['label']
    fileloc = dataset['dirname']+dataset['label']+'_results.png'
    xs = np.arange(len(codes))
    fig, ax = plt.subplots()
    fig.subplots_adjust(right=1.)
    handles = []
    for n in range(len(metric_names)):
        (ax, ax_n, handle) = per_metric_helper(ax, n, data, metric_names, codes, shapes, colors)
        handles.append(handle)
    plt.xticks(xs, codes)
    for tick in ax.get_xticklabels():
        tick.set_rotation(90)
    plt.xlabel('Classifiers', fontsize=14)
    plt.legend(handles, metric_names)
    plt.suptitle(title)
    plt.savefig(fileloc)
    return

make a plot of classifier on the x-axis and metric score on the y-axis


In [None]:
for dataset in [mystery, snphotcc, plasticc]:
    data = np.empty((len(metricslist), len(dataset['names'])))
    for cc, pair in enumerate(dataset['class_pairs']):
        probm, truthv = read_class_pairs(pair, dataset, cc)#loc=dataset['dirname'], title=dataset['label']+' '+dataset['names'][cc])
#         plot_cm(probm, truthv, str(cc), loc='./sandbox/')
        det = proclam.metrics.util.prob_to_det(probm)
        cm = proclam.metrics.util.prob_to_cm(probm, truthv, per_class_norm=False, vb=False)
        rates = proclam.metrics.util.cm_to_rate(cm, vb=True)
#         print(rates)
        compare = ConfusionMatrix(truthv, det)
        printout = proclam.metrics.util.RateMatrix(compare.TPR, compare.FPR, compare.FNR, compare.TNR)
        print('for comparison: ' + str(printout))
#         for count, metric in enumerate(metricslist):
#             D = getattr(proclam.metrics, metric)()
#             hm = D.evaluate(probm, truthv)
#             data[count][cc] = hm
#     dataset['results'] = data
    
#     metric_plot(dataset, metricslist, markerlist, colors)

In [None]:
# more_data = np.empty((len(metricslist), len(more_names)))
# for cc, pair in enumerate(more_class_pairs):
#     probm, truthv = read_class_pairs(pair, dirname)
#     for count, metric in enumerate(metricslist):
#         D = getattr(proclam.metrics, metric)()
#         hm = D.evaluate(probm, truthv)
#         more_data[count][cc] = hm

In [None]:
# metric_plot(more_names, metricslist, more_data, markerlist, colors, title='SNPhotCC', fileloc=dirname+'snphotccdata.png')

In [None]:
# data = np.empty((len(metricslist), len(names)))
# for cc, pair in enumerate(class_pairs):
#     probm, truthv = read_class_pairs(pair, dirname)
#     for count, metric in enumerate(metricslist):
#         D = getattr(proclam.metrics, metric)()
#         hm = D.evaluate(probm, truthv)
#         data[count][cc] = hm

In [None]:
# metric_plot(names, metricslist, data, markerlist, colors, title='Mystery Dataset', fileloc=dirname+'mysterydata.png')