In [None]:
import os
import pickle
import pandas as pd
import numpy as np
import itertools

from scipy.stats import pearsonr
import matplotlib.pyplot as plt

In [None]:
DATAFILE = './data/Bayesian_hyperparameter_tuning.pickle'

if os.path.isfile(DATAFILE):
    results = pickle.load(open(DATAFILE,'rb'))
    print('data to be analysed from ' + str(len(results)) + ' hyperparameter combinations\n\n')
else:
    print('come back another day ! no results found')

#convert to dataframe for easier everything.
results = pd.DataFrame(results)

print('\n'.join(['results dataframe contains the following columns:'] + list(results.columns)))

Each row of results represents a model and its performance. However, before even considering choices of hyperparameters (dropout position, r and N), we need to look at which METRICS we should be selecting on. That is, which of the mean pairwise/global IOU/DSC predicts TRUE IOU/DSC best?

In [None]:
trues = ['true IOU','true DSC']
predictions = ['mean pairwise DSC','global DSC','mean pairwise IOU','global IOU']

def clean_r2(predictor,result):
    
    'returns r2 from 2 arrays, as well as replacing nans with 0s to make consistent'
    
    predictor[np.isnan(predictor)] = 0
    result[np.isnan(result)]=0
    #return the actual r2 value
    return pearsonr(predictor,result)[0]**2
    
def get_r2(trueName,predictionName):
    
    tr = results.loc[:,[trueName,predictionName]]
    
    #FIXME - when something better shows up, remove np.array conversion (should already be done)
    r2s = tr.apply(lambda x: clean_r2(np.array(x[trueName]),np.array(x[predictionName])) ,axis=1)
#     r2s = results.apply(lambda x:print(x[trueName],axis=1))
    return r2s

def mae(predictor,result):
    
    '''returns the mean absolute error between a set of predictions and their true values'''
    
    predictor[np.isnan(predictor)] = 0
    result[np.isnan(result)]=0
    
    ae = np.abs(result-predictor)
    return np.mean(ae)

def get_mae(trueName,predictionName):
    
    tr = results.loc[:,[trueName,predictionName]]
    
    #FIXME - when something better shows up, remove np.array conversion (should already be done)
    maes = tr.apply(lambda x: mae(np.array(x[trueName]),np.array(x[predictionName])) ,axis=1)

    return maes

In [None]:
plt.figure(figsize = (24,12))

bins = np.arange(0,1,0.05)

for ind,combination in enumerate(itertools.product(trues,predictions)):
    
    #first column, calculate and show
    r2s = get_r2(*combination)
    plt.subplot(3,8,1+ind)
    plt.hist(r2s,bins=bins,orientation='horizontal')
    plt.ylim([0,1])
    plt.title('/'.join(combination))
    if ind==0:
        plt.ylabel('R^2')
    
    
    maes = get_mae(*combination)
    plt.subplot(3,8,9+ind)
    plt.hist(maes,bins=bins,orientation='horizontal')
    plt.ylim([0,1])
    if ind ==0:
        plt.ylabel('MAE')
        
    plt.subplot(3,8,17+ind)
    plt.scatter(maes,r2s)
    plt.ylim([0,1])
    plt.xlim([0,0.7])