# Results

In [None]:
import os
from pathlib import Path
import time
from datetime import datetime
import itertools

import numpy as np
import pandas as pd
from scipy import stats

import matplotlib.pyplot as plt
from matplotlib.ticker import FuncFormatter
import matplotlib.ticker as mtick
import seaborn as sns
from sklearn.model_selection import KFold, RepeatedStratifiedKFold
from sklearn import preprocessing
from sklearn.metrics import confusion_matrix, roc_curve, roc_auc_score

import tensorflow as tf
import tensorflow.keras as keras
from tensorflow.keras.models import model_from_json

In [None]:
# Create color palettes for plots
sns.set(style = 'whitegrid')
colors = sns.color_palette()

clr_model_all  = sns.color_palette('muted')
clr_model_all.insert(2, clr_model_all[0])
clr_model_all.pop(0)

clr_model_tuned  = sns.color_palette('muted')
clr_model_tuned.insert(2, clr_model_tuned[0])
clr_model_tuned.pop(0)
clr_model_tuned.append(clr_model_tuned[0])
clr_model_tuned.pop(0)

clrdogs = sns.color_palette(['red', 'green', 'yellow', 'blue'])

if False:
    print('clr_model_all')
    sns.palplot(clr_model_all)
    print('clr_model_tuned')
    sns.palplot(clr_model_tuned)
    
sns.set_palette(clr_model_all)

In [None]:
def format_plot(ax, plt):
    ax.set_ylabel('Validation accuracy')
    plt.ylim(bottom=0.35, top=1.05)
    ax.yaxis.set_major_formatter(FuncFormatter('{0:.0%}'.format))
    ax.set_xlabel('Dataset')
    plt.tight_layout()
    
def print_stats(data, name):
    print(name,  ':')
    print('Quantiles:\n', data['val_acc'].quantile([0.25, 0.5, 0.75]) )
    print('Mean:', data['val_acc'].mean())
    
def print_means(data, names):
    print('Mean averages:')
    for d, n, in zip(data, names):
        print(d['val_acc'].mean(), n)
        

def print_wilcoxon(all_data):
    ''' Print the pair-wise Wilcoxon signed-rank pvalues for all pairs of columns in all_data '''
    # Wilcoxon t-test
    print('Wilcoxon signed rank test pvalues')
    for i in range(len(all_data)):
        for j in range(i+1, len(all_data)):
            statistic, pvalue = stats.wilcoxon(all_data[i], all_data[j])
            #print(i, j, pvalue)
            if pvalue > 0.05:
                print('These two result sets could be drawn from the same distribution:', i, ',', j)
            
            
def plot_confusion_matrix(cm, title='Normalised confusion matrix', name=''):
    ''' Plot the normalised confusion matrix
    Parameters
    cm : array - normalised confusion matrix
    Scikit-learn: Machine Learning in Python, Pedregosa et al., JMLR 12, pp. 2825-2830, 2011.
    'Confusion Matrix' https://scikit-learn.org/stable/auto_examples/model_selection/plot_confusion_matrix.html#sphx-glr-auto-examples-model-selection-plot-confusion-matrix-py
    '''
    classes = ['Positive', 'Negative']
    cmap=plt.cm.Blues
    sns.set_style('dark')
    plt.figure()
    plt.imshow(cm, interpolation='nearest', cmap=cmap)
    plt.title(title)
    plt.colorbar(format=FuncFormatter('{0:.0%}'.format))
    tick_marks = np.arange(len(classes))
    plt.xticks(tick_marks, classes, rotation=45)
    plt.yticks(tick_marks, classes)
    plt.clim(0, 1)
    fmt = '.0%'
    thresh = cm.max() / 2.
    for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):
        plt.text(j, i, format(cm[i, j], fmt),
                 horizontalalignment="center",
                 color="white" if cm[i, j] > thresh else "black")
    plt.ylabel('True class')
    plt.xlabel('Predicted class')
    plt.tight_layout()
    file_name = 'cm_devnet_'+name+'.png'
    plt.savefig(file_name, bbox_inches='tight')
    sns.set(style = 'whitegrid')
    
    
def append_result_set(file, name, all_data, all_end_test_data, all_names, results):
    ''' Get results data from file. Against the input name, append the mean and std of this result set to 
    all_data, all_end_test_data, all_names, results '''
    data = pd.read_csv(file, header=None, names=['run','loss','val_acc','epoch','time', 'end_test_acc'])
    assert(data.shape[1]==6)
    all_data.append(data['val_acc'])
    all_end_test_data.append(data['end_test_acc'])
    all_names.append(name)
    results.append([all_names[-1], data['val_acc'].mean(), data['val_acc'].std(), 
                data['end_test_acc'].mean(), data['end_test_acc'].std()])
    
    
def plot_acc(accuracy_results, result_names, title, 
             ylabel='Validation accuracy', ylim=[0.3, 1.05]):
    ''' Show a box plot of accuracy data and save image to file'''
    ax = sns.boxplot(data=accuracy_results)
    ax = sns.swarmplot(data=accuracy_results, color='black')
    title = title
    plt.suptitle(title)
    xticks = list(range(len(result_names)))
    plt.xticks(xticks, result_names)
    format_plot(ax, plt)
    ax.set_xlabel('Model')
    ax.set_ylabel(ylabel)
    plt.ylim(bottom=ylim[0], top=ylim[1])
    return ax, plt

In [None]:
def readucr(filename):
    data = np.loadtxt(Path(filename))
    Y = data[:,0]
    X = data[:,1:]
    return X, Y


def dataset_size(filename):
    if 'private_dog0' == filename or 'private_dog1' == filename or 'private_dog2' == filename:
        fdir = '../../data/private_data/private_events_dev' 
    elif 'private' in filename:
        fdir = '../../data/private_data/private_events_dev2' 
        
    _, y_train = readucr(fdir+'/'+filename+'/'+filename+'_TRAIN.txt')
    _, y_test = readucr(fdir+'/'+filename+'/'+filename+'_TEST.txt')
    return y_train.shape[0]+y_test.shape[0]


def end_test_dataset_size(filename):
    fdir = '../../data/private_data/private_events_dev2' 
    _, y_endtest = readucr(fdir+'/'+filename+'/'+filename+'_END_TEST.txt')
    return y_endtest.shape[0]


def sample_sizes(num_dataset, k, m, brackets=True):
    ''' Given a dataset size, calculate the number of samples used in the calculations of m iterations
    of k-fold cross validation.
    Returns 
    ND 
        number of data samples used to calculate the validation accuracy 
    NT
        number of tests used run
    num_text 
        a string that can be used in plots to write ND and NT with subscripts
    '''
    N = num_dataset
    ND = str(round(N/k))
    NT = str(k*m)
    if brackets:
        num_text = '($N_D$='+ND+', $N_T$='+NT+')'
    else:
        num_text = '$N_D$='+ND+', $N_T$='+NT
    return ND, NT, num_text

# Example confusion matrix and box plot

In [None]:
y_true = [1, 1, 1, 1, 0, 0, 0, 0, 0, 0]
y_pred = [1, 1, 0, 1, 0, 0, 0, 1, 1, 0]
cm = confusion_matrix(y_true, y_pred, labels=[1,0])
cm_norm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]
title = 'Confusion matrix' # ($N_D$=10)'
plot_confusion_matrix(cm_norm, title=title, name='example')

In [None]:
all_data = pd.DataFrame([14, 14, 13, 13, 12, 12, 10, 10, 10, 9])
all_data = all_data/15
print(all_data)
print('mean', all_data[0].mean(), 'std', all_data[0].std())
print('quantiles', all_data[0].quantile([0.25, 0.5, 0.75]))

ax = sns.boxplot(data=all_data, width=0.2)
ax = sns.swarmplot(data=all_data, color='black')
title = '10-fold cross validation result ($N_T$=10)'
plt.suptitle(title)
ax.set_ylabel('Validation accuracy')
plt.ylim(bottom=0.35, top=1.05)
ax.yaxis.set_major_formatter(FuncFormatter('{0:.0%}'.format))
ax.set_xlabel('Dataset')
plt.xticks([0], ['Example ($N_D$=15)'])
plt.savefig('boxplot_example.png', bbox_inches='tight')

# Confirm performance of untuned DNNs : GunPoint

In [None]:
print('Single train and test. Same train:test split as UCR TSC archive. Reporting validation accuracy and error.', '\n')

print('1-NN (1-nearest neighbor)')
print('0.9133333333333333')
print(1-0.9133333333333333, 'error\n')

file = '../../logs/2019-05-10T20:18/GunPoint/mlpwang_summary.csv'
data1 = pd.read_csv(file, header=None, names=['run','loss','val_acc','epoch','time'])
print(file, '(MLP Wang)')
print('No. results:', data1.shape[0])
print(data1.iloc[0]['val_acc'].mean())
print(1-data1['val_acc'].mean(), 'error\n')

file = '../../logs/2019-03-31T18:07/GunPoint/devnet_summary.csv'
data2 = pd.read_csv(file, header=None, names=['run','loss','val_acc','epoch','time'])
print(file, '(FCN Wang)')
print('No. results:', data2.shape[0])
print(data2['val_acc'].mean())
print(1-data2['val_acc'].mean(), 'error\n')

file = '../../logs/2019-03-29T15:29/GunPoint/devnet_summary.csv'
#file = '../../logs/2019-05-11T13:42/GunPoint/resnet_summary.csv'
data2 = pd.read_csv(file, header=None, names=['run','loss','val_acc','epoch','time'])
print(file, '(ResNet Wang)')
print('No. results:', data2.shape[0])
print(data2['val_acc'].mean())
print(1-data2['val_acc'].mean(), 'error\n')


In [None]:
# 10-fold cross validation
ND, NT, num_text = sample_sizes(200, k=10, m=1)
print(num_text, '\n')

print('1-NN (1-nearest neighbor)')
print(0.9450000000000001)
print(0.043779751788545644, '\n')

file = '../../logs/2019-03-17T16:35/GunPoint/mlpwang_summary.csv'
data1 = pd.read_csv(file, header=None, names=['run','loss','val_acc','epoch','time'])
name1 = 'GunPoint'
print(file, '(MLP Wang)')
print(data1['val_acc'].mean())
print(data1['val_acc'].std())
print('Number of folds', data1['val_acc'].count(), '\n')

file = '../../logs/2019-05-09T09:25/GunPoint/devnet_summary.csv'
data2 = pd.read_csv(file, header=None, names=['run','loss','val_acc','epoch','time'])
print(file, '(FCN Wang)')
print(data2['val_acc'].mean())
print(data2['val_acc'].std())
print('Number of folds', data2['val_acc'].count(), '\n')

file = '../../logs/2019-03-18T17:32/GunPoint/resnet_summary.csv'
data2 = pd.read_csv(file, header=None, names=['run','loss','val_acc','epoch','time'])
print(file, '(ResNet Wang)')
print(data2['val_acc'].mean())
print(data2['val_acc'].std())
print('Number of folds', data2['val_acc'].count(), '\n')

# Untuned models : all dogs data (balanced dataset)

In [None]:
all_data = list()
all_names = list()
results = []
N = dataset_size('private_balanced')
ND, NT, num_text = sample_sizes(N, k=10, m=1)

# TODO new files with 4x 3-fold
one_nn = '../logs/2019-07-14T13:48/private_balanced/nearestneighbours_summary.csv'
mlp = '../../logs/2019-07-14T08:14/private_balanced/devnet_summary.csv'
fcn = '../logs/2019-07-14T10:57/private_balanced/devnet_summary.csv'





file = '../../logs/2019-05-10T16:36/private_balanced/nearestneighbours_summary.csv'
data = pd.read_csv(file, header=None, names=['val_acc'])
all_data.append(data['val_acc'])
all_names.append('1-NN')
results.append([all_names[-1], all_data[-1].mean(), all_data[-1].std()])

file = '../../logs/2019-03-21T14:23/private_balanced/mlpwang_summary.csv'
data = pd.read_csv(file, header=None, names=['run','loss','val_acc','epoch','time'])
all_data.append(data['val_acc'])
all_names.append('MLP')
results.append([all_names[-1], all_data[-1].mean(), all_data[-1].std()])

file = '../../logs/2019-05-09T12:30/private_balanced/devnet_summary.csv'
data = pd.read_csv(file, header=None, names=['run','loss','val_acc','epoch','time'])
all_data.append(data['val_acc'])
all_names.append('FCN')
results.append([all_names[-1], all_data[-1].mean(), all_data[-1].std()])

file = '../../logs/2019-03-20T19:47/private_balanced/resnet_summary.csv'
data = pd.read_csv(file, header=None, names=['run','loss','val_acc','epoch','time'])
all_data.append(data['val_acc'])
all_names.append('ResNet')
results.append([all_names[-1], all_data[-1].mean(), all_data[-1].std()])

print(results)
print_wilcoxon(all_data)
sns.set_palette(clr_model_all)
plot_acc(all_data, all_names, 'All dogs dataset '+num_text, ylim=[0.45, 0.9])
plt.savefig('pubmodels_alldogs.png', bbox_inches='tight')

# Untuned models : all dogs correct data (balanced dataset)

In [None]:
all_data = list()
all_end_test_data = list()
all_names = list()
results = []
end_test_results = []
k=3
m=4
N = dataset_size('private_correct_plus')
ND, NT, num_text = sample_sizes(N, k, m, brackets=False) 


# Get results sets
file = '../../logs/2019-07-14T16:26/private_correct_plus/nearestneighbours_summary.csv' # 1-NN, instant
data = pd.read_csv(file, header=None, names=['val_acc', 'end_test_acc'])
all_data.append(data['val_acc'])
all_end_test_data.append(data['end_test_acc'])
all_names.append('1-NN')
results.append([all_names[-1], data['val_acc'].mean(), data['val_acc'].std(), 
            data['end_test_acc'].mean(), data['end_test_acc'].std()])


file = '../../logs/2019-07-07T17:40/private_correct_plus/devnet_summary.csv' # MLP Wang 1h10m
ND, __, __ = sample_sizes(dataset_size('private_dog1_correct_plus'), k, m)
append_result_set(file, 'MLP', all_data, all_end_test_data, all_names, results)

file = '../../logs/2019-07-08T16:49/private_correct_plus/devnet_summary.csv' # FCN 1h15m
ND, __, __ = sample_sizes(dataset_size('private_dog2_correct_plus'), k, m)
append_result_set(file, 'FCN', all_data, all_end_test_data, all_names, results)

file = '../../logs/2019-07-07T20:52/private_correct_plus/devnet_summary.csv' # ResNet Wang, 12h30m
ND, __, __ = sample_sizes(dataset_size('private_correct_plus'), k, m)
append_result_set(file, 'ResNet', all_data, all_end_test_data, all_names, results)


# Plot
save_filename = 'pubmodels_alldogs_correct'
sns.set_palette(clr_model_all)
plot_acc(all_data, all_names, '\tDog correct dataset (all dogs, '+num_text+')', 
         ylim=[0.45, 0.9])
plt.savefig(save_filename+'.png', bbox_inches='tight')

print_wilcoxon(all_data)

# Print and save results
res = pd.DataFrame(results, columns=['set', 'val_acc', 'val_std', 'end_acc', 'end_std'])
res = res.replace(to_replace=r'\n', value=' ', regex=True)
res.to_csv(save_filename+'.csv')
res

## Test on realistic data
Lower sample standard deviation on this test set than on all_dogs correct dataset most likely because the test set itself is constant. Whereas, the validation set under k-fold cross validation changes with each iteration of model trainig.

In [None]:
print(end_test_results)
title = '\tRealistic dataset (all dogs, $N_D$='+str(end_test_dataset_size('private_correct_plus'))
title = title + ', $N_T$='+NT+')'
sns.set_palette(clr_model_all)
ax, plt = plot_acc(all_end_test_data, all_names, title, 'Accuracy', ylim=[0.45,0.9])
ax.set_xlabel('Model (trained on dog correct data)')
plt.savefig(save_filename+'_realistic.png', bbox_inches='tight')
print_wilcoxon(all_end_test_data)

# Untuned MLP : dogs correct  : personal vs impersonal model

In [None]:
all_data = list()
all_end_test_data = list()
all_names = list()
results = []
end_test_results = []
k = 3
m = 4
N = dataset_size('private_correct_plus')
ND, NT, num_text = sample_sizes(N, k, m, brackets=False) 


all_data = list()
all_end_test_data = list()
all_names = list()
results = []
end_test_results = []
N = dataset_size('private_correct_plus')
ND, NT, num_text = sample_sizes(N, k=3, m=4) 

# Get results sets
file = '../../logs/2019-07-12T14:41/private_dog0_correct_plus/devnet_summary.csv'
ND, __, __ = sample_sizes(dataset_size('private_dog0_correct_plus'), k, m)
append_result_set(file, 'dog0\n$N_D$='+str(ND), all_data, all_end_test_data, all_names, results)

file = '../../logs/2019-07-12T17:23/private_dog1_correct_plus/devnet_summary.csv'
ND, __, __ = sample_sizes(dataset_size('private_dog1_correct_plus'), k, m)
append_result_set(file, 'dog1\n$N_D$='+str(ND), all_data, all_end_test_data, all_names, results)

file = '../../logs/2019-07-12T18:34/private_dog2_correct_plus/devnet_summary.csv' 
ND, __, __ = sample_sizes(dataset_size('private_dog2_correct_plus'), k, m)
append_result_set(file, 'dog2\n$N_D$='+str(ND), all_data, all_end_test_data, all_names, results)

file = file = '../../logs/2019-07-07T17:40/private_correct_plus/devnet_summary.csv' 
ND, __, __ = sample_sizes(dataset_size('private_correct_plus'), k, m)
append_result_set(file, 'all dogs\n$N_D$='+str(ND), all_data, all_end_test_data, all_names, results)

# Plot
save_filename = 'MLP_personal'
title = '\tMLP ($N_T$='+NT+')'
with sns.color_palette('Blues',4):
    ax, plt = plot_acc(all_data, all_names, title, ylim=[0.45, 1.05])
ax.set_xlabel('Dog correct dataset')
plt.savefig(save_filename+'.png', bbox_inches='tight')

print_wilcoxon(all_data)

# Print and save results
res = pd.DataFrame(results, columns=['set', 'val_acc', 'val_std', 'end_acc', 'end_std'])
res = res.replace(to_replace=r'\n', value=' ', regex=True)
res.to_csv(save_filename+'.csv')
res

In [None]:
all_names = list()

ND = end_test_dataset_size('private_dog0_correct_plus')
all_names.append('dog0\n$N_D$='+str(ND))
ND = end_test_dataset_size('private_dog1_correct_plus')
all_names.append('dog1\n$N_D$='+str(ND))
ND = end_test_dataset_size('private_dog2_correct_plus')
all_names.append('dog2\n$N_D$='+str(ND))
ND = end_test_dataset_size('private_correct_plus')
all_names.append('all dogs\n$N_D$='+str(ND))


title = '\tMLP (trained on dog correct data) ($N_T$='+NT+')'
with sns.color_palette('Blues', 4):
    ax, plt = plot_acc(all_end_test_data, all_names, title, 'Accuracy', ylim=[0.45, 1.05])
ax.set_xlabel('Realistic dataset')
plt.savefig(save_filename+'_realistic.png', bbox_inches='tight')
print_wilcoxon(all_end_test_data)

# Dog accuracy

In [None]:
datadir = '../../data/private_data/private_events_dev2' 
fname = 'private_balanced'
meta_train = pd.read_csv(datadir+'/'+fname+'/'+fname+'_TRAIN_meta.txt', sep=',', parse_dates=['date'])
meta_test = pd.read_csv(datadir+'/'+fname+'/'+fname+'_TEST_meta.txt', sep=',', parse_dates=['date'])
meta = pd.concat([meta_train, meta_test])

colors = ['red', 'lightsalmon', 'palegreen', 'lime' ]   
meta.groupby('dog')['dog_result'] \
    .value_counts() \
    .sort_index(ascending=False) \
    .unstack(level=1) \
    .plot.bar(stacked=True, color=colors)

plt.xticks([0, 1, 2], ['0', '1', '2'])
plt.xticks(rotation=0)
plt.ylabel('Number of samples')
plt.suptitle('Bio-detection dogs\' results')
plt.legend(title='result')
plt.savefig('bar_dogAccuracy.png', bbox_inches='tight')

results = meta.pivot_table('run', index='dog', columns='dog_result', aggfunc=len, fill_value=0, margins=True)
print(results)


# Calculate Accuracy and FNR, false negative rate, etc.
P = results.TP+results.FN
N = results.TN+results.FP
results['Accuracy'] = (results.TP+results.TN)/(P+N)
results['TPR'] = results.TP/P
results['FPR'] = results.FP/N
results['TNR'] = results.TN/N
results['FNR'] = results.FN/P
print(results[['Accuracy', 'TPR', 'TNR', 'FPR', 'FNR']])

# MLP hyperparameter tuning

In [None]:
ND, NT, num_text = sample_sizes(dataset_size('private_dog0_correct_plus'), k=3, m=4)

acc = [[ 0.85,0.85, 0.85, 0.82, 0.79, 0.63, 0.53],
        [0.84,0.85, 0.84, 0.82, 0.86, 0.73, 0.56],
        [0.85,0.84, 0.85, 0.84, 0.85, 0.72, 0.62],
        [0.85,0.84, 0.84, 0.84, 0.8, 0.62, 0.52],
        [0.84,0.84, 0.84, 0.84, 0.84, 0.59, 0.5],
        [0.83,0.84, 0.84, 0.83, 0.84, 0.73, 0.5]]

std = [[ 0.045,0.05, 0.047, 0.051, 0.14, 0.16, 0.1],
        [0.044,0.042, 0.044, 0.041, 0.053, 0.14, 0.14],
        [0.039,0.051, 0.059, 0.049, 0.052, 0.17, 0.17],
        [0.053,0.044, 0.049, 0.042, 0.1, 0.18,  np.nan],
        [0.047,0.046, 0.046, 0.055, 0.039, 0.16,  np.nan],
        [0.042,0.046, 0.051, 0.05, 0.054, 0.17,  np.nan]]

fig = plt.figure(figsize=(10, 4))
ax0 = fig.add_subplot(121)
ax1 = fig.add_subplot(122, sharey=ax0)
sns.heatmap(acc, cmap='RdYlGn', annot=True, fmt = '.0%', cbar=False, ax=ax0)
sns.heatmap(std, cmap='RdYlGn_r', annot=True, fmt = '.1%', cbar=False, ax=ax1)
ax0.set_xlabel('Number of hidden layers')
ax1.set_xlabel('Number of hidden layers')
ax0.set_ylabel('Nodes per layer')
ax0.set_xticklabels([2, 3, 4, 5, 6, 7, 8])
ax1.set_xticklabels([2, 3, 4, 5, 6, 7, 8])
ax0.set_yticklabels([8, 16, 32, 64, 128, 256])
ax0.set_title('Mean')
ax1.set_title('Sample standard deviation')
title = 'Validation accuracy : dog0_correct '+num_text
plt.suptitle(title, y=1)     
plt.savefig('heatmap_MLP_dog0Correct.png', bbox_inches='tight')

# Tuned models : trained on all dogs correct dataset

In [None]:
all_data = list()
all_end_test_data = list()
all_names = list()
results = []
end_test_results = []
N = dataset_size('private_correct_plus')
ND, NT, num_text = sample_sizes(N, k=3, m=4) 

# Get results sets
file = '../../logs/2019-05-11T19:09/private_correct_plus/devnet_summary.csv' 
append_result_set(file, 'MLP\ntuned', all_data, all_end_test_data, all_names, results)

file = '../../logs/2019-07-12T08:47/private_correct_plus/devnet_summary.csv' # FCN TODO - tune it more
append_result_set(file, 'FCN\ntuned', all_data, all_end_test_data, all_names, results)

file = '../../logs/2019-07-12T06:35/private_correct_plus/devnet_summary.csv' # ResNet 
append_result_set(file, 'ResNet\ntuned', all_data, all_end_test_data, all_names, results)

file = '../../logs/2019-07-12T05:31/private_correct_plus/devnet_summary.csv' # CNN
append_result_set(file, 'CNN\ntuned', all_data, all_end_test_data, all_names, results)

# Plot
save_filename = 'tuned_alldogs_correct'
sns.set_palette(clr_model_tuned)
ax, plt = plot_acc(all_data, all_names, '\tDog correct dataset (all dogs) '+num_text, ylim=[0.6, 1.05])
plt.savefig(save_filename+'.png', bbox_inches='tight')

print_wilcoxon(all_data)

# Print and save results
res = pd.DataFrame(results, columns=['set', 'val_acc', 'val_std', 'end_acc', 'end_std'])
res = res.replace(to_replace=r'\n', value=' ', regex=True)
res.to_csv(save_filename+'.csv')
res

In [None]:
title = '\tRealistic dataset (all dogs, $N_D$='+str(end_test_dataset_size('private_correct_plus'))
title = title + ', $N_T$='+NT+')'
sns.set_palette(clr_model_tuned)
plot_acc(all_end_test_data, all_names, title, 'Accuracy', ylim=[0.6, 1.05])
plt.savefig(save_filename+'_realistic.png', bbox_inches='tight')
print_wilcoxon(all_end_test_data)

# Confusion matrices

In [None]:
''' As per end test on MLP 1000/16/16/16/1
../logs/2019-05-12T16:36/private_correct_plus/devnet_summary.csv
[[39 38]
 [ 7 70]]
Class balance in test set: 77 to 77 i.e. 0.5
'''
cm = np.array(  [[0.50649351, 0.49350649],
                 [0.09090909, 0.90909091]])
acc = (cm[0][0]+cm[1][1])/(cm.sum())
plot_confusion_matrix(cm, title='MLP tuned', name='cm_MLPtuned_endTest')


dog_cm = np.array([[48, 29],
                    [8, 69]])
dog_acc = (dog_cm[0][0]+dog_cm[1][1])/(dog_cm.sum())
ND = dog_cm.sum()
dog_cm = dog_cm.astype('float') / dog_cm.sum(axis=1)[:, np.newaxis]
plot_confusion_matrix(dog_cm, title='Bio-detection dogs', name='cm_dog_endTest')

print('acc', acc, 'dog_acc', dog_acc, 'ND', ND)

In [None]:
''' With probability threshold 0.3
[[47 30]
 [15 62]]
Calculated accuracy: 0.7077922077922078
'''  
cm = np.array([[0.61038961, 0.38961039],
               [0.19480519, 0.80519481]])
plot_confusion_matrix(cm, title='MLP tuned', name='cm_MLPtuned_endTest_threshold_0.3')
print('acc', (cm[0][0]+cm[1][1])/(cm.sum()))

# Dataset size vs accuracy
MLP 16/16/16

In [None]:
x = np.array([25,30,40,50,60,70,75,80,90,100,102,125,150,175,200,225,250,275,284])
mean_acc = [0.72,0.69,0.7,0.8,0.79,0.82,0.81,0.77,0.79,0.83,0.82,0.81,0.81,0.81,0.83,0.82,0.84,0.83,0.83]
acc_std = [0.13,0.12,0.1,0.12,0.082,0.059,0.08,0.062,0.07,0.042,0.046,0.04,0.036,0.055,0.044,0.03,0.055,0.035,0.024]


k = 3
x_train = np.round((x/k*(k-1)))
x_test = x-x_train

print(x_train)
print(x_test)

yerr = [acc_std, acc_std]
fig, ax = plt.subplots(figsize=(10, 4))
ax.errorbar(x, mean_acc, yerr,fmt='-o', capsize=4)

ax.set_ylim(bottom=0.35, top=1.05)
ax.set_xlim(left=0, right=300)
ax.yaxis.set_major_formatter(mtick.PercentFormatter(1.0, 0))
plt.suptitle('Effect of dataset size')
ax.set_xlabel('Dataset size')
ax.set_ylabel('Mean validation accuracy')

In [None]:
end_test_acc = [0.64,0.64,0.66,0.68,0.69,0.71,0.69,0.69,0.68,0.7,0.7,0.69,0.68,0.69,0.69,0.69,0.7,0.7,0.7]
end_test_std = [0.035,0.042,0.047,0.048,0.025,0.022,0.015,0.035,0.029,0.027,0.015,0.02,0.018,0.016,0.014,0.011,0.011,0.008,0.012]

yerr = [end_test_std, end_test_std]
fig, ax = plt.subplots(figsize=(10, 4))
ax.errorbar(x_train, end_test_acc, yerr,fmt='-o', capsize=4)

ax.set_ylim(bottom=0.5, top=0.8)
ax.set_xlim(left=0, right=200)
ax.yaxis.set_major_formatter(mtick.PercentFormatter(1.0, 0))
num_text = '($N_D$='+str(154)+', $N_T$='+str(12)+')'
plt.suptitle('Realistic dataset test\n'+num_text)
ax.set_xlabel('Size of the training dataset')
ax.set_ylabel('Accuracy\nmean +/- one sample standard deviation')

print('Train on', x_train[-1], 'acc', end_test_acc[-1], 'std', end_test_std[-1])
print('Train on', x_train[10], 'acc', end_test_acc[10], 'std', end_test_std[10])