In [1]:
import pickle
import glob
import os
import pandas
import numpy as np
import json

pandas.options.display.float_format = '{:.3f}'.format

In [2]:
with open('./datasets.json', 'r') as f:
    datasets = json.load(f)

In [3]:
models = ['SigLSTM', 'SigGRU', 'Sig', 'LSTM', 'GRU', 'KConv1D']
with open('./architectures.json', 'r') as f:
    architectures = json.load(f)
num_experiments = 5

### Parse results

In [4]:
results_dict = {m : {'acc' : {}, 'nlpp' : {}} for m in models}
for k, model in enumerate(models):
    results_folder = './results/GP{}/'.format(model)
    for i, dataset in enumerate(datasets):
        print('Parsing results for model: {} ({}/{}), dataset: {} ({:2d}/{})...'.format(model, k+1, len(models), dataset, i+1, len(datasets)))
        for j in range(num_experiments):
            spec = ''
            if 'GRU' in model or 'LSTM' in model:
                if dataset in architectures[model]:
                    spec = architectures[model][dataset]
                    spec = '_H{}_D{}'.format(spec['H'], spec['D'])
                else:
                    print('Warning: missing architecture spec. for dataset: {} and model: {}'.format(dataset, model))
                    spec = None
                    test_nlpp = float('nan')
                    test_acc = float('nan')
            
            if spec is not None:
                experiment_name = '{}{}_{}'.format(dataset, spec, j)
                file_path = os.path.join(results_folder, experiment_name + '.pkl')
                if os.path.exists(file_path):
                    with open(file_path, 'rb') as f:
                        saved_dict = pickle.load(f)
                    test_acc = saved_dict['results']['test_acc']
                    test_nlpp = saved_dict['results']['test_nlpp']
                else:
                    print('Warning: missing result file {}'.format(file_path))
                    test_acc = float('nan')
                    test_nlpp = float('nan')
                

            if j not in results_dict[model]['acc']:
                results_dict[model]['acc'][j] = []
            results_dict[model]['acc'][j].append(test_acc)

            if j not in results_dict[model]['nlpp']:
                results_dict[model]['nlpp'][j] = []
            results_dict[model]['nlpp'][j].append(test_nlpp)

Parsing results for model: SigLSTM (1/6), dataset: ArabicDigits ( 1/16)...
Parsing results for model: SigLSTM (1/6), dataset: AUSLAN ( 2/16)...
Parsing results for model: SigLSTM (1/6), dataset: CharacterTrajectories ( 3/16)...
Parsing results for model: SigLSTM (1/6), dataset: CMUsubject16 ( 4/16)...
Parsing results for model: SigLSTM (1/6), dataset: DigitShapes ( 5/16)...
Parsing results for model: SigLSTM (1/6), dataset: ECG ( 6/16)...
Parsing results for model: SigLSTM (1/6), dataset: JapaneseVowels ( 7/16)...
Parsing results for model: SigLSTM (1/6), dataset: KickvsPunch ( 8/16)...
Parsing results for model: SigLSTM (1/6), dataset: LIBRAS ( 9/16)...
Parsing results for model: SigLSTM (1/6), dataset: NetFlow (10/16)...
Parsing results for model: SigLSTM (1/6), dataset: PEMS (11/16)...
Parsing results for model: SigLSTM (1/6), dataset: PenDigits (12/16)...
Parsing results for model: SigLSTM (1/6), dataset: Shapes (13/16)...
Parsing results for model: SigLSTM (1/6), dataset: UWave (1

### Individual results of models over 5 runs

In [5]:
from IPython.display import display
for model in models:
    
    model_full = '# GP{} #'.format(model)
    print()
    print('#'*len(model_full))
    print(model_full)
    print('#'*len(model_full))
    print()
    
    print('Accuracy:'.format(model))
    df = pandas.DataFrame.from_dict(results_dict[model]['acc']).rename(index={i : name for i, name in enumerate(datasets)})
    df['mean'] = df.mean(axis=1)
    df['std'] = df.std(axis=1)
    display(df)
    
    print('\nNegative log-predictive probability:'.format(model))
    df = pandas.DataFrame.from_dict(results_dict[model]['nlpp']).rename(index={i : name for i, name in enumerate(datasets)})
    df['mean'] = df.mean(axis=1)
    df['std'] = df.std(axis=1)
    display(df)


#############
# GPSigLSTM #
#############

Accuracy:


Unnamed: 0,0,1,2,3,4,mean,std
ArabicDigits,0.992,0.995,0.995,0.988,0.99,0.992,0.003
AUSLAN,0.985,0.987,0.978,0.982,0.984,0.983,0.003
CharacterTrajectories,0.991,0.995,0.989,0.994,0.987,0.991,0.003
CMUsubject16,1.0,1.0,1.0,1.0,1.0,1.0,0.0
DigitShapes,1.0,1.0,1.0,1.0,1.0,1.0,0.0
ECG,0.8,0.8,0.78,0.85,0.85,0.816,0.029
JapaneseVowels,0.981,0.986,0.986,0.976,0.976,0.981,0.005
KickvsPunch,0.9,0.8,1.0,0.9,0.9,0.9,0.063
LIBRAS,0.906,0.928,0.933,0.933,0.906,0.921,0.013
NetFlow,0.933,0.929,0.929,0.933,0.931,0.931,0.002



Negative log-predictive probability:


Unnamed: 0,0,1,2,3,4,mean,std
ArabicDigits,0.026,0.02,0.02,0.089,0.078,0.047,0.03
AUSLAN,0.115,0.105,0.107,0.109,0.094,0.106,0.007
CharacterTrajectories,0.031,0.022,0.036,0.023,0.041,0.031,0.007
CMUsubject16,0.056,0.103,0.072,0.105,0.102,0.088,0.02
DigitShapes,0.009,0.007,0.008,0.008,0.009,0.008,0.001
ECG,0.397,0.439,0.405,0.367,0.404,0.402,0.023
JapaneseVowels,0.071,0.055,0.061,0.072,0.141,0.08,0.031
KickvsPunch,0.424,0.393,0.151,0.345,0.192,0.301,0.109
LIBRAS,0.335,0.305,0.362,0.24,0.356,0.32,0.045
NetFlow,0.23,0.216,0.223,0.203,0.218,0.218,0.009



############
# GPSigGRU #
############

Accuracy:


Unnamed: 0,0,1,2,3,4,mean,std
ArabicDigits,0.99,0.994,0.995,0.995,0.994,0.994,0.002
AUSLAN,0.968,0.985,0.985,0.975,0.979,0.978,0.006
CharacterTrajectories,0.991,0.841,0.986,0.819,0.987,0.925,0.078
CMUsubject16,1.0,1.0,1.0,1.0,1.0,1.0,0.0
DigitShapes,1.0,1.0,1.0,0.938,1.0,0.988,0.025
ECG,0.82,0.83,0.84,0.82,0.85,0.832,0.012
JapaneseVowels,0.981,0.989,0.984,0.981,0.989,0.985,0.004
KickvsPunch,0.8,1.0,0.8,0.7,0.8,0.82,0.098
LIBRAS,0.872,0.906,0.939,0.856,0.922,0.899,0.031
NetFlow,0.919,0.899,0.927,0.931,0.931,0.921,0.012



Negative log-predictive probability:


Unnamed: 0,0,1,2,3,4,mean,std
ArabicDigits,0.035,0.02,0.018,0.022,0.019,0.023,0.006
AUSLAN,0.205,0.084,0.085,0.134,0.108,0.123,0.045
CharacterTrajectories,0.041,0.521,0.048,0.638,0.044,0.258,0.265
CMUsubject16,0.042,0.024,0.054,0.042,0.039,0.04,0.009
DigitShapes,0.01,0.01,0.01,0.137,0.01,0.035,0.051
ECG,0.461,0.412,0.423,0.483,0.377,0.431,0.037
JapaneseVowels,0.052,0.058,0.063,0.053,0.038,0.053,0.009
KickvsPunch,0.537,0.261,0.46,0.593,0.615,0.493,0.128
LIBRAS,0.423,0.305,0.242,0.48,0.277,0.346,0.091
NetFlow,0.289,0.399,0.212,0.2,0.195,0.259,0.078



#########
# GPSig #
#########

Accuracy:


Unnamed: 0,0,1,2,3,4,mean,std
ArabicDigits,0.98,0.975,0.979,0.976,0.985,0.979,0.004
AUSLAN,0.947,0.903,0.927,0.93,0.919,0.925,0.014
CharacterTrajectories,0.977,0.98,0.98,0.98,0.977,0.979,0.002
CMUsubject16,1.0,0.966,0.966,1.0,0.966,0.979,0.017
DigitShapes,1.0,1.0,1.0,1.0,1.0,1.0,0.0
ECG,0.85,0.85,0.86,0.85,0.83,0.848,0.01
JapaneseVowels,0.984,0.984,0.978,0.989,0.976,0.982,0.005
KickvsPunch,0.9,0.9,0.9,0.9,0.9,0.9,0.0
LIBRAS,0.922,0.928,0.917,0.922,0.928,0.923,0.004
NetFlow,0.942,0.934,0.94,0.933,0.936,0.937,0.003



Negative log-predictive probability:


Unnamed: 0,0,1,2,3,4,mean,std
ArabicDigits,0.055,0.067,0.062,0.113,0.056,0.071,0.021
AUSLAN,0.358,0.706,0.535,0.537,0.612,0.55,0.114
CharacterTrajectories,0.112,0.104,0.105,0.104,0.117,0.108,0.005
CMUsubject16,0.091,0.109,0.107,0.036,0.104,0.089,0.027
DigitShapes,0.021,0.021,0.02,0.019,0.021,0.021,0.001
ECG,0.352,0.357,0.35,0.352,0.371,0.356,0.008
JapaneseVowels,0.067,0.068,0.066,0.07,0.074,0.069,0.003
KickvsPunch,0.213,0.216,0.216,0.224,0.252,0.224,0.014
LIBRAS,0.301,0.246,0.252,0.243,0.253,0.259,0.021
NetFlow,0.178,0.199,0.177,0.211,0.181,0.189,0.014



##########
# GPLSTM #
##########

Accuracy:


Unnamed: 0,0,1,2,3,4,mean,std
ArabicDigits,0.988,0.981,0.979,0.99,0.988,0.985,0.004
AUSLAN,0.881,0.862,0.889,0.895,0.871,0.88,0.012
CharacterTrajectories,0.895,0.059,0.055,0.049,0.108,0.233,0.331
CMUsubject16,0.966,0.966,0.931,0.931,0.828,0.924,0.051
DigitShapes,1.0,1.0,1.0,1.0,1.0,1.0,0.0
ECG,0.83,0.75,0.76,0.76,0.81,0.782,0.032
JapaneseVowels,0.981,0.989,0.978,0.978,0.984,0.982,0.004
KickvsPunch,0.5,0.7,0.6,0.6,0.7,0.62,0.075
LIBRAS,0.772,0.75,0.8,0.761,0.794,0.776,0.019
NetFlow,0.91,0.931,0.944,0.931,0.925,0.928,0.011



Negative log-predictive probability:


Unnamed: 0,0,1,2,3,4,mean,std
ArabicDigits,0.078,0.082,0.122,0.059,0.07,0.082,0.022
AUSLAN,0.648,0.775,0.58,0.583,0.663,0.65,0.071
CharacterTrajectories,0.502,2.923,2.927,2.999,3.181,2.506,1.007
CMUsubject16,0.178,0.214,0.229,0.359,0.372,0.27,0.08
DigitShapes,0.012,0.015,0.015,0.012,0.01,0.013,0.002
ECG,0.485,0.524,0.491,0.471,0.51,0.496,0.018
JapaneseVowels,0.032,0.035,0.112,0.059,0.065,0.061,0.029
KickvsPunch,0.786,0.662,0.683,0.683,0.665,0.696,0.046
LIBRAS,0.868,0.999,0.87,0.958,0.861,0.911,0.056
NetFlow,0.255,0.225,0.2,0.324,0.251,0.251,0.041



#########
# GPGRU #
#########

Accuracy:


Unnamed: 0,0,1,2,3,4,mean,std
ArabicDigits,0.986,0.982,0.98,0.993,0.991,0.986,0.005
AUSLAN,0.966,0.966,0.945,0.935,0.935,0.949,0.014
CharacterTrajectories,0.077,0.075,0.07,0.161,0.188,0.114,0.05
CMUsubject16,1.0,1.0,1.0,1.0,0.966,0.993,0.014
DigitShapes,0.688,0.688,0.688,1.0,1.0,0.812,0.153
ECG,0.72,0.74,0.79,0.69,0.73,0.734,0.033
JapaneseVowels,0.984,0.992,0.978,0.989,0.986,0.986,0.005
KickvsPunch,0.6,0.6,0.4,0.7,0.7,0.6,0.11
LIBRAS,0.722,0.817,0.772,0.733,0.667,0.742,0.05
NetFlow,0.914,0.938,0.912,0.94,0.927,0.926,0.012



Negative log-predictive probability:


Unnamed: 0,0,1,2,3,4,mean,std
ArabicDigits,0.067,0.072,0.079,0.06,0.051,0.066,0.01
AUSLAN,0.176,0.169,0.278,0.308,0.309,0.248,0.063
CharacterTrajectories,3.002,3.017,3.005,4.413,4.178,3.523,0.635
CMUsubject16,0.061,0.055,0.076,0.093,0.162,0.089,0.039
DigitShapes,1.232,1.132,1.21,0.033,0.03,0.727,0.569
ECG,0.538,0.552,0.431,0.839,0.644,0.601,0.137
JapaneseVowels,0.053,0.05,0.061,0.048,0.05,0.052,0.005
KickvsPunch,0.691,0.66,0.711,0.607,0.698,0.674,0.037
LIBRAS,1.143,0.78,0.937,1.174,1.515,1.11,0.248
NetFlow,0.201,0.181,0.212,0.186,0.189,0.194,0.011



#############
# GPKConv1D #
#############

Accuracy:


Unnamed: 0,0,1,2,3,4,mean,std
ArabicDigits,0.985,0.983,0.985,0.983,0.985,0.984,0.001
AUSLAN,0.778,0.793,0.793,0.763,0.794,0.784,0.012
CharacterTrajectories,0.934,0.934,0.966,0.935,0.935,0.941,0.013
CMUsubject16,0.897,0.897,0.897,0.897,0.897,0.897,0.0
DigitShapes,1.0,1.0,1.0,1.0,1.0,1.0,0.0
ECG,0.75,0.77,0.78,0.73,0.77,0.76,0.018
JapaneseVowels,0.989,0.986,0.986,0.986,0.984,0.986,0.002
KickvsPunch,0.8,0.7,0.8,0.6,0.6,0.7,0.089
LIBRAS,0.661,0.672,0.717,0.717,0.722,0.698,0.026
NetFlow,0.946,0.893,0.968,0.957,0.961,0.945,0.027



Negative log-predictive probability:


Unnamed: 0,0,1,2,3,4,mean,std
ArabicDigits,0.049,0.051,0.047,0.055,0.049,0.05,0.003
AUSLAN,2.015,1.772,1.782,2.114,1.816,1.9,0.139
CharacterTrajectories,0.479,0.481,0.126,0.481,0.48,0.409,0.141
CMUsubject16,0.256,0.257,0.253,0.253,0.257,0.255,0.002
DigitShapes,0.039,0.034,0.031,0.033,0.039,0.035,0.003
ECG,0.548,0.515,0.543,0.573,0.537,0.543,0.019
JapaneseVowels,0.064,0.068,0.067,0.068,0.066,0.067,0.001
KickvsPunch,0.642,0.65,0.666,0.691,0.659,0.662,0.017
LIBRAS,1.793,2.057,1.672,1.252,1.267,1.608,0.311
NetFlow,0.157,0.325,0.103,0.111,0.144,0.168,0.081


### Comparison of GPs

In [6]:
results_mean = {metric : {m : [np.nanmean([results_dict[m][metric][i][j] for i in range(num_experiments)]) for j, d in enumerate(datasets)] for m in models} for metric in ['nlpp', 'acc']}
results_std = {metric : {m : [np.nanstd([results_dict[m][metric][i][j] for i in range(num_experiments)]) for j, d in enumerate(datasets)] for m in models} for metric in ['nlpp', 'acc']}
results_str = {metric : {m : ['{:.3f} ± {:.3f}'.format(results_mean[metric][m][i], results_std[metric][m][i]) for i in range(len(datasets))] for m in models} for metric in ['nlpp', 'acc']}

results_mean_mean = {metric : {m : np.nanmean(results_mean[metric][m]) for m in models} for metric in ['nlpp', 'acc']}

#### Mean and sd. over 5 runs

In [7]:
idx = {i : name for i, name in enumerate(datasets)}
idx[len(datasets)] = 'Mean'

print('Accuracies:\n')
df = pandas.DataFrame.from_dict(results_str['acc'])
df = df.append(results_mean_mean['acc'], ignore_index=True).rename(index=idx)
display(df)

print('Nlpps:\n')
df = pandas.DataFrame.from_dict(results_str['nlpp'])
df = df.append(results_mean_mean['nlpp'], ignore_index=True).rename(index=idx)
display(df)

Accuracies:



Unnamed: 0,SigLSTM,SigGRU,Sig,LSTM,GRU,KConv1D
ArabicDigits,0.992 ± 0.003,0.994 ± 0.002,0.979 ± 0.004,0.985 ± 0.004,0.986 ± 0.005,0.984 ± 0.001
AUSLAN,0.983 ± 0.003,0.978 ± 0.006,0.925 ± 0.014,0.880 ± 0.012,0.949 ± 0.014,0.784 ± 0.012
CharacterTrajectories,0.991 ± 0.003,0.925 ± 0.078,0.979 ± 0.002,0.233 ± 0.331,0.114 ± 0.050,0.941 ± 0.013
CMUsubject16,1.000 ± 0.000,1.000 ± 0.000,0.979 ± 0.017,0.924 ± 0.051,0.993 ± 0.014,0.897 ± 0.000
DigitShapes,1.000 ± 0.000,0.988 ± 0.025,1.000 ± 0.000,1.000 ± 0.000,0.812 ± 0.153,1.000 ± 0.000
ECG,0.816 ± 0.029,0.832 ± 0.012,0.848 ± 0.010,0.782 ± 0.032,0.734 ± 0.033,0.760 ± 0.018
JapaneseVowels,0.981 ± 0.005,0.985 ± 0.004,0.982 ± 0.005,0.982 ± 0.004,0.986 ± 0.005,0.986 ± 0.002
KickvsPunch,0.900 ± 0.063,0.820 ± 0.098,0.900 ± 0.000,0.620 ± 0.075,0.600 ± 0.110,0.700 ± 0.089
LIBRAS,0.921 ± 0.013,0.899 ± 0.031,0.923 ± 0.004,0.776 ± 0.019,0.742 ± 0.050,0.698 ± 0.026
NetFlow,0.931 ± 0.002,0.921 ± 0.012,0.937 ± 0.003,0.928 ± 0.011,0.926 ± 0.012,0.945 ± 0.027


Nlpps:



Unnamed: 0,SigLSTM,SigGRU,Sig,LSTM,GRU,KConv1D
ArabicDigits,0.047 ± 0.030,0.023 ± 0.006,0.071 ± 0.021,0.082 ± 0.022,0.066 ± 0.010,0.050 ± 0.003
AUSLAN,0.106 ± 0.007,0.123 ± 0.045,0.550 ± 0.114,0.650 ± 0.071,0.248 ± 0.063,1.900 ± 0.139
CharacterTrajectories,0.031 ± 0.007,0.258 ± 0.265,0.108 ± 0.005,2.506 ± 1.007,3.523 ± 0.635,0.409 ± 0.141
CMUsubject16,0.088 ± 0.020,0.040 ± 0.009,0.089 ± 0.027,0.270 ± 0.080,0.089 ± 0.039,0.255 ± 0.002
DigitShapes,0.008 ± 0.001,0.035 ± 0.051,0.021 ± 0.001,0.013 ± 0.002,0.727 ± 0.569,0.035 ± 0.003
ECG,0.402 ± 0.023,0.431 ± 0.037,0.356 ± 0.008,0.496 ± 0.018,0.601 ± 0.137,0.543 ± 0.019
JapaneseVowels,0.080 ± 0.031,0.053 ± 0.009,0.069 ± 0.003,0.061 ± 0.029,0.052 ± 0.005,0.067 ± 0.001
KickvsPunch,0.301 ± 0.109,0.493 ± 0.128,0.224 ± 0.014,0.696 ± 0.046,0.674 ± 0.037,0.662 ± 0.017
LIBRAS,0.320 ± 0.045,0.346 ± 0.091,0.259 ± 0.021,0.911 ± 0.056,1.110 ± 0.248,1.608 ± 0.311
NetFlow,0.218 ± 0.009,0.259 ± 0.078,0.189 ± 0.014,0.251 ± 0.041,0.194 ± 0.011,0.168 ± 0.081


#### Ranks over 5 runs

In [8]:
small_n_train = 300
small_d_idx = [i for i, d in enumerate(datasets.items()) if d[1]['n_train'] < small_n_train]
large_d_idx = [i for i, d in enumerate(datasets.items()) if d[1]['n_train'] >= small_n_train]

idx = {i : name for i, name in enumerate(datasets)}
idx[len(datasets)] = 'Avg. Rank (smaller)'
idx[len(datasets)+1] = 'Avg. Rank (larger)'
idx[len(datasets)+2] = 'Avg. Rank (all)'

print('Accuracies:\n')
df = pandas.DataFrame.from_dict(results_mean['acc']).rank(axis=1, ascending=False)
df = df.append(df.iloc[small_d_idx].mean(axis=0), ignore_index=True).append(df.iloc[large_d_idx].mean(axis=0), ignore_index=True).append(df.mean(axis=0), ignore_index=True).rename(index=idx)
display(df)

print('Nlpps:\n')
df = pandas.DataFrame.from_dict(results_mean['nlpp']).rank(axis=1)
df = df.append(df.iloc[small_d_idx].mean(axis=0), ignore_index=True).append(df.iloc[large_d_idx].mean(axis=0), ignore_index=True).append(df.mean(axis=0), ignore_index=True).rename(index=idx)
display(df)

Accuracies:



Unnamed: 0,SigLSTM,SigGRU,Sig,LSTM,GRU,KConv1D
ArabicDigits,2.0,1.0,6.0,4.0,3.0,5.0
AUSLAN,1.0,2.0,4.0,5.0,3.0,6.0
CharacterTrajectories,1.0,4.0,2.0,5.0,6.0,3.0
CMUsubject16,1.5,1.5,4.0,5.0,3.0,6.0
DigitShapes,2.5,5.0,2.5,2.5,6.0,2.5
ECG,3.0,2.0,1.0,4.0,6.0,5.0
JapaneseVowels,6.0,3.0,4.5,4.5,2.0,1.0
KickvsPunch,1.5,3.0,1.5,5.0,6.0,4.0
LIBRAS,2.0,3.0,1.0,4.0,5.0,6.0
NetFlow,3.0,6.0,2.0,4.0,5.0,1.0


Nlpps:



Unnamed: 0,SigLSTM,SigGRU,Sig,LSTM,GRU,KConv1D
ArabicDigits,2.0,1.0,5.0,6.0,4.0,3.0
AUSLAN,1.0,2.0,4.0,5.0,3.0,6.0
CharacterTrajectories,1.0,3.0,2.0,5.0,6.0,4.0
CMUsubject16,2.0,1.0,3.0,6.0,4.0,5.0
DigitShapes,1.0,4.0,3.0,2.0,6.0,5.0
ECG,2.0,3.0,1.0,4.0,6.0,5.0
JapaneseVowels,6.0,2.0,5.0,3.0,1.0,4.0
KickvsPunch,2.0,3.0,1.0,6.0,5.0,4.0
LIBRAS,2.0,3.0,1.0,4.0,5.0,6.0
NetFlow,4.0,6.0,2.0,5.0,3.0,1.0
