In [1]:
from mlaut.analyze_results import AnalyseResults
from mlaut.data import Data
import pandas as pd
from mlaut.estimators.estimators import instantiate_default_estimators
from mlaut.analyze_results.scores import ScoreAccuracy

import matplotlib.pyplot as plt
pd.options.display.max_rows = 1000
pd.options.display.max_columns = 50

In [2]:
data = Data()
input_io = data.open_hdf5('data/delgado.h5', mode='r')
out_io = data.open_hdf5('data/delgado-classification-deep.h5', mode='r')
analyze = AnalyseResults(hdf5_output_io=out_io, 
                        hdf5_input_io=input_io, 
                        input_h5_original_datasets_group='openml/', 
                        output_h5_predictions_group='experiments/predictions/')


### All datasets

In [3]:
from mlaut.estimators.nn_estimators import Deep_NN_Classifier
hyperparameters = {'epochs': [50,100], 
                    'batch_size': [0, 50, 100]}
def keras_model1(num_classes, input_dim):
    model = OverwrittenSequentialClassifier()
    model.add(Dense(288, input_dim=input_dim, activation='relu'))
    model.add(Dense(144, activation='relu'))
    model.add(Dropout(0.5))
    model.add(Dense(12, activation='relu'))
    model.add(Dense(num_classes, activation='softmax'))

    model_optimizer = optimizers.Adam(lr=0.001)
    model.compile(loss='mean_squared_error', optimizer=model_optimizer, metrics=['accuracy'])

    return model

deep_nn_4_layer_thin_dropout = Deep_NN_Classifier(keras_model=keras_model1, 
                            properties={'name':'NN-4-layer_thin_dropout'})


def keras_model2(num_classes, input_dim):
    nn_deep_model = OverwrittenSequentialClassifier()
    nn_deep_model.add(Dense(2500, input_dim=input_dim, activation='relu'))
    nn_deep_model.add(Dense(2000, activation='relu'))
    nn_deep_model.add(Dense(1500, activation='relu'))
    nn_deep_model.add(Dense(num_classes, activation='softmax'))

    model_optimizer = optimizers.Adam(lr=0.001)
    nn_deep_model.compile(loss='mean_squared_error', optimizer=model_optimizer, metrics=['accuracy'])
    return nn_deep_model

deep_nn_4_layer_wide_no_dropout = Deep_NN_Classifier(hyperparameters=hyperparameters,
                            keras_model=keras_model2,
                            properties={'name':'NN-4-layer_wide_no_dropout'})


def keras_model3(num_classes, input_dim):
    nn_deep_model = OverwrittenSequentialClassifier()
    nn_deep_model.add(Dense(2500, input_dim=input_dim, activation='relu'))
    nn_deep_model.add(Dense(2000, activation='relu'))
    nn_deep_model.add(Dropout(0.5))
    nn_deep_model.add(Dense(1500, activation='relu'))
    nn_deep_model.add(Dense(num_classes, activation='softmax'))

    model_optimizer = optimizers.Adam(lr=0.001)
    nn_deep_model.compile(loss='mean_squared_error', optimizer=model_optimizer, metrics=['accuracy'])
    return nn_deep_model

deep_nn_4_layer_wide_with_dropout = Deep_NN_Classifier(hyperparameters=hyperparameters,
                            keras_model=keras_model3,
                            properties={'name':'NN-4-layer_wide_with_dropout'})


def keras_model4(num_classes, input_dim):
    nn_deep_model = OverwrittenSequentialClassifier()
    nn_deep_model.add(Dense(5000, input_dim=input_dim, activation='relu'))
    nn_deep_model.add(Dense(4500, activation='relu'))
    nn_deep_model.add(Dense(4000, activation='relu'))
    nn_deep_model.add(Dropout(0.5))

    nn_deep_model.add(Dense(3500, activation='relu'))
    nn_deep_model.add(Dense(3000, activation='relu'))
    nn_deep_model.add(Dense(2500, activation='relu'))
    nn_deep_model.add(Dropout(0.5))


    nn_deep_model.add(Dense(2000, activation='relu'))
    nn_deep_model.add(Dense(1500, activation='relu'))
    nn_deep_model.add(Dense(1000, activation='relu'))
    nn_deep_model.add(Dropout(0.5))

    nn_deep_model.add(Dense(500, activation='relu'))
    nn_deep_model.add(Dense(250, activation='relu'))
    nn_deep_model.add(Dense(num_classes, activation='softmax'))

    model_optimizer = optimizers.Adam(lr=0.001)
    nn_deep_model.compile(loss='mean_squared_error', optimizer=model_optimizer, metrics=['accuracy'])
    return nn_deep_model

deep_nn_12_layer_wide_with_dropout = Deep_NN_Classifier(hyperparameters=hyperparameters,
                            keras_model=keras_model4,
                            properties={'name':'NN-12-layer_wide_with_dropout'})



def keras_model_1_lr01(num_classes, input_dim):
    model = OverwrittenSequentialClassifier()
    model.add(Dense(288, input_dim=input_dim, activation='relu'))
    model.add(Dense(144, activation='relu'))
    model.add(Dropout(0.5))
    model.add(Dense(12, activation='relu'))
    model.add(Dense(num_classes, activation='softmax'))

    model_optimizer = optimizers.Adam(lr=0.1)
    model.compile(loss='mean_squared_error', optimizer=model_optimizer, metrics=['accuracy'])

    return model

deep_nn_4_layer_thin_dropout_lr01 = Deep_NN_Classifier(keras_model=keras_model_1_lr01, 
                            properties={'name':'NN-4-layer_thin_dropout_lr01'})

def keras_model_1_lr1(num_classes, input_dim):
    model = OverwrittenSequentialClassifier()
    model.add(Dense(288, input_dim=input_dim, activation='relu'))
    model.add(Dense(144, activation='relu'))
    model.add(Dropout(0.5))
    model.add(Dense(12, activation='relu'))
    model.add(Dense(num_classes, activation='softmax'))

    model_optimizer = optimizers.Adam(lr=1)
    model.compile(loss='mean_squared_error', optimizer=model_optimizer, metrics=['accuracy'])

    return model

deep_nn_4_layer_thin_dropout_lr1 = Deep_NN_Classifier(keras_model=keras_model_1_lr1, 
                            properties={'name':'NN-4-layer_thin_dropout_lr1'})


def keras_model_2_lr01(num_classes, input_dim):
    nn_deep_model = OverwrittenSequentialClassifier()
    nn_deep_model.add(Dense(2500, input_dim=input_dim, activation='relu'))
    nn_deep_model.add(Dense(2000, activation='relu'))
    nn_deep_model.add(Dense(1500, activation='relu'))
    nn_deep_model.add(Dense(num_classes, activation='softmax'))

    model_optimizer = optimizers.Adam(lr=0.1)
    nn_deep_model.compile(loss='mean_squared_error', optimizer=model_optimizer, metrics=['accuracy'])
    return nn_deep_model

deep_nn_4_layer_wide_no_dropout_lr01 = Deep_NN_Classifier(keras_model=keras_model_2_lr01,
                            properties={'name':'NN-4-layer_wide_no_dropout_lr01'})


def keras_model_2_lr1(num_classes, input_dim):
    nn_deep_model = OverwrittenSequentialClassifier()
    nn_deep_model.add(Dense(2500, input_dim=input_dim, activation='relu'))
    nn_deep_model.add(Dense(2000, activation='relu'))
    nn_deep_model.add(Dense(1500, activation='relu'))
    nn_deep_model.add(Dense(num_classes, activation='softmax'))

    model_optimizer = optimizers.Adam(lr=1)
    nn_deep_model.compile(loss='mean_squared_error', optimizer=model_optimizer, metrics=['accuracy'])
    return nn_deep_model

deep_nn_4_layer_wide_no_dropout_lr1 = Deep_NN_Classifier(keras_model=keras_model_2_lr1,
                            properties={'name':'NN-4-layer_wide_no_dropout_lr1'})



def keras_model_3_lr01(num_classes, input_dim):
    nn_deep_model = OverwrittenSequentialClassifier()
    nn_deep_model.add(Dense(2500, input_dim=input_dim, activation='relu'))
    nn_deep_model.add(Dense(2000, activation='relu'))
    nn_deep_model.add(Dropout(0.5))
    nn_deep_model.add(Dense(1500, activation='relu'))
    nn_deep_model.add(Dense(num_classes, activation='softmax'))

    model_optimizer = optimizers.Adam(lr=0.1)
    nn_deep_model.compile(loss='mean_squared_error', optimizer=model_optimizer, metrics=['accuracy'])
    return nn_deep_model

deep_nn_4_layer_wide_with_dropout_lr01 = Deep_NN_Classifier(keras_model=keras_model_3_lr01,
                            properties={'name':'NN-4-layer_wide_with_dropout_lr01'})


def keras_model_3_lr1(num_classes, input_dim):
    nn_deep_model = OverwrittenSequentialClassifier()
    nn_deep_model.add(Dense(2500, input_dim=input_dim, activation='relu'))
    nn_deep_model.add(Dense(2000, activation='relu'))
    nn_deep_model.add(Dropout(0.5))
    nn_deep_model.add(Dense(1500, activation='relu'))
    nn_deep_model.add(Dense(num_classes, activation='softmax'))

    model_optimizer = optimizers.Adam(lr=1)
    nn_deep_model.compile(loss='mean_squared_error', optimizer=model_optimizer, metrics=['accuracy'])
    return nn_deep_model

deep_nn_4_layer_wide_with_dropout_lr1 = Deep_NN_Classifier(keras_model=keras_model_3_lr1,
                            properties={'name':'NN-4-layer_wide_with_dropout_lr1'})



def keras_model_4_lr01(num_classes, input_dim):
    nn_deep_model = OverwrittenSequentialClassifier()
    nn_deep_model.add(Dense(5000, input_dim=input_dim, activation='relu'))
    nn_deep_model.add(Dense(4500, activation='relu'))
    nn_deep_model.add(Dense(4000, activation='relu'))
    nn_deep_model.add(Dropout(0.5))

    nn_deep_model.add(Dense(3500, activation='relu'))
    nn_deep_model.add(Dense(3000, activation='relu'))
    nn_deep_model.add(Dense(2500, activation='relu'))
    nn_deep_model.add(Dropout(0.5))
    
    
    nn_deep_model.add(Dense(2000, activation='relu'))
    nn_deep_model.add(Dense(1500, activation='relu'))
    nn_deep_model.add(Dense(1000, activation='relu'))
    nn_deep_model.add(Dropout(0.5))
    
    nn_deep_model.add(Dense(500, activation='relu'))
    nn_deep_model.add(Dense(250, activation='relu'))
    nn_deep_model.add(Dense(num_classes, activation='softmax'))
    
    model_optimizer = optimizers.Adam(lr=0.1)
    nn_deep_model.compile(loss='mean_squared_error', optimizer=model_optimizer, metrics=['accuracy'])
    return nn_deep_model

deep_nn_12_layer_wide_with_dropout_lr01 = Deep_NN_Classifier(keras_model=keras_model_4_lr01,
                            properties={'name':'NN-12-layer_wide_with_dropout_lr01'})

def keras_model_4_lr1(num_classes, input_dim):
    nn_deep_model = OverwrittenSequentialClassifier()
    nn_deep_model.add(Dense(5000, input_dim=input_dim, activation='relu'))
    nn_deep_model.add(Dense(4500, activation='relu'))
    nn_deep_model.add(Dense(4000, activation='relu'))
    nn_deep_model.add(Dropout(0.5))

    nn_deep_model.add(Dense(3500, activation='relu'))
    nn_deep_model.add(Dense(3000, activation='relu'))
    nn_deep_model.add(Dense(2500, activation='relu'))
    nn_deep_model.add(Dropout(0.5))
    
    
    nn_deep_model.add(Dense(2000, activation='relu'))
    nn_deep_model.add(Dense(1500, activation='relu'))
    nn_deep_model.add(Dense(1000, activation='relu'))
    nn_deep_model.add(Dropout(0.5))
    
    nn_deep_model.add(Dense(500, activation='relu'))
    nn_deep_model.add(Dense(250, activation='relu'))
    nn_deep_model.add(Dense(num_classes, activation='softmax'))
    
    model_optimizer = optimizers.Adam(lr=1)
    nn_deep_model.compile(loss='mean_squared_error', optimizer=model_optimizer, metrics=['accuracy'])
    return nn_deep_model

deep_nn_12_layer_wide_with_dropout_lr1 = Deep_NN_Classifier(keras_model=keras_model_4_lr1,
                            properties={'name':'NN-12-layer_wide_with_dropout_lr1'})

def keras_model_5_lr0001(num_classes, input_dim):
    nn_deep_model = OverwrittenSequentialClassifier()
    nn_deep_model.add(Dense(2000, input_dim=input_dim, activation='relu'))
    nn_deep_model.add(Dropout(0.5))
    nn_deep_model.add(Dense(1000, activation='relu'))
    nn_deep_model.add(Dropout(0.5))
    nn_deep_model.add(Dense(1000, activation='relu'))
    nn_deep_model.add(Dropout(0.5))
    nn_deep_model.add(Dense(50, activation='relu'))
    nn_deep_model.add(Dropout(0.5))

    nn_deep_model.add(Dense(num_classes, activation='softmax'))

    model_optimizer = optimizers.Adam(lr=0.001)
    nn_deep_model.compile(loss='mean_squared_error', optimizer=model_optimizer, metrics=['accuracy'])
    return nn_deep_model
deep_nn_4_layer_droput_each_layer_lr0001 = Deep_NN_Classifier(keras_model=keras_model_5_lr0001,
                                        properties={'name':'NN-4-layer-droput-each-layer_lr0001'})

def keras_model_5_lr01(num_classes, input_dim):
    nn_deep_model = OverwrittenSequentialClassifier()
    nn_deep_model.add(Dense(2000, input_dim=input_dim, activation='relu'))
    nn_deep_model.add(Dropout(0.5))
    nn_deep_model.add(Dense(1000, activation='relu'))
    nn_deep_model.add(Dropout(0.5))
    nn_deep_model.add(Dense(1000, activation='relu'))
    nn_deep_model.add(Dropout(0.5))
    nn_deep_model.add(Dense(50, activation='relu'))
    nn_deep_model.add(Dropout(0.5))

    nn_deep_model.add(Dense(num_classes, activation='softmax'))

    model_optimizer = optimizers.Adam(lr=0.1)
    nn_deep_model.compile(loss='mean_squared_error', optimizer=model_optimizer, metrics=['accuracy'])
    return nn_deep_model
deep_nn_4_layer_droput_each_layer_lr01 = Deep_NN_Classifier(keras_model=keras_model_5_lr01,
                                        properties={'name':'NN-4-layer-droput-each-layer_lr01'})

def keras_model_5_lr1(num_classes, input_dim):
    nn_deep_model = OverwrittenSequentialClassifier()
    nn_deep_model.add(Dense(2000, input_dim=input_dim, activation='relu'))
    nn_deep_model.add(Dropout(0.5))
    nn_deep_model.add(Dense(1000, activation='relu'))
    nn_deep_model.add(Dropout(0.5))
    nn_deep_model.add(Dense(1000, activation='relu'))
    nn_deep_model.add(Dropout(0.5))
    nn_deep_model.add(Dense(50, activation='relu'))
    nn_deep_model.add(Dropout(0.5))

    nn_deep_model.add(Dense(num_classes, activation='softmax'))

    model_optimizer = optimizers.Adam(lr=1)
    nn_deep_model.compile(loss='mean_squared_error', optimizer=model_optimizer, metrics=['accuracy'])
    return nn_deep_model
deep_nn_4_layer_droput_each_layer_lr1 = Deep_NN_Classifier(keras_model=keras_model_5_lr01,
                                        properties={'name':'NN-4-layer-droput-each-layer_lr1'})

def keras_model_6_lr001(num_classes, input_dim):
    nn_deep_model = OverwrittenSequentialClassifier()
    nn_deep_model.add(Dropout(0.7, input_shape=(input_dim,)))
    nn_deep_model.add(Dense(1024, activation='relu'))
    nn_deep_model.add(Dropout(0.5))
    nn_deep_model.add(Dense(num_classes, activation='softmax'))

    model_optimizer = optimizers.Adam(lr=0.001)
    nn_deep_model.compile(loss='mean_squared_error', optimizer=model_optimizer, metrics=['accuracy'])
    return nn_deep_model
deep_nn_2_layer_droput_input_layer_lr001 = Deep_NN_Classifier(keras_model=keras_model_6_lr001,
                                        properties={'name':'NN-2-layer-droput-input-layer_lr001'})

def keras_model_6_lr01(num_classes, input_dim):
    nn_deep_model = OverwrittenSequentialClassifier()
    nn_deep_model.add(Dropout(0.7, input_shape=(input_dim,)))
    nn_deep_model.add(Dense(1024, activation='relu'))
    nn_deep_model.add(Dropout(0.5))
    nn_deep_model.add(Dense(num_classes, activation='softmax'))

    model_optimizer = optimizers.Adam(lr=0.1)
    nn_deep_model.compile(loss='mean_squared_error', optimizer=model_optimizer, metrics=['accuracy'])
    return nn_deep_model
deep_nn_2_layer_droput_input_layer_lr01 = Deep_NN_Classifier(keras_model=keras_model_6_lr01,
                                        properties={'name':'NN-2-layer-droput-input-layer_lr01'})

def keras_model_6_lr1(num_classes, input_dim):
    nn_deep_model = OverwrittenSequentialClassifier()
    nn_deep_model.add(Dropout(0.7, input_shape=(input_dim,)))
    nn_deep_model.add(Dense(1024, activation='relu'))
    nn_deep_model.add(Dropout(0.5))
    nn_deep_model.add(Dense(num_classes, activation='softmax'))

    model_optimizer = optimizers.Adam(lr=1)
    nn_deep_model.compile(loss='mean_squared_error', optimizer=model_optimizer, metrics=['accuracy'])
    return nn_deep_model

deep_nn_2_layer_droput_input_layer_lr1 = Deep_NN_Classifier(keras_model=keras_model_6_lr1,
                                        properties={'name':'NN-2-layer-droput-input-layer_lr1'})

estimators = [deep_nn_4_layer_thin_dropout_lr01,
            deep_nn_4_layer_thin_dropout_lr1, 
            deep_nn_4_layer_wide_no_dropout_lr01,
            deep_nn_4_layer_wide_no_dropout_lr1,
            deep_nn_4_layer_wide_with_dropout_lr01,
            deep_nn_4_layer_wide_with_dropout_lr1,
            deep_nn_12_layer_wide_with_dropout_lr01,
            deep_nn_12_layer_wide_with_dropout_lr1,
            deep_nn_4_layer_droput_each_layer_lr0001,
            deep_nn_4_layer_droput_each_layer_lr01,
            deep_nn_4_layer_droput_each_layer_lr1,
            deep_nn_4_layer_thin_dropout,
            deep_nn_4_layer_wide_no_dropout, 
            deep_nn_4_layer_wide_with_dropout,
            deep_nn_12_layer_wide_with_dropout,            
              deep_nn_2_layer_droput_input_layer_lr001,
            deep_nn_2_layer_droput_input_layer_lr01,
            deep_nn_2_layer_droput_input_layer_lr1]

estim = instantiate_default_estimators(['Classification'])
# estimators = []
for e in estim:
    if e.properties['name'] is not 'NeuralNetworkDeepClassifier':
        estimators.append(e)

In [4]:
# estimators = instantiate_default_estimators(['Classification'])
score_accuracy = ScoreAccuracy()

# (errors_per_estimator, 
#  errors_per_dataset_per_estimator) = analyze.prediction_errors(metric=score_accuracy, estimators=estimators)
 
(errors_per_estimator, 
 errors_per_dataset_per_estimator, 
 errors_per_dataset_per_estimator_df) = analyze.prediction_errors(score_accuracy, estimators)



#### Simple average and standard error

In [5]:
avg_and_std_error = analyze.average_and_std_error(errors_per_estimator)
# avg_and_std_error.index.name='Estimator Name'
avg_and_std_error.round(3)

Unnamed: 0,avg_score,std_error
BaselineClassifier,0.419,0.019
NN-12-layer_wide_with_dropout_lr1,0.458,0.023
NN-4-layer_thin_dropout_lr1,0.462,0.023
NN-4-layer_wide_with_dropout_lr01,0.478,0.022
NN-12-layer_wide_with_dropout_lr01,0.479,0.022
NN-4-layer-droput-each-layer_lr01,0.482,0.022
NN-4-layer_wide_with_dropout_lr1,0.483,0.022
NN-4-layer-droput-each-layer_lr1,0.488,0.022
NN-4-layer_wide_no_dropout_lr1,0.49,0.022
NN-4-layer_wide_no_dropout_lr01,0.494,0.022


#### Average Rank

In [6]:
avg_rank = analyze.ranks(errors_per_estimator, ascending=False)
avg_rank.round(1)

Unnamed: 0,avg_rank
RandomForestClassifier,4.3
SVC,5.0
K_Neighbours,5.6
BaggingClassifier,5.8
GradientBoostingClassifier,7.6
PassiveAggressiveClassifier,8.5
NN-4-layer_wide_with_dropout,10.0
NN-4-layer_wide_no_dropout,10.5
BernoulliNaiveBayes,10.9
NN-4-layer-droput-each-layer_lr0001,11.2


#### Training time

In [7]:
avg_training_time, training_time_per_dataset = analyze.average_training_time(estimators)


In [8]:
avg_training_time

Unnamed: 0,avg training time (in sec)
BaselineClassifier,0.001
GaussianNaiveBayes,0.004
BernoulliNaiveBayes,0.005
NN-4-layer_thin_dropout,2.869
NN-4-layer_thin_dropout_lr1,4.299
BaggingClassifier,5.231
NN-2-layer-droput-input-layer_lr01,5.413
NN-2-layer-droput-input-layer_lr001,5.42
NN-2-layer-droput-input-layer_lr1,5.437
NN-4-layer_thin_dropout_lr01,5.559


#### merge avg score, rank and training time

In [9]:
avg_metrics = pd.DataFrame.merge(avg_rank,avg_and_std_error, left_index=True, right_index=True)
avg_metrics = pd.DataFrame.merge(avg_metrics, avg_training_time,left_index=True, right_index=True)
avg_metrics
#change names of estimators
as_list = avg_metrics.index.tolist()
idx = as_list.index('NN-12-layer_wide_with_dropout')
as_list[idx] = 'NN-12-layer_wide_with_dropout_lr001'

idx = as_list.index('NN-4-layer_wide_with_dropout')
as_list[idx] = 'NN-4-layer_wide_with_dropout_lr001'

idx = as_list.index('NN-4-layer_wide_no_dropout')
as_list[idx] = 'NN-4-layer_wide_no_dropout_lr001'


idx = as_list.index('NN-4-layer_thin_dropout')
as_list[idx] = 'NN-4-layer_thin_dropout_lr001'

avg_metrics.index = as_list

avg_metrics

Unnamed: 0,avg_rank,avg_score,std_error,avg training time (in sec)
RandomForestClassifier,4.3,0.831,0.013,14.277
SVC,5.0,0.818,0.014,1742.466
K_Neighbours,5.6,0.805,0.014,107.796
BaggingClassifier,5.8,0.82,0.014,5.231
GradientBoostingClassifier,7.6,0.79,0.016,49.509
PassiveAggressiveClassifier,8.5,0.758,0.016,19.352
NN-4-layer_wide_with_dropout_lr001,10.0,0.692,0.021,14.617
NN-4-layer_wide_no_dropout_lr001,10.5,0.694,0.021,14.609
BernoulliNaiveBayes,10.9,0.707,0.015,0.005
NN-4-layer-droput-each-layer_lr0001,11.2,0.662,0.022,6.786


#### Cohen's d

In [10]:
cohens_d = analyze.cohens_d(errors_per_estimator)
cohens_d.round(2)

estimator_2,BaselineClassifier,BernoulliNaiveBayes,GaussianNaiveBayes,GradientBoostingClassifier,K_Neighbours,NN-12-layer_wide_with_dropout,NN-12-layer_wide_with_dropout_lr01,NN-12-layer_wide_with_dropout_lr1,NN-2-layer-droput-input-layer_lr001,NN-2-layer-droput-input-layer_lr01,NN-2-layer-droput-input-layer_lr1,NN-4-layer-droput-each-layer_lr0001,NN-4-layer-droput-each-layer_lr01,NN-4-layer-droput-each-layer_lr1,NN-4-layer_thin_dropout,NN-4-layer_thin_dropout_lr01,NN-4-layer_thin_dropout_lr1,NN-4-layer_wide_no_dropout,NN-4-layer_wide_no_dropout_lr01,NN-4-layer_wide_no_dropout_lr1,NN-4-layer_wide_with_dropout,NN-4-layer_wide_with_dropout_lr01,NN-4-layer_wide_with_dropout_lr1,PassiveAggressiveClassifier,RandomForestClassifier,SVC
estimator_1,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1
BaggingClassifier,-2.245,-0.732,-0.824,-0.189,-0.1,-1.431,-1.746,-1.819,-0.87,-1.359,-1.552,-0.815,-1.705,-1.692,-0.861,-1.572,-1.774,-0.668,-1.67,-1.676,-0.679,-1.735,-1.713,-0.388,0.077,-0.01
BaselineClassifier,,1.572,1.249,1.965,2.188,0.52,0.274,0.175,1.103,0.545,0.401,1.117,0.284,0.314,1.063,0.326,0.191,1.29,0.339,0.318,1.285,0.267,0.29,1.8,2.336,2.25
BernoulliNaiveBayes,,,-0.184,0.498,0.648,-0.847,-1.143,-1.226,-0.269,-0.79,-0.969,-0.228,-1.112,-1.093,-0.277,-1.008,-1.191,-0.068,-1.07,-1.081,-0.077,-1.138,-1.115,0.307,0.819,0.72
GaussianNaiveBayes,,,,0.618,0.75,-0.625,-0.89,-0.972,-0.087,-0.578,-0.739,-0.053,-0.866,-0.846,-0.099,-0.783,-0.943,0.096,-0.824,-0.837,0.089,-0.889,-0.867,0.45,0.898,0.82
GradientBoostingClassifier,,,,,0.1,-1.224,-1.519,-1.594,-0.676,-1.16,-1.341,-0.628,-1.484,-1.469,-0.674,-1.369,-1.555,-0.483,-1.447,-1.455,-0.493,-1.512,-1.49,-0.185,0.264,0.18
K_Neighbours,,,,,,-1.371,-1.687,-1.762,-0.801,-1.299,-1.492,-0.747,-1.646,-1.633,-0.794,-1.515,-1.717,-0.597,-1.611,-1.617,-0.608,-1.677,-1.654,-0.301,0.18,0.09
NN-12-layer_wide_with_dropout,,,,,,,-0.236,-0.321,0.519,0.032,-0.109,0.542,-0.222,-0.197,0.496,-0.166,-0.302,0.69,-0.176,-0.192,0.684,-0.24,-0.219,1.073,1.502,1.43
NN-12-layer_wide_with_dropout_lr01,,,,,,,,-0.089,0.772,0.264,0.125,0.791,0.012,0.038,0.742,0.061,-0.072,0.946,0.061,0.043,0.94,-0.005,0.016,1.366,1.822,1.74
NN-12-layer_wide_with_dropout_lr1,,,,,,,,,0.852,0.348,0.211,0.87,0.1,0.127,0.822,0.146,0.016,1.024,0.149,0.132,1.019,0.083,0.105,1.444,1.894,1.82
NN-2-layer-droput-input-layer_lr001,,,,,,,,,,-0.477,-0.629,0.031,-0.751,-0.73,-0.014,-0.675,-0.826,0.175,-0.708,-0.721,0.168,-0.771,-0.751,0.518,0.94,0.86


#### t-test

In [11]:
t_test, t_test_df = analyze.t_test(errors_per_estimator)
t_test_df.round(3)

Unnamed: 0_level_0,BaggingClassifier,BaggingClassifier,BaselineClassifier,BaselineClassifier,BernoulliNaiveBayes,BernoulliNaiveBayes,GaussianNaiveBayes,GaussianNaiveBayes,GradientBoostingClassifier,GradientBoostingClassifier,K_Neighbours,K_Neighbours,NN-12-layer_wide_with_dropout,NN-12-layer_wide_with_dropout,NN-12-layer_wide_with_dropout_lr01,NN-12-layer_wide_with_dropout_lr01,NN-12-layer_wide_with_dropout_lr1,NN-12-layer_wide_with_dropout_lr1,NN-2-layer-droput-input-layer_lr001,NN-2-layer-droput-input-layer_lr001,NN-2-layer-droput-input-layer_lr01,NN-2-layer-droput-input-layer_lr01,NN-2-layer-droput-input-layer_lr1,NN-2-layer-droput-input-layer_lr1,NN-4-layer-droput-each-layer_lr0001,...,NN-4-layer-droput-each-layer_lr1,NN-4-layer_thin_dropout,NN-4-layer_thin_dropout,NN-4-layer_thin_dropout_lr01,NN-4-layer_thin_dropout_lr01,NN-4-layer_thin_dropout_lr1,NN-4-layer_thin_dropout_lr1,NN-4-layer_wide_no_dropout,NN-4-layer_wide_no_dropout,NN-4-layer_wide_no_dropout_lr01,NN-4-layer_wide_no_dropout_lr01,NN-4-layer_wide_no_dropout_lr1,NN-4-layer_wide_no_dropout_lr1,NN-4-layer_wide_with_dropout,NN-4-layer_wide_with_dropout,NN-4-layer_wide_with_dropout_lr01,NN-4-layer_wide_with_dropout_lr01,NN-4-layer_wide_with_dropout_lr1,NN-4-layer_wide_with_dropout_lr1,PassiveAggressiveClassifier,PassiveAggressiveClassifier,RandomForestClassifier,RandomForestClassifier,SVC,SVC
Unnamed: 0_level_1,t_stat,p_val,t_stat,p_val,t_stat,p_val,t_stat,p_val,t_stat,p_val,t_stat,p_val,t_stat,p_val,t_stat,p_val,t_stat,p_val,t_stat,p_val,t_stat,p_val,t_stat,p_val,t_stat,...,p_val,t_stat,p_val,t_stat,p_val,t_stat,p_val,t_stat,p_val,t_stat,p_val,t_stat,p_val,t_stat,p_val,t_stat,p_val,t_stat,p_val,t_stat,p_val,t_stat,p_val,t_stat,p_val
BaggingClassifier,0.0,1.0,16.724,0.0,5.456,0.0,6.135,0.0,1.411,0.16,0.745,0.457,10.662,0.0,13.005,0.0,13.551,0.0,6.484,0.0,10.123,0.0,11.56,0.0,6.072,...,0.0,6.417,0.0,11.714,0.0,13.219,0.0,4.977,0.0,12.44,0.0,12.487,0.0,5.057,0.0,12.926,0.0,12.761,0.0,2.892,0.004,-0.575,0.566,0.099,0.921
BaselineClassifier,-16.724,0.0,0.0,1.0,-11.709,0.0,-9.308,0.0,-14.64,0.0,-16.301,0.0,-3.876,0.0,-2.044,0.042,-1.307,0.193,-8.218,0.0,-4.057,0.0,-2.984,0.003,-8.321,...,0.02,-7.921,0.0,-2.432,0.016,-1.424,0.156,-9.614,0.0,-2.523,0.012,-2.37,0.019,-9.575,0.0,-1.986,0.048,-2.158,0.032,-13.413,0.0,-17.401,0.0,-16.729,0.0
BernoulliNaiveBayes,-5.456,0.0,11.709,0.0,0.0,1.0,1.373,0.171,-3.712,0.0,-4.825,0.0,6.31,0.0,8.513,0.0,9.134,0.0,2.002,0.046,5.887,0.0,7.22,0.0,1.695,...,0.0,2.066,0.04,7.512,0.0,8.87,0.0,0.508,0.612,7.969,0.0,8.053,0.0,0.575,0.566,8.48,0.0,8.31,0.0,-2.291,0.023,-6.099,0.0,-5.4,0.0
GaussianNaiveBayes,-6.135,0.0,9.308,0.0,-1.373,0.171,0.0,1.0,-4.604,0.0,-5.588,0.0,4.655,0.0,6.631,0.0,7.24,0.0,0.646,0.519,4.308,0.0,5.502,0.0,0.392,...,0.0,0.739,0.46,5.833,0.0,7.028,0.0,-0.718,0.473,6.138,0.0,6.232,0.0,-0.66,0.51,6.622,0.0,6.462,0.0,-3.354,0.001,-6.688,0.0,-6.085,0.0
GradientBoostingClassifier,-1.411,0.16,14.64,0.0,3.712,0.0,4.604,0.0,0.0,1.0,-0.743,0.458,9.116,0.0,11.316,0.0,11.878,0.0,5.04,0.0,8.643,0.0,9.988,0.0,4.682,...,0.0,5.022,0.0,10.197,0.0,11.588,0.0,3.597,0.0,10.78,0.0,10.842,0.0,3.669,0.0,11.261,0.0,11.099,0.0,1.381,0.169,-1.964,0.051,-1.329,0.185
K_Neighbours,-0.745,0.457,16.301,0.0,4.825,0.0,5.588,0.0,0.743,0.458,0.0,1.0,10.21,0.0,12.569,0.0,13.126,0.0,5.971,0.0,9.675,0.0,11.118,0.0,5.565,...,0.0,5.916,0.0,11.285,0.0,12.794,0.0,4.446,0.0,11.999,0.0,12.048,0.0,4.526,0.0,12.492,0.0,12.324,0.0,2.244,0.026,-1.344,0.18,-0.65,0.516
NN-12-layer_wide_with_dropout,-10.662,0.0,3.876,0.0,-6.31,0.0,-4.655,0.0,-9.116,0.0,-10.21,0.0,0.0,1.0,1.758,0.08,2.392,0.018,-3.87,0.0,-0.238,0.812,0.81,0.419,-4.037,...,0.143,-3.693,0.0,1.235,0.218,2.247,0.026,-5.141,0.0,1.308,0.192,1.427,0.155,-5.096,0.0,1.785,0.076,1.631,0.104,-7.995,0.0,-11.191,0.0,-10.634,0.0
NN-12-layer_wide_with_dropout_lr01,-13.005,0.0,2.044,0.042,-8.513,0.0,-6.631,0.0,-11.316,0.0,-12.569,0.0,-1.758,0.08,0.0,1.0,0.666,0.506,-5.748,0.0,-1.969,0.05,-0.928,0.355,-5.889,...,0.775,-5.529,0.0,-0.458,0.648,0.535,0.593,-7.046,0.0,-0.454,0.651,-0.323,0.747,-7.003,0.0,0.04,0.968,-0.119,0.905,-10.178,0.0,-13.57,0.0,-12.987,0.0
NN-12-layer_wide_with_dropout_lr1,-13.551,0.0,1.307,0.193,-9.134,0.0,-7.24,0.0,-11.878,0.0,-13.126,0.0,-2.392,0.018,-0.666,0.506,0.0,1.0,-6.348,0.0,-2.59,0.01,-1.569,0.118,-6.48,...,0.345,-6.122,0.0,-1.091,0.277,-0.123,0.903,-7.63,0.0,-1.113,0.267,-0.98,0.328,-7.589,0.0,-0.622,0.535,-0.78,0.436,-10.759,0.0,-14.108,0.0,-13.535,0.0
NN-2-layer-droput-input-layer_lr001,-6.484,0.0,8.218,0.0,-2.002,0.046,-0.646,0.519,-5.04,0.0,-5.971,0.0,3.87,0.0,5.748,0.0,6.348,0.0,0.0,1.0,3.554,0.0,4.687,0.0,-0.232,...,0.0,0.104,0.917,5.031,0.0,6.157,0.0,-1.306,0.193,5.277,0.0,5.374,0.0,-1.252,0.212,5.747,0.0,5.593,0.0,-3.86,0.0,-7.0,0.0,-6.437,0.0


In [12]:
t_test_df.iloc[:,0:8]

Unnamed: 0_level_0,BaggingClassifier,BaggingClassifier,BaselineClassifier,BaselineClassifier,BernoulliNaiveBayes,BernoulliNaiveBayes,GaussianNaiveBayes,GaussianNaiveBayes
Unnamed: 0_level_1,t_stat,p_val,t_stat,p_val,t_stat,p_val,t_stat,p_val
BaggingClassifier,0.0,1.0,16.724,0.0,5.456,0.0,6.135,0.0
BaselineClassifier,-16.724,0.0,0.0,1.0,-11.709,0.0,-9.308,0.0
BernoulliNaiveBayes,-5.456,0.0,11.709,0.0,0.0,1.0,1.373,0.171
GaussianNaiveBayes,-6.135,0.0,9.308,0.0,-1.373,0.171,0.0,1.0
GradientBoostingClassifier,-1.411,0.16,14.64,0.0,3.712,0.0,4.604,0.0
K_Neighbours,-0.745,0.457,16.301,0.0,4.825,0.0,5.588,0.0
NN-12-layer_wide_with_dropout,-10.662,0.0,3.876,0.0,-6.31,0.0,-4.655,0.0
NN-12-layer_wide_with_dropout_lr01,-13.005,0.0,2.044,0.042,-8.513,0.0,-6.631,0.0
NN-12-layer_wide_with_dropout_lr1,-13.551,0.0,1.307,0.193,-9.134,0.0,-7.24,0.0
NN-2-layer-droput-input-layer_lr001,-6.484,0.0,8.218,0.0,-2.002,0.046,-0.646,0.519


#### sign test

In [13]:
sign_test, sign_test_df = analyze.sign_test(errors_per_estimator)
sign_test_df

Unnamed: 0_level_0,BaggingClassifier,BaggingClassifier,BaselineClassifier,BaselineClassifier,BernoulliNaiveBayes,BernoulliNaiveBayes,GaussianNaiveBayes,GaussianNaiveBayes,GradientBoostingClassifier,GradientBoostingClassifier,K_Neighbours,K_Neighbours,NN-12-layer_wide_with_dropout,NN-12-layer_wide_with_dropout,NN-12-layer_wide_with_dropout_lr01,NN-12-layer_wide_with_dropout_lr01,NN-12-layer_wide_with_dropout_lr1,NN-12-layer_wide_with_dropout_lr1,NN-2-layer-droput-input-layer_lr001,NN-2-layer-droput-input-layer_lr001,NN-2-layer-droput-input-layer_lr01,NN-2-layer-droput-input-layer_lr01,NN-2-layer-droput-input-layer_lr1,NN-2-layer-droput-input-layer_lr1,NN-4-layer-droput-each-layer_lr0001,...,NN-4-layer-droput-each-layer_lr1,NN-4-layer_thin_dropout,NN-4-layer_thin_dropout,NN-4-layer_thin_dropout_lr01,NN-4-layer_thin_dropout_lr01,NN-4-layer_thin_dropout_lr1,NN-4-layer_thin_dropout_lr1,NN-4-layer_wide_no_dropout,NN-4-layer_wide_no_dropout,NN-4-layer_wide_no_dropout_lr01,NN-4-layer_wide_no_dropout_lr01,NN-4-layer_wide_no_dropout_lr1,NN-4-layer_wide_no_dropout_lr1,NN-4-layer_wide_with_dropout,NN-4-layer_wide_with_dropout,NN-4-layer_wide_with_dropout_lr01,NN-4-layer_wide_with_dropout_lr01,NN-4-layer_wide_with_dropout_lr1,NN-4-layer_wide_with_dropout_lr1,PassiveAggressiveClassifier,PassiveAggressiveClassifier,RandomForestClassifier,RandomForestClassifier,SVC,SVC
Unnamed: 0_level_1,t_stat,p_val,t_stat,p_val,t_stat,p_val,t_stat,p_val,t_stat,p_val,t_stat,p_val,t_stat,p_val,t_stat,p_val,t_stat,p_val,t_stat,p_val,t_stat,p_val,t_stat,p_val,t_stat,...,p_val,t_stat,p_val,t_stat,p_val,t_stat,p_val,t_stat,p_val,t_stat,p_val,t_stat,p_val,t_stat,p_val,t_stat,p_val,t_stat,p_val,t_stat,p_val,t_stat,p_val,t_stat,p_val
BaggingClassifier,0.0,1.0,11.47,0.0,5.437,0.0,5.85,0.0,1.228,0.219,0.912,0.362,8.93,0.0,10.15,0.0,10.395,0.0,6.0,0.0,8.729,0.0,9.563,0.0,5.632,...,0.0,6.041,0.0,9.522,0.0,10.247,0.0,4.545,0.0,9.973,0.0,10.032,0.0,4.658,0.0,10.079,0.0,10.086,0.0,2.796,0.005,-0.67,0.503,0.097,0.923
BaselineClassifier,-11.47,0.0,0.0,1.0,-9.616,0.0,-8.235,0.0,-10.712,0.0,-11.375,0.0,-3.728,0.0,-1.915,0.056,-1.283,0.2,-7.566,0.0,-4.222,0.0,-3.217,0.001,-7.788,...,0.017,-7.434,0.0,-2.528,0.011,-1.467,0.142,-8.518,0.0,-2.536,0.011,-2.486,0.013,-8.462,0.0,-1.866,0.062,-2.194,0.028,-10.351,0.0,-11.679,0.0,-11.487,0.0
BernoulliNaiveBayes,-5.437,0.0,9.616,0.0,0.0,1.0,0.842,0.4,-3.912,0.0,-4.76,0.0,5.524,0.0,7.378,0.0,7.772,0.0,1.193,0.233,5.31,0.0,6.341,0.0,0.738,...,0.0,1.355,0.176,6.5,0.0,7.571,0.0,-0.571,0.568,7.006,0.0,7.089,0.0,-0.554,0.58,7.286,0.0,7.233,0.0,-2.681,0.007,-6.062,0.0,-5.515,0.0
GaussianNaiveBayes,-5.85,0.0,8.235,0.0,-0.842,0.4,0.0,1.0,-4.483,0.0,-5.197,0.0,4.483,0.0,6.258,0.0,6.687,0.0,0.364,0.716,4.252,0.0,5.313,0.0,0.027,...,0.0,0.523,0.601,5.455,0.0,6.486,0.0,-1.217,0.224,5.888,0.0,5.957,0.0,-1.156,0.248,6.22,0.0,6.123,0.0,-3.192,0.001,-6.411,0.0,-5.843,0.0
GradientBoostingClassifier,-1.228,0.219,10.712,0.0,3.912,0.0,4.483,0.0,0.0,1.0,-0.306,0.759,7.851,0.0,9.187,0.0,9.491,0.0,4.708,0.0,7.613,0.0,8.496,0.0,4.371,...,0.0,4.699,0.0,8.506,0.0,9.32,0.0,3.276,0.001,8.95,0.0,9.007,0.0,3.361,0.001,9.123,0.0,9.082,0.0,1.546,0.122,-1.71,0.087,-1.047,0.295
K_Neighbours,-0.912,0.362,11.375,0.0,4.76,0.0,5.197,0.0,0.306,0.759,0.0,1.0,8.67,0.0,9.964,0.0,10.245,0.0,5.413,0.0,8.403,0.0,9.335,0.0,5.055,...,0.0,5.41,0.0,9.307,0.0,10.057,0.0,3.851,0.0,9.759,0.0,9.84,0.0,3.946,0.0,9.882,0.0,9.89,0.0,1.966,0.049,-1.536,0.124,-0.809,0.418
NN-12-layer_wide_with_dropout,-8.93,0.0,3.728,0.0,-5.524,0.0,-4.483,0.0,-7.851,0.0,-8.67,0.0,0.0,1.0,1.804,0.071,2.374,0.018,-3.908,0.0,-0.281,0.778,0.76,0.447,-4.19,...,0.129,-3.828,0.0,1.14,0.254,2.239,0.025,-5.215,0.0,1.368,0.171,1.418,0.156,-5.205,0.0,1.804,0.071,1.613,0.107,-7.145,0.0,-9.279,0.0,-8.992,0.0
NN-12-layer_wide_with_dropout_lr01,-10.15,0.0,1.915,0.056,-7.378,0.0,-6.258,0.0,-9.187,0.0,-9.964,0.0,-1.804,0.071,0.0,1.0,0.575,0.565,-5.634,0.0,-2.118,0.034,-1.078,0.281,-5.896,...,0.742,-5.539,0.0,-0.635,0.525,0.48,0.631,-6.783,0.0,-0.477,0.633,-0.443,0.658,-6.839,0.0,0.028,0.978,-0.214,0.83,-8.641,0.0,-10.413,0.0,-10.187,0.0
NN-12-layer_wide_with_dropout_lr1,-10.395,0.0,1.283,0.2,-7.772,0.0,-6.687,0.0,-9.491,0.0,-10.245,0.0,-2.374,0.018,-0.575,0.565,0.0,1.0,-6.076,0.0,-2.615,0.009,-1.623,0.105,-6.322,...,0.363,-5.983,0.0,-1.202,0.229,-0.11,0.912,-7.18,0.0,-1.043,0.297,-0.989,0.323,-7.218,0.0,-0.592,0.554,-0.816,0.415,-8.983,0.0,-10.652,0.0,-10.445,0.0
NN-2-layer-droput-input-layer_lr001,-6.0,0.0,7.566,0.0,-1.193,0.233,-0.364,0.716,-4.708,0.0,-5.413,0.0,3.908,0.0,5.634,0.0,6.076,0.0,0.0,1.0,3.764,0.0,4.708,0.0,-0.401,...,0.0,0.096,0.924,4.938,0.0,5.919,0.0,-1.54,0.123,5.287,0.0,5.382,0.0,-1.542,0.123,5.536,0.0,5.48,0.0,-3.586,0.0,-6.537,0.0,-6.036,0.0


#### t-test with Bonferroni correction

In [14]:
t_test_bonferroni_df = analyze.t_test_with_bonferroni_correction(errors_per_estimator)
t_test_bonferroni_df

Unnamed: 0,BaggingClassifier,BaselineClassifier,BernoulliNaiveBayes,GaussianNaiveBayes,GradientBoostingClassifier,K_Neighbours,NN-12-layer_wide_with_dropout,NN-12-layer_wide_with_dropout_lr01,NN-12-layer_wide_with_dropout_lr1,NN-2-layer-droput-input-layer_lr001,NN-2-layer-droput-input-layer_lr01,NN-2-layer-droput-input-layer_lr1,NN-4-layer-droput-each-layer_lr0001,NN-4-layer-droput-each-layer_lr01,NN-4-layer-droput-each-layer_lr1,NN-4-layer_thin_dropout,NN-4-layer_thin_dropout_lr01,NN-4-layer_thin_dropout_lr1,NN-4-layer_wide_no_dropout,NN-4-layer_wide_no_dropout_lr01,NN-4-layer_wide_no_dropout_lr1,NN-4-layer_wide_with_dropout,NN-4-layer_wide_with_dropout_lr01,NN-4-layer_wide_with_dropout_lr1,PassiveAggressiveClassifier,RandomForestClassifier,SVC
BaggingClassifier,False,True,True,True,False,False,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,False,False,False
BaselineClassifier,True,False,True,True,True,True,True,False,False,True,True,False,True,False,False,True,False,False,True,False,False,True,False,False,True,True,True
BernoulliNaiveBayes,True,True,False,False,True,True,True,True,True,False,True,True,False,True,True,False,True,True,False,True,True,False,True,True,False,True,True
GaussianNaiveBayes,True,True,False,False,True,True,True,True,True,False,True,True,False,True,True,False,True,True,False,True,True,False,True,True,False,True,True
GradientBoostingClassifier,False,True,True,True,False,False,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,False,False,False
K_Neighbours,False,True,True,True,False,False,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,False,False,False
NN-12-layer_wide_with_dropout,True,True,True,True,True,True,False,False,False,True,False,False,True,False,False,True,False,False,True,False,False,True,False,False,True,True,True
NN-12-layer_wide_with_dropout_lr01,True,False,True,True,True,True,False,False,False,True,False,False,True,False,False,True,False,False,True,False,False,True,False,False,True,True,True
NN-12-layer_wide_with_dropout_lr1,True,False,True,True,True,True,False,False,False,True,False,False,True,False,False,True,False,False,True,False,False,True,False,False,True,True,True
NN-2-layer-droput-input-layer_lr001,True,True,False,False,True,True,True,True,True,False,True,True,False,True,True,False,True,True,False,True,True,False,True,True,True,True,True


In [15]:
for e in estim:
    print(e.properties['name'])

RandomForestClassifier
BaggingClassifier
GradientBoostingClassifier
SVC
GaussianNaiveBayes
BernoulliNaiveBayes
NeuralNetworkDeepClassifier
PassiveAggressiveClassifier
BaselineClassifier
K_Neighbours


#### Wilcoxon test

In [16]:
a, wilcoxon_df_multiindex = analyze.wilcoxon_test(errors_per_estimator)
wilcoxon_df_multiindex

  z = (T - mn - correction) / se


Unnamed: 0_level_0,BaggingClassifier,BaggingClassifier,BaselineClassifier,BaselineClassifier,BernoulliNaiveBayes,BernoulliNaiveBayes,GaussianNaiveBayes,GaussianNaiveBayes,GradientBoostingClassifier,GradientBoostingClassifier,K_Neighbours,K_Neighbours,NN-12-layer_wide_with_dropout,NN-12-layer_wide_with_dropout,NN-12-layer_wide_with_dropout_lr01,NN-12-layer_wide_with_dropout_lr01,NN-12-layer_wide_with_dropout_lr1,NN-12-layer_wide_with_dropout_lr1,NN-2-layer-droput-input-layer_lr001,NN-2-layer-droput-input-layer_lr001,NN-2-layer-droput-input-layer_lr01,NN-2-layer-droput-input-layer_lr01,NN-2-layer-droput-input-layer_lr1,NN-2-layer-droput-input-layer_lr1,NN-4-layer-droput-each-layer_lr0001,...,NN-4-layer-droput-each-layer_lr1,NN-4-layer_thin_dropout,NN-4-layer_thin_dropout,NN-4-layer_thin_dropout_lr01,NN-4-layer_thin_dropout_lr01,NN-4-layer_thin_dropout_lr1,NN-4-layer_thin_dropout_lr1,NN-4-layer_wide_no_dropout,NN-4-layer_wide_no_dropout,NN-4-layer_wide_no_dropout_lr01,NN-4-layer_wide_no_dropout_lr01,NN-4-layer_wide_no_dropout_lr1,NN-4-layer_wide_no_dropout_lr1,NN-4-layer_wide_with_dropout,NN-4-layer_wide_with_dropout,NN-4-layer_wide_with_dropout_lr01,NN-4-layer_wide_with_dropout_lr01,NN-4-layer_wide_with_dropout_lr1,NN-4-layer_wide_with_dropout_lr1,PassiveAggressiveClassifier,PassiveAggressiveClassifier,RandomForestClassifier,RandomForestClassifier,SVC,SVC
Unnamed: 0_level_1,statistic,p_val,statistic,p_val,statistic,p_val,statistic,p_val,statistic,p_val,statistic,p_val,statistic,p_val,statistic,p_val,statistic,p_val,statistic,p_val,statistic,p_val,statistic,p_val,statistic,...,p_val,statistic,p_val,statistic,p_val,statistic,p_val,statistic,p_val,statistic,p_val,statistic,p_val,statistic,p_val,statistic,p_val,statistic,p_val,statistic,p_val,statistic,p_val,statistic,p_val
BaggingClassifier,0.0,,6.0,0.0,440.5,0.0,405.5,0.0,972.0,0.0,1879.0,0.018,203.0,0.0,148.0,0.0,119.0,0.0,476.0,0.0,143.0,0.0,174.0,0.0,613.0,...,0.0,493.0,0.0,118.0,0.0,103.0,0.0,678.5,0.0,106.0,0.0,112.0,0.0,678.5,0.0,123.0,0.0,114.0,0.0,948.0,0.0,1094.0,0.0,2621.5,0.987
BaselineClassifier,6.0,0.0,0.0,,49.0,0.0,398.0,0.0,23.5,0.0,1.0,0.0,574.0,0.0,1043.0,0.0,1373.0,0.0,49.0,0.0,239.0,0.0,664.0,0.0,53.0,...,0.0,60.0,0.0,1065.5,0.0,1522.0,0.0,27.0,0.0,737.5,0.0,806.0,0.0,40.0,0.0,1082.5,0.0,913.5,0.0,11.5,0.0,0.0,0.0,0.0,0.0
BernoulliNaiveBayes,440.5,0.0,49.0,0.0,0.0,,2177.0,0.053,1062.5,0.0,305.0,0.0,773.5,0.0,361.0,0.0,330.0,0.0,2175.5,0.038,422.0,0.0,415.5,0.0,2512.5,...,0.0,2283.0,0.043,360.5,0.0,324.0,0.0,2531.5,0.422,337.0,0.0,348.0,0.0,2835.0,0.517,335.0,0.0,302.0,0.0,1321.0,0.0,158.5,0.0,249.5,0.0
GaussianNaiveBayes,405.5,0.0,398.0,0.0,2177.0,0.053,0.0,,1039.0,0.0,341.0,0.0,1531.0,0.0,976.0,0.0,751.0,0.0,2857.5,0.793,1245.0,0.0,1179.0,0.0,2433.0,...,0.0,2757.5,0.806,974.5,0.0,940.0,0.0,1873.5,0.008,998.0,0.0,1006.0,0.0,2122.0,0.017,1001.0,0.0,988.0,0.0,949.5,0.0,142.5,0.0,134.0,0.0
GradientBoostingClassifier,972.0,0.0,23.5,0.0,1062.5,0.0,1039.0,0.0,0.0,,2172.5,0.13,300.0,0.0,241.0,0.0,221.0,0.0,850.5,0.0,310.0,0.0,284.0,0.0,923.5,...,0.0,731.0,0.0,205.0,0.0,222.0,0.0,1066.5,0.0,222.0,0.0,220.0,0.0,1079.0,0.0,211.0,0.0,223.0,0.0,2322.5,0.057,509.5,0.0,1562.0,0.0
K_Neighbours,1879.0,0.018,1.0,0.0,305.0,0.0,341.0,0.0,2172.5,0.13,0.0,,115.0,0.0,51.0,0.0,23.0,0.0,230.0,0.0,35.0,0.0,66.0,0.0,360.0,...,0.0,356.0,0.0,24.0,0.0,25.0,0.0,538.0,0.0,23.0,0.0,26.0,0.0,514.0,0.0,41.0,0.0,26.0,0.0,1155.0,0.0,1046.0,0.0,1509.5,0.004
NN-12-layer_wide_with_dropout,203.0,0.0,574.0,0.0,773.5,0.0,1531.0,0.0,300.0,0.0,115.0,0.0,0.0,,271.0,0.0,212.0,0.0,671.0,0.0,1252.5,0.73,669.0,0.031,321.0,...,0.0,609.0,0.0,477.0,0.003,188.5,0.0,365.5,0.0,387.0,0.002,438.5,0.001,343.5,0.0,329.5,0.0,313.0,0.0,414.0,0.0,94.0,0.0,28.0,0.0
NN-12-layer_wide_with_dropout_lr01,148.0,0.0,1043.0,0.0,361.0,0.0,976.0,0.0,241.0,0.0,51.0,0.0,271.0,0.0,0.0,,541.0,0.808,293.0,0.0,460.5,0.0,446.0,0.007,87.0,...,0.399,310.0,0.0,654.0,0.171,425.0,0.296,172.5,0.0,351.5,0.142,306.0,0.241,169.0,0.0,407.0,0.968,346.0,0.39,229.0,0.0,74.0,0.0,17.0,0.0
NN-12-layer_wide_with_dropout_lr1,119.0,0.0,1373.0,0.0,330.0,0.0,751.0,0.0,221.0,0.0,23.0,0.0,212.0,0.0,541.0,0.808,0.0,,197.0,0.0,398.0,0.0,462.0,0.002,91.0,...,0.199,310.0,0.0,542.5,0.037,523.5,0.508,165.0,0.0,329.0,0.053,379.5,0.119,189.0,0.0,460.0,0.516,467.0,0.215,173.0,0.0,55.0,0.0,3.0,0.0
NN-2-layer-droput-input-layer_lr001,476.0,0.0,49.0,0.0,2175.5,0.038,2857.5,0.793,850.5,0.0,230.0,0.0,671.0,0.0,293.0,0.0,197.0,0.0,0.0,,443.0,0.0,331.0,0.0,1741.5,...,0.0,2416.5,0.838,372.5,0.0,220.0,0.0,1313.0,0.0,225.0,0.0,284.0,0.0,1468.5,0.0,263.0,0.0,222.0,0.0,777.0,0.0,207.0,0.0,156.0,0.0


#### Friedman test

In [17]:
_, friedman_test_df = analyze.friedman_test(errors_per_estimator)
friedman_test_df

Unnamed: 0,statistic,p_value
0,1542.781,0.0


In [18]:
nemeniy_test = analyze.nemenyi(errors_per_estimator)
nemeniy_test_df = pd.DataFrame(nemeniy_test)
nemeniy_test_df

Unnamed: 0,BaggingClassifier,BaselineClassifier,BernoulliNaiveBayes,GaussianNaiveBayes,GradientBoostingClassifier,K_Neighbours,NN-12-layer_wide_with_dropout,NN-12-layer_wide_with_dropout_lr01,NN-12-layer_wide_with_dropout_lr1,NN-2-layer-droput-input-layer_lr001,NN-2-layer-droput-input-layer_lr01,NN-2-layer-droput-input-layer_lr1,NN-4-layer-droput-each-layer_lr0001,NN-4-layer-droput-each-layer_lr01,NN-4-layer-droput-each-layer_lr1,NN-4-layer_thin_dropout,NN-4-layer_thin_dropout_lr01,NN-4-layer_thin_dropout_lr1,NN-4-layer_wide_no_dropout,NN-4-layer_wide_no_dropout_lr01,NN-4-layer_wide_no_dropout_lr1,NN-4-layer_wide_with_dropout,NN-4-layer_wide_with_dropout_lr01,NN-4-layer_wide_with_dropout_lr1,PassiveAggressiveClassifier,RandomForestClassifier,SVC
BaggingClassifier,-1.0,0.0,0.933,0.585,1.0,1.0,0.0,0.0,0.0,0.333,0.0,0.0,0.504,0.0,0.0,0.292,0.0,0.0,0.941,0.0,0.0,0.938,0.0,0.0,1.0,1.0,1.0
BaselineClassifier,0.0,-1.0,0.0,0.0,0.0,0.0,0.989,1.0,1.0,0.002,0.971,1.0,0.0,1.0,1.0,0.002,1.0,1.0,0.0,1.0,1.0,0.0,1.0,1.0,0.0,0.0,0.0
BernoulliNaiveBayes,0.933,0.0,-1.0,1.0,1.0,0.988,0.498,0.012,0.002,1.0,0.635,0.152,1.0,0.019,0.027,1.0,0.084,0.004,1.0,0.039,0.032,1.0,0.013,0.02,1.0,0.798,0.93
GaussianNaiveBayes,0.585,0.0,1.0,-1.0,0.971,0.817,0.897,0.131,0.041,1.0,0.949,0.571,1.0,0.176,0.221,1.0,0.424,0.062,1.0,0.276,0.244,1.0,0.141,0.182,1.0,0.342,0.578
GradientBoostingClassifier,1.0,0.0,1.0,0.971,-1.0,1.0,0.0,0.0,0.0,0.883,0.0,0.0,0.953,0.0,0.0,0.857,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,1.0,1.0,1.0
K_Neighbours,1.0,0.0,0.988,0.817,1.0,-1.0,0.0,0.0,0.0,0.592,0.0,0.0,0.755,0.0,0.0,0.546,0.0,0.0,0.989,0.0,0.0,0.989,0.0,0.0,1.0,1.0,1.0
NN-12-layer_wide_with_dropout,0.0,0.989,0.498,0.897,0.0,0.0,-1.0,1.0,1.0,0.976,1.0,1.0,0.931,1.0,1.0,0.982,1.0,1.0,0.476,1.0,1.0,0.485,1.0,1.0,0.005,0.0,0.0
NN-12-layer_wide_with_dropout_lr01,0.0,1.0,0.012,0.131,0.0,0.0,1.0,-1.0,1.0,0.312,1.0,1.0,0.178,1.0,1.0,0.354,1.0,1.0,0.01,1.0,1.0,0.011,1.0,1.0,0.0,0.0,0.0
NN-12-layer_wide_with_dropout_lr1,0.0,1.0,0.002,0.041,0.0,0.0,1.0,1.0,-1.0,0.129,1.0,1.0,0.06,1.0,1.0,0.154,1.0,1.0,0.002,1.0,1.0,0.002,1.0,1.0,0.0,0.0,0.0
NN-2-layer-droput-input-layer_lr001,0.333,0.002,1.0,1.0,0.883,0.592,0.976,0.312,0.129,-1.0,0.991,0.801,1.0,0.384,0.448,1.0,0.679,0.18,1.0,0.519,0.479,1.0,0.328,0.392,0.997,0.15,0.326


### Per dataset

In [19]:
errors_per_dataset_per_estimator_df

Unnamed: 0,Unnamed: 1,loss,std_error
acute_inflammation,BaggingClassifier,0.00000,0.00000
acute_inflammation,BaselineClassifier,0.32500,0.07406
acute_inflammation,BernoulliNaiveBayes,0.12500,0.05229
acute_inflammation,GaussianNaiveBayes,0.17500,0.06008
acute_inflammation,GradientBoostingClassifier,0.00000,0.00000
acute_inflammation,K_Neighbours,0.00000,0.00000
acute_inflammation,NN-12-layer_wide_with_dropout,0.47500,0.07896
acute_inflammation,NN-12-layer_wide_with_dropout_lr01,0.52500,0.07896
acute_inflammation,NN-12-layer_wide_with_dropout_lr1,0.52500,0.07896
acute_inflammation,NN-2-layer-droput-input-layer_lr001,0.12500,0.05229


## Save tables to $\LaTeX$

In [25]:
#t-test
with open('../mlaut-paper/mlaut/tables/t_test.tex', 'w') as tf:
    tf.write(t_test_df.to_latex())
    
t_test1 = t_test_df.iloc[:,0:8]
t_test2 = t_test_df.iloc[:,8:16]
t_test3 = t_test_df.iloc[:,16:24]
t_test4 = t_test_df.iloc[:,24:32]
t_test5 = t_test_df.iloc[:,32:40]
t_test6 = t_test_df.iloc[:,40:48]
t_test7 = t_test_df.iloc[:,48:54]

with open('../mlaut-paper/mlaut/tables/t_test1.tex', 'w') as tf:
    tf.write(t_test1.to_latex())
with open('../mlaut-paper/mlaut/tables/t_test2.tex', 'w') as tf:
    tf.write(t_test2.to_latex()) 
with open('../mlaut-paper/mlaut/tables/t_test3.tex', 'w') as tf:
    tf.write(t_test3.to_latex()) 
with open('../mlaut-paper/mlaut/tables/t_test4.tex', 'w') as tf:
    tf.write(t_test4.to_latex()) 
with open('../mlaut-paper/mlaut/tables/t_test5.tex', 'w') as tf:
    tf.write(t_test5.to_latex()) 
with open('../mlaut-paper/mlaut/tables/t_test6.tex', 'w') as tf:
    tf.write(t_test6.to_latex()) 
with open('../mlaut-paper/mlaut/tables/t_test7.tex', 'w') as tf:
    tf.write(t_test7.to_latex()) 

In [27]:
#t-test with Bonferroni
with open('../mlaut-paper/mlaut/tables/t_test_bonferroni.tex', 'w') as tf:
    tf.write(t_test_bonferroni_df.to_latex())
    
t_test_bonferroni1 = t_test_bonferroni_df.iloc[:,0:4]
t_test_bonferroni2 = t_test_bonferroni_df.iloc[:,4:8]
t_test_bonferroni3 = t_test_bonferroni_df.iloc[:,8:12]
t_test_bonferroni4 = t_test_bonferroni_df.iloc[:,12:16]
t_test_bonferroni5 = t_test_bonferroni_df.iloc[:,16:20]
t_test_bonferroni6 = t_test_bonferroni_df.iloc[:,20:24]
t_test_bonferroni7 = t_test_bonferroni_df.iloc[:,24:27]

with open('../mlaut-paper/mlaut/tables/t_test_bonferroni1.tex', 'w') as tf:
    tf.write(t_test_bonferroni1.to_latex())
with open('../mlaut-paper/mlaut/tables/t_test_bonferroni2.tex', 'w') as tf:
    tf.write(t_test_bonferroni2.to_latex())
with open('../mlaut-paper/mlaut/tables/t_test_bonferroni3.tex', 'w') as tf:
    tf.write(t_test_bonferroni3.to_latex())
with open('../mlaut-paper/mlaut/tables/t_test_bonferroni4.tex', 'w') as tf:
    tf.write(t_test_bonferroni4.to_latex())
with open('../mlaut-paper/mlaut/tables/t_test_bonferroni5.tex', 'w') as tf:
    tf.write(t_test_bonferroni5.to_latex())
with open('../mlaut-paper/mlaut/tables/t_test_bonferroni6.tex', 'w') as tf:
    tf.write(t_test_bonferroni6.to_latex())
with open('../mlaut-paper/mlaut/tables/t_test_bonferroni7.tex', 'w') as tf:
    tf.write(t_test_bonferroni7.to_latex())

In [26]:
#sign test
with open('../mlaut-paper/mlaut/tables/sign_test.tex', 'w') as tf:
    tf.write(sign_test_df.to_latex())
sign_test1 = sign_test_df.iloc[:,0:8]
sign_test2 = sign_test_df.iloc[:,8:16]
sign_test3= sign_test_df.iloc[:,16:24]
sign_test4 = sign_test_df.iloc[:,24:32]
sign_test5 = sign_test_df.iloc[:,32:40]
sign_test6 = sign_test_df.iloc[:,40:48]
sign_test7 = sign_test_df.iloc[:,48:54]
with open('../mlaut-paper/mlaut/tables/sign_test1.tex', 'w') as tf:
    tf.write(sign_test1.to_latex())
with open('../mlaut-paper/mlaut/tables/sign_test2.tex', 'w') as tf:
    tf.write(sign_test2.to_latex())
with open('../mlaut-paper/mlaut/tables/sign_test3.tex', 'w') as tf:
    tf.write(sign_test3.to_latex())
with open('../mlaut-paper/mlaut/tables/sign_test4.tex', 'w') as tf:
    tf.write(sign_test4.to_latex())
with open('../mlaut-paper/mlaut/tables/sign_test5.tex', 'w') as tf:
    tf.write(sign_test5.to_latex())
with open('../mlaut-paper/mlaut/tables/sign_test6.tex', 'w') as tf:
    tf.write(sign_test6.to_latex())
with open('../mlaut-paper/mlaut/tables/sign_test7.tex', 'w') as tf:
    tf.write(sign_test7.to_latex())

In [29]:
#Wilcoxon
with open('../mlaut-paper/mlaut/tables/wilxocon_test.tex', 'w') as tf:
    tf.write(wilcoxon_df_multiindex.to_latex())
    
wilcoxon1 = wilcoxon_df_multiindex.iloc[:,0:8]
wilcoxon2 = wilcoxon_df_multiindex.iloc[:,8:16]
wilcoxon3 = wilcoxon_df_multiindex.iloc[:,16:24]
wilcoxon4 = wilcoxon_df_multiindex.iloc[:,24:32]
wilcoxon5 = wilcoxon_df_multiindex.iloc[:,32:40]
wilcoxon6 = wilcoxon_df_multiindex.iloc[:,40:48]
wilcoxon7 = wilcoxon_df_multiindex.iloc[:,48:54]

with open('../mlaut-paper/mlaut/tables/wilcoxon_test1.tex', 'w') as tf:
    tf.write(wilcoxon1.to_latex())
with open('../mlaut-paper/mlaut/tables/wilcoxon_test2.tex', 'w') as tf:
    tf.write(wilcoxon2.to_latex())
with open('../mlaut-paper/mlaut/tables/wilcoxon_test3.tex', 'w') as tf:
    tf.write(wilcoxon3.to_latex())
with open('../mlaut-paper/mlaut/tables/wilcoxon_test4.tex', 'w') as tf:
    tf.write(wilcoxon4.to_latex())
with open('../mlaut-paper/mlaut/tables/wilcoxon_test5.tex', 'w') as tf:
    tf.write(wilcoxon5.to_latex())
with open('../mlaut-paper/mlaut/tables/wilcoxon_test6.tex', 'w') as tf:
    tf.write(wilcoxon6.to_latex())
with open('../mlaut-paper/mlaut/tables/wilcoxon_test7.tex', 'w') as tf:
    tf.write(wilcoxon7.to_latex())

In [31]:
#Nemeniy test
with open('../mlaut-paper/mlaut/tables/nemeniy_test.tex', 'w') as tf:
    tf.write(nemeniy_test_df.to_latex())
    
nemeniy_test1 = nemeniy_test_df.iloc[:,0:4]
nemeniy_test2 = nemeniy_test_df.iloc[:,4:8]
nemeniy_test3 = nemeniy_test_df.iloc[:,8:12]
nemeniy_test4 = nemeniy_test_df.iloc[:,12:16]
nemeniy_test5 = nemeniy_test_df.iloc[:,16:20]
nemeniy_test6 = nemeniy_test_df.iloc[:,20:24]
nemeniy_test7 = nemeniy_test_df.iloc[:,24:27]

with open('../mlaut-paper/mlaut/tables/nemeniy_test1.tex', 'w') as tf:
    tf.write(nemeniy_test1.to_latex())
with open('../mlaut-paper/mlaut/tables/nemeniy_test2.tex', 'w') as tf:
    tf.write(nemeniy_test2.to_latex())
with open('../mlaut-paper/mlaut/tables/nemeniy_test3.tex', 'w') as tf:
    tf.write(nemeniy_test3.to_latex())
with open('../mlaut-paper/mlaut/tables/nemeniy_test4.tex', 'w') as tf:
    tf.write(nemeniy_test4.to_latex())
with open('../mlaut-paper/mlaut/tables/nemeniy_test5.tex', 'w') as tf:
    tf.write(nemeniy_test5.to_latex())
with open('../mlaut-paper/mlaut/tables/nemeniy_test6.tex', 'w') as tf:
    tf.write(nemeniy_test6.to_latex())
with open('../mlaut-paper/mlaut/tables/nemeniy_test7.tex', 'w') as tf:
    tf.write(nemeniy_test7.to_latex())

In [22]:
#average and standard error
with open('../mlaut-paper/mlaut/tables/avg_and_st_error.tex', 'w') as tf:
    tf.write(avg_and_std_error.to_latex())
#average trining time
with open('../mlaut-paper/mlaut/tables/avg_training_time.tex', 'w') as tf:
    tf.write(avg_training_time.to_latex())
#average rank
with open('../mlaut-paper/mlaut/tables/avg_rank.tex', 'w') as tf:
    tf.write(avg_rank.to_latex())

#average metrics
with open('../mlaut-paper/mlaut/tables/avg_metrics.tex', 'w') as tf:
    tf.write(avg_metrics.to_latex())
#Cohen's D
with open('../mlaut-paper/mlaut/tables/cohens_d.tex', 'w') as tf:
    tf.write(cohens_d.to_latex())



#Errors per dataset per estimator
with open('../mlaut-paper/mlaut/tables/errors_per_dataset_per_estimator.tex', 'w') as tf:
    tf.write(errors_per_dataset_per_estimator_df.to_latex(longtable=True))
#              replace('\n', '\n\\caption{Errors per dataset and estimator}\\\\\n', 1))

In [33]:
with open('../mlaut-paper/mlaut/tables/friedman_test.tex', 'w') as tf:
    tf.write(friedman_test_df.to_latex(index=False))