In [1]:
from mlaut.analyze_results import AnalyseResults
from mlaut.data import Data
import pandas as pd
from mlaut.estimators.estimators import instantiate_default_estimators
from mlaut.analyze_results.scores import ScoreAccuracy

import matplotlib.pyplot as plt
pd.options.display.max_rows = 1000
pd.options.display.max_columns = 50

In [2]:
data = Data()
input_io = data.open_hdf5('data/delgado.h5', mode='r')
out_io = data.open_hdf5('data/delgado-classification-deep.h5', mode='r')
analyze = AnalyseResults(hdf5_output_io=out_io, 
                        hdf5_input_io=input_io, 
                        input_h5_original_datasets_group='openml/', 
                        output_h5_predictions_group='experiments/predictions/')


### All datasets

In [3]:
from mlaut.estimators.nn_estimators import Deep_NN_Classifier
hyperparameters = {'epochs': [50,100], 
                    'batch_size': [0, 50, 100]}
def keras_model1(num_classes, input_dim):
    model = OverwrittenSequentialClassifier()
    model.add(Dense(288, input_dim=input_dim, activation='relu'))
    model.add(Dense(144, activation='relu'))
    model.add(Dropout(0.5))
    model.add(Dense(12, activation='relu'))
    model.add(Dense(num_classes, activation='softmax'))

    model_optimizer = optimizers.Adam(lr=0.001)
    model.compile(loss='mean_squared_error', optimizer=model_optimizer, metrics=['accuracy'])

    return model

deep_nn_4_layer_thin_dropout = Deep_NN_Classifier(keras_model=keras_model1, 
                            properties={'name':'NN-4-layer_thin_dropout'})


def keras_model2(num_classes, input_dim):
    nn_deep_model = OverwrittenSequentialClassifier()
    nn_deep_model.add(Dense(2500, input_dim=input_dim, activation='relu'))
    nn_deep_model.add(Dense(2000, activation='relu'))
    nn_deep_model.add(Dense(1500, activation='relu'))
    nn_deep_model.add(Dense(num_classes, activation='softmax'))

    model_optimizer = optimizers.Adam(lr=0.001)
    nn_deep_model.compile(loss='mean_squared_error', optimizer=model_optimizer, metrics=['accuracy'])
    return nn_deep_model

deep_nn_4_layer_wide_no_dropout = Deep_NN_Classifier(hyperparameters=hyperparameters,
                            keras_model=keras_model2,
                            properties={'name':'NN-4-layer_wide_no_dropout'})


def keras_model3(num_classes, input_dim):
    nn_deep_model = OverwrittenSequentialClassifier()
    nn_deep_model.add(Dense(2500, input_dim=input_dim, activation='relu'))
    nn_deep_model.add(Dense(2000, activation='relu'))
    nn_deep_model.add(Dropout(0.5))
    nn_deep_model.add(Dense(1500, activation='relu'))
    nn_deep_model.add(Dense(num_classes, activation='softmax'))

    model_optimizer = optimizers.Adam(lr=0.001)
    nn_deep_model.compile(loss='mean_squared_error', optimizer=model_optimizer, metrics=['accuracy'])
    return nn_deep_model

deep_nn_4_layer_wide_with_dropout = Deep_NN_Classifier(hyperparameters=hyperparameters,
                            keras_model=keras_model3,
                            properties={'name':'NN-4-layer_wide_with_dropout'})


def keras_model4(num_classes, input_dim):
    nn_deep_model = OverwrittenSequentialClassifier()
    nn_deep_model.add(Dense(5000, input_dim=input_dim, activation='relu'))
    nn_deep_model.add(Dense(4500, activation='relu'))
    nn_deep_model.add(Dense(4000, activation='relu'))
    nn_deep_model.add(Dropout(0.5))

    nn_deep_model.add(Dense(3500, activation='relu'))
    nn_deep_model.add(Dense(3000, activation='relu'))
    nn_deep_model.add(Dense(2500, activation='relu'))
    nn_deep_model.add(Dropout(0.5))


    nn_deep_model.add(Dense(2000, activation='relu'))
    nn_deep_model.add(Dense(1500, activation='relu'))
    nn_deep_model.add(Dense(1000, activation='relu'))
    nn_deep_model.add(Dropout(0.5))

    nn_deep_model.add(Dense(500, activation='relu'))
    nn_deep_model.add(Dense(250, activation='relu'))
    nn_deep_model.add(Dense(num_classes, activation='softmax'))

    model_optimizer = optimizers.Adam(lr=0.001)
    nn_deep_model.compile(loss='mean_squared_error', optimizer=model_optimizer, metrics=['accuracy'])
    return nn_deep_model

deep_nn_12_layer_wide_with_dropout = Deep_NN_Classifier(hyperparameters=hyperparameters,
                            keras_model=keras_model4,
                            properties={'name':'NN-12-layer_wide_with_dropout'})


estimators = [deep_nn_4_layer_thin_dropout,
            deep_nn_4_layer_wide_no_dropout, 
            deep_nn_4_layer_wide_with_dropout,
            deep_nn_12_layer_wide_with_dropout]

estim = instantiate_default_estimators(['Classification'])
# estimators = []
for e in estim:
    if e.properties['name'] is not 'NeuralNetworkDeepClassifier':
        estimators.append(e)

In [4]:
# estimators = instantiate_default_estimators(['Classification'])
score_accuracy = ScoreAccuracy()

# (errors_per_estimator, 
#  errors_per_dataset_per_estimator) = analyze.prediction_errors(metric=score_accuracy, estimators=estimators)
 
(errors_per_estimator, 
 errors_per_dataset_per_estimator, 
 errors_per_dataset_per_estimator_df) = analyze.prediction_errors(score_accuracy, estimators)



In [5]:
errors_per_dataset_per_estimator_df

Unnamed: 0,Unnamed: 1,loss,std_error
abalone,BaggingClassifier,0.37708,0.01305
abalone,BaselineClassifier,0.66715,0.01269
abalone,BernoulliNaiveBayes,0.44888,0.01339
abalone,GaussianNaiveBayes,0.44017,0.01337
abalone,GradientBoostingClassifier,0.38869,0.01313
abalone,K_Neighbours,0.36476,0.01296
abalone,NN-12-layer_wide_with_dropout,0.36766,0.01298
abalone,NN-4-layer_thin_dropout,0.38724,0.01312
abalone,NN-4-layer_wide_no_dropout,0.36186,0.01294
abalone,NN-4-layer_wide_with_dropout,0.37563,0.01304


#### Simple average and standard error

In [6]:
avg_and_std_error = analyze.average_and_std_error(errors_per_estimator)
# avg_and_std_error.index.name='Estimator Name'
avg_and_std_error.round(3)

Unnamed: 0,avg_score,std_error
BaselineClassifier,0.423,0.019
NN-12-layer_wide_with_dropout,0.538,0.022
NN-4-layer_thin_dropout,0.655,0.021
GaussianNaiveBayes,0.674,0.019
NN-4-layer_wide_with_dropout,0.693,0.02
NN-4-layer_wide_no_dropout,0.696,0.02
BernoulliNaiveBayes,0.706,0.015
PassiveAggressiveClassifier,0.757,0.016
GradientBoostingClassifier,0.79,0.016
K_Neighbours,0.804,0.013


#### Average Rank

In [7]:
avg_rank = analyze.ranks(errors_per_estimator, ascending=False)
avg_rank.round(1)

Unnamed: 0,avg_rank
RandomForestClassifier,3.2
SVC,3.8
BaggingClassifier,4.3
K_Neighbours,4.7
GradientBoostingClassifier,5.6
PassiveAggressiveClassifier,6.3
NN-4-layer_wide_no_dropout,7.5
NN-4-layer_wide_with_dropout,7.5
BernoulliNaiveBayes,8.3
NN-4-layer_thin_dropout,8.4


#### Training time

In [8]:
avg_training_time, training_time_per_dataset = analyze.average_training_time(estimators)


In [9]:
avg_training_time

Unnamed: 0,avg training time (in sec)
BaselineClassifier,0.001
GaussianNaiveBayes,0.004
BernoulliNaiveBayes,0.006
NN-4-layer_thin_dropout,3.307
NN-4-layer_wide_no_dropout,7.31
NN-4-layer_wide_with_dropout,7.423
BaggingClassifier,18.501
PassiveAggressiveClassifier,20.66
RandomForestClassifier,33.507
NN-12-layer_wide_with_dropout,70.118


#### merge avg score, rank and training time

In [10]:
avg_metrics = pd.DataFrame.merge(avg_rank,avg_and_std_error, left_index=True, right_index=True)
avg_metrics = pd.DataFrame.merge(avg_metrics, avg_training_time,left_index=True, right_index=True)
avg_metrics


Unnamed: 0,avg_rank,avg_score,std_error,avg training time (in sec)
RandomForestClassifier,3.2,0.829,0.013,33.507
SVC,3.8,0.817,0.013,5807.507
BaggingClassifier,4.3,0.819,0.014,18.501
K_Neighbours,4.7,0.804,0.013,192.382
GradientBoostingClassifier,5.6,0.79,0.016,139.763
PassiveAggressiveClassifier,6.3,0.757,0.016,20.66
NN-4-layer_wide_no_dropout,7.5,0.696,0.02,7.31
NN-4-layer_wide_with_dropout,7.5,0.693,0.02,7.423
BernoulliNaiveBayes,8.3,0.706,0.015,0.006
NN-4-layer_thin_dropout,8.4,0.655,0.021,3.307


#### Cohen's d

In [12]:
cohens_d = analyze.cohens_d(errors_per_estimator)
cohens_d.round(2)

estimator_2,BaselineClassifier,BernoulliNaiveBayes,GaussianNaiveBayes,GradientBoostingClassifier,K_Neighbours,NN-12-layer_wide_with_dropout,NN-4-layer_thin_dropout,NN-4-layer_wide_no_dropout,NN-4-layer_wide_with_dropout,PassiveAggressiveClassifier,RandomForestClassifier,SVC
estimator_1,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
BaggingClassifier,-2.234,-0.735,-0.82,-0.182,-0.1,-1.425,-0.847,-0.659,-0.673,-0.387,0.074,-0.01
BaselineClassifier,,1.551,1.241,1.963,2.176,0.525,1.07,1.294,1.285,1.79,2.321,2.24
BernoulliNaiveBayes,,,-0.176,0.509,0.649,-0.831,-0.257,-0.053,-0.065,0.312,0.817,0.73
GaussianNaiveBayes,,,,0.622,0.746,-0.617,-0.087,0.105,0.094,0.448,0.891,0.82
GradientBoostingClassifier,,,,,0.092,-1.223,-0.666,-0.48,-0.493,-0.192,0.253,0.17
K_Neighbours,,,,,,-1.364,-0.779,-0.587,-0.601,-0.3,0.177,0.09
NN-12-layer_wide_with_dropout,,,,,,,0.5,0.692,0.683,1.066,1.494,1.42
NN-4-layer_thin_dropout,,,,,,,,0.182,0.171,0.506,0.911,0.84
NN-4-layer_wide_no_dropout,,,,,,,,,-0.011,0.314,0.724,0.65
NN-4-layer_wide_with_dropout,,,,,,,,,,0.327,0.738,0.67


#### t-test

In [13]:
t_test, t_test_df = analyze.t_test(errors_per_estimator)
t_test_df.round(3)

Unnamed: 0_level_0,BaggingClassifier,BaggingClassifier,BaselineClassifier,BaselineClassifier,BernoulliNaiveBayes,BernoulliNaiveBayes,GaussianNaiveBayes,GaussianNaiveBayes,GradientBoostingClassifier,GradientBoostingClassifier,K_Neighbours,K_Neighbours,NN-12-layer_wide_with_dropout,NN-12-layer_wide_with_dropout,NN-4-layer_thin_dropout,NN-4-layer_thin_dropout,NN-4-layer_wide_no_dropout,NN-4-layer_wide_no_dropout,NN-4-layer_wide_with_dropout,NN-4-layer_wide_with_dropout,PassiveAggressiveClassifier,PassiveAggressiveClassifier,RandomForestClassifier,RandomForestClassifier,SVC,SVC
Unnamed: 0_level_1,t_stat,p_val,t_stat,p_val,t_stat,p_val,t_stat,p_val,t_stat,p_val,t_stat,p_val,t_stat,p_val,t_stat,p_val,t_stat,p_val,t_stat,p_val,t_stat,p_val,t_stat,p_val,t_stat,p_val
BaggingClassifier,0.0,1.0,16.939,0.0,5.57,0.0,6.221,0.0,1.381,0.169,0.761,0.448,10.809,0.0,6.421,0.0,4.998,0.0,5.103,0.0,2.938,0.004,-0.561,0.575,0.079,0.937
BaselineClassifier,-16.939,0.0,0.0,1.0,-11.758,0.0,-9.407,0.0,-14.882,0.0,-16.497,0.0,-3.981,0.0,-8.114,0.0,-9.813,0.0,-9.745,0.0,-13.577,0.0,-17.599,0.0,-16.964,0.0
BernoulliNaiveBayes,-5.57,0.0,11.758,0.0,0.0,1.0,1.337,0.183,-3.861,0.0,-4.924,0.0,6.305,0.0,1.946,0.053,0.399,0.69,0.492,0.623,-2.367,0.019,-6.194,0.0,-5.534,0.0
GaussianNaiveBayes,-6.221,0.0,9.407,0.0,-1.337,0.183,0.0,1.0,-4.718,0.0,-5.659,0.0,4.678,0.0,0.657,0.512,-0.793,0.429,-0.71,0.478,-3.398,0.001,-6.759,0.0,-6.189,0.0
GradientBoostingClassifier,-1.381,0.169,14.882,0.0,3.861,0.0,4.718,0.0,0.0,1.0,-0.697,0.487,9.277,0.0,5.051,0.0,3.64,0.0,3.737,0.0,1.455,0.147,-1.92,0.056,-1.317,0.189
K_Neighbours,-0.761,0.448,16.497,0.0,4.924,0.0,5.659,0.0,0.697,0.487,0.0,1.0,10.34,0.0,5.905,0.0,4.45,0.0,4.556,0.0,2.274,0.024,-1.345,0.18,-0.687,0.493
NN-12-layer_wide_with_dropout,-10.809,0.0,3.981,0.0,-6.305,0.0,-4.678,0.0,-9.277,0.0,-10.34,0.0,0.0,1.0,-3.795,0.0,-5.248,0.0,-5.177,0.0,-8.084,0.0,-11.327,0.0,-10.797,0.0
NN-4-layer_thin_dropout,-6.421,0.0,8.114,0.0,-1.946,0.053,-0.657,0.512,-5.051,0.0,-5.905,0.0,3.795,0.0,0.0,1.0,-1.377,0.17,-1.3,0.195,-3.836,0.0,-6.908,0.0,-6.39,0.0
NN-4-layer_wide_no_dropout,-4.998,0.0,9.813,0.0,-0.399,0.69,0.793,0.429,-3.64,0.0,-4.45,0.0,5.248,0.0,1.377,0.17,0.0,1.0,0.081,0.935,-2.382,0.018,-5.491,0.0,-4.96,0.0
NN-4-layer_wide_with_dropout,-5.103,0.0,9.745,0.0,-0.492,0.623,0.71,0.478,-3.737,0.0,-4.556,0.0,5.177,0.0,1.3,0.195,-0.081,0.935,0.0,1.0,-2.478,0.014,-5.598,0.0,-5.066,0.0


#### sign test

In [14]:
sign_test, sign_test_df = analyze.sign_test(errors_per_estimator)
sign_test_df

Unnamed: 0_level_0,BaggingClassifier,BaggingClassifier,BaselineClassifier,BaselineClassifier,BernoulliNaiveBayes,BernoulliNaiveBayes,GaussianNaiveBayes,GaussianNaiveBayes,GradientBoostingClassifier,GradientBoostingClassifier,K_Neighbours,K_Neighbours,NN-12-layer_wide_with_dropout,NN-12-layer_wide_with_dropout,NN-4-layer_thin_dropout,NN-4-layer_thin_dropout,NN-4-layer_wide_no_dropout,NN-4-layer_wide_no_dropout,NN-4-layer_wide_with_dropout,NN-4-layer_wide_with_dropout,PassiveAggressiveClassifier,PassiveAggressiveClassifier,RandomForestClassifier,RandomForestClassifier,SVC,SVC
Unnamed: 0_level_1,t_stat,p_val,t_stat,p_val,t_stat,p_val,t_stat,p_val,t_stat,p_val,t_stat,p_val,t_stat,p_val,t_stat,p_val,t_stat,p_val,t_stat,p_val,t_stat,p_val,t_stat,p_val,t_stat,p_val
BaggingClassifier,0.0,1.0,11.667,0.0,5.513,0.0,5.867,0.0,1.202,0.229,0.915,0.36,9.101,0.0,6.034,0.0,4.563,0.0,4.692,0.0,2.814,0.005,-0.672,0.502,0.035,0.972
BaselineClassifier,-11.667,0.0,0.0,1.0,-9.695,0.0,-8.346,0.0,-10.905,0.0,-11.581,0.0,-3.887,0.0,-7.643,0.0,-8.731,0.0,-8.651,0.0,-10.533,0.0,-11.883,0.0,-11.692,0.0
BernoulliNaiveBayes,-5.513,0.0,9.695,0.0,0.0,1.0,0.838,0.402,-4.036,0.0,-4.812,0.0,5.55,0.0,1.25,0.211,-0.624,0.533,-0.574,0.566,-2.703,0.007,-6.096,0.0,-5.594,0.0
GaussianNaiveBayes,-5.867,0.0,8.346,0.0,-0.838,0.402,0.0,1.0,-4.568,0.0,-5.215,0.0,4.513,0.0,0.439,0.661,-1.276,0.202,-1.171,0.242,-3.212,0.001,-6.417,0.0,-5.888,0.0
GradientBoostingClassifier,-1.202,0.229,10.905,0.0,4.036,0.0,4.568,0.0,0.0,1.0,-0.291,0.771,8.008,0.0,4.719,0.0,3.324,0.001,3.433,0.001,1.606,0.108,-1.672,0.095,-1.054,0.292
K_Neighbours,-0.915,0.36,11.581,0.0,4.812,0.0,5.215,0.0,0.291,0.771,0.0,1.0,8.809,0.0,5.387,0.0,3.849,0.0,3.949,0.0,1.995,0.046,-1.521,0.128,-0.864,0.388
NN-12-layer_wide_with_dropout,-9.101,0.0,3.887,0.0,-5.55,0.0,-4.513,0.0,-8.008,0.0,-8.809,0.0,0.0,1.0,-3.943,0.0,-5.334,0.0,-5.316,0.0,-7.226,0.0,-9.422,0.0,-9.175,0.0
NN-4-layer_thin_dropout,-6.034,0.0,7.643,0.0,-1.25,0.211,-0.439,0.661,-4.719,0.0,-5.387,0.0,3.943,0.0,0.0,1.0,-1.623,0.105,-1.556,0.12,-3.494,0.0,-6.494,0.0,-6.011,0.0
NN-4-layer_wide_no_dropout,-4.563,0.0,8.731,0.0,0.624,0.533,1.276,0.202,-3.324,0.001,-3.849,0.0,5.334,0.0,1.623,0.105,0.0,1.0,0.089,0.929,-1.863,0.062,-5.115,0.0,-4.583,0.0
NN-4-layer_wide_with_dropout,-4.692,0.0,8.651,0.0,0.574,0.566,1.171,0.242,-3.433,0.001,-3.949,0.0,5.316,0.0,1.556,0.12,-0.089,0.929,0.0,1.0,-1.97,0.049,-5.285,0.0,-4.728,0.0


#### t-test with Bonferroni correction

In [15]:
t_test_bonferroni_df = analyze.t_test_with_bonferroni_correction(errors_per_estimator)
t_test_bonferroni_df

Unnamed: 0,BaggingClassifier,BaselineClassifier,BernoulliNaiveBayes,GaussianNaiveBayes,GradientBoostingClassifier,K_Neighbours,NN-12-layer_wide_with_dropout,NN-4-layer_thin_dropout,NN-4-layer_wide_no_dropout,NN-4-layer_wide_with_dropout,PassiveAggressiveClassifier,RandomForestClassifier,SVC
BaggingClassifier,False,True,True,True,False,False,True,True,True,True,False,False,False
BaselineClassifier,True,False,True,True,True,True,True,True,True,True,True,True,True
BernoulliNaiveBayes,True,True,False,False,True,True,True,False,False,False,False,True,True
GaussianNaiveBayes,True,True,False,False,True,True,True,False,False,False,False,True,True
GradientBoostingClassifier,False,True,True,True,False,False,True,True,True,True,False,False,False
K_Neighbours,False,True,True,True,False,False,True,True,True,True,False,False,False
NN-12-layer_wide_with_dropout,True,True,True,True,True,True,False,True,True,True,True,True,True
NN-4-layer_thin_dropout,True,True,False,False,True,True,True,False,False,False,True,True,True
NN-4-layer_wide_no_dropout,True,True,False,False,True,True,True,False,False,False,False,True,True
NN-4-layer_wide_with_dropout,True,True,False,False,True,True,True,False,False,False,False,True,True


In [11]:
for e in estim:
    print(e.properties['name'])

RandomForestClassifier
BaggingClassifier
GradientBoostingClassifier
SVC
GaussianNaiveBayes
BernoulliNaiveBayes
NeuralNetworkDeepClassifier
PassiveAggressiveClassifier
BaselineClassifier
K_Neighbours


#### Wilcoxon test

In [16]:
a, wilcoxon_df_multiindex = analyze.wilcoxon_test(errors_per_estimator)
wilcoxon_df_multiindex

  z = (T - mn - correction) / se


Unnamed: 0_level_0,BaggingClassifier,BaggingClassifier,BaselineClassifier,BaselineClassifier,BernoulliNaiveBayes,BernoulliNaiveBayes,GaussianNaiveBayes,GaussianNaiveBayes,GradientBoostingClassifier,GradientBoostingClassifier,K_Neighbours,K_Neighbours,NN-12-layer_wide_with_dropout,NN-12-layer_wide_with_dropout,NN-4-layer_thin_dropout,NN-4-layer_thin_dropout,NN-4-layer_wide_no_dropout,NN-4-layer_wide_no_dropout,NN-4-layer_wide_with_dropout,NN-4-layer_wide_with_dropout,PassiveAggressiveClassifier,PassiveAggressiveClassifier,RandomForestClassifier,RandomForestClassifier,SVC,SVC
Unnamed: 0_level_1,statistic,p_val,statistic,p_val,statistic,p_val,statistic,p_val,statistic,p_val,statistic,p_val,statistic,p_val,statistic,p_val,statistic,p_val,statistic,p_val,statistic,p_val,statistic,p_val,statistic,p_val
BaggingClassifier,0.0,,6.0,0.0,456.5,0.0,429.5,0.0,1097.0,0.0,2017.0,0.014,222.0,0.0,536.0,0.0,730.5,0.0,724.5,0.0,1018.0,0.0,1200.0,0.0,2731.5,0.87
BaselineClassifier,6.0,0.0,0.0,,62.5,0.0,439.0,0.0,24.5,0.0,1.0,0.0,604.0,0.0,64.0,0.0,29.0,0.0,44.0,0.0,13.5,0.0,0.0,0.0,0.0,0.0
BernoulliNaiveBayes,456.5,0.0,62.5,0.0,0.0,,2386.0,0.064,1110.5,0.0,314.0,0.0,929.5,0.0,2579.0,0.089,2626.5,0.262,2955.0,0.362,1364.0,0.0,165.5,0.0,253.5,0.0
GaussianNaiveBayes,429.5,0.0,439.0,0.0,2386.0,0.064,0.0,,1093.0,0.0,362.0,0.0,1706.0,0.0,2890.5,0.629,1968.5,0.004,2240.0,0.011,993.0,0.0,151.5,0.0,144.0,0.0
GradientBoostingClassifier,1097.0,0.0,24.5,0.0,1110.5,0.0,1093.0,0.0,0.0,,2399.0,0.169,331.0,0.0,805.0,0.0,1152.5,0.0,1160.0,0.0,2466.5,0.043,604.5,0.0,1709.0,0.0
K_Neighbours,2017.0,0.014,1.0,0.0,314.0,0.0,362.0,0.0,2399.0,0.169,0.0,,123.0,0.0,381.0,0.0,579.0,0.0,543.0,0.0,1230.0,0.0,1102.0,0.0,1594.5,0.002
NN-12-layer_wide_with_dropout,222.0,0.0,604.0,0.0,929.5,0.0,1706.0,0.0,331.0,0.0,123.0,0.0,0.0,,637.0,0.0,395.5,0.0,394.5,0.0,459.0,0.0,106.0,0.0,32.0,0.0
NN-4-layer_thin_dropout,536.0,0.0,64.0,0.0,2579.0,0.089,2890.5,0.629,805.0,0.0,381.0,0.0,637.0,0.0,0.0,,1421.5,0.0,1488.0,0.0,1183.0,0.0,216.0,0.0,149.0,0.0
NN-4-layer_wide_no_dropout,730.5,0.0,29.0,0.0,2626.5,0.262,1968.5,0.004,1152.5,0.0,579.0,0.0,395.5,0.0,1421.5,0.0,0.0,,2105.0,0.415,1778.5,0.0,340.0,0.0,246.0,0.0
NN-4-layer_wide_with_dropout,724.5,0.0,44.0,0.0,2955.0,0.362,2240.0,0.011,1160.0,0.0,543.0,0.0,394.5,0.0,1488.0,0.0,2105.0,0.415,0.0,,1794.5,0.0,433.0,0.0,425.0,0.0


#### Friedman test

In [17]:
_, friedman_test_df = analyze.friedman_test(errors_per_estimator)
friedman_test_df

Unnamed: 0,statistic,p_value
0,681.302,0.0


In [19]:
nemeniy_test = analyze.nemenyi(errors_per_estimator)
nemeniy_test_df = pd.DataFrame(nemeniy_test)
nemeniy_test_df

Unnamed: 0,BaggingClassifier,BaselineClassifier,BernoulliNaiveBayes,GaussianNaiveBayes,GradientBoostingClassifier,K_Neighbours,NN-12-layer_wide_with_dropout,NN-4-layer_thin_dropout,NN-4-layer_wide_no_dropout,NN-4-layer_wide_with_dropout,PassiveAggressiveClassifier,RandomForestClassifier,SVC
BaggingClassifier,-1.0,0.0,0.017,0.002,1.0,1.0,0.0,0.0,0.076,0.06,0.88,1.0,1.0
BaselineClassifier,0.0,-1.0,0.0,0.0,0.0,0.0,0.544,0.0,0.0,0.0,0.0,0.0,0.0
BernoulliNaiveBayes,0.017,0.0,-1.0,1.0,0.284,0.113,0.046,1.0,1.0,1.0,0.932,0.003,0.017
GaussianNaiveBayes,0.002,0.0,1.0,-1.0,0.074,0.02,0.207,1.0,1.0,1.0,0.683,0.0,0.002
GradientBoostingClassifier,1.0,0.0,0.284,0.074,-1.0,1.0,0.0,0.031,0.574,0.519,0.999,0.996,1.0
K_Neighbours,1.0,0.0,0.113,0.02,1.0,-1.0,0.0,0.007,0.314,0.268,0.991,1.0,1.0
NN-12-layer_wide_with_dropout,0.0,0.544,0.046,0.207,0.0,0.0,-1.0,0.357,0.009,0.012,0.0,0.0,0.0
NN-4-layer_thin_dropout,0.0,0.0,1.0,1.0,0.031,0.007,0.357,-1.0,0.999,0.999,0.5,0.0,0.0
NN-4-layer_wide_no_dropout,0.076,0.0,1.0,1.0,0.574,0.314,0.009,0.999,-1.0,1.0,0.992,0.017,0.077
NN-4-layer_wide_with_dropout,0.06,0.0,1.0,1.0,0.519,0.268,0.012,0.999,1.0,-1.0,0.987,0.012,0.06


### Per dataset

In [20]:
errors_per_dataset_per_estimator_df

Unnamed: 0,Unnamed: 1,loss,std_error
abalone,BaggingClassifier,0.37708,0.01305
abalone,BaselineClassifier,0.66715,0.01269
abalone,BernoulliNaiveBayes,0.44888,0.01339
abalone,GaussianNaiveBayes,0.44017,0.01337
abalone,GradientBoostingClassifier,0.38869,0.01313
abalone,K_Neighbours,0.36476,0.01296
abalone,NN-12-layer_wide_with_dropout,0.36766,0.01298
abalone,NN-4-layer_thin_dropout,0.38724,0.01312
abalone,NN-4-layer_wide_no_dropout,0.36186,0.01294
abalone,NN-4-layer_wide_with_dropout,0.37563,0.01304


## Save tables to $\LaTeX$

In [21]:
#average and standard error
with open('../mlaut-paper/mlaut/tables/avg_and_st_error.tex', 'w') as tf:
    tf.write(avg_and_std_error.to_latex())
#average trining time
with open('../mlaut-paper/mlaut/tables/avg_training_time.tex', 'w') as tf:
    tf.write(avg_training_time.to_latex())
#average rank
with open('../mlaut-paper/mlaut/tables/avg_rank.tex', 'w') as tf:
    tf.write(avg_rank.to_latex())

#average metrics
with open('../mlaut-paper/mlaut/tables/avg_metrics.tex', 'w') as tf:
    tf.write(avg_metrics.to_latex())
#Cohen's D
with open('../mlaut-paper/mlaut/tables/cohens_d.tex', 'w') as tf:
    tf.write(cohens_d.to_latex())
#t-test
with open('../mlaut-paper/mlaut/tables/t_test.tex', 'w') as tf:
    tf.write(t_test_df.to_latex())
with open('../mlaut-paper/mlaut/tables/t_test_bonferroni.tex', 'w') as tf:
    tf.write(t_test_bonferroni_df.to_latex())
#sign test
with open('../mlaut-paper/mlaut/tables/sign_test.tex', 'w') as tf:
    tf.write(sign_test_df.to_latex())
#t-test with Bonferroni correction
with open('../mlaut-paper/mlaut/tables/t_test_bonferroni.tex', 'w') as tf:
    tf.write(t_test_bonferroni_df.to_latex())
#Wilcoxon
with open('../mlaut-paper/mlaut/tables/wilxocon_test.tex', 'w') as tf:
    tf.write(wilcoxon_df_multiindex.to_latex())
#Friedman test
with open('../mlaut-paper/mlaut/tables/friedman_test.tex', 'w') as tf:
    tf.write(friedman_test_df.to_latex())
#Nemeniy test
with open('../mlaut-paper/mlaut/tables/nemeniy_test.tex', 'w') as tf:
    tf.write(nemeniy_test_df.to_latex())
#Errors per dataset per estimator
with open('../mlaut-paper/mlaut/tables/errors_per_dataset_per_estimator.tex', 'w') as tf:
    tf.write(errors_per_dataset_per_estimator_df.to_latex(longtable=True))
#              replace('\n', '\n\\caption{Errors per dataset and estimator}\\\\\n', 1))