In [1]:
from mlaut.analyze_results import AnalyseResults
from mlaut.data import Data
import pandas as pd
from mlaut.estimators.estimators import instantiate_default_estimators

import matplotlib.pyplot as plt
pd.options.display.max_rows = 1000

  from ._conv import register_converters as _register_converters


In [2]:
data = Data()
input_io = data.open_hdf5('data/openml.h5', mode='r')
out_io = data.open_hdf5('data/openml-classification.h5', mode='a')
analyze = AnalyseResults(hdf5_output_io=out_io, 
                        hdf5_input_io=input_io, 
                        input_h5_original_datasets_group='openml/', 
                        output_h5_predictions_group='experiments/predictions/')


### All datasets

In [3]:
(errors_per_estimator, 
 errors_per_dataset_per_estimator, 
 errors_per_dataset_per_estimator_df) = analyze.prediction_errors(metric='accuracy')

#### Simple average

In [4]:
res_df = analyze.average_and_std_error(errors_per_estimator)
res_df

Unnamed: 0,avg,std_error
K_Means,0.448391,0.010524
BaselineClassifier,0.541539,0.008353
SVC,0.724837,0.008714
GaussianNaiveBayes,0.725442,0.008779
BernoulliNaiveBayes,0.741032,0.007355
PassiveAggressiveClassifier,0.765447,0.007881
GradientBoostingClassifier,0.795358,0.007449
BaggingClassifier,0.81911,0.007072
RandomForestClassifier,0.82208,0.006987


#### Cohen's d

In [5]:
cohens_d = analyze.cohens_d(errors_per_estimator)
cohens_d

Unnamed: 0,Cohen's d
K_Means-RandomForestClassifier,1.820712
BaggingClassifier-K_Means,-1.799504
GradientBoostingClassifier-K_Means,-1.656232
BaselineClassifier-RandomForestClassifier,1.585505
BaggingClassifier-BaselineClassifier,-1.560861
K_Means-PassiveAggressiveClassifier,1.484209
BernoulliNaiveBayes-K_Means,-1.402784
BaselineClassifier-GradientBoostingClassifier,1.395745
K_Means-SVC,1.245264
GaussianNaiveBayes-K_Means,-1.2442


#### t-test

In [6]:
t_test, t_test_df = analyze.t_test(errors_per_estimator)
t_test_df

Unnamed: 0,pair,t_statistic,p_value
0,BaggingClassifier - BaselineClassifier,25.336969,5.4376139999999994e-111
1,BaggingClassifier - BernoulliNaiveBayes,7.644637,4.701239e-14
2,BaggingClassifier - GaussianNaiveBayes,8.301049,3.134151e-16
3,BaggingClassifier - GradientBoostingClassifier,2.310175,0.02107092
4,BaggingClassifier - K_Means,29.210771,6.675142e-138
5,BaggingClassifier - PassiveAggressiveClassifier,5.063108,4.865797e-07
6,BaggingClassifier - RandomForestClassifier,-0.298497,0.7653826
7,BaggingClassifier - SVC,8.392401,1.517651e-16
8,BaselineClassifier - BernoulliNaiveBayes,-17.906962,8.191254999999999e-63
9,BaselineClassifier - GaussianNaiveBayes,-15.161676,4.048827e-47


#### sign test

In [7]:
sign_test, sign_test_df = analyze.sign_test(errors_per_estimator)
sign_test_df

Unnamed: 0,pair,t_statistic,p_value
0,BaggingClassifier - BaselineClassifier,20.471105,3.8968200000000005e-93
1,BaggingClassifier - BernoulliNaiveBayes,8.389696,4.873995000000001e-17
2,BaggingClassifier - GaussianNaiveBayes,8.218672,2.057697e-16
3,BaggingClassifier - GradientBoostingClassifier,2.422194,0.01542709
4,BaggingClassifier - K_Means,21.760702,5.470411e-105
5,BaggingClassifier - PassiveAggressiveClassifier,4.963652,6.917976e-07
6,BaggingClassifier - RandomForestClassifier,-0.302598,0.7621964
7,BaggingClassifier - SVC,7.970459,1.580865e-15
8,BaselineClassifier - BernoulliNaiveBayes,-16.388104,2.3258580000000002e-60
9,BaselineClassifier - GaussianNaiveBayes,-15.017787,5.615084e-51


#### t-test with Bonferroni correction

In [8]:
t_test_bonferroni, t_test_bonferroni_df = analyze.t_test_with_bonferroni_correction(errors_per_estimator)
t_test_bonferroni_df

Unnamed: 0,pair,p_value
0,BaggingClassifier - BaselineClassifier,1.957541e-109
1,BaggingClassifier - BernoulliNaiveBayes,1.692446e-12
2,BaggingClassifier - GaussianNaiveBayes,1.128294e-14
3,BaggingClassifier - GradientBoostingClassifier,0.758553
4,BaggingClassifier - K_Means,2.403051e-136
5,BaggingClassifier - PassiveAggressiveClassifier,1.751687e-05
6,BaggingClassifier - RandomForestClassifier,1.0
7,BaggingClassifier - SVC,5.463544e-15
8,BaselineClassifier - BernoulliNaiveBayes,2.9488520000000002e-61
9,BaselineClassifier - GaussianNaiveBayes,1.457578e-45


#### Wilcoxon test

In [9]:
wilcoxon_test, wilcoxon_test_df = analyze.wilcoxon_test(errors_per_estimator)
wilcoxon_test_df

Unnamed: 0,pair,statistic,p_value
0,BaggingClassifier - BaselineClassifier,1568.0,3.591427e-83
1,BaggingClassifier - BernoulliNaiveBayes,14771.5,2.228336e-46
2,BaggingClassifier - GaussianNaiveBayes,18544.5,4.460294e-39
3,BaggingClassifier - GradientBoostingClassifier,20367.5,6.382952e-25
4,BaggingClassifier - K_Means,1643.5,8.649447e-82
5,BaggingClassifier - PassiveAggressiveClassifier,25451.0,7.991567e-25
6,BaggingClassifier - RandomForestClassifier,37331.5,0.00223485
7,BaggingClassifier - SVC,19538.5,1.068305e-37
8,BaselineClassifier - BernoulliNaiveBayes,6968.5,4.175855e-70
9,BaselineClassifier - GaussianNaiveBayes,13502.0,9.747028999999999e-56


#### Friedman test

In [10]:
friedman_test, friedman_test_df = analyze.friedman_test(errors_per_estimator)
friedman_test_df

Unnamed: 0,statistic,p_value
0,1868.675728,0.0


In [11]:
nemeniy_test = analyze.nemenyi(errors_per_estimator)
pd.DataFrame(nemeniy_test)

Unnamed: 0,BaggingClassifier,BaselineClassifier,BernoulliNaiveBayes,GaussianNaiveBayes,GradientBoostingClassifier,K_Means,PassiveAggressiveClassifier,RandomForestClassifier,SVC
BaggingClassifier,-1.0,1.8756769999999997e-91,5.620171e-08,2.93776e-09,0.8355906,2.567262e-122,0.007710652,0.9999999,9.427057e-10
BaselineClassifier,1.8756769999999997e-91,-1.0,9.198627000000001e-39,4.3351889999999995e-36,9.375245e-74,0.2638589,9.686679000000001e-55,6.087868e-94,3.901929e-35
BernoulliNaiveBayes,5.620171e-08,9.198627000000001e-39,-1.0,0.9999954,0.001796664,8.119234e-60,0.6416164,9.923986e-09,0.9999484
GaussianNaiveBayes,2.93776e-09,4.3351889999999995e-36,0.9999954,-1.0,0.0002722613,1.68628e-56,0.3853797,4.523261e-10,1.0
GradientBoostingClassifier,0.8355906,9.375245e-74,0.001796664,0.0002722613,-1.0,9.371874e-102,0.6175779,0.7107906,0.0001284997
K_Means,2.567262e-122,0.2638589,8.119234e-60,1.68628e-56,9.371874e-102,-1.0,3.026019e-79,3.4775080000000004e-125,2.612149e-55
PassiveAggressiveClassifier,0.007710652,9.686679000000001e-55,0.6416164,0.3853797,0.6175779,3.026019e-79,-1.0,0.002931587,0.3008923
RandomForestClassifier,0.9999999,6.087868e-94,9.923986e-09,4.523261e-10,0.7107906,3.4775080000000004e-125,0.002931587,-1.0,1.380977e-10
SVC,9.427057e-10,3.901929e-35,0.9999484,1.0,0.0001284997,2.612149e-55,0.3008923,1.380977e-10,-1.0


### Per dataset

In [12]:
errors_per_dataset_per_estimator_df

Unnamed: 0,Unnamed: 1,loss,std
AP_Breast_Colon,BaggingClassifier,0.048077,0.014833
AP_Breast_Colon,BaselineClassifier,0.480769,0.034643
AP_Breast_Colon,BernoulliNaiveBayes,0.057692,0.016167
AP_Breast_Colon,GaussianNaiveBayes,0.072115,0.017936
AP_Breast_Colon,GradientBoostingClassifier,0.052885,0.015518
AP_Breast_Colon,K_Means,0.086538,0.019495
AP_Breast_Colon,PassiveAggressiveClassifier,0.052885,0.015518
AP_Breast_Colon,RandomForestClassifier,0.057692,0.016167
AP_Breast_Colon,SVC,0.471154,0.034611
AP_Breast_Kidney,BaggingClassifier,0.035000,0.012995
