In [1]:
from mleap.analyze_results import AnalyseResults
from mleap.data import Data
import pandas as pd
from mleap.estimators.estimators import instantiate_default_estimators

import matplotlib.pyplot as plt
pd.options.display.max_rows = 1000

  from ._conv import register_converters as _register_converters
  return f(*args, **kwds)


In [2]:
data = Data()
input_io = data.open_hdf5('data/delgado.hdf5', mode='r')
out_io = data.open_hdf5('data/classification.hdf5', mode='a')
analyze = AnalyseResults(hdf5_output_io=out_io, 
                        hdf5_input_io=input_io, 
                        input_h5_original_datasets_group='delgado_datasets/', 
                        output_h5_predictions_group='experiments/predictions/')


### All datasets

In [3]:
error_all_datasets = analyze.calculate_error_all_datasets(metric='accuracy')

#### Simple average

In [4]:
res_df = analyze.calculate_average_std(error_all_datasets)
res_df

Unnamed: 0,avg,std
BaselineClassifier,0.415101,0.201903
Lasso,0.566471,0.277306
LassoLars,0.599303,0.289259
RidgeRegression,0.609121,0.278852
GaussianNaiveBayes,0.657472,0.214265
BernoulliNaiveBayes,0.691525,0.179218
SVC,0.723133,0.210781
PassiveAggressiveClassifier,0.72844,0.189196
LogisticRegression,0.752862,0.181393
GradientBoostingClassifier,0.756742,0.196091


#### Cohen's d

In [5]:
cohens_d = analyze.cohens_d(error_all_datasets)
cohens_d

Unnamed: 0,Cohen's d
BaselineClassifier-RandomForestClassifier,1.954958
BaggingClassifier-BaselineClassifier,-1.846145
BaselineClassifier-LogisticRegression,1.759885
BaselineClassifier-GradientBoostingClassifier,1.716635
BaselineClassifier-PassiveAggressiveClassifier,1.60151
BaselineClassifier-SVC,1.492479
BaselineClassifier-BernoulliNaiveBayes,1.448019
BaselineClassifier-GaussianNaiveBayes,1.164262
Lasso-RandomForestClassifier,0.959893
BaggingClassifier-Lasso,-0.894276


#### t-test

In [6]:
t_test, t_test_df = analyze.t_test(error_all_datasets)
t_test_df

Unnamed: 0,pair,t_statistic,p_value
0,BaggingClassifier - BaselineClassifier,14.300181,5.743758999999999e-34
1,BaggingClassifier - BernoulliNaiveBayes,3.688118,0.0002794533
2,BaggingClassifier - GaussianNaiveBayes,4.660971,5.222207e-06
3,BaggingClassifier - GradientBoostingClassifier,0.939337,0.3485022
4,BaggingClassifier - Lasso,6.927031,3.91879e-11
5,BaggingClassifier - LassoLars,5.69763,3.543237e-08
6,BaggingClassifier - LogisticRegression,1.135794,0.2571756
7,BaggingClassifier - PassiveAggressiveClassifier,2.10111,0.03667455
8,BaggingClassifier - RandomForestClassifier,-0.481831,0.630365
9,BaggingClassifier - RidgeRegression,5.525178,8.550309e-08


#### sign test

In [7]:
sign_test, sign_test_df = analyze.sign_test(error_all_datasets)
sign_test_df

Unnamed: 0,pair,t_statistic,p_value
0,BaggingClassifier - BaselineClassifier,10.908173,1.053528e-27
1,BaggingClassifier - BernoulliNaiveBayes,4.543618,5.529683e-06
2,BaggingClassifier - GaussianNaiveBayes,5.082645,3.722157e-07
3,BaggingClassifier - GradientBoostingClassifier,1.156107,0.2476376
4,BaggingClassifier - Lasso,6.426079,1.309374e-10
5,BaggingClassifier - LassoLars,5.094582,3.495112e-07
6,BaggingClassifier - LogisticRegression,1.624426,0.1042849
7,BaggingClassifier - PassiveAggressiveClassifier,2.648301,0.008089741
8,BaggingClassifier - RandomForestClassifier,-0.490358,0.6238805
9,BaggingClassifier - RidgeRegression,5.024793,5.039748e-07


#### t-test with Bonferroni correction

In [8]:
t_test_bonferroni, t_test_bonferroni_df = analyze.t_test_with_bonferroni_correction(error_all_datasets)
t_test_bonferroni_df

Unnamed: 0,pair,p_value
0,BaggingClassifier - BaselineClassifier,3.790881e-32
1,BaggingClassifier - BernoulliNaiveBayes,0.01844392
2,BaggingClassifier - GaussianNaiveBayes,0.0003446656
3,BaggingClassifier - GradientBoostingClassifier,1.0
4,BaggingClassifier - Lasso,2.586402e-09
5,BaggingClassifier - LassoLars,2.338536e-06
6,BaggingClassifier - LogisticRegression,1.0
7,BaggingClassifier - PassiveAggressiveClassifier,1.0
8,BaggingClassifier - RandomForestClassifier,1.0
9,BaggingClassifier - RidgeRegression,5.643204e-06


#### Wilcoxon test

In [9]:
wilcoxon_test, wilcoxon_test_df = analyze.wilcoxon_test(error_all_datasets)
wilcoxon_test_df

Unnamed: 0,pair,statistic,p_value
0,BaggingClassifier - BaselineClassifier,46.0,6.230632e-21
1,BaggingClassifier - BernoulliNaiveBayes,1008.0,1.09112e-11
2,BaggingClassifier - GaussianNaiveBayes,661.0,5.215726e-14
3,BaggingClassifier - GradientBoostingClassifier,1471.0,1.702102e-05
4,BaggingClassifier - Lasso,614.0,3.110134e-14
5,BaggingClassifier - LassoLars,948.0,1.631066e-11
6,BaggingClassifier - LogisticRegression,2271.0,0.006524758
7,BaggingClassifier - PassiveAggressiveClassifier,1507.5,4.072268e-06
8,BaggingClassifier - RandomForestClassifier,1654.0,0.02014512
9,BaggingClassifier - RidgeRegression,886.0,1.363669e-11


#### Friedman test

In [10]:
friedman_test, friedman_test_df = analyze.friedman_test(error_all_datasets)
friedman_test_df

Unnamed: 0,statistic,p_value
0,479.352709,8.082297e-96


In [11]:
nemeniy_test = analyze.nemenyi(error_all_datasets)
pd.DataFrame(nemeniy_test)

Unnamed: 0,BaggingClassifier,BaselineClassifier,BernoulliNaiveBayes,GaussianNaiveBayes,GradientBoostingClassifier,Lasso,LassoLars,LogisticRegression,PassiveAggressiveClassifier,RandomForestClassifier,RidgeRegression,SVC
BaggingClassifier,-1.0,0.0,0.1574443,0.01811928,0.999853,3.431908e-06,0.00211,0.999037,0.9171606,1.0,0.003658,0.9275771
BaselineClassifier,0.0,-1.0,4.19296e-09,8.240973e-07,0.0,0.007953507,2.4e-05,0.0,1.310063e-14,0.0,1.1e-05,8.881784e-15
BernoulliNaiveBayes,0.157444,4.19296e-09,-1.0,0.9999916,0.710123,0.7153713,0.998016,0.818867,0.9938888,0.0618664,0.999264,0.9923062
GaussianNaiveBayes,0.018119,8.240973e-07,0.9999916,-1.0,0.261405,0.9717815,1.0,0.37683,0.8592395,0.004620135,1.0,0.8432785
GradientBoostingClassifier,0.999853,0.0,0.7101235,0.2614053,-1.0,0.0007558198,0.072367,1.0,0.9997267,0.9970865,0.102376,0.9998123
Lasso,3e-06,0.007953507,0.7153713,0.9717815,0.000756,-1.0,0.998904,0.001964,0.04499707,3.318602e-07,0.997196,0.03986667
LassoLars,0.00211,2.356335e-05,0.998016,0.9999997,0.072367,0.9989041,-1.0,0.125393,0.5614734,0.0003947811,1.0,0.5363421
LogisticRegression,0.999037,0.0,0.8188674,0.3768301,1.0,0.001963545,0.125393,-1.0,0.9999709,0.9895904,0.169787,0.9999822
PassiveAggressiveClassifier,0.917161,1.310063e-14,0.9938888,0.8592395,0.999727,0.04499707,0.561473,0.999971,-1.0,0.7735985,0.642788,1.0
RandomForestClassifier,1.0,0.0,0.0618664,0.004620135,0.997087,3.318602e-07,0.000395,0.98959,0.7735985,-1.0,0.000736,0.7932984


### Per dataset

In [13]:
error_per_dataset, error_per_dataset_df = analyze.calculate_error_per_dataset(metric='accuracy')
error_per_dataset_df

Unnamed: 0,Unnamed: 1,score,std
abalone,BaggingClassifier,0.495286,0.022710
abalone,BaselineClassifier,1.378535,0.041126
abalone,BernoulliNaiveBayes,0.759971,0.032166
abalone,GaussianNaiveBayes,0.612038,0.025898
abalone,GradientBoostingClassifier,0.498912,0.022710
abalone,Lasso,0.413882,0.014548
abalone,LassoLars,0.376236,0.022154
abalone,LogisticRegression,0.511240,0.024704
abalone,PassiveAggressiveClassifier,0.572154,0.018137
abalone,RandomForestClassifier,0.498912,0.023928
