In [1]:
from mlaut.analyze_results import AnalyseResults
from mlaut.data import Data
import pandas as pd
from mlaut.estimators.estimators import instantiate_default_estimators
from mlaut.analyze_results.scores import ScoreAccuracy

import matplotlib.pyplot as plt
pd.options.display.max_rows = 1000

  from ._conv import register_converters as _register_converters


In [2]:
data = Data()
input_io = data.open_hdf5('data/openml.h5', mode='r')
out_io = data.open_hdf5('data/openml-classification.h5', mode='a')
analyze = AnalyseResults(hdf5_output_io=out_io, 
                        hdf5_input_io=input_io, 
                        input_h5_original_datasets_group='openml/', 
                        output_h5_predictions_group='experiments/predictions/')


### All datasets

In [3]:
score_accuracy = ScoreAccuracy()

# (errors_per_estimator, 
#  errors_per_dataset_per_estimator) = analyze.prediction_errors(score_accuracy)
 
(errors_per_estimator, 
 errors_per_dataset_per_estimator, 
 errors_per_dataset_per_estimator_df) = analyze.prediction_errors(score_accuracy)

#### Simple average and standard error

In [24]:
avg_and_std_error = analyze.average_and_std_error(errors_per_estimator)
avg_and_std_error

Unnamed: 0,avg,std_error
K_Means,0.412214,0.010614
BaselineClassifier,0.536248,0.008504
PassiveAggressiveClassifier,0.559298,0.010893
GaussianNaiveBayes,0.567017,0.011205
GradientBoostingClassifier,0.57134,0.011215
BaggingClassifier,0.575503,0.011307
RandomForestClassifier,0.581803,0.011308
BernoulliNaiveBayes,0.586203,0.010751
NeuralNetworkDeepClassifier,0.594388,0.010582
SVC,0.610982,0.009809


#### Average Rank

In [27]:
avg_rank = analyze.ranks(errors_per_estimator, ascending=False)
avg_rank

Unnamed: 0,avg_rank
RandomForestClassifier,4.829848
SVC,4.852662
BernoulliNaiveBayes,4.912548
BaggingClassifier,4.973384
NeuralNetworkDeepClassifier,5.127376
GaussianNaiveBayes,5.284221
PassiveAggressiveClassifier,5.342205
GradientBoostingClassifier,5.472433
BaselineClassifier,6.944867
K_Means,7.260456


#### Cohen's d

In [6]:
cohens_d = analyze.cohens_d(errors_per_estimator)
cohens_d

Unnamed: 0,Cohen's d
K_Means-SVC,0.848041
K_Means-NeuralNetworkDeepClassifier,0.749476
BernoulliNaiveBayes-K_Means,-0.710127
K_Means-RandomForestClassifier,0.67426
BaggingClassifier-K_Means,-0.649237
GradientBoostingClassifier-K_Means,-0.635441
GaussianNaiveBayes-K_Means,-0.618456
K_Means-PassiveAggressiveClassifier,0.596329
BaselineClassifier-K_Means,-0.56233
BaselineClassifier-SVC,0.354973


#### t-test

In [7]:
t_test, t_test_df = analyze.t_test(errors_per_estimator)
t_test_df

Unnamed: 0,pair,t_statistic,p_value
0,BaggingClassifier - BaselineClassifier,2.771919,0.005671213
1,BaggingClassifier - BernoulliNaiveBayes,-0.685128,0.4934147
2,BaggingClassifier - GaussianNaiveBayes,0.532581,0.5944367
3,BaggingClassifier - GradientBoostingClassifier,0.261157,0.7940229
4,BaggingClassifier - K_Means,10.518846,1.134398e-24
5,BaggingClassifier - NeuralNetworkDeepClassifier,-1.218284,0.2233896
6,BaggingClassifier - PassiveAggressiveClassifier,1.031179,0.3026946
7,BaggingClassifier - RandomForestClassifier,-0.393585,0.6939671
8,BaggingClassifier - SVC,-2.367937,0.01806796
9,BaselineClassifier - BernoulliNaiveBayes,-3.640793,0.0002850537


#### sign test

In [8]:
sign_test, sign_test_df = analyze.sign_test(errors_per_estimator)
sign_test_df

Unnamed: 0,pair,t_statistic,p_value
0,BaggingClassifier - BaselineClassifier,3.27867,0.001042977
1,BaggingClassifier - BernoulliNaiveBayes,-0.578004,0.5632616
2,BaggingClassifier - GaussianNaiveBayes,0.503013,0.6149548
3,BaggingClassifier - GradientBoostingClassifier,0.283319,0.7769321
4,BaggingClassifier - K_Means,9.894966,4.3775900000000006e-23
5,BaggingClassifier - NeuralNetworkDeepClassifier,-1.119071,0.2631101
6,BaggingClassifier - PassiveAggressiveClassifier,1.111764,0.2662395
7,BaggingClassifier - RandomForestClassifier,-0.382156,0.7023454
8,BaggingClassifier - SVC,-1.989831,0.04660958
9,BaselineClassifier - BernoulliNaiveBayes,-4.668882,3.028427e-06


#### t-test with Bonferroni correction

In [9]:
t_test_bonferroni, t_test_bonferroni_df = analyze.t_test_with_bonferroni_correction(errors_per_estimator)
t_test_bonferroni_df

Unnamed: 0,pair,p_value
0,BaggingClassifier - BaselineClassifier,0.2552046
1,BaggingClassifier - BernoulliNaiveBayes,1.0
2,BaggingClassifier - GaussianNaiveBayes,1.0
3,BaggingClassifier - GradientBoostingClassifier,1.0
4,BaggingClassifier - K_Means,5.104792e-23
5,BaggingClassifier - NeuralNetworkDeepClassifier,1.0
6,BaggingClassifier - PassiveAggressiveClassifier,1.0
7,BaggingClassifier - RandomForestClassifier,1.0
8,BaggingClassifier - SVC,0.8130582
9,BaselineClassifier - BernoulliNaiveBayes,0.01282742


#### Wilcoxon test

In [10]:
wilcoxon_test, wilcoxon_test_df = analyze.wilcoxon_test(errors_per_estimator)
wilcoxon_test_df

Unnamed: 0,pair,statistic,p_value
0,BaggingClassifier - BaselineClassifier,48916.5,7.981088e-08
1,BaggingClassifier - BernoulliNaiveBayes,36078.0,0.7502815
2,BaggingClassifier - GaussianNaiveBayes,37150.0,0.01722447
3,BaggingClassifier - GradientBoostingClassifier,19724.5,0.0003236654
4,BaggingClassifier - K_Means,21593.5,2.684341e-29
5,BaggingClassifier - NeuralNetworkDeepClassifier,42132.0,0.7362613
6,BaggingClassifier - PassiveAggressiveClassifier,38599.0,0.06211165
7,BaggingClassifier - RandomForestClassifier,16322.5,0.1030145
8,BaggingClassifier - SVC,29475.5,0.04269111
9,BaselineClassifier - BernoulliNaiveBayes,40632.0,3.382746e-14


#### Friedman test

In [11]:
friedman_test, friedman_test_df = analyze.friedman_test(errors_per_estimator)
friedman_test_df

Unnamed: 0,statistic,p_value
0,447.92709,7.955255999999999e-91


In [30]:
nemeniy_test = analyze.nemenyi(errors_per_estimator)
nemeniy_test_df = pd.DataFrame(nemeniy_test)
nemeniy_test_df

Unnamed: 0,BaggingClassifier,BaselineClassifier,BernoulliNaiveBayes,GaussianNaiveBayes,GradientBoostingClassifier,K_Means,NeuralNetworkDeepClassifier,PassiveAggressiveClassifier,RandomForestClassifier,SVC
BaggingClassifier,-1.0,0.2717684,0.9998654,0.999998,1.0,2.5300629999999997e-19,0.9978357,0.9989503,0.9999997,0.8816357
BaselineClassifier,0.2717684,-1.0,0.04327028,0.5448475,0.3638425,8.617162e-08,0.01604297,0.830585,0.1243701,0.0005416324
BernoulliNaiveBayes,0.9998654,0.04327028,-1.0,0.9937089,0.9992784,4.6326990000000003e-23,1.0,0.9304125,0.9999996,0.996434
GaussianNaiveBayes,0.999998,0.5448475,0.9937089,-1.0,1.0,3.394648e-17,0.9687459,0.9999957,0.9997061,0.6522507
GradientBoostingClassifier,1.0,0.3638425,0.9992784,1.0,-1.0,1.573628e-18,0.9932215,0.999784,0.9999929,0.8119497
K_Means,2.5300629999999997e-19,8.617162e-08,4.6326990000000003e-23,3.394648e-17,1.573628e-18,-1.0,1.157278e-24,5.213585e-15,4.263889e-21,2.92828e-29
NeuralNetworkDeepClassifier,0.9978357,0.01604297,1.0,0.9687459,0.9932215,1.157278e-24,-1.0,0.8239855,0.9999343,0.9997171
PassiveAggressiveClassifier,0.9989503,0.830585,0.9304125,0.9999957,0.999784,5.213585e-15,0.8239855,-1.0,0.9875828,0.3403343
RandomForestClassifier,0.9999997,0.1243701,0.9999996,0.9997061,0.9999929,4.263889e-21,0.9999343,0.9875828,-1.0,0.9692684
SVC,0.8816357,0.0005416324,0.996434,0.6522507,0.8119497,2.92828e-29,0.9997171,0.3403343,0.9692684,-1.0


### Per dataset

In [13]:
errors_per_dataset_per_estimator_df

Unnamed: 0,Unnamed: 1,loss,std
AP_Breast_Colon,BaggingClassifier,0.528846,0.034611
AP_Breast_Colon,BaselineClassifier,0.442308,0.034437
AP_Breast_Colon,BernoulliNaiveBayes,0.528846,0.034611
AP_Breast_Colon,GaussianNaiveBayes,0.076923,0.018476
AP_Breast_Colon,GradientBoostingClassifier,0.528846,0.034611
AP_Breast_Colon,K_Means,0.509615,0.034662
AP_Breast_Colon,NeuralNetworkDeepClassifier,0.471154,0.034611
AP_Breast_Colon,PassiveAggressiveClassifier,0.528846,0.034611
AP_Breast_Colon,RandomForestClassifier,0.471154,0.034611
AP_Breast_Colon,SVC,0.471154,0.034611


### Check why loss is zero

In [14]:
_, _, _, y_test = data.load_test_train_dts(hdf5_out=out_io, 
                                                            hdf5_in=input_io, 
                                                            dts_name='zoo', 
                                                            dts_grp_path='/openml')
predictions = out_io.load_predictions_for_dataset('zoo')

In [15]:
import numpy as np
for prediction in predictions:
    diffs = y_test-prediction[1]
    print(f'{prediction[0]} - Number of errors: {np.count_nonzero(diffs)}')

BaggingClassifier - Number of errors: 0
BaselineClassifier - Number of errors: 14
BernoulliNaiveBayes - Number of errors: 0
GaussianNaiveBayes - Number of errors: 0
GradientBoostingClassifier - Number of errors: 0
K_Means - Number of errors: 11
NeuralNetworkDeepClassifier - Number of errors: 3
PassiveAggressiveClassifier - Number of errors: 0
RandomForestClassifier - Number of errors: 0
SVC - Number of errors: 0


#### Bagging

In [16]:
bag_predictions = predictions[0][1]
bag_predictions - y_test

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0])

#### BernoulliNaiveBayes

In [17]:
bnb_predictions = predictions[2][1]
bnb_predictions - y_test

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0])

#### GaussianNaiveBayes

In [18]:
gnb_predictions = predictions[2][1]
gnb_predictions - y_test

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0])

#### Random Forest

In [19]:
rf_predictions = predictions[8][1]
rf_predictions - y_test

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0])

#### SVM

In [20]:
svc_predictions = predictions[9][1]
svc_predictions-y_test

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0])

#### Passive Aggressive Classifier

In [21]:
pac_predictions = predictions[8][1]
pac_predictions-y_test

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0])

## Save tables to $\LaTeX$

In [33]:
#average and standard error
with open('../mlaut_paper/tables/avg_and_st_error.tex', 'w') as tf:
    tf.write(avg_and_std_error.to_latex())
    
#average rank
with open('../mlaut_paper/tables/avg_rank.tex', 'w') as tf:
    tf.write(avg_rank.to_latex())
#Cohen's D
with open('../mlaut_paper/tables/cohens_d.tex', 'w') as tf:
    tf.write(cohens_d.to_latex())
#t-test
with open('../mlaut_paper/tables/t_test.tex', 'w') as tf:
    tf.write(t_test_df.to_latex())
with open('../mlaut_paper/tables/t_test_bonferroni.tex', 'w') as tf:
    tf.write(t_test_bonferroni_df.to_latex())
#sign test
with open('../mlaut_paper/tables/sign_test.tex', 'w') as tf:
    tf.write(sign_test_df.to_latex())
#t-test with Bonferroni correction
with open('../mlaut_paper/tables/t_test_bonferroni.tex', 'w') as tf:
    tf.write(t_test_bonferroni_df.to_latex())
#Wilcoxon
with open('../mlaut_paper/tables/wilxocon_test.tex', 'w') as tf:
    tf.write(wilcoxon_test_df.to_latex())
#Friedman test
with open('../mlaut_paper/tables/friedman_test.tex', 'w') as tf:
    tf.write(friedman_test_df.to_latex())
#Nemeniy test
with open('../mlaut_paper/tables/nemeniy_test.tex', 'w') as tf:
    tf.write(nemeniy_test_df.to_latex())
#Errors per dataset per estimator
with open('../mlaut_paper/tables/errors_per_dataset_per_estimator.tex', 'w') as tf:
    tf.write(errors_per_dataset_per_estimator_df.to_latex())