In [1]:
import pandas as pd
from scipy import stats
from statsmodels.sandbox.stats.multicomp import multipletests 

### 2-4

In [10]:
AUCs = pd.read_csv('AUCs.txt', sep='\t')
AUCs.rename(columns={'Unnamed: 0': 'data'}, inplace=True)
AUCs

Unnamed: 0,data,C4.5,C4.5+m,C4.5+cf,C4.5+m+cf
0,adult (sample),0.763,0.768,0.771,0.798
1,breast cancer,0.599,0.591,0.59,0.569
2,breast cancer wisconsin,0.954,0.971,0.968,0.967
3,cmc,0.628,0.661,0.654,0.657
4,ionosphere,0.882,0.888,0.886,0.898
5,iris,0.936,0.931,0.916,0.931
6,liver disorders,0.661,0.668,0.609,0.685
7,lung cancer,0.583,0.583,0.563,0.625
8,lymphography,0.775,0.838,0.866,0.875
9,mushroom,1.0,1.0,1.0,1.0


In [6]:
AUCs.columns[1:]

Index(['C4.5', 'C4.5+m', 'C4.5+cf', 'C4.5+m+cf'], dtype='object')

In [20]:
comparison = []
for i, classifier_1 in enumerate(AUCs.columns[1:]):
    for j, classifier_2 in enumerate(AUCs.columns[1:]):
        if i >= j:
            continue
        w, p = stats.wilcoxon(AUCs[classifier_1], AUCs[classifier_2])
        comparison.append([classifier_1, classifier_2, w, p])

In [23]:
comparison = pd.DataFrame.from_records(comparison)
comparison.columns = ['classifier_1', 'classifier_2', 'w', 'p']
comparison.sort_values(by='p')

Unnamed: 0,classifier_1,classifier_2,w,p
0,C4.5,C4.5+m,6.5,0.010757
2,C4.5,C4.5+m+cf,11.0,0.015906
5,C4.5+cf,C4.5+m+cf,10.0,0.022909
3,C4.5+m,C4.5+cf,17.0,0.046333
4,C4.5+m,C4.5+m+cf,22.0,0.327826
1,C4.5,C4.5+cf,43.0,0.861262


### 5

In [26]:
reject, p_corrected, alphacSidak, alphacBonf = multipletests(comparison.p, alpha = 0.05, method = 'holm')
comparison_holm = comparison.copy()
comparison_holm['p_corrected'] = p_corrected
comparison_holm['reject'] = reject
comparison_holm.sort_values(by='p')

Unnamed: 0,classifier_1,classifier_2,w,p,p_corrected,reject
0,C4.5,C4.5+m,6.5,0.010757,0.064543,False
2,C4.5,C4.5+m+cf,11.0,0.015906,0.079532,False
5,C4.5+cf,C4.5+m+cf,10.0,0.022909,0.091636,False
3,C4.5+m,C4.5+cf,17.0,0.046333,0.138998,False
4,C4.5+m,C4.5+m+cf,22.0,0.327826,0.655651,False
1,C4.5,C4.5+cf,43.0,0.861262,0.861262,False


### 6

In [27]:
reject, p_corrected, alphacSidak, alphacBonf = multipletests(comparison.p, alpha = 0.05, method = 'fdr_bh')
comparison_fdr_bh = comparison.copy()
comparison_fdr_bh['p_corrected'] = p_corrected
comparison_fdr_bh['reject'] = reject
comparison_fdr_bh.sort_values(by='p')

Unnamed: 0,classifier_1,classifier_2,w,p,p_corrected,reject
0,C4.5,C4.5+m,6.5,0.010757,0.045818,True
2,C4.5,C4.5+m+cf,11.0,0.015906,0.045818,True
5,C4.5+cf,C4.5+m+cf,10.0,0.022909,0.045818,True
3,C4.5+m,C4.5+cf,17.0,0.046333,0.069499,False
4,C4.5+m,C4.5+m+cf,22.0,0.327826,0.393391,False
1,C4.5,C4.5+cf,43.0,0.861262,0.861262,False
