In [7]:
import numpy as np
import pandas as pd

from scipy import stats
from statsmodels.sandbox.stats.multicomp import multipletests

In [3]:
aucs = pd.read_csv('AUCs.txt', sep='\t')
aucs.head()

Unnamed: 0.1,Unnamed: 0,C4.5,C4.5+m,C4.5+cf,C4.5+m+cf
0,adult (sample),0.763,0.768,0.771,0.798
1,breast cancer,0.599,0.591,0.59,0.569
2,breast cancer wisconsin,0.954,0.971,0.968,0.967
3,cmc,0.628,0.661,0.654,0.657
4,ionosphere,0.882,0.888,0.886,0.898


In [6]:
print('C4.5 + C4.5+m:', stats.wilcoxon(aucs['C4.5'], aucs['C4.5+m']))
print('C4.5 + C4.5+cf:', stats.wilcoxon(aucs['C4.5'], aucs['C4.5+cf']))
print('C4.5 + C4.5+m+cf:', stats.wilcoxon(aucs['C4.5'], aucs['C4.5+m+cf']))
print('C4.5+m + C4.5+cf:', stats.wilcoxon(aucs['C4.5+m'], aucs['C4.5+cf']))
print('C4.5+m + C4.5+m+cf:', stats.wilcoxon(aucs['C4.5+m'], aucs['C4.5+m+cf']))
print('C4.5+cf + C4.5+m+cf:', stats.wilcoxon(aucs['C4.5+cf'], aucs['C4.5+m+cf']))

C4.5 + C4.5+m: WilcoxonResult(statistic=6.5, pvalue=0.01075713311978963)
C4.5 + C4.5+cf: WilcoxonResult(statistic=43.0, pvalue=0.861262330095348)
C4.5 + C4.5+m+cf: WilcoxonResult(statistic=11.0, pvalue=0.015906444101703374)
C4.5+m + C4.5+cf: WilcoxonResult(statistic=17.0, pvalue=0.046332729793395394)
C4.5+m + C4.5+m+cf: WilcoxonResult(statistic=22.0, pvalue=0.3278256758446406)
C4.5+cf + C4.5+m+cf: WilcoxonResult(statistic=10.0, pvalue=0.022909099354356588)


In [26]:
wilcoxon_matrix = np.array([['C4.5 + C4.5+m', stats.wilcoxon(aucs['C4.5'], aucs['C4.5+m']).pvalue],
                            ['C4.5 + C4.5+cf', stats.wilcoxon(aucs['C4.5'], aucs['C4.5+cf']).pvalue],
                            ['C4.5 + C4.5+m+cf', stats.wilcoxon(aucs['C4.5'], aucs['C4.5+m+cf']).pvalue],
                            ['C4.5+m + C4.5+cf', stats.wilcoxon(aucs['C4.5+m'], aucs['C4.5+cf']).pvalue],
                            ['C4.5+m + C4.5+m+cf', stats.wilcoxon(aucs['C4.5+m'], aucs['C4.5+m+cf']).pvalue],
                            ['C4.5+cf + C4.5+m+cf', stats.wilcoxon(aucs['C4.5+cf'], aucs['C4.5+m+cf']).pvalue]])

In [27]:
wilcoxon_matrix

array([['C4.5 + C4.5+m', '0.01075713311978963'],
       ['C4.5 + C4.5+cf', '0.861262330095348'],
       ['C4.5 + C4.5+m+cf', '0.015906444101703374'],
       ['C4.5+m + C4.5+cf', '0.046332729793395394'],
       ['C4.5+m + C4.5+m+cf', '0.3278256758446406'],
       ['C4.5+cf + C4.5+m+cf', '0.022909099354356588']], dtype='<U20')

In [28]:
wilcoxon_matrix[:, 1].astype('float64')

array([0.01075713, 0.86126233, 0.01590644, 0.04633273, 0.32782568,
       0.0229091 ])

In [24]:
reject, p_corrected, a1, a2 = multipletests(wilcoxon_matrix[:, 1].astype('float64'), 
                                            alpha = 0.05, 
                                            method = 'holm')

In [30]:
reject

array([False, False, False, False, False, False])

In [31]:
reject, p_corrected, a1, a2 = multipletests(wilcoxon_matrix[:, 1].astype('float64'), 
                                            alpha = 0.05, 
                                            method = 'fdr_bh')

In [32]:
reject

array([ True, False,  True, False, False,  True])