In [1]:
import pandas as pd
import numpy as np
from statsmodels.stats.multicomp import pairwise_tukeyhsd

In [2]:
hom_exps = ["hom_relieff", "hom_geode", "hom_gr", "hom_su", "hom_wx"]
sin_exps = ["sin_relieff", "sin_geode", "sin_gr", "sin_su", "sin_wx"]

exps = ["hyb3", "hyb_stbw_borda", "hyb_borda_borda", "het", "het2"] + hom_exps + sin_exps

exp_translation = {
    "het": "Het",
    "hyb3": "Hyb Wx-GR-SU",
    "hyb_stbw_borda": "Hyb Stb",
    "hyb_borda_borda": "Hyb Borda",
    "het2": "Het Wx-GR-SU",
    "hom_geode": "Hom GeoDE",
    "hom_gr": "Hom GR",
    "hom_su": "Hom SU",
    "hom_relieff": "Hom ReliefF",
    "hom_wx": "Hom Wx",
    "sin_relieff": "ReliefF",
    "sin_geode": "GeoDE",
    "sin_gr": "GR",
    "sin_su": "SU",
    "sin_wx": "Wx"
}

In [3]:
renamed_cols= {
    'th_int': 'Threshold',
    'stabilities': 'Stability',
    'mean_ACC': 'Accuracy',
    'mean_ROC_AUC': 'ROC AUC',
    'mean_PR_AUC': 'PR AUC'
}


hy_hom_items = [
    "Hyb Wx-GR-SU", "Hyb Stb", "Hyb Borda", "Hom GeoDE", "Hom GR", 
    "Hom SU", "Hom ReliefF", "Hom Wx"
]

hy_sin_items = [
    "Hyb Wx-GR-SU", "Hyb Stb", "Hyb Borda", "GeoDE", "GR", 
    "SU", "ReliefF", "Wx"
]

hy_het_items = ['Het', 'Het Wx-GR-SU', 'Hyb Wx-GR-SU', 'Hyb Stb', 'Hyb Borda']

def add_experiment(experiment, results):
    df = pd.read_csv(path+experiment+"/final_results.csv", index_col=0)
    df['Type'] = [exp_translation[experiment] for i in range(len(df))]
    results[experiment] = df
    return 


def load_results(path):
    results = {}
    for e in exps:
        add_experiment(e, results)
    
    concat_df = pd.concat(list(results.values()))
    concat_df = concat_df.rename(columns=renamed_cols, index={'ONE': 'one'})
    concat_df['Trade-off'] = concat_df['Stability'] + 2*concat_df['Accuracy']
    
    if thresholds == "1_100":
        concat_df = concat_df[concat_df.Threshold != 150]
        concat_df = concat_df[concat_df.Threshold != 200]
        concat_df = concat_df[concat_df.Threshold != 500]
    return concat_df

## Breast

#### Annotations

* Top Stability: (Hyb Wx-GR-SU; Hom SU; Wx; Het Wx-GR-SU; Hom Wx), (SU ...<br>
* Piores PR AUC (com sig. comparada ao top1): Het, Hom ReliefF, ReliefF <br>

#### Hyb Wx-GR-SU   VS   Het Wx-GR-SU
* Stability: No difference <br>
* PR AUC: No difference <br>

#### Hyb   VS   Het
* Stability: (Hyb Stb, Hyb Borda), (Het) <br>
* PR AUC: No difference <br>

In [4]:
path = "/home/colombelli/Documents/arrayexpress/breast/"
thresholds = "1_500"
concat_df = load_results(path)

In [5]:
concat_df

Unnamed: 0_level_0,Threshold,Stability,Accuracy,std_ACC,ROC AUC,std_ROC_AUC,PR AUC,std_PR_AUC,Type,Trade-off
th_frac,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
0.004867,1,0.299966,0.717608,0.042121,0.807296,0.023213,0.974556,0.004450,Hyb Wx-GR-SU,1.735182
0.009734,2,0.349937,0.831863,0.034454,0.907453,0.023355,0.989128,0.002996,Hyb Wx-GR-SU,2.013662
0.014601,3,0.266560,0.849658,0.037342,0.933446,0.015468,0.992035,0.002277,Hyb Wx-GR-SU,1.965875
0.019469,4,0.474898,0.875309,0.024838,0.941318,0.020777,0.991505,0.005451,Hyb Wx-GR-SU,2.225516
0.024336,5,0.439864,0.876177,0.037375,0.950233,0.011857,0.993729,0.001924,Hyb Wx-GR-SU,2.192217
...,...,...,...,...,...,...,...,...,...,...
0.365035,75,0.530285,0.930925,0.017264,0.978000,0.008952,0.997115,0.001373,Wx,2.392136
0.486713,100,0.513633,0.937874,0.007267,0.984862,0.006781,0.998236,0.000893,Wx,2.389381
0.730069,150,0.535274,0.941788,0.008737,0.982067,0.012136,0.997239,0.002506,Wx,2.418850
0.973425,200,0.576881,0.942216,0.017171,0.984350,0.008771,0.998150,0.001068,Wx,2.461314


In [95]:
concat_df.groupby("Type")["Stability"].mean().sort_values()

Type
GeoDE           0.156131
Hom ReliefF     0.177512
Hom GeoDE       0.185184
Het             0.214128
ReliefF         0.249164
GR              0.394085
Hyb Borda       0.398199
Hom GR          0.402221
Hyb Stb         0.439712
SU              0.534855
Hom Wx          0.538378
Het Wx-GR-SU    0.540202
Wx              0.547328
Hom SU          0.558686
Hyb Wx-GR-SU    0.567700
Name: Stability, dtype: float64

In [96]:
tukey = pairwise_tukeyhsd(endog=concat_df['Stability'],
                          groups=concat_df['Type'],
                          alpha=0.05)
print(tukey)

      Multiple Comparison of Means - Tukey HSD, FWER=0.05       
   group1       group2    meandiff p-adj   lower   upper  reject
----------------------------------------------------------------
          GR        GeoDE   -0.238  0.001 -0.2928 -0.1831   True
          GR          Het    -0.18  0.001 -0.2348 -0.1251   True
          GR Het Wx-GR-SU   0.1461  0.001  0.0913   0.201   True
          GR       Hom GR   0.0081    0.9 -0.0467   0.063  False
          GR    Hom GeoDE  -0.2089  0.001 -0.2637 -0.1541   True
          GR  Hom ReliefF  -0.2166  0.001 -0.2714 -0.1617   True
          GR       Hom SU   0.1646  0.001  0.1098  0.2194   True
          GR       Hom Wx   0.1443  0.001  0.0894  0.1991   True
          GR    Hyb Borda   0.0041    0.9 -0.0507   0.059  False
          GR      Hyb Stb   0.0456 0.2332 -0.0092  0.1005  False
          GR Hyb Wx-GR-SU   0.1736  0.001  0.1188  0.2285   True
          GR      ReliefF  -0.1449  0.001 -0.1998 -0.0901   True
          GR           SU

In [97]:
concat_df.groupby("Type")["PR AUC"].mean().sort_values()

Type
ReliefF         0.952954
Hom ReliefF     0.954110
Het             0.992626
GeoDE           0.994916
Hom Wx          0.994978
Wx              0.995065
GR              0.995855
Hom GeoDE       0.996004
Hyb Wx-GR-SU    0.996021
Het Wx-GR-SU    0.996124
Hyb Borda       0.996353
Hyb Stb         0.996781
Hom GR          0.996971
SU              0.997689
Hom SU          0.997896
Name: PR AUC, dtype: float64

In [98]:
tukey = pairwise_tukeyhsd(endog=concat_df['PR AUC'],
                          groups=concat_df['Type'],
                          alpha=0.05)
print(tukey)

      Multiple Comparison of Means - Tukey HSD, FWER=0.05       
   group1       group2    meandiff p-adj   lower   upper  reject
----------------------------------------------------------------
          GR        GeoDE  -0.0009    0.9 -0.0055  0.0036  False
          GR          Het  -0.0032 0.5105 -0.0078  0.0013  False
          GR Het Wx-GR-SU   0.0003    0.9 -0.0043  0.0048  False
          GR       Hom GR   0.0011    0.9 -0.0035  0.0057  False
          GR    Hom GeoDE   0.0001    0.9 -0.0044  0.0047  False
          GR  Hom ReliefF  -0.0417  0.001 -0.0463 -0.0372   True
          GR       Hom SU    0.002    0.9 -0.0025  0.0066  False
          GR       Hom Wx  -0.0009    0.9 -0.0054  0.0037  False
          GR    Hyb Borda   0.0005    0.9 -0.0041  0.0051  False
          GR      Hyb Stb   0.0009    0.9 -0.0036  0.0055  False
          GR Hyb Wx-GR-SU   0.0002    0.9 -0.0044  0.0047  False
          GR      ReliefF  -0.0429  0.001 -0.0475 -0.0383   True
          GR           SU

## Lung

#### Annotations

* Top Stability: (Hom Wx; Wx), (Hyb Wx-GR-SU); (Het Wx-GR-SU ...<br>
* Piores PR AUC (com sig. comparada ao top1): Hom ReliefF, ReliefF <br>

#### Hyb Wx-GR-SU   VS   Het Wx-GR-SU
* Stability: Hyb Wx-GR-SU <br>
* PR AUC: No difference <br>

#### Hyb   VS   Het
* Stability: No difference <br>
* PR AUC: No difference <br>

In [116]:
path = "/home/colombelli/Documents/arrayexpress/lung/"
thresholds = "1_500"
concat_df = load_results(path)

In [101]:
concat_df.groupby("Type")["Stability"].mean().sort_values()

Type
Hom GeoDE       0.125560
GeoDE           0.146628
Het             0.217902
Hyb Borda       0.235197
Hom ReliefF     0.239828
Hyb Stb         0.263572
ReliefF         0.284026
Hom GR          0.289843
GR              0.317076
SU              0.324827
Hom SU          0.347898
Het Wx-GR-SU    0.391748
Hyb Wx-GR-SU    0.518850
Wx              0.575066
Hom Wx          0.592791
Name: Stability, dtype: float64

In [102]:
tukey = pairwise_tukeyhsd(endog=concat_df['Stability'],
                          groups=concat_df['Type'],
                          alpha=0.05)
print(tukey)

      Multiple Comparison of Means - Tukey HSD, FWER=0.05       
   group1       group2    meandiff p-adj   lower   upper  reject
----------------------------------------------------------------
          GR        GeoDE  -0.1704  0.001 -0.2181 -0.1228   True
          GR          Het  -0.0992  0.001 -0.1468 -0.0516   True
          GR Het Wx-GR-SU   0.0747  0.001  0.0271  0.1223   True
          GR       Hom GR  -0.0272 0.8044 -0.0749  0.0204  False
          GR    Hom GeoDE  -0.1915  0.001 -0.2391 -0.1439   True
          GR  Hom ReliefF  -0.0772  0.001 -0.1249 -0.0296   True
          GR       Hom SU   0.0308 0.6402 -0.0168  0.0784  False
          GR       Hom Wx   0.2757  0.001  0.2281  0.3233   True
          GR    Hyb Borda  -0.0819  0.001 -0.1295 -0.0343   True
          GR      Hyb Stb  -0.0535 0.0119 -0.1011 -0.0059   True
          GR Hyb Wx-GR-SU   0.2018  0.001  0.1542  0.2494   True
          GR      ReliefF   -0.033 0.5383 -0.0807  0.0146  False
          GR           SU

In [103]:
concat_df.groupby("Type")["PR AUC"].mean().sort_values()

Type
ReliefF         0.975923
Hom ReliefF     0.991988
Hyb Borda       0.996265
GeoDE           0.996302
Hom GeoDE       0.996311
Hyb Stb         0.997033
GR              0.997063
Het Wx-GR-SU    0.997165
Hyb Wx-GR-SU    0.997192
SU              0.997214
Hom SU          0.997222
Het             0.997267
Hom GR          0.997433
Hom Wx          0.997861
Wx              0.997923
Name: PR AUC, dtype: float64

In [104]:
tukey = pairwise_tukeyhsd(endog=concat_df['PR AUC'],
                          groups=concat_df['Type'],
                          alpha=0.05)
print(tukey)

      Multiple Comparison of Means - Tukey HSD, FWER=0.05       
   group1       group2    meandiff p-adj   lower   upper  reject
----------------------------------------------------------------
          GR        GeoDE  -0.0008    0.9 -0.0046  0.0031  False
          GR          Het   0.0002    0.9 -0.0036   0.004  False
          GR Het Wx-GR-SU   0.0001    0.9 -0.0037  0.0039  False
          GR       Hom GR   0.0004    0.9 -0.0035  0.0042  False
          GR    Hom GeoDE  -0.0008    0.9 -0.0046  0.0031  False
          GR  Hom ReliefF  -0.0051  0.001 -0.0089 -0.0012   True
          GR       Hom SU   0.0002    0.9 -0.0037   0.004  False
          GR       Hom Wx   0.0008    0.9  -0.003  0.0046  False
          GR    Hyb Borda  -0.0008    0.9 -0.0046   0.003  False
          GR      Hyb Stb     -0.0    0.9 -0.0039  0.0038  False
          GR Hyb Wx-GR-SU   0.0001    0.9 -0.0037   0.004  False
          GR      ReliefF  -0.0211  0.001  -0.025 -0.0173   True
          GR           SU

## Liver

#### Annotations

* Top Stability: (Hyb Wx-GR-SU; Hom SU), (SU; Hom GR; Het Wx-GR-SU), (GR...<br>
* Piores PR AUC (com sig. comparada ao top1): Hom GeoDE, Hom ReliefF, ReliefF <br>

#### Hyb Wx-GR-SU   VS   Het Wx-GR-SU
* Stability: Hyb Wx-GR-SU <br>
* PR AUC: No difference <br>

#### Hyb   VS   Het
* Stability: (Hyb Stb), (Hyb Borda), (Het) <br>
* PR AUC: No difference <br>

In [117]:
path = "/home/colombelli/Documents/arrayexpress/liver/"
thresholds = "1_500"
concat_df = load_results(path)

In [106]:
concat_df.groupby("Type")["Stability"].mean().sort_values()

Type
Hom GeoDE       0.049031
Hom ReliefF     0.145870
GeoDE           0.185802
Het             0.213812
ReliefF         0.250041
Hyb Borda       0.311035
Hyb Stb         0.362000
Wx              0.438023
Hom Wx          0.463216
GR              0.484545
Het Wx-GR-SU    0.529082
Hom GR          0.545951
SU              0.565216
Hyb Wx-GR-SU    0.630051
Hom SU          0.639466
Name: Stability, dtype: float64

In [107]:
tukey = pairwise_tukeyhsd(endog=concat_df['Stability'],
                          groups=concat_df['Type'],
                          alpha=0.05)
print(tukey)

      Multiple Comparison of Means - Tukey HSD, FWER=0.05       
   group1       group2    meandiff p-adj   lower   upper  reject
----------------------------------------------------------------
          GR        GeoDE  -0.2987  0.001 -0.3472 -0.2503   True
          GR          Het  -0.2707  0.001 -0.3192 -0.2223   True
          GR Het Wx-GR-SU   0.0445 0.1125 -0.0039   0.093  False
          GR       Hom GR   0.0614 0.0017  0.0129  0.1099   True
          GR    Hom GeoDE  -0.4355  0.001  -0.484  -0.387   True
          GR  Hom ReliefF  -0.3387  0.001 -0.3872 -0.2902   True
          GR       Hom SU   0.1549  0.001  0.1064  0.2034   True
          GR       Hom Wx  -0.0213    0.9 -0.0698  0.0271  False
          GR    Hyb Borda  -0.1735  0.001  -0.222  -0.125   True
          GR      Hyb Stb  -0.1225  0.001  -0.171 -0.0741   True
          GR Hyb Wx-GR-SU   0.1455  0.001   0.097   0.194   True
          GR      ReliefF  -0.2345  0.001  -0.283  -0.186   True
          GR           SU

In [108]:
concat_df.groupby("Type")["PR AUC"].mean().sort_values()

Type
ReliefF         0.806208
Hom ReliefF     0.820183
Hom GeoDE       0.929998
GeoDE           0.942491
Hyb Borda       0.942630
Het Wx-GR-SU    0.943895
Hyb Wx-GR-SU    0.944421
Hyb Stb         0.945472
Het             0.945481
Wx              0.948324
Hom SU          0.949587
GR              0.949717
SU              0.949936
Hom GR          0.950159
Hom Wx          0.954799
Name: PR AUC, dtype: float64

In [109]:
tukey = pairwise_tukeyhsd(endog=concat_df['PR AUC'],
                          groups=concat_df['Type'],
                          alpha=0.05)
print(tukey)

      Multiple Comparison of Means - Tukey HSD, FWER=0.05       
   group1       group2    meandiff p-adj   lower   upper  reject
----------------------------------------------------------------
          GR        GeoDE  -0.0072 0.8635 -0.0205   0.006  False
          GR          Het  -0.0042    0.9 -0.0175   0.009  False
          GR Het Wx-GR-SU  -0.0058    0.9 -0.0191  0.0074  False
          GR       Hom GR   0.0004    0.9 -0.0128  0.0137  False
          GR    Hom GeoDE  -0.0197  0.001  -0.033 -0.0065   True
          GR  Hom ReliefF  -0.1295  0.001 -0.1428 -0.1163   True
          GR       Hom SU  -0.0001    0.9 -0.0134  0.0131  False
          GR       Hom Wx   0.0051    0.9 -0.0082  0.0183  False
          GR    Hyb Borda  -0.0071 0.8863 -0.0203  0.0062  False
          GR      Hyb Stb  -0.0042    0.9 -0.0175   0.009  False
          GR Hyb Wx-GR-SU  -0.0053    0.9 -0.0186   0.008  False
          GR      ReliefF  -0.1435  0.001 -0.1568 -0.1302   True
          GR           SU

## Pancreas

#### Annotations

* Top Stability: (Hom Wx), (Wx; Hyb Borda; Hyb Wx-GR-SU; Het Wx-GR-SU; Hyb Stb), (SU...<br>
* Piores PR AUC (com sig. comparada ao top1): ReliefF, Hom ReliefF, Hyb Stb, Hom GeoDE, GeoDE <br>

#### Hyb Wx-GR-SU   VS   Het Wx-GR-SU
* Stability: Hyb Wx-GR-SU <br>
* PR AUC: No difference <br>

#### Hyb   VS   Het
* Stability: (Hyb Stb), (Hyb Borda), (Het) <br>
* PR AUC: No difference <br>

In [118]:
path = "/home/colombelli/Documents/arrayexpress/pancreas/"
thresholds = "1_500"
concat_df = load_results(path)

In [111]:
concat_df.groupby("Type")["Stability"].mean().sort_values()

Type
ReliefF         0.112292
Hom GeoDE       0.307846
GeoDE           0.341144
Hom GR          0.387594
GR              0.411920
Het             0.445008
Hom ReliefF     0.448990
Hom SU          0.463162
SU              0.475228
Hyb Stb         0.536275
Het Wx-GR-SU    0.557340
Hyb Wx-GR-SU    0.560182
Hyb Borda       0.564467
Wx              0.584341
Hom Wx          0.804263
Name: Stability, dtype: float64

In [112]:
tukey = pairwise_tukeyhsd(endog=concat_df['Stability'],
                          groups=concat_df['Type'],
                          alpha=0.05)
print(tukey)

      Multiple Comparison of Means - Tukey HSD, FWER=0.05       
   group1       group2    meandiff p-adj   lower   upper  reject
----------------------------------------------------------------
          GR        GeoDE  -0.0708 0.0014 -0.1261 -0.0154   True
          GR          Het   0.0331 0.7475 -0.0222  0.0884  False
          GR Het Wx-GR-SU   0.1454  0.001  0.0901  0.2007   True
          GR       Hom GR  -0.0243    0.9 -0.0797   0.031  False
          GR    Hom GeoDE  -0.1041  0.001 -0.1594 -0.0487   True
          GR  Hom ReliefF   0.0371 0.5908 -0.0183  0.0924  False
          GR       Hom SU   0.0512 0.1048 -0.0041  0.1066  False
          GR       Hom Wx   0.3923  0.001   0.337  0.4477   True
          GR    Hyb Borda   0.1525  0.001  0.0972  0.2079   True
          GR      Hyb Stb   0.1244  0.001   0.069  0.1797   True
          GR Hyb Wx-GR-SU   0.1483  0.001  0.0929  0.2036   True
          GR      ReliefF  -0.2996  0.001  -0.355 -0.2443   True
          GR           SU

In [113]:
concat_df.groupby("Type")["PR AUC"].mean().sort_values()

Type
ReliefF         0.798875
Hom ReliefF     0.871381
Hyb Stb         0.910138
Hom GeoDE       0.914994
GeoDE           0.918056
Hyb Borda       0.927134
Het             0.930610
Hyb Wx-GR-SU    0.931815
Wx              0.932104
Hom Wx          0.932275
SU              0.933536
Hom SU          0.934194
Het Wx-GR-SU    0.935533
GR              0.936917
Hom GR          0.942657
Name: PR AUC, dtype: float64

In [114]:
tukey = pairwise_tukeyhsd(endog=concat_df['PR AUC'],
                          groups=concat_df['Type'],
                          alpha=0.05)
print(tukey)

      Multiple Comparison of Means - Tukey HSD, FWER=0.05       
   group1       group2    meandiff p-adj   lower   upper  reject
----------------------------------------------------------------
          GR        GeoDE  -0.0189 0.0067  -0.035 -0.0027   True
          GR          Het  -0.0063    0.9 -0.0225  0.0098  False
          GR Het Wx-GR-SU  -0.0014    0.9 -0.0175  0.0148  False
          GR       Hom GR   0.0057    0.9 -0.0104  0.0219  False
          GR    Hom GeoDE  -0.0219  0.001 -0.0381 -0.0058   True
          GR  Hom ReliefF  -0.0655  0.001 -0.0817 -0.0494   True
          GR       Hom SU  -0.0027    0.9 -0.0189  0.0134  False
          GR       Hom Wx  -0.0046    0.9 -0.0208  0.0115  False
          GR    Hyb Borda  -0.0098 0.7302 -0.0259  0.0064  False
          GR      Hyb Stb  -0.0268  0.001 -0.0429 -0.0106   True
          GR Hyb Wx-GR-SU  -0.0051    0.9 -0.0212   0.011  False
          GR      ReliefF   -0.138  0.001 -0.1542 -0.1219   True
          GR           SU