In [15]:
import numpy as np
import pandas as pd

from scipy.stats import wilcoxon

In [16]:
scenario = 'dtc'

In [17]:
data = pd.read_csv(f'evaluation_{scenario}.csv')

In [18]:
score = 'auc' if 'auc' in data.columns else 'r2'

In [19]:
data.head()

Unnamed: 0.1,Unnamed: 0,name,fold,auc,params,estimator,label
0,0,appendicitis,0,0.823529,"{'operator': '<=', 'random_state': 5, 'min_sam...",OperatorDecisionTreeClassifier,dtc_leq
1,1,appendicitis,1,0.786765,"{'operator': '<=', 'random_state': 5, 'min_sam...",OperatorDecisionTreeClassifier,dtc_leq
2,2,appendicitis,2,0.794118,"{'operator': '<=', 'random_state': 5, 'min_sam...",OperatorDecisionTreeClassifier,dtc_leq
3,3,appendicitis,3,0.772059,"{'operator': '<=', 'random_state': 5, 'min_sam...",OperatorDecisionTreeClassifier,dtc_leq
4,4,appendicitis,4,0.632353,"{'operator': '<=', 'random_state': 5, 'min_sam...",OperatorDecisionTreeClassifier,dtc_leq


In [20]:
def do_testing(pdf, metric):
    params = pdf['params'].drop_duplicates().values
    pdf0 = pdf[pdf['params'] == params[0]].sort_values('fold')
    pdf1 = pdf[pdf['params'] == params[1]].sort_values('fold')

    return pd.Series({
        'wilcoxon': wilcoxon(pdf0[metric], pdf1[metric], zero_method='zsplit').pvalue
    })

In [21]:
data['label'].drop_duplicates()

0           dtc_leq
20000         dtc_l
40000     adtc_full
60000     adtc_rand
80000          fdtc
100000         sdtc
Name: label, dtype: object

In [22]:
grouped = data\
    .groupby(['name', 'label'])\
    .apply(lambda pdf: pdf[score].values.tolist())\
    .reset_index(drop=False)\
    .rename(columns={0: score})

In [23]:
grouped

Unnamed: 0,name,label,auc
0,abalone9_18,adtc_full,"[0.788647342995169, 0.6467391304347826, 0.6865..."
1,abalone9_18,adtc_rand,"[0.788647342995169, 0.6467391304347826, 0.6865..."
2,abalone9_18,dtc_l,"[0.788647342995169, 0.6467391304347826, 0.6865..."
3,abalone9_18,dtc_leq,"[0.788647342995169, 0.6467391304347826, 0.6865..."
4,abalone9_18,fdtc,"[0.7910628019323671, 0.6467391304347826, 0.686..."
...,...,...,...
115,yeast-0-3-5-9_vs_7-8,adtc_rand,"[0.6983695652173914, 0.796153846153846, 0.6978..."
116,yeast-0-3-5-9_vs_7-8,dtc_l,"[0.6983695652173914, 0.796153846153846, 0.6978..."
117,yeast-0-3-5-9_vs_7-8,dtc_leq,"[0.6983695652173914, 0.796153846153846, 0.6978..."
118,yeast-0-3-5-9_vs_7-8,fdtc,"[0.696195652173913, 0.7999999999999999, 0.6879..."


In [44]:
def evaluate_one(grouped, pivot_postfix='leq'):
    pivot_row = grouped[grouped['label'] == f"{scenario}_{pivot_postfix}"].iloc[0]
    pivot_label = pivot_row['label']

    other_rows = grouped[grouped['label'] != pivot_label]

    result = {f'auc_{pivot_label}': np.mean(pivot_row[score])}

    for idx, row in other_rows.iterrows():
        result[f'auc_{row["label"]}'] = np.mean(row[score])
        result[f'p_{row["label"]}_l'] = wilcoxon(pivot_row[score], row[score], zero_method="zsplit", alternative="less").pvalue
        #result[f'p_{row["label"]}_g'] = wilcoxon(pivot_row[score], row[score], zero_method="zsplit", alternative="greater").pvalue

    return pd.Series(result)

In [45]:
def evaluate_all(data):
    return data.groupby("name").apply(evaluate_one)

In [46]:
evaluate_all(grouped)

Unnamed: 0_level_0,auc_dtc_leq,auc_adtc_full,p_adtc_full_l,auc_adtc_rand,p_adtc_rand_l,auc_dtc_l,p_dtc_l_l,auc_fdtc,p_fdtc_l,auc_sdtc,p_sdtc_l
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
abalone9_18,0.761769,0.761769,0.5,0.761769,0.5,0.761769,0.5,0.761932,0.0001599006,0.761769,0.5
appendicitis,0.774753,0.774753,0.5,0.774753,0.5,0.774753,0.5,0.7748,0.5249264,0.774753,0.5
bupa,0.692042,0.692249,0.810041,0.692222,0.8491318,0.691741,0.9725139,0.692855,0.003126914,0.691812,0.9175362
cleveland-0_vs_4,0.888081,0.891356,0.1320785,0.891492,0.1320785,0.891727,0.1320383,0.891402,0.1078043,0.891727,0.1320383
ecoli1,0.954363,0.954363,0.5,0.954363,0.5,0.954363,0.5,0.955318,1.616892e-13,0.954363,0.5
glass0,0.832642,0.832642,0.5,0.832642,0.5,0.832642,0.5,0.833845,5.232257e-06,0.832642,0.5
haberman,0.656121,0.656287,0.06025456,0.656258,0.0758364,0.656338,0.06205116,0.656386,0.06066495,0.656338,0.06205116
hepatitis,0.746074,0.746297,0.3124241,0.746288,0.3132339,0.746375,0.3749779,0.74923,0.09353505,0.746062,0.6457443
lymphography,0.90007,0.903323,0.2864238,0.903323,0.2864238,0.903251,0.2864238,0.906929,0.008088386,0.903251,0.2864238
mammographic,0.899691,0.899677,0.9620445,0.899677,0.9620445,0.899665,0.9668458,0.899683,0.8416329,0.899665,0.9668458
