In [976]:
import numpy as np
import pandas as pd

from scipy.stats import wilcoxon

In [977]:
splits = {
    'dtc': pd.read_csv('splits_dtc.csv'),
    'dtr': pd.read_csv('splits_dtr.csv'),
    'rfc': pd.read_csv('splits_rfc.csv'),
    'rfr': pd.read_csv('splits_rfr.csv')
}
evaluations = {
    'dtc': pd.read_csv('evaluation_dtc.csv'),
    'dtr': pd.read_csv('evaluation_dtr.csv'),
    'rfc': pd.read_csv('evaluation_rfc.csv'),
    'rfr': pd.read_csv('evaluation_rfr.csv')
}

In [978]:
splits['dtc'] = splits['dtc'].rename(columns={'Unnamed: 0': 'name'})
splits['dtr'] = splits['dtr'].rename(columns={'Unnamed: 0': 'name'})
splits['rfc'] = splits['rfc'].rename(columns={'Unnamed: 0': 'name'})
splits['rfr'] = splits['rfr'].rename(columns={'Unnamed: 0': 'name'})

In [979]:
splits['dtc']['rate'] = splits['dtc']['n_lattice_splits'] / splits['dtc']['n_splits']
splits['dtr']['rate'] = splits['dtr']['n_lattice_splits'] / splits['dtr']['n_splits']
splits['rfc']['rate'] = splits['rfc']['n_lattice_splits'] / splits['rfc']['n_splits']
splits['rfr']['rate'] = splits['rfr']['n_lattice_splits'] / splits['rfr']['n_splits']

splits['dtc']['rate_kfold'] = splits['dtc']['n_lattice_splits_kfold'] / splits['dtc']['n_splits_kfold']
splits['dtr']['rate_kfold'] = splits['dtr']['n_lattice_splits_kfold'] / splits['dtr']['n_splits_kfold']
splits['rfc']['rate_kfold'] = splits['rfc']['n_lattice_splits_kfold'] / splits['rfc']['n_splits_kfold']
splits['rfr']['rate_kfold'] = splits['rfr']['n_lattice_splits_kfold'] / splits['rfr']['n_splits_kfold']

In [980]:
for scenario in ['dtc', 'dtr', 'rfc', 'rfr']:
    evaluations[scenario] = evaluations[scenario][evaluations[scenario]['mode'].isin(['<', '<='])]

In [981]:
for scenario in ['dtc', 'rfc']:
    evaluations[scenario] = evaluations[scenario]\
        .groupby(['name', 'mode'])\
        .apply(lambda pdf: pdf.sort_values('fold')['auc'].values.tolist())\
        .reset_index(drop=False)\
        .rename(columns={0: 'auc'})
for scenario in ['dtr', 'rfr']:
    evaluations[scenario] = evaluations[scenario]\
        .groupby(['name', 'mode'])\
        .apply(lambda pdf: pdf.sort_values('fold')['r2'].values.tolist())\
        .reset_index(drop=False)\
        .rename(columns={0: 'r2'})

In [982]:
def evaluate(pdf, score):
    leq = pdf[pdf['mode'] == '<='].iloc[0]
    l = pdf[pdf['mode'] == '<'].iloc[0]
    return pd.Series({
        'mean_leq': np.mean(leq[score]),
        'mean_l': np.mean(l[score]),
        'median_leq': np.median(leq[score]),
        'median_l': np.median(l[score]),
        'p': min(wilcoxon(leq[score], l[score], zero_method='zsplit', alternative='less').pvalue,
                 wilcoxon(leq[score], l[score], zero_method='zsplit', alternative='greater').pvalue)
    })

def evaluate2(pdf, score):
    leq = pdf[pdf['mode'] == '<='].iloc[0]
    l = pdf[pdf['mode'] == '<'].iloc[0]
    return pd.Series({
        'diff': np.mean(leq[score]) - np.mean(l[score]),
        #'mean_l': np.mean(l[score]),
        #'median_leq': np.median(leq[score]),
        #'median_l': np.median(l[score]),
        'p': min(wilcoxon(leq[score], l[score], zero_method='zsplit', alternative='less').pvalue,
                 wilcoxon(leq[score], l[score], zero_method='zsplit', alternative='greater').pvalue)
    })


In [983]:
dtc = evaluations['dtc'].groupby('name').apply(lambda x: evaluate2(x, 'auc'))
rfc = evaluations['rfc'].groupby('name').apply(lambda x: evaluate2(x, 'auc'))
dtr = evaluations['dtr'].groupby('name').apply(lambda x: evaluate2(x, 'r2'))
rfr = evaluations['rfr'].groupby('name').apply(lambda x: evaluate2(x, 'r2'))

In [984]:
dtc = pd.merge(dtc.reset_index(drop=False), splits['dtc'][['name', 'rate', 'rate_kfold']], on=['name']).set_index('name')
dtr = pd.merge(dtr.reset_index(drop=False), splits['dtr'][['name', 'rate', 'rate_kfold']], on=['name']).set_index('name')
rfc = pd.merge(rfc.reset_index(drop=False), splits['rfc'][['name', 'rate', 'rate_kfold']], on=['name']).set_index('name')
rfr = pd.merge(rfr.reset_index(drop=False), splits['rfr'][['name', 'rate', 'rate_kfold']], on=['name']).set_index('name')

In [985]:
dtc.columns = pd.MultiIndex.from_tuples(('Decision Tree', col) for col in dtc.columns)
rfc.columns = pd.MultiIndex.from_tuples(('Random Forest', col) for col in rfc.columns)

dtr.columns = pd.MultiIndex.from_tuples(('Decision Tree', col) for col in dtr.columns)
rfr.columns = pd.MultiIndex.from_tuples(('Random Forest', col) for col in rfr.columns)

In [986]:
binclas = pd.merge(dtc, rfc, left_index=True, right_index=True).reset_index(drop=False)
regr = pd.merge(dtr, rfr, left_index=True, right_index=True).reset_index(drop=False)

In [987]:
binclas.columns = pd.MultiIndex.from_tuples(('Classification', *col) for col in binclas.columns)
regr.columns = pd.MultiIndex.from_tuples(('Regression', *col) for col in regr.columns)

In [988]:
binclas

Unnamed: 0_level_0,Classification,Classification,Classification,Classification,Classification,Classification,Classification,Classification,Classification
Unnamed: 0_level_1,name,Decision Tree,Decision Tree,Decision Tree,Decision Tree,Random Forest,Random Forest,Random Forest,Random Forest
Unnamed: 0_level_2,Unnamed: 1_level_2,diff,p,rate,rate_kfold,diff,p,rate,rate_kfold
0,abalone9_18,0.0,0.5,0.0,0.0,-1.271275e-06,0.4486313,0.082214,0.076214
1,appendicitis,0.0,0.5,0.0,0.003534,-4.411765e-06,0.3965131,0.022556,0.031279
2,bupa,0.000105,0.007051001,0.117647,0.095642,0.002388147,6.398291e-56,0.249033,0.243139
3,cleveland-0_vs_4,-0.003139,0.07374231,0.0,0.014337,-0.0007637311,3.434543e-06,0.094907,0.106972
4,ecoli1,0.0,0.5,0.0,0.056667,-6.760699e-05,0.0001832211,0.117832,0.122796
5,glass0,0.0,0.5,0.0,0.032316,-1.999032e-05,0.2266212,0.049146,0.045386
6,haberman,-0.000335,0.0004971797,0.0,0.020378,5.61683e-06,0.4816973,0.039381,0.052
7,hepatitis,-0.000244,0.4734861,0.0,0.044077,-0.0002828571,0.002330632,0.034364,0.04239
8,mammographic,2.6e-05,0.003769766,0.0,0.001086,5.507317e-07,0.003510752,0.018786,0.036491
9,monk-2,0.0,0.5,0.0,0.0,0.0,0.5,0.000551,0.002608


In [989]:
result = pd.concat([binclas, regr], axis=1)

In [990]:
result

Unnamed: 0_level_0,Classification,Classification,Classification,Classification,Classification,Classification,Classification,Classification,Classification,Regression,Regression,Regression,Regression,Regression,Regression,Regression,Regression,Regression
Unnamed: 0_level_1,name,Decision Tree,Decision Tree,Decision Tree,Decision Tree,Random Forest,Random Forest,Random Forest,Random Forest,name,Decision Tree,Decision Tree,Decision Tree,Decision Tree,Random Forest,Random Forest,Random Forest,Random Forest
Unnamed: 0_level_2,Unnamed: 1_level_2,diff,p,rate,rate_kfold,diff,p,rate,rate_kfold,Unnamed: 10_level_2,diff,p,rate,rate_kfold,diff,p,rate,rate_kfold
0,abalone9_18,0.0,0.5,0.0,0.0,-1.271275e-06,0.4486313,0.082214,0.076214,airfoil,8.85171e-06,8.171774e-18,0.004765,0.007478,-9.730602e-06,3.652306e-12,0.008229,0.009761
1,appendicitis,0.0,0.5,0.0,0.003534,-4.411765e-06,0.3965131,0.022556,0.031279,autoMPG6,-0.0003084192,0.0008534206,0.086957,0.049645,3.079508e-05,0.3223636,0.178254,0.170748
2,bupa,0.000105,0.007051001,0.117647,0.095642,0.002388147,6.398291e-56,0.249033,0.243139,baseball,-0.001287322,0.3215768,0.086957,0.141608,0.0006889246,1.693644e-14,0.23198,0.236646
3,cleveland-0_vs_4,-0.003139,0.07374231,0.0,0.014337,-0.0007637311,3.434543e-06,0.094907,0.106972,cpu_performance,0.00213593,1.7747159999999998e-21,0.233333,0.206069,0.001852589,2.2143580000000003e-17,0.237956,0.222382
4,ecoli1,0.0,0.5,0.0,0.056667,-6.760699e-05,0.0001832211,0.117832,0.122796,daily-demand,-0.0003387756,0.01569446,0.0,0.008483,-0.0007188248,5.375951e-46,0.030622,0.028669
5,glass0,0.0,0.5,0.0,0.032316,-1.999032e-05,0.2266212,0.049146,0.045386,diabetes,-6.555686e-05,0.4821781,0.0,0.016287,0.000749265,5.618773e-16,0.0,0.002313
6,haberman,-0.000335,0.0004971797,0.0,0.020378,5.61683e-06,0.4816973,0.039381,0.052,excitation_current,-4.020561e-08,3.191441e-06,0.102041,0.09613,2.113701e-07,1.15457e-18,0.12549,0.12246
7,hepatitis,-0.000244,0.4734861,0.0,0.044077,-0.0002828571,0.002330632,0.034364,0.04239,laser,-0.0009442027,0.03058846,0.254777,0.283509,-0.0002017708,0.006584353,0.382239,0.383465
8,mammographic,2.6e-05,0.003769766,0.0,0.001086,5.507317e-07,0.003510752,0.018786,0.036491,maternal_health_risk,0.001374641,1.131266e-11,0.216867,0.228852,-0.000728661,1.893013e-33,0.238309,0.234374
9,monk-2,0.0,0.5,0.0,0.0,0.0,0.5,0.000551,0.002608,medical_cost,0.0,0.5,0.0,0.001429,2.283849e-06,0.04529445,0.001429,0.001829


In [991]:
def boldface(row):
    res = pd.Series()
    if row['p'] < -1:
        for label, _ in row.items():
            if label != 'p':
                value = row[label]
                value = f'{value:.3f}'
                if value[0] == '0':
                    value = value[1:]
                else:
                    value = '1'
                res[label] = f'{{ \\bfseries {value}}}'
    else:
        for label, _ in row.items():
            if label != 'p' and not label.startswith('rate'):
                value = row[label]
                if abs(value) > 1e-9:
                    value = f'{value:.0e}'
                else:
                    value = '0'
                #if value[0] == '0' or value[0] == '-':
                #    value = value[1:]
                #else:
                #    value = '1'
                if value[0] != '-':
                    value = ' ' + value
                res[label] = f'{value}'
            elif label.startswith('rate'):
                value = row[label]
                res[label] = f'{value:.2f}'
                res[label] = res[label][1:]
    res['p'] = row['p']
    return res

In [992]:
result[('Classification', 'Decision Tree')] = result[('Classification', 'Decision Tree')].apply(boldface, axis=1)
result[('Classification', 'Random Forest')] = result[('Classification', 'Random Forest')].apply(boldface, axis=1)
result[('Regression', 'Decision Tree')] = result[('Regression', 'Decision Tree')].apply(boldface, axis=1)
result[('Regression', 'Random Forest')] = result[('Regression', 'Random Forest')].apply(boldface, axis=1)

  result[('Classification', 'Decision Tree')] = result[('Classification', 'Decision Tree')].apply(boldface, axis=1)
  result[('Classification', 'Decision Tree')] = result[('Classification', 'Decision Tree')].apply(boldface, axis=1)
  result[('Classification', 'Random Forest')] = result[('Classification', 'Random Forest')].apply(boldface, axis=1)
  result[('Classification', 'Random Forest')] = result[('Classification', 'Random Forest')].apply(boldface, axis=1)
  result[('Regression', 'Decision Tree')] = result[('Regression', 'Decision Tree')].apply(boldface, axis=1)
  result[('Regression', 'Decision Tree')] = result[('Regression', 'Decision Tree')].apply(boldface, axis=1)
  result[('Regression', 'Random Forest')] = result[('Regression', 'Random Forest')].apply(boldface, axis=1)
  result[('Regression', 'Random Forest')] = result[('Regression', 'Random Forest')].apply(boldface, axis=1)


In [993]:
result[('Classification', 'Decision Tree', 'p')] = result[('Classification', 'Decision Tree', 'p')].apply(lambda x: '$\\neq$' if x < 0.05 else '')
result[('Classification', 'Random Forest', 'p')] = result[('Classification', 'Random Forest', 'p')].apply(lambda x: '$\\neq$' if x < 0.05 else '')
result[('Regression', 'Decision Tree', 'p')] = result[('Regression', 'Decision Tree', 'p')].apply(lambda x: '$\\neq$' if x < 0.05 else '')
result[('Regression', 'Random Forest', 'p')] = result[('Regression', 'Random Forest', 'p')].apply(lambda x: '$\\neq$' if x < 0.05 else '')

In [994]:
result

Unnamed: 0_level_0,Classification,Classification,Classification,Classification,Classification,Classification,Classification,Classification,Classification,Regression,Regression,Regression,Regression,Regression,Regression,Regression,Regression,Regression
Unnamed: 0_level_1,name,Decision Tree,Decision Tree,Decision Tree,Decision Tree,Random Forest,Random Forest,Random Forest,Random Forest,name,Decision Tree,Decision Tree,Decision Tree,Decision Tree,Random Forest,Random Forest,Random Forest,Random Forest
Unnamed: 0_level_2,Unnamed: 1_level_2,diff,p,rate,rate_kfold,diff,p,rate,rate_kfold,Unnamed: 10_level_2,diff,p,rate,rate_kfold,diff,p,rate,rate_kfold
0,abalone9_18,0.0,,0.0,0.0,-1e-06,,0.08,0.08,airfoil,9e-06,$\neq$,0.0,0.01,-1e-05,$\neq$,0.01,0.01
1,appendicitis,0.0,,0.0,0.0,-4e-06,,0.02,0.03,autoMPG6,-0.0003,$\neq$,0.09,0.05,3e-05,,0.18,0.17
2,bupa,0.0001,$\neq$,0.12,0.1,0.002,$\neq$,0.25,0.24,baseball,-0.001,,0.09,0.14,0.0007,$\neq$,0.23,0.24
3,cleveland-0_vs_4,-0.003,,0.0,0.01,-0.0008,$\neq$,0.09,0.11,cpu_performance,0.002,$\neq$,0.23,0.21,0.002,$\neq$,0.24,0.22
4,ecoli1,0.0,,0.0,0.06,-7e-05,$\neq$,0.12,0.12,daily-demand,-0.0003,$\neq$,0.0,0.01,-0.0007,$\neq$,0.03,0.03
5,glass0,0.0,,0.0,0.03,-2e-05,,0.05,0.05,diabetes,-7e-05,,0.0,0.02,0.0007,$\neq$,0.0,0.0
6,haberman,-0.0003,$\neq$,0.0,0.02,6e-06,,0.04,0.05,excitation_current,-4e-08,$\neq$,0.1,0.1,2e-07,$\neq$,0.13,0.12
7,hepatitis,-0.0002,,0.0,0.04,-0.0003,$\neq$,0.03,0.04,laser,-0.0009,$\neq$,0.25,0.28,-0.0002,$\neq$,0.38,0.38
8,mammographic,3e-05,$\neq$,0.0,0.0,6e-07,$\neq$,0.02,0.04,maternal_health_risk,0.001,$\neq$,0.22,0.23,-0.0007,$\neq$,0.24,0.23
9,monk-2,0.0,,0.0,0.0,0.0,,0.0,0.0,medical_cost,0.0,,0.0,0.0,2e-06,$\neq$,0.0,0.0


In [995]:
#final = result.drop(columns=[('Classification', 'Decision Tree', 'p'), ('Classification', 'Random Forest', 'p'),
#                                ('Regression', 'Decision Tree', 'p'), ('Regression', 'Random Forest', 'p')])
final = result

In [996]:
final

Unnamed: 0_level_0,Classification,Classification,Classification,Classification,Classification,Classification,Classification,Classification,Classification,Regression,Regression,Regression,Regression,Regression,Regression,Regression,Regression,Regression
Unnamed: 0_level_1,name,Decision Tree,Decision Tree,Decision Tree,Decision Tree,Random Forest,Random Forest,Random Forest,Random Forest,name,Decision Tree,Decision Tree,Decision Tree,Decision Tree,Random Forest,Random Forest,Random Forest,Random Forest
Unnamed: 0_level_2,Unnamed: 1_level_2,diff,p,rate,rate_kfold,diff,p,rate,rate_kfold,Unnamed: 10_level_2,diff,p,rate,rate_kfold,diff,p,rate,rate_kfold
0,abalone9_18,0.0,,0.0,0.0,-1e-06,,0.08,0.08,airfoil,9e-06,$\neq$,0.0,0.01,-1e-05,$\neq$,0.01,0.01
1,appendicitis,0.0,,0.0,0.0,-4e-06,,0.02,0.03,autoMPG6,-0.0003,$\neq$,0.09,0.05,3e-05,,0.18,0.17
2,bupa,0.0001,$\neq$,0.12,0.1,0.002,$\neq$,0.25,0.24,baseball,-0.001,,0.09,0.14,0.0007,$\neq$,0.23,0.24
3,cleveland-0_vs_4,-0.003,,0.0,0.01,-0.0008,$\neq$,0.09,0.11,cpu_performance,0.002,$\neq$,0.23,0.21,0.002,$\neq$,0.24,0.22
4,ecoli1,0.0,,0.0,0.06,-7e-05,$\neq$,0.12,0.12,daily-demand,-0.0003,$\neq$,0.0,0.01,-0.0007,$\neq$,0.03,0.03
5,glass0,0.0,,0.0,0.03,-2e-05,,0.05,0.05,diabetes,-7e-05,,0.0,0.02,0.0007,$\neq$,0.0,0.0
6,haberman,-0.0003,$\neq$,0.0,0.02,6e-06,,0.04,0.05,excitation_current,-4e-08,$\neq$,0.1,0.1,2e-07,$\neq$,0.13,0.12
7,hepatitis,-0.0002,,0.0,0.04,-0.0003,$\neq$,0.03,0.04,laser,-0.0009,$\neq$,0.25,0.28,-0.0002,$\neq$,0.38,0.38
8,mammographic,3e-05,$\neq$,0.0,0.0,6e-07,$\neq$,0.02,0.04,maternal_health_risk,0.001,$\neq$,0.22,0.23,-0.0007,$\neq$,0.24,0.23
9,monk-2,0.0,,0.0,0.0,0.0,,0.0,0.0,medical_cost,0.0,,0.0,0.0,2e-06,$\neq$,0.0,0.0


In [997]:
final.columns[0]

('Classification', 'name', '')

In [998]:
final[('Classification', 'name', '')] = final[('Classification', 'name', '')].apply(lambda x: x.replace('_', '-'))
final[('Regression', 'name', '')] = final[('Regression', 'name', '')].apply(lambda x: x.replace('_', '-'))

In [999]:
def mode(string):
    if not string.startswith('rate'):
        first, second = string.split('_')
        first = 'a' if first == 'mean' else 'm'
        second = '<' if second == 'l' else '\leq'
        return first + '$_{' + second + '}$'
    else:
        if string == 'rate':
            return '$\\rho$'
        else:
            return '$\\rho_{k}$'

In [1000]:
final.columns = pd.MultiIndex.from_tuples([col if col[-1] in ['', 'p', 'diff'] else (col[0], col[1], mode(col[2])) for col in final.columns])

In [1001]:
final

Unnamed: 0_level_0,Classification,Classification,Classification,Classification,Classification,Classification,Classification,Classification,Classification,Regression,Regression,Regression,Regression,Regression,Regression,Regression,Regression,Regression
Unnamed: 0_level_1,name,Decision Tree,Decision Tree,Decision Tree,Decision Tree,Random Forest,Random Forest,Random Forest,Random Forest,name,Decision Tree,Decision Tree,Decision Tree,Decision Tree,Random Forest,Random Forest,Random Forest,Random Forest
Unnamed: 0_level_2,Unnamed: 1_level_2,diff,p,$\rho$,$\rho_{k}$,diff,p,$\rho$,$\rho_{k}$,Unnamed: 10_level_2,diff,p,$\rho$,$\rho_{k}$,diff,p,$\rho$,$\rho_{k}$
0,abalone9-18,0.0,,0.0,0.0,-1e-06,,0.08,0.08,airfoil,9e-06,$\neq$,0.0,0.01,-1e-05,$\neq$,0.01,0.01
1,appendicitis,0.0,,0.0,0.0,-4e-06,,0.02,0.03,autoMPG6,-0.0003,$\neq$,0.09,0.05,3e-05,,0.18,0.17
2,bupa,0.0001,$\neq$,0.12,0.1,0.002,$\neq$,0.25,0.24,baseball,-0.001,,0.09,0.14,0.0007,$\neq$,0.23,0.24
3,cleveland-0-vs-4,-0.003,,0.0,0.01,-0.0008,$\neq$,0.09,0.11,cpu-performance,0.002,$\neq$,0.23,0.21,0.002,$\neq$,0.24,0.22
4,ecoli1,0.0,,0.0,0.06,-7e-05,$\neq$,0.12,0.12,daily-demand,-0.0003,$\neq$,0.0,0.01,-0.0007,$\neq$,0.03,0.03
5,glass0,0.0,,0.0,0.03,-2e-05,,0.05,0.05,diabetes,-7e-05,,0.0,0.02,0.0007,$\neq$,0.0,0.0
6,haberman,-0.0003,$\neq$,0.0,0.02,6e-06,,0.04,0.05,excitation-current,-4e-08,$\neq$,0.1,0.1,2e-07,$\neq$,0.13,0.12
7,hepatitis,-0.0002,,0.0,0.04,-0.0003,$\neq$,0.03,0.04,laser,-0.0009,$\neq$,0.25,0.28,-0.0002,$\neq$,0.38,0.38
8,mammographic,3e-05,$\neq$,0.0,0.0,6e-07,$\neq$,0.02,0.04,maternal-health-risk,0.001,$\neq$,0.22,0.23,-0.0007,$\neq$,0.24,0.23
9,monk-2,0.0,,0.0,0.0,0.0,,0.0,0.0,medical-cost,0.0,,0.0,0.0,2e-06,$\neq$,0.0,0.0


In [1002]:
latex = final.to_latex(
    index=False,
    multicolumn_format='c'
)

In [1003]:
tabular_string = latex[len('\\begin{tabular}{'): len('\\begin{tabular}{') + len(final.columns)]
print(tabular_string)
tabular_string_new = 'lrlrrrlrrlrrrlrrrl'
updated = '@{\hspace{4pt}}'.join(tabular_string_new)
latex = latex.replace(tabular_string, updated)

llllllllllllllllll


In [1004]:
print(latex)

\begin{tabular}{l@{\hspace{4pt}}r@{\hspace{4pt}}l@{\hspace{4pt}}r@{\hspace{4pt}}r@{\hspace{4pt}}r@{\hspace{4pt}}l@{\hspace{4pt}}r@{\hspace{4pt}}r@{\hspace{4pt}}l@{\hspace{4pt}}r@{\hspace{4pt}}r@{\hspace{4pt}}r@{\hspace{4pt}}l@{\hspace{4pt}}r@{\hspace{4pt}}r@{\hspace{4pt}}r@{\hspace{4pt}}l}
\toprule
\multicolumn{9}{c}{Classification} & \multicolumn{9}{c}{Regression} \\
name & \multicolumn{4}{c}{Decision Tree} & \multicolumn{4}{c}{Random Forest} & name & \multicolumn{4}{c}{Decision Tree} & \multicolumn{4}{c}{Random Forest} \\
 & diff & p & $\rho$ & $\rho_{k}$ & diff & p & $\rho$ & $\rho_{k}$ &  & diff & p & $\rho$ & $\rho_{k}$ & diff & p & $\rho$ & $\rho_{k}$ \\
\midrule
abalone9-18 &  0 &  & .00 & .00 & -1e-06 &  & .08 & .08 & airfoil &  9e-06 & $\neq$ & .00 & .01 & -1e-05 & $\neq$ & .01 & .01 \\
appendicitis &  0 &  & .00 & .00 & -4e-06 &  & .02 & .03 & autoMPG6 & -3e-04 & $\neq$ & .09 & .05 &  3e-05 &  & .18 & .17 \\
bupa &  1e-04 & $\neq$ & .12 & .10 &  2e-03 & $\neq$ & .25 & .24 & b

In [1005]:
with open('tab_presence.tex', 'wt') as file:
    file.write(latex)

In [1006]:
binclas['Decision Tree']

KeyError: 'Decision Tree'

In [None]:
scenario = 'rfc'

In [None]:
data = pd.read_csv(f'evaluation_{scenario}.csv')

In [None]:
score = 'auc' if 'auc' in data.columns else 'r2'

In [None]:
def do_testing(pdf, metric):
    params = pdf['mode'].drop_duplicates().values
    pdf0 = pdf[pdf['mode'] == params[0]].sort_values('fold')
    pdf1 = pdf[pdf['mode'] == params[1]].sort_values('fold')

    return pd.Series({
        'wilcoxon': wilcoxon(pdf0[metric], pdf1[metric], zero_method='zsplit').pvalue
    })

In [None]:
grouped = data\
    .groupby(['name', 'mode'])\
    .apply(lambda pdf: pdf.sort_values('fold')[score].values.tolist())\
    .reset_index(drop=False)\
    .rename(columns={0: score})

In [None]:
def evaluate_one(grouped, pivot_postfix='<='):
    pivot_row = grouped[grouped['mode'] == f"{pivot_postfix}"].iloc[0]
    pivot_label = pivot_row['mode']

    other_rows = grouped[grouped['mode'] != pivot_label]

    result = {f'{score}_{pivot_label}': np.mean(pivot_row[score])}

    for idx, row in other_rows.iterrows():
        result[f'{score}_{row["mode"]}'] = np.mean(row[score])
        if row["mode"] in ['<', '<=']:
            result[f'p_{row["mode"]}'] = wilcoxon(pivot_row[score], row[score], zero_method="zsplit").pvalue
        else:
            result[f'p_{row["mode"]}_l'] = wilcoxon(pivot_row[score], row[score], zero_method="zsplit", alternative="less").pvalue
            result[f'p_{row["mode"]}_g'] = wilcoxon(pivot_row[score], row[score], zero_method="zsplit", alternative="greater").pvalue

    return pd.Series(result)

def evaluate_min_max(grouped):
    pivot_leq = grouped[grouped['mode'] == "<="].iloc[0]
    pivot_l = grouped[grouped['mode'] == "<"].iloc[0]

    score_leq = np.mean(pivot_leq[score])
    score_l = np.mean(pivot_l[score])

    if score_leq < score_l:
        min_row = pivot_leq
        max_row = pivot_l
    else:
        min_row = pivot_l
        max_row = pivot_leq

    other_rows = grouped[~grouped['mode'].isin(['<=', '<'])]

    result = {f'{score}_min': np.mean(min_row[score]),
                f'{score}_max': np.mean(max_row[score])}

    for idx, row in other_rows.iterrows():
        result[f'{score}_{row["mode"]}'] = np.mean(row[score])

        result[f'p_{row["mode"]}_min'] = wilcoxon(row[score], min_row[score], zero_method="zsplit", alternative="greater").pvalue
        result[f'p_{row["mode"]}_max'] = wilcoxon(row[score], max_row[score], zero_method="zsplit", alternative="less").pvalue

    return pd.Series(result)

In [None]:
def evaluate_all(data, pivot_postfix='<='):
    return data.groupby("name").apply(lambda x: evaluate_one(x, pivot_postfix))


In [None]:
grouped.groupby("name").apply(evaluate_min_max)

          name      mode                                                auc
0  abalone9_18         <  [0.8679549114331723, 0.7581521739130435, 0.667...
1  abalone9_18        <=  [0.8679549114331723, 0.7581521739130435, 0.667...
2  abalone9_18   avg_all  [0.8679549114331723, 0.7581521739130435, 0.667...
3  abalone9_18  avg_half  [0.8679549114331723, 0.7581521739130435, 0.667...
4  abalone9_18  specific  [0.8679549114331723, 0.7581521739130435, 0.667...
           name      mode                                                auc
5  appendicitis         <  [0.8588235294117648, 0.7647058823529411, 0.911...
6  appendicitis        <=  [0.8588235294117648, 0.7647058823529411, 0.897...
7  appendicitis   avg_all  [0.8588235294117648, 0.7647058823529411, 0.897...
8  appendicitis  avg_half  [0.8588235294117648, 0.7647058823529411, 0.897...
9  appendicitis  specific  [0.8588235294117648, 0.7647058823529411, 0.897...
    name      mode                                                auc
10  bupa    

Unnamed: 0_level_0,auc_min,auc_max,auc_avg_all,p_avg_all_min,p_avg_all_max,auc_avg_half,p_avg_half_min,p_avg_half_max,auc_specific,p_specific_min,p_specific_max
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
abalone9_18,0.8504,0.850401,0.850404,0.4298141,0.5181608,0.850399,0.1061404,0.8676687,0.850401,0.4486313,0.5
appendicitis,0.854646,0.854651,0.854668,0.5348933,0.5705234,0.854638,0.5531214,0.5522187,0.854646,0.5,0.6034869
bupa,0.768082,0.77047,0.769375,1.325996e-46,1.526726e-35,0.769292,1.486021e-33,1.121001e-30,0.769884,8.603254e-37,5.725887e-09
cleveland-0_vs_4,0.975759,0.976522,0.976118,0.00524609,0.004020133,0.976105,0.01072252,0.005042984,0.976522,3.434543e-06,0.5
ecoli1,0.958258,0.958325,0.958301,0.006066381,0.04039756,0.958288,0.04709558,0.005118013,0.958325,0.0001832211,0.5
glass0,0.930402,0.930422,0.930414,0.3445875,0.3779421,0.93041,0.4050638,0.3487645,0.93041,0.3776664,0.3288417
haberman,0.720607,0.720613,0.720585,0.7653497,0.2682971,0.720602,0.4698231,0.3223207,0.720607,0.5,0.5183027
hepatitis,0.871735,0.872018,0.871873,0.06593984,0.02887862,0.871888,0.03527014,0.06633195,0.87178,0.2706473,0.009879414
lymphography,0.994233,0.995412,0.995306,0.001308777,0.4129667,0.995306,0.003220631,0.3495543,0.995133,0.02424079,0.297379
mammographic,0.911992,0.911992,0.911992,0.9841486,0.891747,0.912007,0.8892974,0.9995737,0.911992,0.5,0.9964892


In [None]:
final = evaluate_all(grouped, pivot_postfix='<=')
final

Unnamed: 0_level_0,auc_<=,auc_<,p_<,auc_avg_all,p_avg_all_l,p_avg_all_g,auc_avg_half,p_avg_half_l,p_avg_half_g,auc_specific,p_specific_l,p_specific_g
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
abalone9_18,0.8504,0.850401,0.8972626,0.850404,0.4298141,0.5701859,0.850399,0.1061404,0.8938596,0.850401,0.4486313,0.5513687
appendicitis,0.854646,0.854651,0.7930262,0.854668,0.5348933,0.4651067,0.854638,0.5531214,0.4468786,0.854646,0.5,0.5
bupa,0.77047,0.768082,1.2796580000000001e-55,0.769375,1.0,1.526726e-35,0.769292,1.0,1.121001e-30,0.769884,1.0,5.725887e-09
cleveland-0_vs_4,0.975759,0.976522,6.869086e-06,0.976118,0.00524609,0.9947539,0.976105,0.01072252,0.9892775,0.976522,3.434543e-06,0.9999966
ecoli1,0.958258,0.958325,0.0003664422,0.958301,0.006066381,0.9939336,0.958288,0.04709558,0.9529044,0.958325,0.0001832211,0.9998168
glass0,0.930402,0.930422,0.4532423,0.930414,0.3445875,0.6554125,0.93041,0.4050638,0.5949362,0.93041,0.3776664,0.6223336
haberman,0.720613,0.720607,0.9633946,0.720585,0.7317029,0.2682971,0.720602,0.6776793,0.3223207,0.720607,0.4816973,0.5183027
hepatitis,0.871735,0.872018,0.004661264,0.871873,0.06593984,0.9340602,0.871888,0.03527014,0.9647299,0.87178,0.2706473,0.7293527
lymphography,0.994233,0.995412,0.01399264,0.995306,0.001308777,0.9986912,0.995306,0.003220631,0.9967794,0.995133,0.02424079,0.9757592
mammographic,0.911992,0.911992,0.007021504,0.911992,0.108253,0.891747,0.912007,0.0004263038,0.9995737,0.911992,0.003510752,0.9964892


In [None]:
(final <= 0.05).sum()

auc_<=          0
auc_<           0
p_<             9
auc_avg_all     0
p_avg_all_l     6
p_avg_all_g     1
auc_avg_half    0
p_avg_half_l    8
p_avg_half_g    2
auc_specific    0
p_specific_l    6
p_specific_g    3
dtype: int64

In [None]:
final = evaluate_all(grouped, pivot_postfix='<')
final

Unnamed: 0_level_0,auc_<,auc_<=,p_<=,auc_avg_all,p_avg_all_l,p_avg_all_g,auc_avg_half,p_avg_half_l,p_avg_half_g,auc_specific,p_specific_l,p_specific_g
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
abalone9_18,0.850401,0.8504,0.8972626,0.850404,0.4818392,0.518161,0.850399,0.1323313,0.867669,0.850401,0.5,0.5
appendicitis,0.854651,0.854646,0.7930262,0.854668,0.4294766,0.570523,0.854638,0.4477813,0.552219,0.854646,0.3965131,0.6034869
bupa,0.768082,0.77047,1.2796580000000001e-55,0.769375,1.325996e-46,1.0,0.769292,1.486021e-33,1.0,0.769884,8.603254e-37,1.0
cleveland-0_vs_4,0.976522,0.975759,6.869086e-06,0.976118,0.9959799,0.00402,0.976105,0.994957,0.005043,0.976522,0.5,0.5
ecoli1,0.958325,0.958258,0.0003664422,0.958301,0.9596024,0.040398,0.958288,0.994882,0.005118,0.958325,0.5,0.5
glass0,0.930422,0.930402,0.4532423,0.930414,0.6220579,0.377942,0.93041,0.6512355,0.348765,0.93041,0.6711583,0.3288417
haberman,0.720607,0.720613,0.9633946,0.720585,0.7653497,0.23465,0.720602,0.4698231,0.530177,0.720607,0.5,0.5
hepatitis,0.872018,0.871735,0.004661264,0.871873,0.9711214,0.028879,0.871888,0.933668,0.066332,0.87178,0.9901206,0.009879414
lymphography,0.995412,0.994233,0.01399264,0.995306,0.5870333,0.412967,0.995306,0.6504457,0.349554,0.995133,0.702621,0.297379
mammographic,0.911992,0.911992,0.007021504,0.911992,0.9841486,0.015851,0.912007,0.8892974,0.110703,0.911992,0.5,0.5


In [None]:
(final <= 0.05).sum()

auc_<           0
auc_<=          0
p_<=            9
auc_avg_all     0
p_avg_all_l     2
p_avg_all_g     7
auc_avg_half    0
p_avg_half_l    1
p_avg_half_g    5
auc_specific    0
p_specific_l    1
p_specific_g    2
dtype: int64