# The analysis of the proposed method

In [56]:
import os
import numpy as np
import pandas as pd

from scipy.stats import wilcoxon, ttest_rel, ranksums

from datasets import binclas_datasets, regr_datasets
from config import dataset_map, data_dir, tab_dir

In [57]:
postfix = ''

In [58]:
evaluations = {
    'dtr': pd.read_csv(os.path.join(data_dir, f'evaluation_dtr{postfix}.csv')),
    'dtc': pd.read_csv(os.path.join(data_dir, f'evaluation_dtc{postfix}.csv')),
    'rfr': pd.read_csv(os.path.join(data_dir, f'evaluation_rfr{postfix}.csv')),
    'rfc': pd.read_csv(os.path.join(data_dir, f'evaluation_rfc{postfix}.csv'))
}
scores = {
    'dtr': 'r2',
    'dtc': 'auc',
    'rfr': 'r2',
    'rfc': 'auc'
}

binclas_order = binclas_datasets[['name']]\
    .apply(lambda row: pd.Series({'name': dataset_map.get(row['name'], row['name'])}), axis=1)
regr_order = regr_datasets[['name']]\
    .apply(lambda row: pd.Series({'name': dataset_map.get(row['name'], row['name'])}), axis=1)

In [59]:
for scenario in ['dtc', 'dtr']:
    evaluations[scenario] = evaluations[scenario][evaluations[scenario]['mode'].isin(['<=', '<', 'avg_full'])]
for scenario in ['rfc', 'rfr']:
    evaluations[scenario] = evaluations[scenario][evaluations[scenario]['mode'].isin(['<=', '<', 'avg_half'])]

In [60]:
grouped = {
    key: value.groupby(['name', 'mode'])\
        .apply(lambda pdf: pdf.sort_values('fold')[scores[key]].values.tolist())\
        .reset_index(drop=False)\
        .rename(columns={0: scores[key]})
for key, value in evaluations.items()}

In [61]:
def evaluate(grouped):
    leq_row = grouped[grouped['mode'] == '<='].iloc[0]
    l_row = grouped[grouped['mode'] == '<'].iloc[0]
    avg_row = grouped[~grouped['mode'].isin(['<=', '<'])].iloc[0]

    score = grouped.columns[-1]

    p_neq = min(wilcoxon(leq_row[score], l_row[score], alternative='less', zero_method='zsplit').pvalue,
                wilcoxon(leq_row[score], l_row[score], alternative='greater', zero_method='zsplit').pvalue)

    p_leq = wilcoxon(leq_row[score], avg_row[score], alternative='less', zero_method='zsplit').pvalue
    p_l = wilcoxon(l_row[score], avg_row[score], alternative='less', zero_method='zsplit').pvalue

    p_leq_gr = wilcoxon(leq_row[score], avg_row[score], alternative='greater', zero_method='zsplit').pvalue
    p_l_gr = wilcoxon(l_row[score], avg_row[score], alternative='greater', zero_method='zsplit').pvalue

    p_orig = wilcoxon(l_row[score], leq_row[score], zero_method='zsplit').pvalue

    if p_leq < 0.05 and p_l < 0.05:
        diff = max([np.mean(avg_row[score]) - np.mean(leq_row[score]),np.mean(avg_row[score]) - np.mean(l_row[score])])
    elif p_leq < 0.05:
        diff = np.mean(avg_row[score]) - np.mean(leq_row[score])
    elif p_l < 0.05:
        diff = np.mean(avg_row[score]) - np.mean(l_row[score])
    else:
        diff = ''

    leq_flag = ''
    if p_leq < 0.05:
        leq_flag = '+'
    if p_leq_gr < 0.05:
        leq_flag = '-'

    l_flag = ''
    if p_l < 0.05:
        l_flag = '+'
    if p_l_gr < 0.05:
        l_flag = '-'

    if (leq_flag != '' or l_flag != '') and diff == '':
        diff = max([np.mean(avg_row[score]) - np.mean(leq_row[score]),np.mean(avg_row[score]) - np.mean(l_row[score])])

    result = {
        f'{score}_leq': np.mean(leq_row[score]),
        f'{score}_l': np.mean(l_row[score]),
        f'{score}_avg': np.mean(avg_row[score]),
        'issue': p_orig > 0.05 and min(p_leq, p_l) < 0.05,
        f'p_neq': '$\\ast$' if p_neq < 0.05 else '',
        f'p_leq': leq_flag,
        f'p_l': l_flag,
        f'{score}_diff': diff,
        }

    return pd.Series(result)

In [62]:
processed = {
    key: grouped[key].groupby('name').apply(evaluate)
    for key, value in grouped.items()
}

In [63]:
processed['rfr']

Unnamed: 0_level_0,r2_leq,r2_l,r2_avg,issue,p_neq,p_leq,p_l,r2_diff
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
airfoil,0.934119,0.934128,0.934124,False,$\ast$,+,-,5e-06
autoMPG6,0.871902,0.871871,0.87194,True,,+,+,6.9e-05
baseball,0.671777,0.671088,0.671616,False,$\ast$,-,+,0.000528
cpu_performance,0.861611,0.859758,0.861343,False,$\ast$,-,+,0.001584
daily-demand,0.819386,0.820104,0.81974,False,$\ast$,+,-,0.000354
diabetes,0.039666,0.038917,0.03925,False,$\ast$,-,+,0.000333
excitation_current,0.999908,0.999907,0.999908,False,$\ast$,,+,0.0
laser,0.962984,0.963186,0.963204,False,$\ast$,+,+,0.00022
maternal_health_risk,0.752626,0.753355,0.753176,False,$\ast$,+,-,0.00055
medical_cost,0.765803,0.765801,0.765802,False,$\ast$,-,,1e-06


In [64]:
selected = {
    key: processed[key][processed[key].columns[-4:]]
    for key, value in processed.items()
}

In [65]:
dtc = pd.merge(binclas_order, selected['dtc'].reset_index(drop=False), on=['name']).set_index('name')
dtr = pd.merge(regr_order, selected['dtr'].reset_index(drop=False), on=['name']).set_index('name')
rfc = pd.merge(binclas_order, selected['rfc'].reset_index(drop=False), on=['name']).set_index('name')
rfr = pd.merge(regr_order, selected['rfr'].reset_index(drop=False), on=['name']).set_index('name')

In [66]:
dtc.columns = pd.MultiIndex.from_product([['Decision Tree'], dtc.columns])
dtr.columns = pd.MultiIndex.from_product([['Decision Tree'], dtr.columns])
rfc.columns = pd.MultiIndex.from_product([['Random Forest'], rfc.columns])
rfr.columns = pd.MultiIndex.from_product([['Random Forest'], rfr.columns])

In [67]:
binclas = pd.merge(dtc, rfc, left_index=True, right_index=True).reset_index(drop=False)
regr = pd.merge(dtr, rfr, left_index=True, right_index=True).reset_index(drop=False)

In [68]:
binclas.columns = pd.MultiIndex.from_tuples(('Classification', *col) for col in binclas.columns)
regr.columns = pd.MultiIndex.from_tuples(('Regression', *col) for col in regr.columns)

In [69]:
result = pd.concat([binclas, regr], axis=1)

In [70]:
result[('Classification', 'name', '')] = result[('Classification', 'name', '')].apply(lambda x: x.replace('_', '-'))
result[('Regression', 'name', '')] = result[('Regression', 'name', '')].apply(lambda x: x.replace('_', '-'))

for col in result.columns:
    if col[-1].endswith('diff'):
        result[col] = result[col].apply(lambda x: f'{x:.1e}' if not x == '' else '')

In [71]:
result

Unnamed: 0_level_0,Classification,Classification,Classification,Classification,Classification,Classification,Classification,Classification,Classification,Regression,Regression,Regression,Regression,Regression,Regression,Regression,Regression,Regression
Unnamed: 0_level_1,name,Decision Tree,Decision Tree,Decision Tree,Decision Tree,Random Forest,Random Forest,Random Forest,Random Forest,name,Decision Tree,Decision Tree,Decision Tree,Decision Tree,Random Forest,Random Forest,Random Forest,Random Forest
Unnamed: 0_level_2,Unnamed: 1_level_2,p_neq,p_leq,p_l,auc_diff,p_neq,p_leq,p_l,auc_diff,Unnamed: 10_level_2,p_neq,p_leq,p_l,r2_diff,p_neq,p_leq,p_l,r2_diff
0,appendicitis,,,,,,,,,diabetes,,,,,$\ast$,-,+,0.00033
1,haberman,$\ast$,+,-,0.00022,,,,,o-ring,$\ast$,+,-,0.0022,$\ast$,+,-,0.015
2,new-thyroid1,,,,,$\ast$,,,,stock-portfolio,,,,,$\ast$,-,+,1.7e-05
3,glass0,,,,,,,,,wsn-ale,,,,,$\ast$,+,-,0.00058
4,shuttle-6-vs-2-3,,,,,,,,,daily-demand,$\ast$,+,-,0.00023,$\ast$,+,-,0.00035
5,bupa,$\ast$,,+,0.00041,$\ast$,-,+,0.0012,slump-test,$\ast$,+,-,0.001,$\ast$,+,-,0.0003
6,cleveland-0-vs-4,,,,,$\ast$,+,-,0.00035,servo,,,,,$\ast$,+,-,4.3e-06
7,ecoli1,,,,,$\ast$,+,-,3e-05,yacht-hydrodynamics,,,,,,,,
8,poker-9-vs-7,,,,,$\ast$,+,-,0.00098,autoMPG6,$\ast$,+,,0.00029,,+,+,6.9e-05
9,monk-2,,,,,,,,,excitation-current,$\ast$,+,+,2.6e-07,$\ast$,,+,2.1e-07


In [72]:
classif = result['Classification']
regr = result['Regression']

In [73]:
classif = result['Classification']
classif.columns = pd.MultiIndex.from_tuples([('Classification', *col) for col in classif.columns])

regr = result['Regression']
regr.columns = pd.MultiIndex.from_tuples([('Regression', *col) for col in regr.columns])

In [74]:
latex = result.to_latex(
    index=False,
    multicolumn_format='c',
)

classif_latex = classif.to_latex(
    index=False,
    multicolumn_format='c',
)

regr_latex = regr.to_latex(
    index=False,
    multicolumn_format='c',
)

In [75]:
latex = latex.replace('p_neq', 'p$_{\\neq}$')\
                .replace('p_leq', 'p$_{\\leq}$')\
                .replace('p_l', 'p$_{<}$')\
                .replace('auc_diff', 'auc$_{d}$')\
                .replace('r2_diff', 'r$^2_{d}$')

classif_latex = classif_latex.replace('p_neq', 'p$_{\\neq}$')\
                .replace('p_leq', 'p$_{\\leq}$')\
                .replace('p_l', 'p$_{<}$')\
                .replace('auc_diff', 'auc$_{d}$')\
                .replace('r2_diff', 'r$^2_{d}$')

regr_latex = regr_latex.replace('p_neq', 'p$_{\\neq}$')\
                .replace('p_leq', 'p$_{\\leq}$')\
                .replace('p_l', 'p$_{<}$')\
                .replace('auc_diff', 'auc$_{d}$')\
                .replace('r2_diff', 'r$^2_{d}$')

In [76]:
tabular_string = latex[len('\\begin{tabular}{'): len('\\begin{tabular}{') + len(result.columns)]
print(tabular_string)
classif_string = classif_latex[len('\\begin{tabular}{'): len('\\begin{tabular}{') + len(classif.columns)]
print(classif_string)
regr_string = regr_latex[len('\\begin{tabular}{'): len('\\begin{tabular}{') + len(regr.columns)]
print(regr_string)

llllllllllllllllll
lllllllll
lllllllll


In [77]:
spaces = list('44448444844448444')
classif_spaces = list('44448444')
regr_spaces = list('44448444')

In [78]:
len(spaces), len(tabular_string)

(17, 18)

In [79]:
'@{\hspace{%s}}'.join(list(tabular_string)) % tuple(spaces)

'l@{\\hspace{4}}l@{\\hspace{4}}l@{\\hspace{4}}l@{\\hspace{4}}l@{\\hspace{8}}l@{\\hspace{4}}l@{\\hspace{4}}l@{\\hspace{4}}l@{\\hspace{8}}l@{\\hspace{4}}l@{\\hspace{4}}l@{\\hspace{4}}l@{\\hspace{4}}l@{\\hspace{8}}l@{\\hspace{4}}l@{\\hspace{4}}l@{\\hspace{4}}l'

In [80]:
updated = '@{\hspace{%spt}}'.join(list(tabular_string)) % tuple(spaces)
latex = latex.replace(tabular_string, updated).replace('name', 'dataset')

updated = '@{\hspace{%spt}}'.join(list(classif_string)) % tuple(classif_spaces)
classif_latex = classif_latex.replace(classif_string, updated).replace('name', 'dataset')

updated = '@{\hspace{%spt}}'.join(list(regr_string)) % tuple(regr_spaces)
regr_latex = regr_latex.replace(regr_string, updated).replace('name', 'dataset')

In [81]:
print(latex)

\begin{tabular}{l@{\hspace{4pt}}l@{\hspace{4pt}}l@{\hspace{4pt}}l@{\hspace{4pt}}l@{\hspace{8pt}}l@{\hspace{4pt}}l@{\hspace{4pt}}l@{\hspace{4pt}}l@{\hspace{8pt}}l@{\hspace{4pt}}l@{\hspace{4pt}}l@{\hspace{4pt}}l@{\hspace{4pt}}l@{\hspace{8pt}}l@{\hspace{4pt}}l@{\hspace{4pt}}l@{\hspace{4pt}}l}
\toprule
\multicolumn{9}{c}{Classification} & \multicolumn{9}{c}{Regression} \\
dataset & \multicolumn{4}{c}{Decision Tree} & \multicolumn{4}{c}{Random Forest} & dataset & \multicolumn{4}{c}{Decision Tree} & \multicolumn{4}{c}{Random Forest} \\
 & p$_{\neq}$ & p$_{\leq}$ & p$_{<}$ & auc$_{d}$ & p$_{\neq}$ & p$_{\leq}$ & p$_{<}$ & auc$_{d}$ &  & p$_{\neq}$ & p$_{\leq}$ & p$_{<}$ & r$^2_{d}$ & p$_{\neq}$ & p$_{\leq}$ & p$_{<}$ & r$^2_{d}$ \\
\midrule
appendicitis &  &  &  &  &  &  &  &  & diabetes &  &  &  &  & $\ast$ & - & + & 3.3e-04 \\
haberman & $\ast$ & + & - & 2.2e-04 &  &  &  &  & o-ring & $\ast$ & + & - & 2.2e-03 & $\ast$ & + & - & 1.5e-02 \\
new-thyroid1 &  &  &  &  & $\ast$ &  &  &  & stock-p

In [82]:
with open(os.path.join(tab_dir, 'tab_results.tex'), 'wt') as file:
    file.write(latex)

In [83]:
with open(os.path.join(tab_dir, 'tab_results_classification.tex'), 'wt') as file:
    file.write(classif_latex)

In [84]:
with open(os.path.join(tab_dir, 'tab_results_regression.tex'), 'wt') as file:
    file.write(regr_latex)