In [1]:
import numpy as np
import pandas as pd
import pickle
import xgboost
import os
from scipy.stats import ttest_1samp, ttest_ind, ttest_rel
from config import path_prefix_results
import warnings

warnings.filterwarnings('ignore')


In [2]:
#latex output setup
latex_template = r'''\documentclass[a4paper]{{article}}
\usepackage{{booktabs}}
\usepackage{{pdflscape}}
\begin{{document}}
\thispagestyle{{empty}}
\begin{{landscape}}
{}
\end{{landscape}}
\end{{document}}
'''

path_breast_latex= os.path.join(path_prefix_results, 'breast.tex')
path_thigh_latex= os.path.join(path_prefix_results, 'thigh.tex')

In [3]:
def test_diff_cross(df, row='multi-atlas'):
    tmp= pd.DataFrame(index=df.columns, columns=df.columns)
    for i in df.columns:
        for j in df.columns:
            tmp.loc[i,j]= ttest_rel(np.abs(df.loc[row, i]), np.abs(df.loc[row, j]))[1]
    return tmp

def test_diff(df, col='multi-atlas'):
    tmp= pd.DataFrame(index=df.index, columns=df.columns)
    for i in df.index:
        for j in df.columns:
            tmp.loc[i,j]= ttest_rel(np.abs(df.loc[i, j]), np.abs(df.loc[col, j]))[1]
    tmp= tmp.drop(col, axis='rows')
    return tmp

def test_compare(df0, df1):
    tmp= pd.DataFrame(index=df0.index, columns=df0.columns)
    for i in df0.index:
        for j in df0.columns:
            tmp.loc[i,j]= ttest_rel(np.abs(df0.loc[i, j]), np.abs(df1.loc[i, j]))[1]
    return tmp

In [4]:
data_fs= pickle.load(open(os.path.join(path_prefix_results,'results.pickle'), 'rb'))
data_no_fs= pickle.load(open(os.path.join(path_prefix_results,'results_no_fs.pickle'), 'rb'))

In [5]:
thigh_fs= data_fs[data_fs['dataset'] == 'thigh']
breast_fs= data_fs[data_fs['dataset'] == 'breast']

thigh_no_fs= data_no_fs[data_no_fs['dataset'] == 'thigh']
breast_no_fs= data_no_fs[data_no_fs['dataset'] == 'breast']

In [6]:
print(sorted(thigh_fs['type'].unique()))

['074k', '075a', '075k', '077k', '079k', '080k', '082k', '083a', '083k', '084a', '087f', '088f', '088k', '090a', '091f', '092k', 'all', 'mean_mask']


In [7]:
rename_dict_regr= {'KNNR_Objective': 'kNN', 'LinearRegression_Objective': 'linear', 'PLSRegression_Objective': 'PLS', 'RidgeRegression_Objective': 'ridge', 'LassoRegression_Objective': 'lasso'}
#rename_dict_type= {'203a': 'atlas 1', '203k': 'atlas 2', '204f': 'atlas 3', '206k': 'atlas 4', '208f': 'atlas 5', 'all': 'multi-atlas', 'mean_mask': 'mean'}
#row_order= ['atlas 1', 'atlas 2', 'atlas 3', 'atlas 4', 'atlas 5', 'mean', 'multi-atlas']
rename_dict_type= sorted(thigh_fs['type'].unique())
rename_dict_type= dict(map(lambda x, i: (x, f"atlas {i+1}"), rename_dict_type,  range(len(rename_dict_type))))
rename_dict_type['all']= "multi-atlas"
rename_dict_type['mean_mask'] = "mean"
print(rename_dict_type)
row_order= list(rename_dict_type.values())
col_order= ['linear', 'PLS', 'lasso', 'ridge', 'kNN']

thigh_fs_results= thigh_fs.pivot(index='type', columns='model', values='r2_1')
thigh_fs_results= thigh_fs_results.rename(rename_dict_regr, axis='columns')
thigh_fs_results= thigh_fs_results.rename(rename_dict_type)
thigh_fs_results= thigh_fs_results.apply(lambda x: np.round(x, 4))
thigh_fs_results= thigh_fs_results.loc[row_order, col_order]

thigh_no_fs_results= thigh_no_fs.pivot(index='type', columns='model', values='r2_1')
thigh_no_fs_results= thigh_no_fs_results.rename(rename_dict_regr, axis='columns')
thigh_no_fs_results= thigh_no_fs_results.rename(rename_dict_type)
thigh_no_fs_results= thigh_no_fs_results.apply(lambda x: np.round(x, 4))
thigh_no_fs_results= thigh_no_fs_results.loc[row_order, col_order]

thigh_all= pd.concat([thigh_fs_results, thigh_no_fs_results], axis='columns')
columns= pd.MultiIndex.from_product([['feature selection', 'no feature selection'], col_order])
thigh_all.columns= columns

print(thigh_all.to_latex())

{'074k': 'atlas 1', '075a': 'atlas 2', '075k': 'atlas 3', '077k': 'atlas 4', '079k': 'atlas 5', '080k': 'atlas 6', '082k': 'atlas 7', '083a': 'atlas 8', '083k': 'atlas 9', '084a': 'atlas 10', '087f': 'atlas 11', '088f': 'atlas 12', '088k': 'atlas 13', '090a': 'atlas 14', '091f': 'atlas 15', '092k': 'atlas 16', 'all': 'multi-atlas', 'mean_mask': 'mean'}
\begin{tabular}{lrrrrrrrrrr}
\toprule
 & \multicolumn{5}{r}{feature selection} & \multicolumn{5}{r}{no feature selection} \\
 & linear & PLS & lasso & ridge & kNN & linear & PLS & lasso & ridge & kNN \\
type &  &  &  &  &  &  &  &  &  &  \\
\midrule
atlas 1 & 0.518700 & 0.566200 & 0.556100 & 0.568600 & 0.572500 & 0.042900 & 0.472300 & 0.484900 & 0.477300 & 0.354800 \\
atlas 2 & 0.484500 & 0.510400 & 0.501000 & 0.518800 & 0.442600 & -0.174600 & 0.446000 & 0.434300 & 0.399700 & 0.360000 \\
atlas 3 & 0.329100 & 0.332600 & 0.328600 & 0.340400 & 0.323400 & -1.876700 & 0.223700 & 0.273400 & 0.233900 & 0.257900 \\
atlas 4 & 0.576000 & 0.583300 

In [8]:
with open(path_thigh_latex, 'w') as f:
    f.write(latex_template.format(thigh_all.to_latex()))

In [9]:
thigh_fs['diff']= (thigh_fs['y_pred_1'] - thigh_fs['y_test_1'])**2
thigh_fs_test= thigh_fs.pivot(index='type', columns='model', values='diff')
thigh_fs_test= thigh_fs_test.rename(rename_dict_regr, axis='columns')
thigh_fs_test= thigh_fs_test.rename(rename_dict_type)
thigh_fs_test= thigh_fs_test.loc[row_order, col_order]

thigh_no_fs['diff']= (thigh_no_fs['y_pred_1'] - thigh_no_fs['y_test_1'])**2
thigh_no_fs_test= thigh_no_fs.pivot(index='type', columns='model', values='diff')
thigh_no_fs_test= thigh_no_fs_test.rename(rename_dict_regr, axis='columns')
thigh_no_fs_test= thigh_no_fs_test.rename(rename_dict_type)
thigh_no_fs_test= thigh_no_fs_test.loc[row_order, col_order]

In [10]:
print(test_compare(thigh_fs_test, thigh_no_fs_test).applymap(lambda x: '%.0e' % x).to_latex())

\begin{tabular}{llllll}
\toprule
model & linear & PLS & lasso & ridge & kNN \\
type &  &  &  &  &  \\
\midrule
atlas 1 & 7e-28 & 4e-14 & 7e-21 & 8e-23 & 2e-25 \\
atlas 2 & 4e-29 & 8e-22 & 9e-14 & 9e-19 & 9e-14 \\
atlas 3 & 9e-08 & 3e-30 & 6e-15 & 5e-31 & 2e-14 \\
atlas 4 & 3e-47 & 4e-23 & 5e-34 & 1e-15 & 1e-26 \\
atlas 5 & 2e-14 & 2e-16 & 1e-27 & 9e-01 & 1e-33 \\
atlas 6 & 3e-16 & 2e-10 & 9e-05 & 1e-13 & 9e-22 \\
atlas 7 & 8e-37 & 7e-25 & 4e-13 & 7e-13 & 4e-21 \\
atlas 8 & 1e-37 & 7e-12 & 7e-14 & 1e-19 & 4e-15 \\
atlas 9 & 1e-01 & 2e-12 & 7e-13 & 1e-15 & 2e-21 \\
atlas 10 & 3e-41 & 5e-27 & 4e-25 & 1e-12 & 1e-29 \\
atlas 11 & 6e-33 & 1e-13 & 6e-12 & 3e-06 & 2e-31 \\
atlas 12 & 2e-34 & 6e-11 & 5e-18 & 1e-12 & 4e-35 \\
atlas 13 & 1e-37 & 2e-15 & 1e-21 & 6e-32 & 1e-29 \\
atlas 14 & 3e-60 & 2e-15 & 6e-19 & 5e-15 & 6e-32 \\
atlas 15 & 2e-49 & 2e-18 & 4e-22 & 3e-31 & 2e-29 \\
atlas 16 & 8e-30 & 5e-19 & 2e-14 & 4e-19 & 2e-19 \\
multi-atlas & 5e-64 & 9e-57 & 6e-99 & 8e-82 & 2e-53 \\
mean & 4e-2

In [11]:
print(test_diff(thigh_fs_test).applymap(lambda x: '%.0e' % x).to_latex())

\begin{tabular}{llllll}
\toprule
model & linear & PLS & lasso & ridge & kNN \\
type &  &  &  &  &  \\
\midrule
atlas 1 & 3e-76 & 8e-18 & 1e-64 & 5e-67 & 3e-16 \\
atlas 2 & 6e-143 & 2e-58 & 8e-147 & 2e-156 & 2e-47 \\
atlas 3 & 6e-99 & 3e-52 & 5e-100 & 4e-105 & 8e-48 \\
atlas 4 & 3e-108 & 2e-47 & 2e-135 & 4e-117 & 1e-04 \\
atlas 5 & 1e-105 & 5e-23 & 2e-125 & 6e-113 & 5e-34 \\
atlas 6 & 2e-129 & 4e-55 & 2e-160 & 9e-121 & 3e-24 \\
atlas 7 & 4e-112 & 1e-55 & 1e-142 & 1e-120 & 7e-10 \\
atlas 8 & 7e-92 & 3e-62 & 1e-100 & 5e-99 & 8e-48 \\
atlas 9 & 1e-73 & 1e-42 & 7e-77 & 6e-94 & 5e-37 \\
atlas 10 & 1e-98 & 3e-28 & 3e-91 & 2e-86 & 7e-12 \\
atlas 11 & 6e-131 & 1e-55 & 2e-129 & 3e-138 & 6e-20 \\
atlas 12 & 3e-113 & 4e-50 & 2e-128 & 5e-143 & 5e-17 \\
atlas 13 & 6e-126 & 3e-48 & 4e-127 & 5e-128 & 7e-20 \\
atlas 14 & 9e-104 & 5e-28 & 2e-107 & 2e-115 & 3e-15 \\
atlas 15 & 8e-76 & 3e-19 & 2e-122 & 8e-133 & 3e-08 \\
atlas 16 & 2e-128 & 3e-76 & 1e-121 & 7e-141 & 2e-58 \\
mean & 1e-98 & 8e-45 & 1e-85 & 

In [12]:
print(test_diff_cross(thigh_fs_test.loc[['multi-atlas']]).applymap(lambda x: '%.0e' % x).to_latex())

\begin{tabular}{llllll}
\toprule
model & linear & PLS & lasso & ridge & kNN \\
model &  &  &  &  &  \\
\midrule
linear & nan & 1e-86 & 4e-01 & 3e-43 & 3e-111 \\
PLS & 1e-86 & nan & 7e-90 & 3e-100 & 4e-05 \\
lasso & 4e-01 & 7e-90 & nan & 2e-39 & 1e-113 \\
ridge & 3e-43 & 3e-100 & 2e-39 & nan & 2e-123 \\
kNN & 3e-111 & 4e-05 & 1e-113 & 2e-123 & nan \\
\bottomrule
\end{tabular}



In [13]:
breast_fs_results= breast_fs.pivot(index='type', columns='model', values='r2_1')
breast_fs_results= breast_fs_results.rename(rename_dict_regr, axis='columns')
breast_fs_results= breast_fs_results.rename(rename_dict_type)
breast_fs_results= breast_fs_results.apply(lambda x: np.round(x, 4))
breast_fs_results= breast_fs_results.loc[row_order, col_order]

breast_no_fs_results= breast_no_fs.pivot(index='type', columns='model', values='r2_1')
breast_no_fs_results= breast_no_fs_results.rename(rename_dict_regr, axis='columns')
breast_no_fs_results= breast_no_fs_results.rename(rename_dict_type)
breast_no_fs_results= breast_no_fs_results.apply(lambda x: np.round(x, 4))
breast_no_fs_results= breast_no_fs_results.loc[row_order, col_order]

breast_all= pd.concat([breast_fs_results, breast_no_fs_results], axis='columns')
columns= pd.MultiIndex.from_product([['feature selection', 'no feature selection'], col_order])
breast_all.columns= columns

print(breast_all.to_latex())

\begin{tabular}{lrrrrrrrrrr}
\toprule
 & \multicolumn{5}{r}{feature selection} & \multicolumn{5}{r}{no feature selection} \\
 & linear & PLS & lasso & ridge & kNN & linear & PLS & lasso & ridge & kNN \\
type &  &  &  &  &  &  &  &  &  &  \\
\midrule
atlas 1 & 0.830500 & 0.837100 & 0.838500 & 0.844100 & 0.768900 & 0.566500 & 0.789500 & 0.801800 & 0.790500 & 0.695600 \\
atlas 2 & 0.564300 & 0.564700 & 0.547600 & 0.569600 & 0.540900 & 0.084600 & 0.403300 & 0.375200 & 0.409600 & 0.369500 \\
atlas 3 & 0.754100 & 0.706600 & 0.743400 & 0.755400 & 0.504000 & 0.417300 & 0.648200 & 0.652200 & 0.650000 & 0.286100 \\
atlas 4 & 0.883000 & 0.901700 & 0.880200 & 0.911100 & 0.760900 & 0.817700 & 0.860800 & 0.878600 & 0.880500 & 0.635800 \\
atlas 5 & 0.916400 & 0.916000 & 0.904100 & 0.916900 & 0.828300 & 0.850200 & 0.875600 & 0.878600 & 0.882900 & 0.643600 \\
atlas 6 & 0.894200 & 0.895800 & 0.893600 & 0.895800 & 0.835000 & 0.779000 & 0.878700 & 0.878500 & 0.879200 & 0.712300 \\
atlas 7 & 0.910400 & 0.9

In [14]:
with open(path_breast_latex, 'w') as f:
    f.write(latex_template.format(breast_all.to_latex()))

In [15]:
breast_fs['diff']= (breast_fs['y_pred_1'] - breast_fs['y_test_1'])**2
breast_fs_test= breast_fs.pivot(index='type', columns='model', values='diff')
breast_fs_test= breast_fs_test.rename(rename_dict_regr, axis='columns')
breast_fs_test= breast_fs_test.rename(rename_dict_type)
breast_fs_test= breast_fs_test.loc[row_order, col_order]

breast_no_fs['diff']= (breast_no_fs['y_pred_1'] - breast_no_fs['y_test_1'])**2
breast_no_fs_test= breast_no_fs.pivot(index='type', columns='model', values='diff')
breast_no_fs_test= breast_no_fs_test.rename(rename_dict_regr, axis='columns')
breast_no_fs_test= breast_no_fs_test.rename(rename_dict_type)
breast_no_fs_test= breast_no_fs_test.loc[row_order, col_order]

In [16]:
print(test_compare(breast_fs_test, breast_no_fs_test).applymap(lambda x: '%.0e' % x).to_latex())

\begin{tabular}{llllll}
\toprule
model & linear & PLS & lasso & ridge & kNN \\
type &  &  &  &  &  \\
\midrule
atlas 1 & 8e-20 & 2e-25 & 1e-15 & 2e-23 & 6e-22 \\
atlas 2 & 6e-19 & 7e-19 & 9e-15 & 6e-31 & 4e-17 \\
atlas 3 & 1e-23 & 9e-25 & 5e-23 & 3e-24 & 2e-43 \\
atlas 4 & 7e-10 & 6e-22 & 7e-01 & 7e-19 & 5e-30 \\
atlas 5 & 5e-24 & 2e-20 & 5e-12 & 1e-18 & 9e-41 \\
atlas 6 & 2e-38 & 2e-16 & 2e-09 & 2e-23 & 3e-30 \\
atlas 7 & 2e-43 & 7e-18 & 3e-24 & 8e-19 & 3e-62 \\
atlas 8 & 2e-33 & 1e-06 & 2e-18 & 2e-24 & 1e-63 \\
atlas 9 & 1e-31 & 4e-11 & 3e-22 & 8e-17 & 2e-35 \\
atlas 10 & 1e-18 & 2e-16 & 2e-30 & 3e-20 & 2e-32 \\
atlas 11 & 5e-16 & 6e-12 & 5e-10 & 2e-23 & 5e-43 \\
atlas 12 & 4e-16 & 5e-15 & 5e-08 & 2e-13 & 2e-26 \\
atlas 13 & 5e-34 & 2e-19 & 2e-12 & 2e-24 & 3e-45 \\
atlas 14 & 4e-36 & 1e-14 & 8e-24 & 2e-32 & 6e-41 \\
atlas 15 & 7e-12 & 8e-16 & 3e-29 & 3e-29 & 3e-55 \\
atlas 16 & 1e-56 & 7e-04 & 7e-13 & 4e-46 & 2e-17 \\
multi-atlas & 3e-93 & 6e-63 & 1e-80 & 1e-104 & 6e-73 \\
mean & 6e-

In [17]:
print(test_diff(breast_fs_test).applymap(lambda x: '%.0e' % x).to_latex())

\begin{tabular}{llllll}
\toprule
model & linear & PLS & lasso & ridge & kNN \\
type &  &  &  &  &  \\
\midrule
atlas 1 & 2e-99 & 6e-63 & 3e-80 & 2e-95 & 6e-16 \\
atlas 2 & 2e-128 & 1e-89 & 2e-100 & 2e-111 & 4e-65 \\
atlas 3 & 2e-116 & 4e-127 & 1e-139 & 4e-130 & 3e-90 \\
atlas 4 & 2e-115 & 2e-57 & 4e-77 & 3e-132 & 7e-29 \\
atlas 5 & 2e-110 & 1e-48 & 8e-125 & 2e-153 & 5e-02 \\
atlas 6 & 6e-108 & 2e-54 & 6e-115 & 5e-112 & 8e-02 \\
atlas 7 & 3e-105 & 1e-51 & 2e-91 & 4e-102 & 7e-01 \\
atlas 8 & 5e-122 & 5e-70 & 1e-120 & 5e-99 & 3e-25 \\
atlas 9 & 6e-108 & 4e-76 & 1e-102 & 6e-117 & 6e-33 \\
atlas 10 & 1e-62 & 2e-40 & 2e-76 & 5e-86 & 7e-22 \\
atlas 11 & 1e-119 & 6e-65 & 1e-116 & 1e-113 & 7e-23 \\
atlas 12 & 3e-85 & 2e-37 & 6e-73 & 6e-103 & 2e-05 \\
atlas 13 & 2e-94 & 1e-64 & 4e-125 & 2e-100 & 1e-22 \\
atlas 14 & 7e-89 & 5e-52 & 4e-96 & 5e-96 & 7e-05 \\
atlas 15 & 6e-90 & 4e-41 & 4e-81 & 2e-99 & 9e-06 \\
atlas 16 & 6e-121 & 4e-144 & 4e-118 & 5e-116 & 2e-49 \\
mean & 2e-104 & 3e-55 & 7e-108 & 9

In [18]:
print(test_diff_cross(breast_fs_test.loc[['multi-atlas']]).applymap(lambda x: '%.0e' % x).to_latex())

\begin{tabular}{llllll}
\toprule
model & linear & PLS & lasso & ridge & kNN \\
model &  &  &  &  &  \\
\midrule
linear & nan & 7e-80 & 1e-01 & 2e-19 & 4e-133 \\
PLS & 7e-80 & nan & 2e-80 & 4e-99 & 6e-90 \\
lasso & 1e-01 & 2e-80 & nan & 4e-19 & 1e-129 \\
ridge & 2e-19 & 4e-99 & 4e-19 & nan & 4e-137 \\
kNN & 4e-133 & 6e-90 & 1e-129 & 4e-137 & nan \\
\bottomrule
\end{tabular}

