In [112]:
import numpy as np
import pandas as pd
import pickle
import xgboost
from scipy.stats import ttest_1samp, ttest_ind, ttest_rel

In [113]:
def test_diff_cross(df):
    tmp= pd.DataFrame(index=df.index, columns=df.index)
    for i in df.index:
        for j in df.index:
            tmp.loc[i,j]= ttest_rel(np.abs(np.hstack(df.loc[i].values)), np.abs(np.hstack(df.loc[j].values)))[1]
    return tmp

def test_diff(df, col='multi-atlas'):
    tmp= pd.DataFrame(index=df.index, columns=df.columns)
    for i in df.index:
        for j in df.columns:
            tmp.loc[i,j]= ttest_rel(np.abs(np.hstack(df.loc[i, j])), np.abs(np.hstack(df.loc[col, j])))[1]
    tmp= tmp.drop(col, axis='rows')
    return tmp

def test_compare(df0, df1):
    tmp= pd.DataFrame(index=df0.index, columns=df0.columns)
    for i in df0.index:
        for j in df0.columns:
            tmp.loc[i,j]= ttest_rel(np.abs(np.hstack(df0.loc[i, j])), np.abs(np.hstack(df1.loc[i, j])))[1]
    return tmp

In [114]:
data_fs= pickle.load(open('results.pickle', 'rb'))
data_no_fs= pickle.load(open('results_no_fs.pickle', 'rb'))



In [115]:
mld_fs= data_fs[data_fs['dataset'] == 'mld']
hinds_fs= data_fs[data_fs['dataset'] == 'hinds']

mld_no_fs= data_no_fs[data_no_fs['dataset'] == 'mld']
hinds_no_fs= data_no_fs[data_no_fs['dataset'] == 'hinds']

In [116]:
mld.columns

Index(['model_selection_score', 'features', 'parameters', 'model', 'r2_1',
       'rmse_1', 'y_test_1', 'y_pred_1', 'y_indices_1', 'dataset', 'type'],
      dtype='object')

In [117]:
rename_dict_regr= {'KNNR_Objective': 'kNN', 'LinearRegression_Objective': 'linear', 'PLSRegression_Objective': 'PLS', 'RidgeRegression_Objective': 'ridge', 'LassoRegression_Objective': 'lasso'}
rename_dict_type= {'203a': 'atlas 1', '203k': 'atlas 2', '204f': 'atlas 3', '206k': 'atlas 4', '208f': 'atlas 5', 'all': 'multi-atlas', 'mean_mask': 'mean'}
row_order= ['atlas 1', 'atlas 2', 'atlas 3', 'atlas 4', 'atlas 5', 'mean', 'multi-atlas']
col_order= ['linear', 'PLS', 'lasso', 'ridge', 'kNN']

mld_fs_results= mld_fs.pivot(index='type', columns='model', values='r2_1')
mld_fs_results= mld_fs_results.rename(rename_dict_regr, axis='columns')
mld_fs_results= mld_fs_results.rename(rename_dict_type)
mld_fs_results= mld_fs_results.apply(lambda x: np.round(x, 4))
mld_fs_results= mld_fs_results.loc[row_order, col_order]

mld_no_fs_results= mld_no_fs.pivot(index='type', columns='model', values='r2_1')
mld_no_fs_results= mld_no_fs_results.rename(rename_dict_regr, axis='columns')
mld_no_fs_results= mld_no_fs_results.rename(rename_dict_type)
mld_no_fs_results= mld_no_fs_results.apply(lambda x: np.round(x, 4))
mld_no_fs_results= mld_no_fs_results.loc[row_order, col_order]

mld_all= pd.concat([mld_fs_results, mld_no_fs_results], axis='columns')
columns= pd.MultiIndex.from_product([['feature selection', 'no feature selection'], col_order])
mld_all.columns= columns

print(mld_all.to_latex())

\begin{tabular}{lrrrrrrrrrr}
\toprule
{} & \multicolumn{5}{l}{feature selection} & \multicolumn{5}{l}{no feature selection} \\
{} &            linear &     PLS &   lasso &   ridge &     kNN &               linear &     PLS &   lasso &   ridge &     kNN \\
type        &                   &         &         &         &         &                      &         &         &         &         \\
\midrule
atlas 1     &            0.7876 &  0.7905 &  0.7871 &  0.7873 &  0.7343 &               0.7434 &  0.7625 &  0.7694 &  0.7674 &  0.5867 \\
atlas 2     &            0.7735 &  0.7755 &  0.7753 &  0.7733 &  0.7119 &               0.7202 &  0.7504 &  0.7659 &  0.7512 &  0.5647 \\
atlas 3     &            0.7658 &  0.7650 &  0.7612 &  0.7645 &  0.7242 &               0.7233 &  0.7457 &  0.7582 &  0.7485 &  0.5197 \\
atlas 4     &            0.7499 &  0.7582 &  0.7493 &  0.7736 &  0.6388 &               0.7096 &  0.7326 &  0.7433 &  0.7364 &  0.5223 \\
atlas 5     &            0.7529 &  0.7618 &  

In [118]:
mld_fs['diff']= (mld_fs['y_pred_1'] - mld_fs['y_test_1'])**2
mld_fs_test= mld_fs.pivot(index='type', columns='model', values='diff')
mld_fs_test= mld_fs_test.rename(rename_dict_regr, axis='columns')
mld_fs_test= mld_fs_test.rename(rename_dict_type)
mld_fs_test= mld_fs_test.loc[row_order, col_order]

mld_no_fs['diff']= (mld_no_fs['y_pred_1'] - mld_no_fs['y_test_1'])**2
mld_no_fs_test= mld_no_fs.pivot(index='type', columns='model', values='diff')
mld_no_fs_test= mld_no_fs_test.rename(rename_dict_regr, axis='columns')
mld_no_fs_test= mld_no_fs_test.rename(rename_dict_type)
mld_no_fs_test= mld_no_fs_test.loc[row_order, col_order]

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  """Entry point for launching an IPython kernel.
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  import sys


In [130]:
print(test_compare(mld_fs_test, mld_no_fs_test).applymap(lambda x: '%.0e' % x).to_latex())

\begin{tabular}{llllll}
\toprule
model & linear &    PLS &  lasso &  ridge &     kNN \\
type        &        &        &        &        &         \\
\midrule
atlas 1     &  6e-32 &  9e-35 &  1e-51 &  3e-41 &   1e-59 \\
atlas 2     &  3e-45 &  4e-29 &  7e-12 &  7e-35 &   4e-57 \\
atlas 3     &  2e-25 &  5e-11 &  2e-01 &  7e-12 &  3e-111 \\
atlas 4     &  4e-13 &  3e-27 &  1e-02 &  7e-53 &   8e-69 \\
atlas 5     &  7e-16 &  2e-24 &  5e-24 &  9e-09 &   1e-62 \\
mean        &  3e-22 &  1e-26 &  8e-49 &  7e-31 &   9e-47 \\
multi-atlas &  1e-81 &  7e-54 &  2e-53 &  4e-79 &   8e-95 \\
\bottomrule
\end{tabular}



In [131]:
print(test_diff(mld_fs_test).applymap(lambda x: '%.0e' % x).to_latex())

\begin{tabular}{llllll}
\toprule
model & linear &    PLS &  lasso &  ridge &    kNN \\
type    &        &        &        &        &        \\
\midrule
atlas 1 &  4e-12 &  5e-13 &  7e-12 &  2e-19 &  7e-01 \\
atlas 2 &  7e-19 &  6e-29 &  5e-15 &  1e-34 &  1e-03 \\
atlas 3 &  1e-20 &  1e-31 &  7e-29 &  1e-36 &  2e-01 \\
atlas 4 &  1e-42 &  1e-41 &  3e-47 &  2e-23 &  1e-25 \\
atlas 5 &  5e-30 &  2e-38 &  1e-33 &  1e-49 &  1e-15 \\
mean    &  1e-13 &  1e-15 &  4e-10 &  1e-25 &  3e-01 \\
\bottomrule
\end{tabular}



In [121]:
hinds_fs_results= hinds_fs.pivot(index='type', columns='model', values='r2_1')
hinds_fs_results= hinds_fs_results.rename(rename_dict_regr, axis='columns')
hinds_fs_results= hinds_fs_results.rename(rename_dict_type)
hinds_fs_results= hinds_fs_results.apply(lambda x: np.round(x, 4))
hinds_fs_results= hinds_fs_results.loc[row_order, col_order]

hinds_no_fs_results= hinds_no_fs.pivot(index='type', columns='model', values='r2_1')
hinds_no_fs_results= hinds_no_fs_results.rename(rename_dict_regr, axis='columns')
hinds_no_fs_results= hinds_no_fs_results.rename(rename_dict_type)
hinds_no_fs_results= hinds_no_fs_results.apply(lambda x: np.round(x, 4))
hinds_no_fs_results= hinds_no_fs_results.loc[row_order, col_order]

hinds_all= pd.concat([hinds_fs_results, hinds_no_fs_results], axis='columns')
columns= pd.MultiIndex.from_product([['feature selection', 'no feature selection'], col_order])
hinds_all.columns= columns

print(hinds_all.to_latex())

\begin{tabular}{lrrrrrrrrrr}
\toprule
{} & \multicolumn{5}{l}{feature selection} & \multicolumn{5}{l}{no feature selection} \\
{} &            linear &     PLS &   lasso &   ridge &     kNN &               linear &     PLS &   lasso &   ridge &     kNN \\
type        &                   &         &         &         &         &                      &         &         &         &         \\
\midrule
atlas 1     &            0.8768 &  0.8825 &  0.8772 &  0.8843 &  0.8238 &               0.8545 &  0.8688 &  0.8735 &  0.8690 &  0.6573 \\
atlas 2     &            0.8822 &  0.8799 &  0.8827 &  0.8824 &  0.8355 &               0.8545 &  0.8701 &  0.8763 &  0.8684 &  0.6517 \\
atlas 3     &            0.8677 &  0.8782 &  0.8770 &  0.8791 &  0.8212 &               0.8502 &  0.8671 &  0.8703 &  0.8651 &  0.6906 \\
atlas 4     &            0.8825 &  0.8828 &  0.8826 &  0.8862 &  0.8164 &               0.8639 &  0.8740 &  0.8763 &  0.8765 &  0.6557 \\
atlas 5     &            0.8704 &  0.8690 &  

In [122]:
hinds_fs['diff']= (hinds_fs['y_pred_1'] - hinds_fs['y_test_1'])**2
hinds_fs_test= hinds_fs.pivot(index='type', columns='model', values='diff')
hinds_fs_test= hinds_fs_test.rename(rename_dict_regr, axis='columns')
hinds_fs_test= hinds_fs_test.rename(rename_dict_type)
hinds_fs_test= hinds_fs_test.loc[row_order, col_order]

hinds_no_fs['diff']= (hinds_no_fs['y_pred_1'] - hinds_no_fs['y_test_1'])**2
hinds_no_fs_test= hinds_no_fs.pivot(index='type', columns='model', values='diff')
hinds_no_fs_test= hinds_no_fs_test.rename(rename_dict_regr, axis='columns')
hinds_no_fs_test= hinds_no_fs_test.rename(rename_dict_type)
hinds_no_fs_test= hinds_no_fs_test.loc[row_order, col_order]

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  """Entry point for launching an IPython kernel.
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  import sys


In [132]:
print(test_compare(hinds_fs_test, hinds_no_fs_test).applymap(lambda x: '%.0e' % x).to_latex())

\begin{tabular}{llllll}
\toprule
model &  linear &    PLS &  lasso &  ridge &     kNN \\
type        &         &        &        &        &         \\
\midrule
atlas 1     &   5e-33 &  8e-26 &  5e-06 &  5e-34 &  9e-105 \\
atlas 2     &   2e-74 &  1e-24 &  5e-42 &  7e-46 &  6e-101 \\
atlas 3     &   1e-17 &  2e-14 &  2e-31 &  5e-38 &   4e-74 \\
atlas 4     &   8e-21 &  3e-15 &  2e-17 &  5e-33 &   4e-81 \\
atlas 5     &   6e-31 &  8e-04 &  3e-27 &  1e-49 &  2e-105 \\
mean        &   8e-30 &  4e-22 &  5e-43 &  1e-24 &   3e-76 \\
multi-atlas &  1e-124 &  1e-21 &  3e-40 &  1e-60 &  8e-135 \\
\bottomrule
\end{tabular}



In [133]:
print(test_diff(hinds_fs_test).applymap(lambda x: '%.0e' % x).to_latex())

\begin{tabular}{llllll}
\toprule
model & linear &    PLS &  lasso &  ridge &    kNN \\
type    &        &        &        &        &        \\
\midrule
atlas 1 &  1e-11 &  4e-04 &  7e-16 &  7e-12 &  3e-15 \\
atlas 2 &  1e-04 &  2e-06 &  4e-04 &  3e-13 &  3e-07 \\
atlas 3 &  2e-19 &  6e-07 &  9e-14 &  1e-20 &  2e-14 \\
atlas 4 &  5e-04 &  2e-03 &  2e-05 &  4e-10 &  9e-15 \\
atlas 5 &  6e-20 &  9e-22 &  9e-26 &  3e-24 &  9e-22 \\
mean    &  1e-09 &  7e-05 &  9e-07 &  5e-14 &  2e-09 \\
\bottomrule
\end{tabular}

