In [57]:
import numpy as np
import pandas as pd
import pickle
import xgboost
from scipy.stats import ttest_1samp, ttest_ind, ttest_rel

In [58]:
#data= pd.read_csv('results.csv')
data= pickle.load(open('results.pickle', 'rb'))

In [59]:
mld= data[data['dataset'] == 'mld']
hinds= data[data['dataset'] == 'hinds']

In [60]:
mld.columns

Index(['model_selection_score', 'features', 'parameters', 'model', 'r2_1',
       'rmse_1', 'y_test_1', 'y_pred_1', 'y_indices_1', 'dataset', 'type'],
      dtype='object')

In [61]:
mld_results= mld.pivot(index='type', columns='model', values='r2_1')
mld_results= mld_results.rename({'KNNR_Objective': 'kNN', 'LinearRegression_Objective': 'linear', 'PLSRegression_Objective': 'PLS', 'SVR_RBF_Objective': 'SVR', 'XGBR_Objective': 'XGBR', 'RidgeRegression_Objective': 'ridge', 'LassoRegression_Objective': 'lasso'}, axis='columns')
mld_results= mld_results.rename({'203a': 'atlas 1', '203k': 'atlas 2', '204f': 'atlas 3', '206k': 'atlas 4', '208f': 'atlas 5', 'all': 'multi-atlas', 'mean_mask': 'mean'})
mld_results= mld_results.apply(lambda x: np.round(x, 4))
print(mld_results.to_latex())

\begin{tabular}{lrrrrr}
\toprule
model &     kNN &   lasso &  linear &     PLS &   ridge \\
type        &         &         &         &         &         \\
\midrule
atlas 1     &  0.7423 &  0.7871 &  0.7876 &  0.7905 &  0.7876 \\
atlas 2     &  0.7119 &  0.7753 &  0.7735 &  0.7755 &  0.7757 \\
atlas 3     &  0.7242 &  0.7612 &  0.7658 &  0.7650 &  0.7731 \\
atlas 4     &  0.6388 &  0.7483 &  0.7499 &  0.7582 &  0.7526 \\
atlas 5     &  0.6896 &  0.7524 &  0.7529 &  0.7618 &  0.7526 \\
multi-atlas &  0.7339 &  0.8090 &  0.8113 &  0.8130 &  0.8143 \\
mean        &  0.7255 &  0.7836 &  0.7837 &  0.7877 &  0.7836 \\
\bottomrule
\end{tabular}



In [62]:
hinds_results= hinds.pivot(index='type', columns='model', values='r2_1')
hinds_results= hinds_results.rename({'KNNR_Objective': 'kNN', 'LinearRegression_Objective': 'linear', 'PLSRegression_Objective': 'PLS', 'SVR_RBF_Objective': 'SVR', 'XGBR_Objective': 'XGBR', 'RidgeRegression_Objective': 'ridge', 'LassoRegression_Objective': 'lasso'}, axis='columns')
hinds_results= hinds_results.rename({'203a': 'atlas 1', '203k': 'atlas 2', '204f': 'atlas 3', '206k': 'atlas 4', '208f': 'atlas 5', 'all': 'multi-atlas', 'mean_mask': 'mean'})
hinds_results= hinds_results.apply(lambda x: np.round(x, 4))
print(hinds_results.to_latex())

\begin{tabular}{lrrrrr}
\toprule
model &     kNN &   lasso &  linear &     PLS &   ridge \\
type        &         &         &         &         &         \\
\midrule
atlas 1     &  0.8238 &  0.8763 &  0.8768 &  0.8825 &  0.8843 \\
atlas 2     &  0.8355 &  0.8827 &  0.8822 &  0.8799 &  0.8824 \\
atlas 3     &  0.8212 &  0.8770 &  0.8677 &  0.8782 &  0.8791 \\
atlas 4     &  0.8164 &  0.8818 &  0.8825 &  0.8828 &  0.8839 \\
atlas 5     &  0.8164 &  0.8749 &  0.8704 &  0.8690 &  0.8770 \\
multi-atlas &  0.8527 &  0.8873 &  0.8891 &  0.8890 &  0.8962 \\
mean        &  0.8284 &  0.8810 &  0.8773 &  0.8816 &  0.8824 \\
\bottomrule
\end{tabular}



In [63]:
mld['diff']= mld['y_test_1'].apply(np.array) - mld['y_pred_1'].apply(np.array)
mld_test= mld.pivot(index='type', columns='model', values='diff')
mld_test= mld_test.rename({'KNNR_Objective': 'kNN', 'LinearRegression_Objective': 'linear', 'PLSRegression_Objective': 'PLS', 'SVR_RBF_Objective': 'SVR', 'XGBR_Objective': 'XGBR', 'RidgeRegression_Objective': 'ridge', 'LassoRegression_Objective': 'lasso'}, axis='columns')
mld_test= mld_test.rename({'203a': 'atlas 1', '203k': 'atlas 2', '204f': 'atlas 3', '206k': 'atlas 4', '208f': 'atlas 5', 'all': 'multi-atlas', 'mean_mask': 'mean'})

hinds['diff']= hinds['y_test_1'].apply(np.array) - hinds['y_pred_1'].apply(np.array)
hinds_test= hinds.pivot(index='type', columns='model', values='diff')
hinds_test= hinds_test.rename({'KNNR_Objective': 'kNN', 'LinearRegression_Objective': 'linear', 'PLSRegression_Objective': 'PLS', 'SVR_RBF_Objective': 'SVR', 'XGBR_Objective': 'XGBR', 'RidgeRegression_Objective': 'ridge', 'LassoRegression_Objective': 'lasso'}, axis='columns')
hinds_test= hinds_test.rename({'203a': 'atlas 1', '203k': 'atlas 2', '204f': 'atlas 3', '206k': 'atlas 4', '208f': 'atlas 5', 'all': 'multi-atlas', 'mean_mask': 'mean'})

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  """Entry point for launching an IPython kernel.
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  


In [64]:
to_drop= []
mld_test_reduced= mld_test.drop(to_drop, axis='columns')
hinds_test_reduced= hinds_test.drop(to_drop, axis='columns')

In [65]:
mld_test_reduced.columns

Index(['kNN', 'lasso', 'linear', 'PLS', 'ridge'], dtype='object', name='model')

In [66]:
def test_diff_cross(df):
    tmp= pd.DataFrame(index=df.index, columns=df.index)
    for i in df.index:
        for j in df.index:
            tmp.loc[i,j]= ttest_rel(np.abs(np.hstack(df.loc[i].values)), np.abs(np.hstack(df.loc[j].values)))[1]
    return tmp

def test_diff(df):
    tmp= pd.DataFrame(index=df.index, columns=df.columns)
    for i in df.index:
        for j in df.columns:
            tmp.loc[i,j]= ttest_rel(np.abs(np.hstack(df.loc[i, j])), np.abs(np.hstack(df.loc['multi-atlas', j])))[1]
    return tmp

In [67]:
mld_test_pivot= test_diff(mld_test_reduced)
hinds_test_pivot= test_diff(hinds_test_reduced)

In [68]:
print(mld_test_pivot.drop('multi-atlas', axis='rows').astype(float).applymap(lambda x: '%.1e' % x).to_latex())
print(hinds_test_pivot.drop('multi-atlas', axis='rows').astype(float).applymap(lambda x: '%.1e' % x).to_latex())

\begin{tabular}{llllll}
\toprule
model &      kNN &    lasso &   linear &      PLS &    ridge \\
type    &          &          &          &          &          \\
\midrule
atlas 1 &  3.2e-01 &  1.1e-21 &  7.0e-18 &  1.7e-16 &  5.1e-22 \\
atlas 2 &  1.1e-06 &  6.0e-17 &  4.8e-13 &  1.5e-17 &  9.2e-23 \\
atlas 3 &  5.2e-04 &  7.5e-17 &  3.4e-06 &  1.0e-11 &  5.8e-12 \\
atlas 4 &  4.9e-38 &  4.3e-64 &  2.2e-45 &  1.1e-42 &  5.4e-58 \\
atlas 5 &  1.8e-22 &  3.2e-23 &  9.6e-19 &  1.8e-18 &  6.4e-27 \\
mean    &  1.4e-01 &  3.8e-07 &  1.7e-09 &  1.0e-06 &  3.7e-10 \\
\bottomrule
\end{tabular}

\begin{tabular}{llllll}
\toprule
model &      kNN &    lasso &   linear &      PLS &    ridge \\
type    &          &          &          &          &          \\
\midrule
atlas 1 &  6.8e-11 &  1.5e-22 &  9.7e-15 &  2.9e-09 &  7.6e-07 \\
atlas 2 &  4.2e-10 &  4.7e-02 &  5.6e-02 &  2.8e-10 &  8.7e-08 \\
atlas 3 &  1.1e-07 &  2.0e-08 &  1.2e-19 &  9.5e-11 &  2.9e-07 \\
atlas 4 &  5.0e-13 &  6.2e-02 &  1.

In [69]:

hinds_results.applymap(lambda x: ttest_1samp(x, 0.0))

  **kwargs)
  ret = ret.dtype.type(ret / rcount)


model,kNN,lasso,linear,PLS,ridge
type,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
atlas 1,"(nan, nan)","(nan, nan)","(nan, nan)","(nan, nan)","(nan, nan)"
atlas 2,"(nan, nan)","(nan, nan)","(nan, nan)","(nan, nan)","(nan, nan)"
atlas 3,"(nan, nan)","(nan, nan)","(nan, nan)","(nan, nan)","(nan, nan)"
atlas 4,"(nan, nan)","(nan, nan)","(nan, nan)","(nan, nan)","(nan, nan)"
atlas 5,"(nan, nan)","(nan, nan)","(nan, nan)","(nan, nan)","(nan, nan)"
multi-atlas,"(nan, nan)","(nan, nan)","(nan, nan)","(nan, nan)","(nan, nan)"
mean,"(nan, nan)","(nan, nan)","(nan, nan)","(nan, nan)","(nan, nan)"


In [70]:
ttest_ind(np.hstack(hinds_results.loc['multi-atlas'].values[0])**2, np.hstack(hinds_results.loc['atlas 5'].values[0])**2)

TypeError: dispatcher for __array_function__ did not return an iterable

In [31]:
tmp

type,atlas 1,atlas 2,atlas 3,atlas 4,atlas 5,multi-atlas,mean
type,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
atlas 1,1.0,9.81087e-05,0.00504041,0.00886265,2.9291e-06,0.438448,0.133248
atlas 2,9.81087e-05,1.0,0.211484,0.182906,0.396567,0.00127357,0.0145622
atlas 3,0.00504041,0.211484,1.0,0.908227,0.0353795,0.0380065,0.202854
atlas 4,0.00886265,0.182906,0.908227,1.0,0.0301756,0.0569263,0.259228
atlas 5,2.9291e-06,0.396567,0.0353795,0.0301756,1.0,5.56381e-05,0.00111875
multi-atlas,0.438448,0.00127357,0.0380065,0.0569263,5.56381e-05,1.0,0.447448
mean,0.133248,0.0145622,0.202854,0.259228,0.00111875,0.447448,1.0


In [24]:
np.mean(np.abs(np.hstack(hinds_results.loc['multi-atlas'].values[0])))

4.22539461649508

In [25]:
np.mean(np.abs(np.hstack(hinds_results.loc['atlas 5'].values[0])))

5.243137254901961

In [None]:
for 

In [10]:
from scipy.stats import ttest_ind

In [11]:
ttest_ind(hinds_results.loc['multi-atlas'].values, hinds_results.loc['mean'].values)

Ttest_indResult(statistic=0.2438534443779241, pvalue=0.8134814939711938)