### Generate the post hoc dataset for analysis

In [1]:
from imagen_posthocloader import *

In [2]:
DATA_DIR = "/ritter/share/data/IMAGEN"
posthoc = IMAGEN_posthoc()

### 1. Load the [INSTRUMENT](https://imagen-europe.com/resources/imagen-dataset/documentation/) data

#### collect the selected instrument files from IMAGEN_RAW and store in posthoc

Please refer to <i>set_INSTRUMENT()</i> in <i>imagen_posthocloader.py</i>, and load the file and save it as <b> all_*.csv</b> 

In [3]:
# Instrument
NEO   = posthoc.set_INSTRUMENT('NEO')#, save=True)
SURPS = posthoc.set_INSTRUMENT('SURPS')#, save=True)
CTQ   = posthoc.set_INSTRUMENT('CTQ')#, save=True)
CTS   = posthoc.set_INSTRUMENT('CTS')#, save=True)
CTQ_MD = posthoc.set_INSTRUMENT('CTQ_MD')#, save=True)
LEQ   = posthoc.set_INSTRUMENT('LEQ')#, save=True)
PBQ   = posthoc.set_INSTRUMENT('PBQ')#, save=True)
GEN   = posthoc.set_INSTRUMENT('GEN')#, save=True)
FTND  = posthoc.set_INSTRUMENT('FTND')#, save=True)
BSI = posthoc.set_INSTRUMENT('BSI')#, save=True)
MAST = posthoc.set_INSTRUMENT('MAST')#, save=True)
PANAS = posthoc.set_INSTRUMENT('PANAS')#, save=True)
TCI = posthoc.set_INSTRUMENT('TCI')#, save=True)

In [4]:
# general information of the instrument
# selected ROI
col_NEO = list(NEO.columns[2:].values)
col_SURPS = list(SURPS.columns[2:].values)
col_CTQ = list(CTQ.columns[2:].values)
col_CTQ_MD = list(CTQ_MD.columns[2:].values)
col_CTS = list(CTS.columns[2:].values)
col_LEQ = list(LEQ.columns[2:].values)
col_PBQ = list(PBQ.columns[2:].values)
col_GEN = list(GEN.columns[2:].values)
col_FTND = list(FTND.columns[2:].values)
col_BSI = list(BSI.columns[2:].values)
col_MAST = list(MAST.columns[2:].values)
col_PANAS = list(PANAS.columns[2:].values)
col_TCI = list(TCI.columns[2:].values)
roi_list = col_NEO + col_SURPS + col_CTQ_MD + col_LEQ + \
           col_PBQ + col_GEN + col_FTND + col_BSI + col_MAST + col_PANAS + col_TCI
print(roi_list)

['Openness mean', 'Conscientiousness mean', 'Extroversion mean', 'Agreeableness mean', 'Neuroticism mean', 'Anxiety Sensitivity mean', 'Hopelessness mean', 'Impulsivity mean', 'Sensation seeking mean', 'Emotional abuse sum', 'Physical abuse sum', 'Sexual abuse sum', 'Emotional neglect sum', 'Physical neglect sum', 'Denial sum', 'MD 1', 'MD 2', 'MD 3', 'Family valence', 'Accident valence', 'Sexuality valence', 'Autonomy valence', 'Devience valence', 'Relocation valence', 'Distress valence', 'Noscale valence', 'Overall valence', 'Family mean frequency', 'Accident mean frequency', 'Sexuality mean frequency', 'Autonomy mean frequency', 'Devience mean frequency', 'Relocation mean frequency', 'Distress mean frequency', 'Noscale mean frequency', 'Overall mean frequency', 'pbq_03', 'pbq_03a', 'pbq_03b', 'pbq_03c', 'pbq_05', 'pbq_05a', 'pbq_05b', 'pbq_05c', 'pbq_06', 'pbq_06a', 'pbq_12', 'pbq_13', 'pbq_13a', 'pbq_13b', 'pbq_13g', 'Paternal_disorder', 'Maternal_disorder', 'Pd_list', 'Md_list', '

### 2. Load the HDF5 data

#### collect the HDF5 files from h5files and save in posthoc

Please refer to <i>set_HDF5()</i> in <i>imagen_posthocloader.py</i>, and load the file and save it as <b> all_*.csv</b>

In [None]:
# Hdf5
BINGE = posthoc.set_HDF5('Binge')#, save=True)

In [None]:
# general information of the hdf5
BINGE.columns

### 3. Load the RUN data

#### collect the RUN file from results and save in posthoc

Please refer to <i>set_RUN()</i> in <i>imagen_posthocloader.py</i>, and load the file and save it as <b> all_*.csv</b>

In [None]:
# RUN
RUN = posthoc.set_RUN('../../results/holdout_all-tp-clean_run.csv')#, save=True)

In [None]:
# general infromation of the hdf5
RUN.columns

### 4. Save the INSTRUMENT data

#### collect the instrument files from posthoc into one file

Please refer to <i>to_INSTRUMENT()</i> in <i>imagen_posthocloader.py</i>, and load the file

In [None]:
# collect the instrument file 
inst_list = [NEO, SURPS,CTQ, CTS, LEQ, PBQ, GEN,FTND]
# save the instrument file
INST = posthoc.to_INSTRUMENT(inst_list)#, save=True)

In [None]:
# general information of the instrument
# selected ROI
col_INST = list(INST.columns[2:].values)
print(col_INST)

### 5. Read the INSTRUMENT data

#### read the instrument files from posthoc into one file

Please refer to <i>read_INSTRUMENT()</i> in <i>imagen_posthocloader.py</i>, and load the file

In [None]:
INST = posthoc.read_INSTRUMENT('IMAGEN_INSTRUMENT.csv')

In [None]:
# general information of the instrument
INST.columns

### 6. Read the HDF5 data

#### collect the hdf5 files from posthoc into one file

Please refer to <i>to_HDF5()</i> in <i>imagen_posthocloader.py</i>, and load the file and save it as <b> all_*.csv</b>

In [None]:
HDF5 = posthoc.to_HDF5('all_Binge.csv')#, save=True)

#### read the HDF5 files from posthoc into one file

Please refer to <i>read_HDF5()</i> in <i>imagen_posthocloader.py</i>, and load the file</b>

In [None]:
HDF5 = posthoc.read_HDF5('IMAGEN_HDF5.csv')

In [None]:
# general information of the hdf5
HDF5.columns

### 7. Read the RUN data

#### select the ROI of the RUN file from posthoc into one file

Please refer to <i>to_RUN()</i> in <i>imagen_posthocloader.py</i>, and load the file and save it as <b> all_*.csv</b>

In [None]:
RUN.columns

In [None]:
COL = ['ID','Session','Trial','dataset','io','technique','Model',
       'TP prob','TN prob','FP prob','FN prob','T prob','F prob','Prob',
       'Predict TF','Model PN','Label PN','true_label','prediction']

In [None]:
RUN = posthoc.to_RUN('all_RUN.csv', COL)#, save = True)

In [None]:
# general information of the run
RUN.columns

#### read the RUN files from posthoc into one file

Please refer to <i>read_RUN()</i> in <i>imagen_posthocloader.py</i>, and load the file

In [None]:
RUN = posthoc.read_RUN('IMAGEN_RUN.csv')

In [None]:
# general information of the run
RUN.columns

### 8. Save the post hoc dataset

#### set the dataset for analysis of diagnosis (X:FU3 == y:FU3)

Please refer to <i>to_posthoc()</i> in <i>imagen_posthocloader.py</i>, and load the file and save it as <b> all_*.csv</b>

In [None]:
DATA = ['IMAGEN_HDF5.csv', 'IMAGEN_INSTRUMENT.csv', 'IMAGEN_RUN.csv']
FU3 = posthoc.to_posthoc(DATA)#, save=True)

In [None]:
# general information of the instrument
FU3.columns

In [None]:
FU3.iloc[1000]

#### set the dataset for analysis of prognosis (X:FU3 != y:FU3)

Please refer to <i>read_posthoc()</i> in <i>imagen_posthocloader.py</i>, and load the file

In [None]:
FU3 = posthoc.read_posthoc('IMAGEN_posthoc.csv')

In [None]:
FU3.groupby('Dataset').get_group('Holdout')

In [None]:
FU3.info()

In [None]:
# general information of the instrument
print(list(FU3.columns))

In [None]:
print(FU3.iloc[1000])

### 9. Save the mean|SHAP| value

CAUTION: Generating SHAP value is needed in advance

#### load the SHAP

Please refer to <i>load_SHAP()</i> in <i>imagen_posthocloader.py</i>, and load the file

In [1]:
from plot_results_posthoc import *

In [2]:
DATA_DIR = "/ritter/share/data/IMAGEN"
SHAP = SHAP_visualization()

In [4]:
# load the SHAP value
# GB
mean_GB0 = SHAP.load_SHAP("GB0_multi.sav")
mean_GB1 = SHAP.load_SHAP("GB1_multi.sav")
mean_GB2 = SHAP.load_SHAP("GB2_multi.sav")
mean_GB3 = SHAP.load_SHAP("GB3_multi.sav")
mean_GB4 = SHAP.load_SHAP("GB4_multi.sav")
mean_GB5 = SHAP.load_SHAP("GB5_multi.sav")
mean_GB6 = SHAP.load_SHAP("GB6_multi.sav")
# LR
mean_LR0 = SHAP.load_SHAP("LR0_multi.sav")
mean_LR1 = SHAP.load_SHAP("LR1_multi.sav")
mean_LR2 = SHAP.load_SHAP("LR2_multi.sav")
mean_LR3 = SHAP.load_SHAP("LR3_multi.sav")
mean_LR4 = SHAP.load_SHAP("LR4_multi.sav")
mean_LR5 = SHAP.load_SHAP("LR5_multi.sav")
mean_LR6 = SHAP.load_SHAP("LR6_multi.sav")
# SVM-lin
mean_SVM_lin0 = SHAP.load_SHAP("SVM-lin0_multi.sav")
mean_SVM_lin1 = SHAP.load_SHAP("SVM-lin1_multi.sav")
mean_SVM_lin2 = SHAP.load_SHAP("SVM-lin2_multi.sav")
mean_SVM_lin3 = SHAP.load_SHAP("SVM-lin3_multi.sav")
mean_SVM_lin4 = SHAP.load_SHAP("SVM-lin4_multi.sav")
mean_SVM_lin5 = SHAP.load_SHAP("SVM-lin5_multi.sav")
mean_SVM_lin6 = SHAP.load_SHAP("SVM-lin6_multi.sav")
# SVM-rbf
mean_SVM_rbf0 = SHAP.load_SHAP("SVM-rbf0_multi.sav")
mean_SVM_rbf1 = SHAP.load_SHAP("SVM-rbf1_multi.sav")
mean_SVM_rbf2 = SHAP.load_SHAP("SVM-rbf2_multi.sav")
mean_SVM_rbf3 = SHAP.load_SHAP("SVM-rbf3_multi.sav")
mean_SVM_rbf4 = SHAP.load_SHAP("SVM-rbf4_multi.sav")
mean_SVM_rbf5 = SHAP.load_SHAP("SVM-rbf5_multi.sav")
mean_SVM_rbf6 = SHAP.load_SHAP("SVM-rbf6_multi.sav")

In [7]:
# load the holdout data
holdout_dir = "newholdout-clean-fu3-espad-fu3-19a-binge-n102.h5"
ho_X, ho_X_col_names, ho_list = SHAP.get_holdout_data(holdout_dir, group=False)
# print(f"Holdout dataset: {ho_X.shape}, {len(ho_X_col_names)}, "
#       f"{ho_list[0].shape}, {ho_list[1].shape}")

In [8]:
# Save the DF_SHAP
DF_SHAP = pd.DataFrame(
    {'GB0': mean_GB0,
     'GB1': mean_GB1,
     'GB2': mean_GB2,
     'GB3': mean_GB3,
     'GB4': mean_GB4,
     'GB5': mean_GB5,
     'GB6': mean_GB6,
     'LR0': mean_LR0,
     'LR1': mean_LR1,
     'LR2': mean_LR2,
     'LR3': mean_LR3,
     'LR4': mean_LR4,
     'LR5': mean_LR5,
     'LR6': mean_LR6,
     'SVM_lin0': mean_SVM_lin0,
     'SVM_lin1': mean_SVM_lin1,
     'SVM_lin2': mean_SVM_lin2,
     'SVM_lin3': mean_SVM_lin3,
     'SVM_lin4': mean_SVM_lin4,
     'SVM_lin5': mean_SVM_lin5,
     'SVM_lin6': mean_SVM_lin6,
     'SVM_rbf0': mean_SVM_rbf0,
     'SVM_rbf1': mean_SVM_rbf1,
     'SVM_rbf2': mean_SVM_rbf2,
     'SVM_rbf3': mean_SVM_rbf3,
     'SVM_rbf4': mean_SVM_rbf4,
     'SVM_rbf5': mean_SVM_rbf5,
     'SVM_rbf6': mean_SVM_rbf6,
     'Col names': ho_X_col_names}
)

In [13]:
DF_SHAP.to_csv("/ritter/share/data/IMAGEN/posthoc/explainers/all_mean_SHAP.csv", index=None)

In [14]:
DF_SHAP

Unnamed: 0,GB0,GB1,GB2,GB3,GB4,GB5,GB6,LR0,LR1,LR2,...,SVM_lin5,SVM_lin6,SVM_rbf0,SVM_rbf1,SVM_rbf2,SVM_rbf3,SVM_rbf4,SVM_rbf5,SVM_rbf6,Col names
0,0.000000,0.000000,0.000000,0.000284,0.000000,0.002814,0.000000,0.000588,0.002971,0.006118,...,0.005039,0.008539,0.003686,0.001245,0.002275,0.003853,0.003078,0.001088,0.004667,T1w_cor_bankssts-lh-volume
1,0.000029,0.005127,0.023265,0.001578,0.009637,0.001892,0.005137,0.009167,0.000284,0.009284,...,0.000696,0.003990,0.010510,0.006716,0.008676,0.009784,0.009873,0.009265,0.004333,T1w_cor_caudalanteriorcingulate-lh-volume
2,0.000804,0.012402,0.002216,0.008206,0.000118,0.003980,0.000824,0.009745,0.015833,0.004412,...,0.012431,0.020882,0.005235,0.003284,0.001461,0.002667,0.002902,0.002196,0.001706,T1w_cor_caudalmiddlefrontal-lh-volume
3,0.000000,0.000000,0.000000,0.000000,0.000500,0.002882,0.000794,0.002010,0.001804,0.006029,...,0.001735,0.007392,0.003627,0.001618,0.001657,0.001118,0.001843,0.001265,0.001588,T1w_cor_cuneus-lh-volume
4,0.000667,0.009020,0.001539,0.011284,0.003735,0.000971,0.000000,0.011000,0.011304,0.009333,...,0.001559,0.023402,0.006284,0.008735,0.007157,0.006980,0.006275,0.010402,0.010078,T1w_cor_entorhinal-lh-volume
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
714,0.003461,0.003157,0.004039,0.014971,0.002598,0.006971,0.000000,0.015814,0.002333,0.003147,...,0.012618,0.016029,0.004892,0.007706,0.008108,0.009324,0.007137,0.007196,0.004657,DTI_SS-L_Average
715,0.000755,0.000000,0.002667,0.001314,0.000000,0.000000,0.001373,0.019206,0.001814,0.002157,...,0.010176,0.014039,0.002029,0.001637,0.002775,0.001304,0.001382,0.001863,0.001588,DTI_SS-R_Average
716,0.000000,0.004010,0.000000,0.005167,0.003402,0.001657,0.000000,0.003324,0.010235,0.008578,...,0.019235,0.003941,0.004843,0.001441,0.002657,0.001637,0.008108,0.002137,0.002961,DTI_UNC_Average
717,0.000000,0.002971,0.001686,0.002206,0.000000,0.000627,0.000000,0.001265,0.003088,0.015833,...,0.004020,0.009304,0.006441,0.006549,0.004598,0.004255,0.002657,0.005990,0.003265,DTI_UNC-L_Average


#### read the SHAP value

In [3]:
DF_SHAP = SHAP.read_SHAP('all_mean_SHAP.csv')

In [4]:
DF_SHAP

Unnamed: 0,GB0,GB1,GB2,GB3,GB4,GB5,GB6,LR0,LR1,LR2,...,SVM_lin5,SVM_lin6,SVM_rbf0,SVM_rbf1,SVM_rbf2,SVM_rbf3,SVM_rbf4,SVM_rbf5,SVM_rbf6,Col names
0,0.000000,0.000000,0.000000,0.000284,0.000000,0.002814,0.000000,0.000588,0.002971,0.006118,...,0.005039,0.008539,0.003686,0.001245,0.002275,0.003853,0.003078,0.001088,0.004667,T1w_cor_bankssts-lh-volume
1,0.000029,0.005127,0.023265,0.001578,0.009637,0.001892,0.005137,0.009167,0.000284,0.009284,...,0.000696,0.003990,0.010510,0.006716,0.008676,0.009784,0.009873,0.009265,0.004333,T1w_cor_caudalanteriorcingulate-lh-volume
2,0.000804,0.012402,0.002216,0.008206,0.000118,0.003980,0.000824,0.009745,0.015833,0.004412,...,0.012431,0.020882,0.005235,0.003284,0.001461,0.002667,0.002902,0.002196,0.001706,T1w_cor_caudalmiddlefrontal-lh-volume
3,0.000000,0.000000,0.000000,0.000000,0.000500,0.002882,0.000794,0.002010,0.001804,0.006029,...,0.001735,0.007392,0.003627,0.001618,0.001657,0.001118,0.001843,0.001265,0.001588,T1w_cor_cuneus-lh-volume
4,0.000667,0.009020,0.001539,0.011284,0.003735,0.000971,0.000000,0.011000,0.011304,0.009333,...,0.001559,0.023402,0.006284,0.008735,0.007157,0.006980,0.006275,0.010402,0.010078,T1w_cor_entorhinal-lh-volume
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
714,0.003461,0.003157,0.004039,0.014971,0.002598,0.006971,0.000000,0.015814,0.002333,0.003147,...,0.012618,0.016029,0.004892,0.007706,0.008108,0.009324,0.007137,0.007196,0.004657,DTI_SS-L_Average
715,0.000755,0.000000,0.002667,0.001314,0.000000,0.000000,0.001373,0.019206,0.001814,0.002157,...,0.010176,0.014039,0.002029,0.001637,0.002775,0.001304,0.001382,0.001863,0.001588,DTI_SS-R_Average
716,0.000000,0.004010,0.000000,0.005167,0.003402,0.001657,0.000000,0.003324,0.010235,0.008578,...,0.019235,0.003941,0.004843,0.001441,0.002657,0.001637,0.008108,0.002137,0.002961,DTI_UNC_Average
717,0.000000,0.002971,0.001686,0.002206,0.000000,0.000627,0.000000,0.001265,0.003088,0.015833,...,0.004020,0.009304,0.006441,0.006549,0.004598,0.004255,0.002657,0.005990,0.003265,DTI_UNC-L_Average
