### Generate the post hoc dataset for analysis

In [1]:
import math
import time
import parmap
import pickle
import multiprocessing
from imagen_posthocloader import *
import matplotlib.pyplot as plt
import seaborn as sns
from plot_results_posthoc import *
%matplotlib inline

In [2]:
num_cores = multiprocessing.cpu_count()
print(f'Available CPU cores: {num_cores}')
num_cores = math.floor(num_cores/3)
print(f'Set CPU cores: {num_cores}')

Available CPU cores: 48
Set CPU cores: 16


In [3]:
DATA_DIR = "/ritter/share/data/IMAGEN"
posthoc = IMAGEN_posthoc()

### 1. Load the [INSTRUMENT](https://imagen-europe.com/resources/imagen-dataset/documentation/) data

#### collect the selected instrument files from IMAGEN_RAW and store in posthoc

Please refer to <i>set_INSTRUMENT()</i> in <i>imagen_posthocloader.py</i>, and load the file and save it as <b> all_*.csv</b> 

In [None]:
# Instrument
## Demographic
PBQ   = posthoc.set_INSTRUMENT('PBQ')#, save=True)
GEN   = posthoc.set_INSTRUMENT('GEN')#, save=True)
LEQ   = posthoc.set_INSTRUMENT('LEQ')#, save=True)
# DAWBA
# CANTAB
NEO   = posthoc.set_INSTRUMENT('NEO')#, save=True)
SURPS = posthoc.set_INSTRUMENT('SURPS')#, save=True)
TCI = posthoc.set_INSTRUMENT('TCI')#, save=True)
BSI = posthoc.set_INSTRUMENT('BSI')#, save=True)
# KIRBY
# BIS-11
# CSI
# PHQ
# CES-D
# ANXDX
# CAPE
# SDQ
# IRI
# RRS
# PALP
## Social
# CTQ   = posthoc.set_INSTRUMENT('CTQ')#, save=True)
CTQ_MD = posthoc.set_INSTRUMENT('CTQ_MD')#, save=True)
CTS   = posthoc.set_INSTRUMENT('CTS')#, save=True)
PANAS = posthoc.set_INSTRUMENT('PANAS')#, save=True)
# MINI5
## Substance Use
MAST = posthoc.set_INSTRUMENT('MAST')#, save=True)
FTND  = posthoc.set_INSTRUMENT('FTND')#, save=True)
# DAST
# SCID
# RAPI
# DMQ
# Bully Questionnaire
# ESPAD
# TLFB
# AUDIT

### 2. Load the HDF5 data

#### collect the HDF5 files from h5files and save in posthoc

Please refer to <i>set_HDF5()</i> in <i>imagen_posthocloader.py</i>, and load the file and save it as <b> all_*.csv</b>

In [None]:
# Hdf5
BINGE = posthoc.set_HDF5('Binge')#, save=True)

In [None]:
# general information of the hdf5
BINGE.columns

### 3. Load the RUN data

#### collect the RUN file from results and save in posthoc

Please refer to <i>set_RUN()</i> in <i>imagen_posthocloader.py</i>, and load the file and save it as <b> all_*.csv</b>

In [None]:
# RUN
RUN = posthoc.set_RUN('../../results/holdout_all-tp-clean_run.csv')#, save=True)

In [None]:
# general infromation of the hdf5
RUN.columns

### 4. Save the INSTRUMENT data

#### collect the instrument files from posthoc into one file

Please refer to <i>to_INSTRUMENT()</i> in <i>imagen_posthocloader.py</i>, and load the file

In [None]:
# collect the instrument file 
inst_list = [LEQ,                      # Demographic
             NEO, SURPS, TCI, BSI,     # Psychological
             CTQ_MD, CTS, PANAS,       # Social
             MAST, FTND]               # Substance use
# save the instrument file
INST = posthoc.to_INSTRUMENT(inst_list)#, save=True)

In [None]:
# general information of the instrument
# selected ROI
col_INST = list(INST.columns[2:].values)
print(len(col_INST), col_INST)

### 5. Read the INSTRUMENT data

#### read the instrument files from posthoc into one file

Please refer to <i>read_INSTRUMENT()</i> in <i>imagen_posthocloader.py</i>, and load the file

In [None]:
INST = posthoc.read_INSTRUMENT('IMAGEN_INSTRUMENT.csv')

In [None]:
# general information of the instrument
INST.columns

### 6. Read the HDF5 data

#### collect the hdf5 files from posthoc into one file

Please refer to <i>to_HDF5()</i> in <i>imagen_posthocloader.py</i>, and load the file and save it as <b> all_*.csv</b>

In [None]:
HDF5 = posthoc.to_HDF5('all_Binge.csv')#, save=True)

#### read the HDF5 files from posthoc into one file

Please refer to <i>read_HDF5()</i> in <i>imagen_posthocloader.py</i>, and load the file</b>

In [None]:
HDF5 = posthoc.read_HDF5('IMAGEN_HDF5.csv')

In [None]:
# general information of the hdf5
HDF5.columns

### 7. Read the RUN data

#### select the ROI of the RUN file from posthoc into one file

Please refer to <i>to_RUN()</i> in <i>imagen_posthocloader.py</i>, and load the file and save it as <b> all_*.csv</b>

In [None]:
RUN.columns

In [None]:
COL = ['ID','Session','Trial','dataset','io','technique','Model',
       'TP prob','TN prob','FP prob','FN prob','T prob','F prob','Prob',
       'Predict TF','Model PN','Label PN','true_label','prediction']

In [None]:
RUN = posthoc.to_RUN('all_RUN.csv', COL)#, save = True)

In [None]:
# general information of the run
RUN.columns

#### read the RUN files from posthoc into one file

Please refer to <i>read_RUN()</i> in <i>imagen_posthocloader.py</i>, and load the file

In [None]:
RUN = posthoc.read_RUN('IMAGEN_RUN.csv')

In [None]:
# general information of the run
RUN.columns

### 8. Save the post hoc dataset

#### set the dataset for analysis of diagnosis (X:FU3 == y:FU3)

Please refer to <i>to_posthoc()</i> in <i>imagen_posthocloader.py</i>, and load the file and save it as <b> all_*.csv</b>

In [None]:
DATA = ['IMAGEN_HDF5.csv', 'IMAGEN_INSTRUMENT.csv', 'IMAGEN_RUN.csv']
FU3 = posthoc.to_posthoc(DATA)#, save=True)

In [None]:
# general information of the instrument
FU3.columns

In [None]:
FU3.iloc[1000]

#### set the dataset for analysis of prognosis (X:FU3 != y:FU3)

Please refer to <i>read_posthoc()</i> in <i>imagen_posthocloader.py</i>, and load the file

In [None]:
FU3 = posthoc.read_posthoc('IMAGEN_posthoc.csv')

In [None]:
FU3.groupby('Dataset').get_group('Holdout')

In [None]:
FU3.info()

In [None]:
# general information of the instrument
print(list(FU3.columns))

In [None]:
print(FU3.iloc[1000])

### 9. Get the SHAP value

(To do) merge the command into one method: to_SHAP() in posthocloader.py

#### Diagnosis: X:FU3 to y:FU3 in holdout set

<b> Load the data and the model </b>

In [None]:
MODELS = posthoc.get_model("../../results/newlbls-clean-fu3-espad-fu3-19a-binge-*/*/")

In [None]:
holdout_dir = "newholdout-clean-fu3-espad-fu3-19a-binge-n102.h5"
# load the holdout data
ho_X, ho_X_col_names, ho_list = posthoc.get_holdout_data(holdout_dir, group=True)
# print(f"Holdout dataset: {ho_X.shape}, {len(ho_X_col_names)}, "
#       f"{ho_list[0].shape}, {ho_list[1].shape}")

In [None]:
# generate the SHAP input list of the holdout ONLY SVM-rbf
ho_INPUT = posthoc.get_list(MODELS, ho_X, "SVM-RBF")
# print(f"Number of training set: {len(tr_INPUT)}\n\n" # , One example: {tr_INPUT[0:1]}\n\n"
print(f"Number of holdout set: {len(ho_INPUT)}")#, {ho_INPUT}")

<b> Compute the SHAP value </b>

In [None]:
# # One by one
# INPUT = tr_INPUT[0]
# start_time = time.time()
# _ = posthoc.get_SHAP(INPUT, 'FU3')
# print("--- %s seconds ---" % (time.time() - start_time))

In [None]:
# Multi processing
INPUT = ho_INPUT
start_time = time.time()
# _ = parmap.map(posthoc.get_SHAP, INPUT, 'FU3', pm_pbar=True, pm_processes=num_cores)
print("--- %s seconds ---" % (time.time() - start_time))

#### Prognosis: X:FU2 to y:FU3 in holdout set

<b> Load the data and the model </b>

In [None]:
MODELS = posthoc.get_model("../../results/newlbls-clean-fu2-espad-fu3-19a-binge-*/*/")

In [None]:
holdout_dir = "newholdout-clean-fu2-espad-fu3-19a-binge-n102.h5"
# load the holdout data
ho_X, ho_X_col_names, ho_list = posthoc.get_holdout_data(holdout_dir, group=True)
# print(f"Holdout dataset: {ho_X.shape}, {len(ho_X_col_names)}, "
#       f"{ho_list[0].shape}, {ho_list[1].shape}")

In [None]:
# generate the SHAP input list of the holdout ONLY SVM-rbf
ho_INPUT = posthoc.get_list(MODELS, ho_X, "SVM-RBF")
# print(f"Number of training set: {len(tr_INPUT)}\n\n" # , One example: {tr_INPUT[0:1]}\n\n"
print(f"Number of holdout set: {len(ho_INPUT)}")#, {ho_INPUT}")

<b> Compute the SHAP value </b>

In [None]:
# # One by one
# INPUT = tr_INPUT[0]
# start_time = time.time()
# _ = posthoc.get_SHAP(INPUT, 'FU2')
# print("--- %s seconds ---" % (time.time() - start_time))

In [None]:
# Multi processing
INPUT = ho_INPUT
start_time = time.time()
# _ = parmap.map(posthoc.get_SHAP, INPUT, 'FU2', pm_pbar=True, pm_processes=num_cores)
print("--- %s seconds ---" % (time.time() - start_time))

#### Prognosis: X:BL to y:FU3 in holdout set

<b> Load the data and the model </b>

In [None]:
MODELS = posthoc.get_model("../../results/newlbls-clean-bl-espad-fu3-19a-binge-*/*/")

In [None]:
holdout_dir = "newholdout-clean-bl-espad-fu3-19a-binge-n102.h5"
# load the holdout data
ho_X, ho_X_col_names, ho_list = posthoc.get_holdout_data(holdout_dir, group=True)
# print(f"Holdout dataset: {ho_X.shape}, {len(ho_X_col_names)}, "
#       f"{ho_list[0].shape}, {ho_list[1].shape}")

In [None]:
# generate the SHAP input list of the holdout ONLY SVM-rbf
ho_INPUT = posthoc.get_list(MODELS, ho_X, "SVM-RBF")
# print(f"Number of training set: {len(tr_INPUT)}\n\n" # , One example: {tr_INPUT[0:1]}\n\n"
print(f"Number of holdout set: {len(ho_INPUT)}")#, {ho_INPUT}")

<b> Compute the SHAP value </b>

In [None]:
# # One by one
# INPUT = tr_INPUT[0]
# start_time = time.time()
# _ = posthoc.get_SHAP(INPUT, 'BL')
# print("--- %s seconds ---" % (time.time() - start_time))

In [None]:
# Multi processing
INPUT = ho_INPUT
start_time = time.time()
# _ = parmap.map(posthoc.get_SHAP, INPUT, 'BL', pm_pbar=True, pm_processes=num_cores)
print("--- %s seconds ---" % (time.time() - start_time))

#### Diagnosis: X:FU3 to y:FU3 in training set

(to do)

In [None]:
# MODELS = posthoc.get_model("../../results/newlbls-clean-fu3-espad-fu3-19a-binge-*/*/")
# train_dir = "newlbls-clean-fu3-espad-fu3-19a-binge-n650.h5"
# # load the training data
# tr_X, tr_X_col_names, tr_list = SHAP.get_train_data(train_dir, group=True)
# print(f"Training dataset: {tr_X.shape}, {len(tr_X_col_names)}, {tr_list[0].shape}")
# # generate the SHAP input list of the training
# tr_INPUT = SHAP.get_list(MODELS, tr_X)

### 10. Save the mean|SHAP| value

CAUTION: Generating SHAP value is needed in advance

#### load the feature derivatives and mean, std |SHAP value|

Please refer to <i>to_abs_SHAP()</i> in <i>imagen_posthocloader.py</i>, and load the file

In [5]:
# FU3
H5_FU3 = "newholdout-clean-fu3-espad-fu3-19a-binge-n102.h5"
GB_FU3 = ["GB0_FU3.sav", "GB1_FU3.sav", "GB2_FU3.sav", "GB3_FU3.sav", "GB4_FU3.sav", "GB5_FU3.sav", "GB6_FU3.sav"]
LR_FU3 = ["LR0_FU3.sav", "LR1_FU3.sav", "LR2_FU3.sav", "LR3_FU3.sav", "LR4_FU3.sav", "LR5_FU3.sav", "LR6_FU3.sav"]
SVM_lin_FU3 = ["SVM-lin0_FU3.sav", "SVM-lin1_FU3.sav", "SVM-lin2_FU3.sav", "SVM-lin3_FU3.sav", "SVM-lin4_FU3.sav", "SVM-lin5_FU3.sav", "SVM-lin6_FU3.sav"]
SVM_rbf_FU3 = ["SVM-rbf0_FU3.sav", "SVM-rbf1_FU3.sav", "SVM-rbf2_FU3.sav", "SVM-rbf3_FU3.sav", "SVM-rbf4_FU3.sav", "SVM-rbf5_FU3.sav", "SVM-rbf6_FU3.sav"]

# FU2
H5_FU2 = "newholdout-clean-fu2-espad-fu3-19a-binge-n102.h5"
GB_FU2 = ["GB0_FU2.sav", "GB1_FU2.sav", "GB2_FU2.sav", "GB3_FU2.sav", "GB4_FU2.sav", "GB5_FU2.sav", "GB6_FU2.sav"]
LR_FU2 = ["LR0_FU2.sav", "LR1_FU2.sav", "LR2_FU2.sav", "LR3_FU2.sav", "LR4_FU2.sav", "LR5_FU2.sav", "LR6_FU2.sav"]
SVM_lin_FU2 = ["SVM-lin0_FU2.sav", "SVM-lin1_FU2.sav", "SVM-lin2_FU2.sav", "SVM-lin3_FU2.sav", "SVM-lin4_FU2.sav", "SVM-lin5_FU2.sav", "SVM-lin6_FU2.sav"]
SVM_rbf_FU2 = ["SVM-rbf0_FU2.sav", "SVM-rbf1_FU2.sav", "SVM-rbf2_FU2.sav", "SVM-rbf3_FU2.sav", "SVM-rbf4_FU2.sav", "SVM-rbf5_FU2.sav", "SVM-rbf6_FU2.sav"]

# BL
H5_BL = "newholdout-clean-bl-espad-fu3-19a-binge-n102.h5"
GB_BL = ["GB0_BL.sav", "GB1_BL.sav", "GB2_BL.sav", "GB3_BL.sav", "GB4_BL.sav", "GB5_BL.sav", "GB6_BL.sav"]
LR_BL = ["LR0_BL.sav", "LR1_BL.sav", "LR2_BL.sav", "LR3_BL.sav", "LR4_BL.sav", "LR5_BL.sav", "LR6_BL.sav"]
SVM_lin_BL = ["SVM-lin0_BL.sav", "SVM-lin1_BL.sav", "SVM-lin2_BL.sav", "SVM-lin3_BL.sav", "SVM-lin4_BL.sav", "SVM-lin5_BL.sav", "SVM-lin6_BL.sav"]
SVM_rbf_BL = ["SVM-rbf0_BL.sav", "SVM-rbf1_BL.sav", "SVM-rbf2_BL.sav", "SVM-rbf3_BL.sav", "SVM-rbf4_BL.sav", "SVM-rbf5_BL.sav", "SVM-rbf6_BL.sav"]

In [6]:
SHAP = GB_FU3+LR_FU3+SVM_lin_FU3+SVM_rbf_FU3

In [7]:
DF = posthoc.to_abs_SHAP(H5_FU3, SHAP)

In [8]:
DF

Unnamed: 0,Feature name,Modality,Type,Lobe Region,Value,GB0_FU3 mean,GB1_FU3 mean,GB2_FU3 mean,GB3_FU3 mean,GB4_FU3 mean,...,SVM-lin4_FU3 std,SVM-lin5_FU3 std,SVM-lin6_FU3 std,SVM-rbf0_FU3 std,SVM-rbf1_FU3 std,SVM-rbf2_FU3 std,SVM-rbf3_FU3 std,SVM-rbf4_FU3 std,SVM-rbf5_FU3 std,SVM-rbf6_FU3 std
0,T1w_cor_bankssts-lh-volume,T1w,Cortical region,Temporal lobe,volume,0.000000,0.000000,0.000000,0.000324,0.000000,...,0.005628,0.005180,0.011352,0.004150,0.002087,0.003004,0.007656,0.003248,0.001739,0.005015
1,T1w_cor_caudalanteriorcingulate-lh-volume,T1w,Cortical region,Cingulate cortex,volume,0.000029,0.004804,0.022804,0.000912,0.009578,...,0.004877,0.000929,0.003446,0.017341,0.008092,0.011282,0.009883,0.016453,0.012913,0.007152
2,T1w_cor_caudalmiddlefrontal-lh-volume,T1w,Cortical region,Frontal lobe,volume,0.000990,0.013353,0.001863,0.008039,0.000069,...,0.005979,0.011220,0.018571,0.005539,0.003332,0.002516,0.003159,0.003832,0.003984,0.002678
3,T1w_cor_cuneus-lh-volume,T1w,Cortical region,Occipital lobe,volume,0.000000,0.000000,0.000000,0.000000,0.000431,...,0.003008,0.002010,0.007099,0.004388,0.002405,0.001897,0.001729,0.003144,0.001694,0.002033
4,T1w_cor_entorhinal-lh-volume,T1w,Cortical region,Temporal lobe,volume,0.000431,0.011343,0.001127,0.012353,0.002863,...,0.012446,0.002098,0.023233,0.006756,0.011312,0.007273,0.007034,0.008840,0.015530,0.011715
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
714,DTI_SS-L_Average,DTI,DTI region,DTI region,Average,0.002892,0.003020,0.003392,0.015284,0.002078,...,0.015199,0.010344,0.014969,0.004702,0.006825,0.007736,0.010568,0.007395,0.007973,0.004931
715,DTI_SS-R_Average,DTI,DTI region,DTI region,Average,0.000833,0.000000,0.001676,0.001402,0.000000,...,0.002606,0.008416,0.012971,0.004373,0.002351,0.003618,0.001771,0.001953,0.002019,0.001964
716,DTI_UNC_Average,DTI,DTI region,DTI region,Average,0.000000,0.003735,0.000000,0.006078,0.003843,...,0.013635,0.015566,0.003089,0.004765,0.001930,0.002788,0.002526,0.007740,0.002295,0.003344
717,DTI_UNC-L_Average,DTI,DTI region,DTI region,Average,0.000000,0.002657,0.001569,0.001931,0.000000,...,0.025696,0.004271,0.009431,0.007154,0.007571,0.005788,0.005348,0.003953,0.007728,0.003741


#### load the mean of mean, std |SHAP value|

Please refer to <i>to_mofm_SHAP()</i> in <i>imagen_posthocloader.py</i>, and load the file

In [9]:
LIST = [GB_FU3, LR_FU3, SVM_lin_FU3, SVM_rbf_FU3]

In [10]:
DF2 = posthoc.to_mofm_SHAP(DF, LIST)#, save=True)

In [11]:
DF2

Unnamed: 0,Feature name,Modality,Type,Lobe Region,Value,GB0_FU3 mean,GB1_FU3 mean,GB2_FU3 mean,GB3_FU3 mean,GB4_FU3 mean,...,SVM-rbf5_FU3 std,SVM-rbf6_FU3 std,GB0_FU3_All mean,LR0_FU3_All mean,SVM-lin0_FU3_All mean,SVM-rbf0_FU3_All mean,GB0_FU3_All std,LR0_FU3_All std,SVM-lin0_FU3_All std,SVM-rbf0_FU3_All std
0,T1w_cor_bankssts-lh-volume,T1w,Cortical region,Temporal lobe,volume,0.000000,0.000000,0.000000,0.000324,0.000000,...,0.001739,0.005015,0.000469,0.003018,0.004971,0.002863,0.001553,0.001801,0.003335,0.002024
1,T1w_cor_caudalanteriorcingulate-lh-volume,T1w,Cortical region,Cingulate cortex,volume,0.000029,0.004804,0.022804,0.000912,0.009578,...,0.012913,0.007152,0.006301,0.006811,0.005057,0.009056,0.009501,0.005390,0.004223,0.003934
2,T1w_cor_caudalmiddlefrontal-lh-volume,T1w,Cortical region,Frontal lobe,volume,0.000990,0.013353,0.001863,0.008039,0.000069,...,0.003984,0.002678,0.004195,0.009511,0.014706,0.002846,0.004996,0.005132,0.007293,0.001021
3,T1w_cor_cuneus-lh-volume,T1w,Cortical region,Occipital lobe,volume,0.000000,0.000000,0.000000,0.000000,0.000431,...,0.001694,0.002033,0.000709,0.003406,0.003602,0.001993,0.001791,0.002395,0.002469,0.000984
4,T1w_cor_entorhinal-lh-volume,T1w,Cortical region,Temporal lobe,volume,0.000431,0.011343,0.001127,0.012353,0.002863,...,0.015530,0.011715,0.004127,0.010252,0.012169,0.008310,0.004948,0.003315,0.006938,0.003240
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
714,DTI_SS-L_Average,DTI,DTI region,DTI region,Average,0.002892,0.003020,0.003392,0.015284,0.002078,...,0.007973,0.004931,0.004831,0.007436,0.012197,0.006751,0.004420,0.005659,0.005040,0.001991
715,DTI_SS-R_Average,DTI,DTI region,DTI region,Average,0.000833,0.000000,0.001676,0.001402,0.000000,...,0.002019,0.001964,0.000789,0.006779,0.006982,0.001930,0.002247,0.005205,0.003849,0.001007
716,DTI_UNC_Average,DTI,DTI region,DTI region,Average,0.000000,0.003735,0.000000,0.006078,0.003843,...,0.002295,0.003344,0.002343,0.009113,0.011517,0.003301,0.004763,0.005155,0.004529,0.002035
717,DTI_UNC-L_Average,DTI,DTI region,DTI region,Average,0.000000,0.002657,0.001569,0.001931,0.000000,...,0.007728,0.003741,0.000966,0.007592,0.010877,0.004700,0.001693,0.005433,0.007632,0.001657


#### read the SHAP value

In [12]:
DF_SHAP = posthoc.read_SHAP('all_mofm_abs_SHAP.csv')

In [13]:
DF_SHAP.columns

Index(['Feature name', 'Modality', 'Type', 'Lobe Region', 'Value',
       'GB0_FU3 mean', 'GB1_FU3 mean', 'GB2_FU3 mean', 'GB3_FU3 mean',
       'GB4_FU3 mean', 'GB5_FU3 mean', 'GB6_FU3 mean', 'LR0_FU3 mean',
       'LR1_FU3 mean', 'LR2_FU3 mean', 'LR3_FU3 mean', 'LR4_FU3 mean',
       'LR5_FU3 mean', 'LR6_FU3 mean', 'SVM-lin0_FU3 mean',
       'SVM-lin1_FU3 mean', 'SVM-lin2_FU3 mean', 'SVM-lin3_FU3 mean',
       'SVM-lin4_FU3 mean', 'SVM-lin5_FU3 mean', 'SVM-lin6_FU3 mean',
       'SVM-rbf0_FU3 mean', 'SVM-rbf1_FU3 mean', 'SVM-rbf2_FU3 mean',
       'SVM-rbf3_FU3 mean', 'SVM-rbf4_FU3 mean', 'SVM-rbf5_FU3 mean',
       'SVM-rbf6_FU3 mean', 'GB0_FU3 std', 'GB1_FU3 std', 'GB2_FU3 std',
       'GB3_FU3 std', 'GB4_FU3 std', 'GB5_FU3 std', 'GB6_FU3 std',
       'LR0_FU3 std', 'LR1_FU3 std', 'LR2_FU3 std', 'LR3_FU3 std',
       'LR4_FU3 std', 'LR5_FU3 std', 'LR6_FU3 std', 'SVM-lin0_FU3 std',
       'SVM-lin1_FU3 std', 'SVM-lin2_FU3 std', 'SVM-lin3_FU3 std',
       'SVM-lin4_FU3 std', 'SVM-li

In [14]:
DF_SHAP

Unnamed: 0,Feature name,Modality,Type,Lobe Region,Value,GB0_FU3 mean,GB1_FU3 mean,GB2_FU3 mean,GB3_FU3 mean,GB4_FU3 mean,...,SVM-rbf5_FU3 std,SVM-rbf6_FU3 std,GB0_FU3_All mean,LR0_FU3_All mean,SVM-lin0_FU3_All mean,SVM-rbf0_FU3_All mean,GB0_FU3_All std,LR0_FU3_All std,SVM-lin0_FU3_All std,SVM-rbf0_FU3_All std
0,T1w_cor_bankssts-lh-volume,T1w,Cortical region,Temporal lobe,volume,0.000000,0.000000,0.000000,0.000324,0.000000,...,0.001739,0.005015,0.000469,0.003018,0.004971,0.002863,0.001553,0.001801,0.003335,0.002024
1,T1w_cor_caudalanteriorcingulate-lh-volume,T1w,Cortical region,Cingulate cortex,volume,0.000029,0.004804,0.022804,0.000912,0.009578,...,0.012913,0.007152,0.006301,0.006811,0.005057,0.009056,0.009501,0.005390,0.004223,0.003934
2,T1w_cor_caudalmiddlefrontal-lh-volume,T1w,Cortical region,Frontal lobe,volume,0.000990,0.013353,0.001863,0.008039,0.000069,...,0.003984,0.002678,0.004195,0.009511,0.014706,0.002846,0.004996,0.005132,0.007293,0.001021
3,T1w_cor_cuneus-lh-volume,T1w,Cortical region,Occipital lobe,volume,0.000000,0.000000,0.000000,0.000000,0.000431,...,0.001694,0.002033,0.000709,0.003406,0.003602,0.001993,0.001791,0.002395,0.002469,0.000984
4,T1w_cor_entorhinal-lh-volume,T1w,Cortical region,Temporal lobe,volume,0.000431,0.011343,0.001127,0.012353,0.002863,...,0.015530,0.011715,0.004127,0.010252,0.012169,0.008310,0.004948,0.003315,0.006938,0.003240
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
714,DTI_SS-L_Average,DTI,DTI region,DTI region,Average,0.002892,0.003020,0.003392,0.015284,0.002078,...,0.007973,0.004931,0.004831,0.007436,0.012197,0.006751,0.004420,0.005659,0.005040,0.001991
715,DTI_SS-R_Average,DTI,DTI region,DTI region,Average,0.000833,0.000000,0.001676,0.001402,0.000000,...,0.002019,0.001964,0.000789,0.006779,0.006982,0.001930,0.002247,0.005205,0.003849,0.001007
716,DTI_UNC_Average,DTI,DTI region,DTI region,Average,0.000000,0.003735,0.000000,0.006078,0.003843,...,0.002295,0.003344,0.002343,0.009113,0.011517,0.003301,0.004763,0.005155,0.004529,0.002035
717,DTI_UNC-L_Average,DTI,DTI region,DTI region,Average,0.000000,0.002657,0.001569,0.001931,0.000000,...,0.007728,0.003741,0.000966,0.007592,0.010877,0.004700,0.001693,0.005433,0.007632,0.001657


#### sorted SHAP in SVM-rbf

(to do) Turn into methods

In [17]:
# SVM rbf
DF_SHAP2 = DF_SHAP.sort_values(by=['Modality','Type','Lobe Region','Value','SVM-rbf0_FU3_All mean'],
                               ascending=[True,True,True,True,False])

In [19]:
DF = DF_SHAP2.copy()

In [21]:
rbf0 = [list(x) for x in zip(DF['Feature name'], DF['SVM-rbf0_FU3 mean'], DF['SVM-rbf0_FU3 std'])]
rbf0.sort(key=lambda x:-x[1])
rbf1 = [list(x) for x in zip(DF['Feature name'], DF['SVM-rbf1_FU3 mean'], DF['SVM-rbf1_FU3 std'])]
rbf1.sort(key=lambda x:-x[1])
rbf2 = [list(x) for x in zip(DF['Feature name'], DF['SVM-rbf2_FU3 mean'], DF['SVM-rbf2_FU3 std'])]
rbf2.sort(key=lambda x:-x[1])
rbf3 = [list(x) for x in zip(DF['Feature name'], DF['SVM-rbf3_FU3 mean'], DF['SVM-rbf3_FU3 std'])]
rbf3.sort(key=lambda x:-x[1])
rbf4 = [list(x) for x in zip(DF['Feature name'], DF['SVM-rbf4_FU3 mean'], DF['SVM-rbf4_FU3 std'])]
rbf4.sort(key=lambda x:-x[1])
rbf5 = [list(x) for x in zip(DF['Feature name'], DF['SVM-rbf5_FU3 mean'], DF['SVM-rbf5_FU3 std'])]
rbf5.sort(key=lambda x:-x[1])
rbf6 = [list(x) for x in zip(DF['Feature name'], DF['SVM-rbf6_FU3 mean'], DF['SVM-rbf6_FU3 std'])]
rbf6.sort(key=lambda x:-x[1])

In [22]:
rbf0_name = [i[0] for i in rbf0]
rbf0_mean = [i[1] for i in rbf0]
rbf0_std = [i[2] for i in rbf0]
rbf1_name = [i[0] for i in rbf1]
rbf1_mean = [i[1] for i in rbf1]
rbf1_std = [i[2] for i in rbf1]
rbf2_name = [i[0] for i in rbf2]
rbf2_mean = [i[1] for i in rbf2]
rbf2_std = [i[2] for i in rbf2]
rbf3_name = [i[0] for i in rbf3]
rbf3_mean = [i[1] for i in rbf3]
rbf3_std = [i[2] for i in rbf3]
rbf4_name = [i[0] for i in rbf4]
rbf4_mean = [i[1] for i in rbf4]
rbf4_std = [i[2] for i in rbf4]
rbf5_name = [i[0] for i in rbf5]
rbf5_mean = [i[1] for i in rbf5]
rbf5_std = [i[2] for i in rbf5]
rbf6_name = [i[0] for i in rbf6]
rbf6_mean = [i[1] for i in rbf6]
rbf6_std = [i[2] for i in rbf6]

In [23]:
DF['sorted SVM rbf0 name'] = rbf0_name
DF['sorted SVM rbf1 name'] = rbf1_name
DF['sorted SVM rbf2 name'] = rbf2_name
DF['sorted SVM rbf3 name'] = rbf3_name
DF['sorted SVM rbf4 name'] = rbf4_name
DF['sorted SVM rbf5 name'] = rbf3_name
DF['sorted SVM rbf6 name'] = rbf4_name
DF['sorted SVM rbf0 mean'] = rbf0_mean
DF['sorted SVM rbf1 mean'] = rbf1_mean
DF['sorted SVM rbf2 mean'] = rbf2_mean
DF['sorted SVM rbf3 mean'] = rbf3_mean
DF['sorted SVM rbf4 mean'] = rbf4_mean
DF['sorted SVM rbf5 mean'] = rbf5_mean
DF['sorted SVM rbf6 mean'] = rbf6_mean
DF['sorted SVM rbf0 std'] = rbf0_std
DF['sorted SVM rbf1 std'] = rbf1_std
DF['sorted SVM rbf2 std'] = rbf2_std
DF['sorted SVM rbf3 std'] = rbf3_std
DF['sorted SVM rbf4 std'] = rbf4_std
DF['sorted SVM rbf5 std'] = rbf5_std
DF['sorted SVM rbf6 std'] = rbf6_std

In [25]:
DF

Unnamed: 0,Feature name,Modality,Type,Lobe Region,Value,GB0_FU3 mean,GB1_FU3 mean,GB2_FU3 mean,GB3_FU3 mean,GB4_FU3 mean,...,sorted SVM rbf4 mean,sorted SVM rbf5 mean,sorted SVM rbf6 mean,sorted SVM rbf0 std,sorted SVM rbf1 std,sorted SVM rbf2 std,sorted SVM rbf3 std,sorted SVM rbf4 std,sorted SVM rbf5 std,sorted SVM rbf6 std
703,DTI_SCC_Average,DTI,DTI region,DTI region,Average,0.057137,0.029000,0.057843,0.000608,0.023892,...,0.018559,0.017451,0.018010,0.016507,0.016984,0.018709,0.016280,0.019121,0.016651,0.018240
702,DTI_RLIC-R_Average,DTI,DTI region,DTI region,Average,0.011225,0.010294,0.023490,0.033892,0.008029,...,0.017569,0.017304,0.015461,0.013869,0.015391,0.014480,0.015381,0.022063,0.015799,0.014638
700,DTI_RLIC_Average,DTI,DTI region,DTI region,Average,0.012275,0.000000,0.012676,0.004353,0.001461,...,0.017127,0.017284,0.013480,0.011296,0.013540,0.013947,0.014691,0.016430,0.018523,0.013981
669,DTI_CGH-L_Average,DTI,DTI region,DTI region,Average,0.011471,0.023020,0.005010,0.026363,0.001716,...,0.017118,0.016137,0.012539,0.017646,0.011178,0.013604,0.015000,0.014036,0.016305,0.011272
701,DTI_RLIC-L_Average,DTI,DTI region,DTI region,Average,0.022284,0.002088,0.028637,0.053147,0.015431,...,0.016892,0.015745,0.012490,0.013309,0.013978,0.013177,0.015385,0.014214,0.019404,0.012489
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
100,T1w_subcor_5th-Ventricle_volume,T1w,Subcortical region,Subcortical region,volume,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
102,T1w_subcor_Left-WM-hypointensities_volume,T1w,Subcortical region,Subcortical region,volume,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
103,T1w_subcor_Right-WM-hypointensities_volume,T1w,Subcortical region,Subcortical region,volume,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
105,T1w_subcor_Left-non-WM-hypointensities_volume,T1w,Subcortical region,Subcortical region,volume,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000


In [27]:
DF.columns

Index(['Feature name', 'Modality', 'Type', 'Lobe Region', 'Value',
       'GB0_FU3 mean', 'GB1_FU3 mean', 'GB2_FU3 mean', 'GB3_FU3 mean',
       'GB4_FU3 mean', 'GB5_FU3 mean', 'GB6_FU3 mean', 'LR0_FU3 mean',
       'LR1_FU3 mean', 'LR2_FU3 mean', 'LR3_FU3 mean', 'LR4_FU3 mean',
       'LR5_FU3 mean', 'LR6_FU3 mean', 'SVM-lin0_FU3 mean',
       'SVM-lin1_FU3 mean', 'SVM-lin2_FU3 mean', 'SVM-lin3_FU3 mean',
       'SVM-lin4_FU3 mean', 'SVM-lin5_FU3 mean', 'SVM-lin6_FU3 mean',
       'SVM-rbf0_FU3 mean', 'SVM-rbf1_FU3 mean', 'SVM-rbf2_FU3 mean',
       'SVM-rbf3_FU3 mean', 'SVM-rbf4_FU3 mean', 'SVM-rbf5_FU3 mean',
       'SVM-rbf6_FU3 mean', 'GB0_FU3 std', 'GB1_FU3 std', 'GB2_FU3 std',
       'GB3_FU3 std', 'GB4_FU3 std', 'GB5_FU3 std', 'GB6_FU3 std',
       'LR0_FU3 std', 'LR1_FU3 std', 'LR2_FU3 std', 'LR3_FU3 std',
       'LR4_FU3 std', 'LR5_FU3 std', 'LR6_FU3 std', 'SVM-lin0_FU3 std',
       'SVM-lin1_FU3 std', 'SVM-lin2_FU3 std', 'SVM-lin3_FU3 std',
       'SVM-lin4_FU3 std', 'SVM-li

In [26]:
DF.to_csv("/ritter/share/data/IMAGEN/posthoc/IMAGEN_SHAP_SVM_rbf.csv", index=None)

### 11. Save the Summary Statistics

#### load the dataset

In [None]:
# HDF5
HDF5 = posthoc.read_HDF5('all_Binge.csv')
# INSTRUMENT
INST = posthoc.read_INSTRUMENT('IMAGEN_INSTRUMENT.csv')

In [None]:
# FU3
HDF5_FU3 = HDF5.groupby('Session').get_group('FU3')
INST_FU3 = INST.groupby('Session').get_group('FU3')
SS_FU3 = pd.merge(HDF5_FU3,INST_FU3, on=['ID','Session'], how='left')

In [None]:
SS_FU3.info()

In [None]:
SS_FU3_Col = list(SS_FU3.columns[:66])+list(SS_FU3.columns[67:70])+list(SS_FU3.columns[71:])

In [None]:
SS = SS_FU3[SS_FU3_Col]
SS

In [None]:
SS.describe()

In [None]:
# save_path = f"{DATA_DIR}/posthoc/IMAGEN_Binge_FU3_SS_ver02.csv"
# if not os.path.isdir(os.path.dirname(save_path)):
#     os.makedirs(os.path.dirname(save_path))
# SS.to_csv(save_path, index=None)