In [76]:
import copy
import numpy as np 
import pandas as pd

import glob 
import matplotlib.pyplot as plt
import numpy as np 
import os 

import nibabel as nib
import json

from monai.transforms import Compose, AddChannel, Resize 
from tqdm import tqdm
%matplotlib inline



def extracting_score(mask_img_dir, subject_list, subject_file_list): 
    # loading mask image
    mask = nib.load(mask_img_dir) 
    mask = np.array(mask.dataobj)

    # calculating mean attribution scores per ROI 
    seg_attr_result = {}
    for i in tqdm(np.unique(mask)):
        if i == 0: 
            pass 
        else:  
            seg_mask = np.where(mask != i, 0, 1)
            transform = Compose([AddChannel(), Resize((128, 128, 128))])
            seg_mask = transform(seg_mask)[0, :, :, :] 
            seg_subj_attr_result = []
            for subj in subject_file_list: 
                # standardizing subject's attribution score across the whole brain
                subj_img = np.load(subj)
                subj_img = (subj_img - np.mean(subj_img)) / np.std(subj_img)
                # masking ROI
                seg_subj_attr = subj_img * seg_mask
                # calculating mean attr score per ROI
                if np.sum(seg_mask) == 0: 
                    seg_subj_attr_norm = 0 
                else: 
                    seg_subj_attr_norm = np.sum(seg_subj_attr) / np.sum(seg_mask)
                seg_subj_attr_result.append(seg_subj_attr_norm)
            seg_attr_result["%s" % i] = seg_subj_attr_result 

    df = {'subjectkey': subject_list}
    df.update(seg_attr_result)
    return pd.DataFrame(df)


def normalizing(df:pd.DataFrame): 
    col_list = list(df.keys())[1:]
    for i in col_list: 
        arr = df[i].values
        mean = np.mean(arr)
        stdev = np.std(arr)
        df[i] = (arr - mean) / stdev
    return df 


# BMI-sds baseline 

In [77]:

### HarvardOxford (FSL)
"""
ref: https://github.com/dmascali/mni2atlas
"""
# cortical
HarvardOxford_cort_mask = "/Users/wangheehwan/Desktop/CNN_for_BMI/paper/data/clustering/atlas/HarvardOxford/HarvardOxford-cort-maxprob-thr25-1mm.nii.gz"
HarvardOxford_cort_LUT = "/Users/wangheehwan/Desktop/CNN_for_BMI/paper/data/clustering/atlas/HarvardOxford/cort_label.json"

# subcortical
HarvardOxford_subcort_mask = "/Users/wangheehwan/Desktop/CNN_for_BMI/paper/data/clustering/atlas/HarvardOxford/HarvardOxford-sub-maxprob-thr25-1mm.nii.gz"
HarvardOxford_subcort_LUT = "/Users/wangheehwan/Desktop/CNN_for_BMI/paper/data/clustering/atlas/HarvardOxford/subcort_label.json"

mask_img_dir_list = [HarvardOxford_cort_mask, HarvardOxford_subcort_mask]
mask_LUT_dir_list = [HarvardOxford_cort_LUT, HarvardOxford_subcort_LUT]


In [78]:
IG_map_dir = "/Users/wangheehwan/Desktop/CNN_for_BMI/interpretation/ABCD/MNI/BMI_sds"


# gathering the name of subject files
subject_file_list = []
subject_list = [] 

for file in  glob.glob(IG_map_dir + '/*'): 
    subject_file_list.append(file)
    subject_list.append(os.path.split(file)[-1].replace('.npy', ''))

df_final = pd.DataFrame({'subjectkey': subject_list})
for (mask_img_dir, mask_LUT_dir) in zip(mask_img_dir_list, mask_LUT_dir_list):
    # calculating mean attribution score per ROI 
    df = extracting_score(mask_img_dir=mask_img_dir, subject_file_list=subject_file_list, subject_list=subject_list)

    # indexing ROI name based on the number of ROI 
    with open(mask_LUT_dir, 'r') as file:
        LUT = json.load(file) 
    df.columns = ['subjectkey'] + list(LUT.values())
    df_final = pd.merge(df_final, df, how='inner', on='subjectkey')

# remove duplicated ROI
df_final = df_final.drop(['Left Cerebral White Matter', 'Left Cerebral Cortex', 'Right Cerebral White Matter', 'Right Cerebral Cortex'], axis=1)

# Summary feature importance 
ROI_list = list(df_final.keys()[1:])
ROI_mean = [] 
ROI_std = []
# getting mean score 
for ROI in ROI_list:
    ROI_mean.append(np.mean(df_final[ROI]))
    ROI_std.append(np.std(df_final[ROI]))
ROI_mean_sorted = np.sort(ROI_mean)[::-1]
ROI_mean_sorted = np.where(ROI_mean_sorted == 0, np.nan, ROI_mean_sorted) 
ROI_std_sorted = np.array(ROI_std)[np.argsort(ROI_mean)[::-1]]
ROI_std_sorted = np.where(ROI_std_sorted == 0, np.nan, ROI_std_sorted)
index_sorted = [index[i] for i in np.argsort(ROI_mean)[::-1]] 

df_result = pd.DataFrame(data=index_sorted, columns=['ROI'])
df_result['attribution_mean'] = ROI_mean_sorted
df_result['Rank'] = [i+1 for i in range(len(index_sorted))]


df_result.to_csv('/Users/wangheehwan/Desktop/CNN_for_BMI/paper/experiments/XAI/Feature_Importance_HarvardOxford_BMI_sds.csv', index=False)



# After 1 year 

In [None]:
year = 'after1y'        #options = ['after1y', 'after2y'] 
### HarvardOxford (FSL)
"""
ref: https://github.com/dmascali/mni2atlas
"""
# cortical
HarvardOxford_cort_mask = "/Users/wangheehwan/Desktop/CNN_for_BMI/paper/data/clustering/atlas/HarvardOxford/HarvardOxford-cort-maxprob-thr25-1mm.nii.gz"
HarvardOxford_cort_LUT = "/Users/wangheehwan/Desktop/CNN_for_BMI/paper/data/clustering/atlas/HarvardOxford/cort_label.json"

# subcortical
HarvardOxford_subcort_mask = "/Users/wangheehwan/Desktop/CNN_for_BMI/paper/data/clustering/atlas/HarvardOxford/HarvardOxford-sub-maxprob-thr25-1mm.nii.gz"
HarvardOxford_subcort_LUT = "/Users/wangheehwan/Desktop/CNN_for_BMI/paper/data/clustering/atlas/HarvardOxford/subcort_label.json"

mask_img_dir_list = [HarvardOxford_cort_mask, HarvardOxford_subcort_mask]
mask_LUT_dir_list = [HarvardOxford_cort_LUT, HarvardOxford_subcort_LUT]


In [None]:
IG_map_dir = "/Users/wangheehwan/Desktop/CNN_for_BMI/interpretation"
# get every partition data
OBESITY_attr_dir = [] 
OBESITY_attr_dir.append(os.path.join(*[IG_map_dir, year,'partition0']))
OBESITY_attr_dir.append(os.path.join(*[IG_map_dir, year,'partition1']))
OBESITY_attr_dir.append(os.path.join(*[IG_map_dir, year,'partition2']))
OBESITY_attr_dir.append(os.path.join(*[IG_map_dir, year,'partition3']))
OBESITY_attr_dir.append(os.path.join(*[IG_map_dir, year,'partition4']))

# gathering the name of subject files
subject_file_list = []
subject_list = [] 
for OBESITY_attr_dir_partition in OBESITY_attr_dir:
    for file in  glob.glob(OBESITY_attr_dir_partition + '/*'): 
        subject_file_list.append(file)
        subject_list.append(os.path.split(file)[-1].replace('.npy', ''))

df_final = pd.DataFrame({'subjectkey': subject_list})
for (mask_img_dir, mask_LUT_dir) in zip(mask_img_dir_list, mask_LUT_dir_list):
    # calculating mean attribution score per ROI 
    df = extracting_score(mask_img_dir=mask_img_dir, subject_file_list=subject_file_list, subject_list=subject_list)

    # indexing ROI name based on the number of ROI 
    with open(mask_LUT_dir, 'r') as file:
        LUT = json.load(file) 
    df.columns = ['subjectkey'] + list(LUT.values())
    df_final = pd.merge(df_final, df, how='inner', on='subjectkey')

# remove duplicated ROI
df_final = df_final.drop(['Left Cerebral White Matter', 'Left Cerebral Cortex', 'Right Cerebral White Matter', 'Right Cerebral Cortex'], axis=1)

# Summary feature importance 
ROI_list = list(df_final.keys()[1:])
ROI_mean = [] 
ROI_std = []
# getting mean score 
for ROI in ROI_list:
    ROI_mean.append(np.mean(df_final[ROI]))
    ROI_std.append(np.std(df_final[ROI]))
ROI_mean_sorted = np.sort(ROI_mean)[::-1]
ROI_mean_sorted = np.where(ROI_mean_sorted == 0, np.nan, ROI_mean_sorted) 
ROI_std_sorted = np.array(ROI_std)[np.argsort(ROI_mean)[::-1]]
ROI_std_sorted = np.where(ROI_std_sorted == 0, np.nan, ROI_std_sorted)
index_sorted = [index[i] for i in np.argsort(ROI_mean)[::-1]] 

df_result = pd.DataFrame(data=index_sorted, columns=['ROI'])
df_result['attribution_mean'] = ROI_mean_sorted
df_result['Rank'] = [i+1 for i in range(len(index_sorted))]


df_result.to_csv('/Users/wangheehwan/Desktop/CNN_for_BMI/paper/experiments/XAI/Feature_Importance_HarvardOxford_become_overweight_aftery.csv', index=False)

100%|██████████| 48/48 [00:02<00:00, 16.01it/s]
100%|██████████| 21/21 [00:01<00:00, 15.03it/s]


# After 2y

In [None]:
year = 'after2y'        #options = ['after1y', 'after2y'] 
### HarvardOxford (FSL)
"""
ref: https://github.com/dmascali/mni2atlas
"""
# cortical
HarvardOxford_cort_mask = "/Users/wangheehwan/Desktop/CNN_for_BMI/paper/data/clustering/atlas/HarvardOxford/HarvardOxford-cort-maxprob-thr25-1mm.nii.gz"
HarvardOxford_cort_LUT = "/Users/wangheehwan/Desktop/CNN_for_BMI/paper/data/clustering/atlas/HarvardOxford/cort_label.json"

# subcortical
HarvardOxford_subcort_mask = "/Users/wangheehwan/Desktop/CNN_for_BMI/paper/data/clustering/atlas/HarvardOxford/HarvardOxford-sub-maxprob-thr25-1mm.nii.gz"
HarvardOxford_subcort_LUT = "/Users/wangheehwan/Desktop/CNN_for_BMI/paper/data/clustering/atlas/HarvardOxford/subcort_label.json"

mask_img_dir_list = [HarvardOxford_cort_mask, HarvardOxford_subcort_mask]
mask_LUT_dir_list = [HarvardOxford_cort_LUT, HarvardOxford_subcort_LUT]


In [None]:
IG_map_dir = "/Users/wangheehwan/Desktop/CNN_for_BMI/interpretation"
# get every partition data
OBESITY_attr_dir = [] 
OBESITY_attr_dir.append(os.path.join(*[IG_map_dir, year,'partition0']))
OBESITY_attr_dir.append(os.path.join(*[IG_map_dir, year,'partition1']))
OBESITY_attr_dir.append(os.path.join(*[IG_map_dir, year,'partition2']))
OBESITY_attr_dir.append(os.path.join(*[IG_map_dir, year,'partition3']))
OBESITY_attr_dir.append(os.path.join(*[IG_map_dir, year,'partition4']))

# gathering the name of subject files
subject_file_list = []
subject_list = [] 
for OBESITY_attr_dir_partition in OBESITY_attr_dir:
    for file in  glob.glob(OBESITY_attr_dir_partition + '/*'): 
        subject_file_list.append(file)
        subject_list.append(os.path.split(file)[-1].replace('.npy', ''))

df_final = pd.DataFrame({'subjectkey': subject_list})
for (mask_img_dir, mask_LUT_dir) in zip(mask_img_dir_list, mask_LUT_dir_list):
    # calculating mean attribution score per ROI 
    df = extracting_score(mask_img_dir=mask_img_dir, subject_file_list=subject_file_list, subject_list=subject_list)

    # indexing ROI name based on the number of ROI 
    with open(mask_LUT_dir, 'r') as file:
        LUT = json.load(file) 
    df.columns = ['subjectkey'] + list(LUT.values())
    
    df_final = pd.merge(df_final, df, how='inner', on='subjectkey')

# remove duplicated ROI
df_final = df_final.drop(['Left Cerebral White Matter', 'Left Cerebral Cortex', 'Right Cerebral White Matter', 'Right Cerebral Cortex'], axis=1)


# Summary feature importance 
ROI_list = list(df_final.keys()[1:])
ROI_mean = [] 
ROI_std = []
# getting mean score 
for ROI in ROI_list:
    ROI_mean.append(np.mean(df_final[ROI]))
    ROI_std.append(np.std(df_final[ROI]))
ROI_mean_sorted = np.sort(ROI_mean)[::-1]
ROI_mean_sorted = np.where(ROI_mean_sorted == 0, np.nan, ROI_mean_sorted) 
ROI_std_sorted = np.array(ROI_std)[np.argsort(ROI_mean)[::-1]]
ROI_std_sorted = np.where(ROI_std_sorted == 0, np.nan, ROI_std_sorted)
index_sorted = [index[i] for i in np.argsort(ROI_mean)[::-1]] 

df_result = pd.DataFrame(data=index_sorted, columns=['ROI'])
df_result['attribution_mean'] = ROI_mean_sorted
df_result['Rank'] = [i+1 for i in range(len(index_sorted))]


df_result.to_csv('/Users/wangheehwan/Desktop/CNN_for_BMI/paper/experiments/XAI/Feature_Importance_HarvardOxford_become_overweight_after2y.csv', index=False)

NameError: name 'mask_img_dir_list' is not defined