In [5]:
from nilearn._utils import check_niimg
from nilearn.input_data import NiftiLabelsMasker
import nibabel as nib
import numpy as np
import pandas as pd
from glob import glob
from nilearn.datasets import fetch_atlas_schaefer_2018

subjects = pd.read_csv('../FDG_BASELINE_HEALTHY_4_15_2021.csv')
subject_list = subjects['Subject'].tolist()
data_path = '/home/doeringe/Dokumente/brain age/4_SUVR/'
output_csv = '../data/parcels_FDG_tpm_ADNI.csv'
atlas = fetch_atlas_schaefer_2018(n_rois=200, yeo_networks = 17)

image_list = []
subj_succ = []
subj_miss = []
subj_year = []
subj_month = []
age = []

# create list of regional data and subject IDs
for sub in subject_list:
    
    foi = glob(data_path + "SUV*" + sub + "*.nii")
    if foi and (sub not in subj_succ):
        this_image = nib.load(foi[0])
        #print(this_image)
        niimg = check_niimg(this_image, atleast_4d=True)
        masker = NiftiLabelsMasker(labels_img=atlas.maps,
                                   standardize=False,
                                   memory='nilearn_cache',
                                   resampling_target='data')
        parcelled = masker.fit_transform(niimg)
        image_list.append(parcelled)
        subj_succ.append(sub)
        subj_year.append(foi[0][95:99])
        subj_month.append(foi[0][99:101])
        age.append(subjects['Age'][subjects['Subject']==sub].tolist()[0])
    
features = np.array(image_list)
x, y, z = features.shape
features = features.reshape(x, z)
df = pd.DataFrame(features, columns=atlas.labels)

# combine information on subjects, age and regional data
subs = {'Subject' : subj_succ,
       'Age' : age,
       'Year' : subj_year,
       'Month' : subj_month}
subs_pd = pd.DataFrame(subs)
df_new = pd.concat([subs_pd, df], axis=1)
df_new.to_csv(output_csv, index=False)


In [6]:
print(df_new.head())

      Subject  Age  Year Month  b'17Networks_LH_VisCent_ExStr_1'  \
0  941_S_5193   73  2013    06                          1.436805   
1  941_S_5124   77  2013    05                          1.254059   
2  941_S_4376   77  2012    02                          1.413042   
3  941_S_4365   80  2012    01                          1.226467   
4  941_S_4292   71  2012    01                          1.637643   

   b'17Networks_LH_VisCent_ExStr_2'  b'17Networks_LH_VisCent_Striate_1'  \
0                          0.967843                            1.201370   
1                          1.102944                            0.936855   
2                          1.008970                            1.145184   
3                          1.029858                            1.014568   
4                          1.530058                            1.575166   

   b'17Networks_LH_VisCent_ExStr_3'  b'17Networks_LH_VisCent_ExStr_4'  \
0                          1.268873                          1.1570

In [None]:
from sklearn.model_selection import train_test_split

df = df_new
df['Agebins'] = df['Age'].values // 7
df['Agebins'] = df['Agebins'].astype(int)

col = [x for x in df.columns if '_' in str(x)]

X = df[col].values

y_pseudo = df['Agebins']
y = df['Age']

x_train, x_test,  y_train, y_test, id_train, id_test = train_test_split(
    X, y, df['Subject'], test_size=.2, random_state=42,
    stratify=y_pseudo)

df['train'] = ["T" if x in id_train.values else "F" for x in df[
               'Subject']]

df.to_csv('../data/test_train_FDG_tpm_ADNI.csv')