In [20]:
from nilearn._utils import check_niimg
from nilearn.input_data import NiftiLabelsMasker
import nibabel as nib
import numpy as np
import pandas as pd
from glob import glob
from nilearn.datasets import fetch_atlas_schaefer_2018

subjects = pd.read_csv('../FDG_BASELINE_HEALTHY_4_15_2021.csv')
subject_list = subjects['Subject'].tolist()
data_path = '/home/doeringe/Dokumente/brain age/4_SUVR/'
output_csv = '../data/parcels_FDG_tpm_ADNI.csv'
atlas = fetch_atlas_schaefer_2018(n_rois=200, yeo_networks = 17)

image_list = []
subj_succ = []
subj_miss = []
subj_year = []
subj_month = []
age = []

# create list of regional data and subject IDs
for sub in subject_list:
    
    foi = glob(data_path + "SUV*" + sub + "*.nii")
    y = []
    for n in range(len(foi)):
        y.append(int(foi[n][95:99]))
    base_ind_ = y.index(np.min(y))
    if foi and (sub not in subj_succ):
        this_image = nib.load(foi[base_ind_])
        #print(this_image)
        niimg = check_niimg(this_image, atleast_4d=True)
        masker = NiftiLabelsMasker(labels_img=atlas.maps,
                                   standardize=False,
                                   memory='nilearn_cache',
                                   resampling_target='data')
        parcelled = masker.fit_transform(niimg)
        image_list.append(parcelled)
        subj_succ.append(sub)
        subj_year.append(foi[base_ind_][95:99])
        subj_month.append(foi[base_ind_][99:101])
        age.append(np.min(subjects['Age'][subjects['Subject']==sub]))
        
    
features = np.array(image_list)
x, y, z = features.shape
features = features.reshape(x, z)
df = pd.DataFrame(features, columns=atlas.labels)

# combine information on subjects, age and regional data
subs = {'Subject' : subj_succ,
       'Age' : age,
       'Year' : subj_year,
       'Month' : subj_month}
subs_pd = pd.DataFrame(subs)
df_new = pd.concat([subs_pd, df], axis=1)
df_new.to_csv(output_csv, index=False)


In [18]:
sub = "137_S_4520"
np.min(subjects['Age'][subjects['Subject']==sub].tolist())

68

In [None]:
from sklearn.model_selection import train_test_split

df = df_new
df['Agebins'] = df['Age'].values // 7
df['Agebins'] = df['Agebins'].astype(int)

col = [x for x in df.columns if '_' in str(x)]

X = df[col].values

y_pseudo = df['Agebins']
y = df['Age']

x_train, x_test,  y_train, y_test, id_train, id_test = train_test_split(
    X, y, df['Subject'], test_size=.2, random_state=42,
    stratify=y_pseudo)

df['train'] = ["T" if x in id_train.values else "F" for x in df[
               'Subject']]

df.to_csv('../data/test_train_FDG_tpm_ADNI.csv')