In [1]:
# Import the needed packages
import numpy as np
import pandas as pd

from bids import BIDSLayout

from nilearn.interfaces import fmriprep
from nilearn import datasets
from nilearn.maskers import NiftiLabelsMasker
from nilearn import image as nimg
from nilearn.signal import clean 

import nibabel as nib

import scipy.io as sio


In [2]:
base_dir = '/home/riccardo/ADNI_Hopf'
BIDS_dir = base_dir + '/Data/ALL'
# set the layout variable to the BIDS-folder where your WMH are located
layout = BIDSLayout(BIDS_dir, validate = False, config = ['bids', 'derivatives'])

In [3]:
# Get the subject list
subjs = layout.get_subjects()

#Create an empty list to store patients to exclude due to high motion artifacts, described as mean RMS framewise-displacement > 0.2 mm or > 20 volumes with RMS FD > 0.25 mm (as per "Benchmarking..." Ciric et al. paper)
motion_exclusion_list = []

for subj in subjs:
    # get the confounds.tsv table storing all confounds
    confounds = layout.get(subject = subj, extension = 'tsv',
               datatype = 'func',
               desc = 'confounds',
               return_type = 'file')
    confound_file = confounds[0]
    # create pandas dataframe for filtering
    df_confounds = pd.read_csv(confound_file, delimiter = '\t') 
    # get patient names if gross motion artifacts and store them in the list
    if df_confounds['rmsd'].mean() > 0.2 or df_confounds[df_confounds['rmsd'] > 0.25].count()[0] > 20:     
        motion_exclusion_list.append(subj)
     
#filter patients to retain just patients without gross motion artifacts     
new_subjs = [subj for subj in subjs if subj not in motion_exclusion_list]

In [5]:
# Understand if patients are HC or MCI. Note that this is not proper BIDS, need to change in the future when having the final project. For example, it will be just easy to use a csv file or txt with patient names divided by MCI or HC
subjs_MCI = BIDSLayout('/home/riccardo/ADNI_Hopf/Data/MCI/', validate = False, config = ['bids', 'derivatives'])
MCI_subjs_initial = subjs_MCI.get_subjects()
MCI_subjs = [MCI for MCI in new_subjs if MCI in MCI_subjs_initial]
HC_subjs = [HC for HC in new_subjs if HC not in MCI_subjs]
MCI_subjs.sort()
HC_subjs.sort()
# print(f'These are the MCI subjects:{MCI_subjs}')
# print(f'These are the HC subjects:{HC_subjs}')
# # for motion in motion_exclusion_list:
# #     if motion in HC_subjs:
# #         print(f'{motion} is a healthy control')
# #     elif motion in MCI_subjs:
# #         print(f'{motion} is a MCI')

In [9]:
# Get the 78 region AAL atlas and its labels
atlas_filename = base_dir + '/Utils/AAL_atlas_78Regions.nii.gz'

import csv
# open the csv containing the names of the 78 cortical regions that we will use 
with open(base_dir + '/Utils/aal_regions_included.csv', newline='') as f:
    reader = csv.reader(f)
    included_regions = list(reader)
f.close()

#create a 1-D array with the names so that it is more easily iterable
included_regions_array = np.array([included_regions]).flatten()

new_subjs.sort()
print(f'This is the list of the included subjects: {new_subjs}')

This is the list of the included subjects: ['ADNI002S1155', 'ADNI002S4229', 'ADNI002S4654', 'ADNI002S4799', 'ADNI002S5178', 'ADNI002S6456', 'ADNI003S6258', 'ADNI003S6259', 'ADNI003S6268', 'ADNI003S6307', 'ADNI003S6432', 'ADNI006S6291', 'ADNI006S6651', 'ADNI011S6618', 'ADNI014S6424', 'ADNI018S2155', 'ADNI018S6414', 'ADNI019S6186', 'ADNI024S4674', 'ADNI024S6033', 'ADNI037S4214', 'ADNI037S4706', 'ADNI037S6083', 'ADNI068S2187', 'ADNI068S4431', 'ADNI070S6236', 'ADNI100S4556']


In [13]:
# This function is the main function at the moment, where we perform 36P confound regression with no scrubbing.

def get_fmri(subj):

    func_images_file = layout.get(subject = subj, datatype='func', task='rest',
                   desc='preproc',
                   space='MNI152NLin2009cAsym',
                   extension='nii.gz',
                   return_type='file')

    return func_images_file[0] 

def process_timeseries_36P(func_image, func_image_smoothed, use_filter, filt_low, filt_high, TR):

    masker = NiftiLabelsMasker(labels_img=atlas_filename)
    signal = masker.fit_transform(func_image_smoothed)
    # exclude the first 4 timepoints and the infratentorial regions (which are the last 26 of the labels)
    signal_selected = signal[4:,:90]
    # Here we are applying a 36P confound regression strategy with motion parameters, derivatives and power of parameters and derivatives. We are excluding the first 4 timepoints. No scrubbing.
    confounds, mask = fmriprep.load_confounds(func_image, strategy=('motion', 'wm_csf'), 
                                                        motion='full', wm_csf='full')
    confounds_selected = confounds.iloc[4:,:]
    if use_filter == True:
        cleaned_signal = clean(signal_selected, detrend=True, standardize='zscore', confounds=confounds_selected, standardize_confounds=True, filter='butterworth', low_pass=filt_low, high_pass=filt_high, t_r=TR, ensure_finite=False)
    else:
        cleaned_signal = clean(signal_selected, detrend=True, standardize='zscore', confounds=confounds_selected, standardize_confounds=True, ensure_finite=False)

    return cleaned_signal.T
    

# Get the timeseries divided according to the AAL atlas
def extract_timeseries_group(subjs, use_filter, filt_low=0.08, filt_high=0.008, TR=3):

    timeseries = np.zeros([len(subjs), included_regions_array.shape[0], 193])
   
    for n, subj in enumerate(subjs):
        
        print(f'Processing subject {subj}... ({n+1}/{len(subjs)})')
        func_image = get_fmri(subj)
        print(f'--Smoothing image for {subj}... ({n+1}/{len(subjs)})')
        func_image_smoothed = nimg.smooth_img(func_image, 5)
        func_image_smoothed_file = base_dir + f'/Temp/sub-{subj}_space-MNI152NLin2009cAsym_task-rest_desc-preprocSmoothed.nii.gz'
        nib.save(func_image_smoothed, func_image_smoothed_file)
        print(f'----Processing timeseries for {subj}... ({n+1}/{len(subjs)})')
        ts = process_timeseries_36P(func_image, func_image_smoothed_file, use_filter, filt_low, filt_high, TR)
        timeseries[n] = ts
    
    return timeseries



In [39]:
# If you want to filter, set use_filter = True and define the low_pass and high_pass filters for the Butterworth filter as well as the TR of your timeseries

use_filter = False
#filt_low = 0.08
#filt_high = 0.008
#TR = 3.


ts_HC = extract_timeseries_group(HC_subjs, use_filter)
print('Done for HC!')
ts_MCI = extract_timeseries_group(MCI_subjs, use_filter)
print('Done for MCI!')

Processing subject ADNI002S4799: 1/9
Smoothing image for ADNI002S4799: 1/9


KeyboardInterrupt: 

In [51]:

# Save outputs
np.save(base_dir + '/Results/subject_list_timeseries_HC.npy', np.array(HC_subjs))
np.save(base_dir + '/Results/timeseries_HC.npy', ts_HC)
# Save outputs
np.save(base_dir + '/Results/subject_list_timeseries_MCI.npy', np.array(MCI_subjs))
np.save(base_dir + '/Results/timeseries_MCI.npy', ts_MCI)

# Create a concatenated timeseries for all patients 
new_subjs_all = HC_subjs + MCI_subjs
timeseries_all = np.vstack([ts_HC, ts_MCI])
# # Save outputs
np.save(base_dir + '/Results/subject_list_timeseries_all.npy', np.array(new_subjs_all))
np.save(base_dir + '/Results/timeseries_all.npy', timeseries_all)


# Save it as a dictionary.. I still don't know which one is best to avoid possible errors...
import pickle

full_dictionary_timeseries = {subj:timeseries_all[n] for n, subj in enumerate(new_subjs_all)}
f = open(base_dir + '/Results/dictionary_timeseries_all.pkl', 'wb')
# write json object to file
pickle.dump(full_dictionary_timeseries, f)
# close file
f.close()
