In [None]:
# Import stuff
from os import listdir, makedirs
from os.path import isdir
from pandas import DataFrame, Series, read_csv

from nipype.pipeline.engine import Workflow, Node, MapNode, JoinNode
from nipype.interfaces.utility import IdentityInterface, Function
from nipype.interfaces.io import DataSink, FreeSurferSource, SelectFiles
from nipype.algorithms.misc import Gunzip

from nipype.interfaces.freesurfer.preprocess import MRIConvert
from nipype.interfaces.freesurfer.model import Binarize
from nipype.interfaces.freesurfer import FSCommand, MRIConvert, ReconAll
from nipype.interfaces.fsl.utils import Reorient2Std, MotionOutliers, Merge
from nipype.interfaces.fsl.preprocess import MCFLIRT, SliceTimer, FLIRT
from nipype.interfaces.fsl.maths import ApplyMask
from nipype.interfaces.fsl.model import GLM
from nipype.interfaces.fsl.epi import ApplyTOPUP, TOPUP
from nipype.algorithms.rapidart import ArtifactDetect
from nipype.interfaces.nipy.preprocess import Trim
from nipype.interfaces.afni.preprocess import Bandpass

# FSL set up- change default file output type
from nipype.interfaces.fsl import FSLCommand
FSLCommand.set_default_output_type('NIFTI_GZ')

# Set study variables
analysis_home = '/data/perlman/moochie/user_data/CamachoCat/ChEC/fmri_proc'
raw_dir =  '/data/perlman/moochie/study_data/ChEC/MRI_data'
preproc_dir = analysis_home + '/preproc'
firstlevel_dir = analysis_home + '/subjectlevel'
secondlevel_dir = analysis_home + '/grouplevel'
workflow_dir = analysis_home + '/workflows'
phase_encoding_file = analysis_home + '/misc/chec_encoding_file.txt'

templates_dir = '/data/perlman/moochie/user_data/CamachoCat/Aggregate_anats/templates'
template_brain = templates_dir + '/lcbd_template_2mm_brain.nii.gz'
template_mask = templates_dir + '/lcbd_template_2mm_mask.nii.gz'
template_mask_dilated = templates_dir + '/lcbd_template_2mm_maskD1.nii.gz'
gm_mask = templates_dir + '/lcbd_template_2mm_gm.nii.gz'
wmcsf_mask = templates_dir + '/lcbd_template_2mm_wmcsf.nii.gz'

subject_info = read_csv(analysis_home + '/misc/subjectinfo.csv',index_col=None)
subjects_list = subject_info['SubjID'].tolist()
#subjects_list = ['1032']

# FreeSurfer set up - change SUBJECTS_DIR 
fs_dir = '/data/perlman/moochie/user_data/CamachoCat/Aggregate_anats/subjects_dir'
FSCommand.set_default_subjects_dir(fs_dir)

# data collection specs
TR = 0.8 #in seconds
num_slices = 40
slice_direction = False #True = z direction, top to bottom
interleaved = True
slice_timing = analysis_home + '/misc/slice_timing.txt'
echo_train_length = 0.051 # in seconds, loc (0x18,0x91) in the dcm header
TEs = [13.2,38.76,64.32] #in milliseconds

highpass_freq = 0.008
lowpass_freq = 0.1

In [None]:
## Data handling Nodes

# Select subjects list
infosource = Node(IdentityInterface(fields=['subjid']),
                  name='infosource')
infosource.iterables = [('subjid', subjects_list)]

# FreeSurferSource - Data grabber specific for FreeSurfer data
fssource = Node(FreeSurferSource(subjects_dir=fs_dir),
                run_without_submitting=True,
                name='fssource')

# Sink data
substitutions = [('_subjid_', ''),
                 ('_func_','')] #output file name substitutions
datasink = Node(DataSink(base_directory = preproc_dir,
                        container = preproc_dir,
                        substitutions = substitutions), 
                name='datasink')

## fMRI Data Prep
This workflow carries out the following processing steps:
1. Slice-time correction
2. Realignment (motion parameters derived from this step)
3. Multi-echo denoising
4. Distortion correction

In [None]:
## File handling nodes

pes_template={'PE_vol': raw_dir + '/sub-{subjid}/fmap/sub-{subjid}_task-AHKJ-{pe_dir}_bold.nii.gz'}
pes_selectfiles = Node(SelectFiles(pes_template), name='pes_selectfiles')
pes_selectfiles.iterables = ('pe_dir',['1','2'])

echo2_template={'echo2': raw_dir + '/sub-{subjid}/func/sub-{subjid}_task-AHKJ-ep2-e2_bold.nii.gz'}
echo2_selectfiles = Node(SelectFiles(echo2_template), name='echo2_selectfiles')

funcs_template={'func': raw_dir + '/sub-{subjid}/func/sub-{subjid}_task-AHKJ-ep2-{func_te}_bold.nii.gz'}
funcs_selectfiles = Node(SelectFiles(funcs_template), name='funcs_selectfiles')
funcs_selectfiles.iterables = ('func_te',['e1','e2','e3'])

In [None]:
# Apply ME-ICA (Kundu, et al., 2011)
def tedana_clean(TE_niftis, TEs):
    from pandas import DataFrame
    from nipype import config, logging
    config.enable_debug_mode()
    logging.update_logging(config)
    from os.path import abspath
    
    from tedana.workflows import tedana_workflow
    
    files = tedana_workflow(TE_niftis, TEs)
    denoised_func = abspath('dn_ts_OC.nii')
    
    return(denoised_func)

In [None]:
# perform slice time correction
slicetime = Node(SliceTimer(index_dir=slice_direction,
                            custom_timings=slice_timing,
                            time_repetition=TR),
                    name='slicetime')

# realignment using mcflirt
realignmc = Node(MCFLIRT(save_plots=True, save_mats=True),
                name='realignmc')

# apply realignment to all echoes
applyrealign = Node(MCFLIRT(save_plots=True),name='applyrealign')

## Unwarping and ME cleaning nodes
# ME denoising using Tedana
me_denoise = JoinNode(Function(input_names=['TE_niftis','TEs'],
                               output_names=['denoised_func'],
                               function=tedana_clean), 
                      name='me_denoise',joinsource='funcs_selectfiles',joinfield='TE_niftis')
me_denoise.inputs.TEs = TEs

# include only the first volume of each PE volume
trim_PEs = Node(Trim(end_index=1),name='trim_PEs')

# merge to 1 file for topup to calculate the fieldcoef
merge_pes = JoinNode(Merge(dimension='t',
                           merged_file='merged_pes.nii.gz'), 
                     name='merge_pes', 
                     joinsource='pes_selectfiles', 
                     joinfield='in_files')

topup = Node(TOPUP(encoding_file=phase_encoding_file), name='topup')

apply_topup = Node(ApplyTOPUP(in_index=[2], encoding_file=phase_encoding_file, 
                              method='jac', out_corrected='func_unwarped.nii.gz'), 
                   name='apply_topup')

In [None]:
prepreprocflow = Workflow(name='prepreprocflow')
prepreprocflow.connect([(infosource,pes_selectfiles, [('subjid','subjid')]),
                        (infosource,funcs_selectfiles, [('subjid','subjid')]),
                        (funcs_selectfiles, slicetime,[('func','in_file')]),
                        (slicetime, applyrealign, [('slice_time_corrected_file','in_file')]),
                        (applyrealign, me_denoise, [('out_file','TE_niftis')]),
                        (pes_selectfiles, trim_PEs, [('PE_vol','in_file')]), 
                        (trim_PEs, merge_pes, [('out_file','in_files')]), 
                        (merge_pes, topup, [('merged_file','in_file')]),
                        (topup, apply_topup, [('out_fieldcoef','in_topup_fieldcoef'), 
                                              ('out_movpar','in_topup_movpar')]),
                        (me_denoise, apply_topup, [('denoised_func','in_files')]),
                        
                        (applyrealign, datasink, [('par_file','motion_file')]),
                        (apply_topup, datasink, [('out_corrected','unwarped_funcs')])
                       ])

prepreprocflow.base_dir = workflow_dir
#prepreprocflow.write_graph(graph2use='flat')
prepreprocflow.run('MultiProc', plugin_args={'n_procs': 8, 'memory_gb':40})

## Registration to common space and denoising

In [None]:
#file handling
preproc_template={'func': preproc_dir + '/unwarped_funcs/{subjid}/func_unwarped.nii.gz',
                  'motion': preproc_dir + '/motion_file/{subjid}/te_e2/sub-{subjid}_task-AHKJ-ep2-e2_bold_st_mcf.nii.gz.par',
                  'orig_file': raw_dir + '/sub-{subjid}/func/sub-{subjid}_task-AHKJ-ep2-e2_bold.nii.gz'}
preproc_selectfiles = Node(SelectFiles(preproc_template), name='preproc_selectfiles')

In [None]:
def fsid_convert(subject_id):
    fs_subject = 'C' + str(subject_id)
    return(fs_subject)

fsid = Node(Function(input_names=['subject_id'], 
                     output_names=['fs_subject'], 
                     function=fsid_convert), 
            name='fsid')

def makenoisemat(motion_file,wmcsf_nifti):
    from nipype import config, logging
    config.enable_debug_mode()
    logging.update_logging(config)
    from os.path import abspath
    import numpy as np
    from nibabel import load

    motion_params = np.loadtxt(motion_file, dtype=float)
    trs = motion_params.shape[0]
    params = motion_params.shape[1]
    derivatives = np.gradient(motion_params, axis=0)
    leadlagderivs = np.zeros((trs,params*6))
    derivativessq = derivatives**2
    leadlagderivssq = np.zeros((trs,params*6))

    for i in range(0,params):
        for j in range(0,6):
            leadlagderivs[:,j+params*i] =  np.roll(derivatives[:,i],shift=j, axis=0)
            leadlagderivs[:j,j+params*i] = 0

    for i in range(0,params):
        for j in range(0,6):
            leadlagderivssq[:,j+params*i] =  np.roll(derivativessq[:,i],shift=j, axis=0)
            leadlagderivssq[:j,j+params*i] = 0
    
    img = load(wmcsf_nifti)
    data = img.get_fdata()
    wmcsf = np.mean(data,axis=(0,1,2))
    noise = np.hstack(leadlagderivs, leadlagderivssq, wmcsf,np.ones((1605,1)))
    np.savetxt('noisemat.txt',noise)
    noise_mat = abspath('noisemat.txt')
    
    return(noise_mat)

def voxelwise_glm(func,shared_noise_file,mask):
    from os.path import abspath
    import numpy as np
    from numpy.linalg import pinv
    from pandas import read_csv, Series
    from nilearn.masking import apply_mask, unmask

    # import data into an array that is timepoints (rows) by voxel number (columns)
    noise_mat = np.loadtxt(shared_noise_file)
    func_data = apply_mask(func, mask)
    coefficients = np.zeros((noise_mat.shape[1],func_data.shape[1]))
    resid_data = np.zeros(func_data.shape)

    # perform voxel-wise matrix inversion
    for x in range(0,func_data.shape[1]):
        y = func_data[:,x]
        inv_mat = pinv(noise_mat)
        coefficients[:,x] = np.dot(inv_mat,y)
        yhat=sum(np.transpose(coefficients[:,x])*noise_mat,axis=1)
        resid_data[:,x] = y - np.transpose(yhat)

    resid_image = unmask(resid_data, mask)
    resid_image.to_filename('residuals.nii.gz')

    coeff_image = unmask(coefficients, mask)
    coeff_image.to_filename('weights.nii.gz')

    weights = abspath('weights.nii.gz')
    residuals = abspath('residuals.nii.gz')

    return(weights, residuals)

def censor_interp(in_file,mask,fd_file,dvars_file):
    from os.path import abspath
    from nipype import config, logging
    config.enable_debug_mode()
    logging.update_logging(config)
    from nilearn.masking import apply_mask, unmask
    import numpy as np
    from pandas import DataFrame
    from scipy.stats import zscore

    func_data = apply_mask(in_file,mask)
    fd = np.loadtxt(fd_file, dtype=int)
    dvars = np.loadtxt(dvars_file, dtype=int)
    dvarsz = zscore(dvars)
    
    vols_to_censor = np.zeros(fd.shape)
    vols_to_censor[fd>0.25] = 1
    vols_to_censor[dvarsz>2] = 1

    func_data[vols_to_censor==1] = np.nan
    # put func data into pandas dataframe to make interpolation easier/faster
    func_data_df = DataFrame(func_data)
    interp_func_df = func_data_df.interpolate(limit_direction='both')

    interp_func = interp_func_df.to_numpy()
    interp_func_img = unmask(interp_func, mask)
    interp_func_img.to_filename('interpolated_func.nii.gz')
    interpolated_func = abspath('interpolated_func.nii.gz')
    
    np.savetxt('volstocensor.txt',vols_to_censor)
    censored_vols_file = abspath('volstocensor.txt')
    
    return(interpolated_func, censored_vols_file)

def nan_high_motion_trs(in_file, mask, vols_to_censor):
    from os.path import abspath
    from nipype import config, logging
    config.enable_debug_mode()
    logging.update_logging(config)
    from nilearn.masking import apply_mask, unmask
    import numpy as np
    
    vols_to_censor = np.loadtxt(vols_to_censor,dtype=int)
    func_data = apply_mask(in_file,mask)
    func_data[vols_to_censor==1] = np.nan
    
    lomo_image = unmask(func_data,mask)
    lomo_image.to_filename('lomo_func.nii.gz')
    out_file = abspath('lomo_func.nii.gz')
    
    return(out_file)

In [None]:
## Anatomical processing
# Convert skullstripped brain to nii, resample to 2mm^3
resample = Node(MRIConvert(out_type='niigz',
                          vox_size=(2,2,2)),
               name='resample')

# Reorient anat to MNI space
reorientanat = Node(Reorient2Std(terminal_output='file'),
                   name='reorientanat')

## fMRI Data processing nodes
# reorient images to MNI space standard
reorientfunc = Node(Reorient2Std(terminal_output='file'),
                   name='reorientfunc')

# Coregistration using flirt
coregflt = Node(FLIRT(),
               name='coregflt')
coregflt2 = Node(FLIRT(apply_xfm=True, 
                       out_file='preproc_func.nii.gz'),
                name='coregflt2')

# Register to sample template
reg2mni = Node(FLIRT(reference=template_brain),
               name='reg2mni')
reg2mni2 = Node(FLIRT(apply_xfm=True, 
                      reference=template_brain,
                      out_file='proc_func.nii.gz'),
                name='reg2mni2')

In [None]:
## motion and artifact denoising

# compute DVARS
calc_dvars = Node(MotionOutliers(metric='dvars',out_metric_values='dvars.txt',
                                 out_metric_plot='dvars_plot.png'),name='calc_dvars')

# compute FD
calc_fd = Node(MotionOutliers(metric='fd',out_metric_values='fd.txt',
                              out_metric_plot='fd_plot.png'),name='calc_fd')

# create WM noise file
mask_wmcsf = Node(ApplyMask(mask_file=wmcsf_mask), name='mask_wmcsf')

# create Volterra series and compile noise mat
prep_noise = Node(Function(input_names=['motion_file','wmcsf_nifti'], 
                           output_names=['noise_mat'],
                           function=makenoisemat), 
                  name='prep_motion')

# Denoise data
denoise_func = Node(Function(input_names=['func','shared_noise_file','mask'], 
                             output_names=['weights','residuals'],
                             function=voxelwise_glm),
                       name='denoise_func')
denoise_func.inputs.mask = template_mask_dilated

# Censor and interpolate data
censor_interpolate = Node(Function(input_names=['in_file','mask','fd_file','dvars_file'],
                                   output_names=['interpolated_func','censored_vols_file'], 
                                   function=censor_interp), 
                          name='censor_interpolate')
censor_interpolate.inputs.mask = template_gmmask

# band pass filtering- all rates are in Hz (1/TR or samples/second)
bandpass = Node(Bandpass(tr=TR, highpass=highpass_freq,
                         lowpass=lowpass_freq, 
                         out_file='resids_filt.nii.gz', 
                         normalize=True), 
                name='bandpass')

# NaN interpolated data
nan_himo = Node(Function(input_names=['in_file','mask','vols_to_censor'], 
                          output_names=['out_file'], 
                          function=drop_high_motion_trs), 
                 name='nan_himo')
nan_himo.inputs.mask = template_gmmask

In [None]:
# Data QC nodes
motion_df_file = preproc_dir + '/motion_summary/motionSummary.csv'

if isdir(preproc_dir + '/motion_summary')==False:
    makedirs(preproc_dir + '/motion_summary')
    motion_df = DataFrame(columns=['NumCensoredVols','percentCensored','secondsNotCensored','rawFD','censoredFD'])
    motion_df.to_csv(motion_df_file)
    
def summarize_motion(motion_df_file, vols_to_censor, FD, TR,subject):
    from nipype import config, logging
    config.enable_debug_mode()
    logging.update_logging(config)
    from os.path import dirname, basename
    from numpy import asarray, mean, insert, zeros, sort
    from pandas import DataFrame, Series, read_csv
    
    motion_df = read_csv(motion_df_file, index_col=0)
    censvols = np.loadtxt(vols_to_censor, dtype=int)
    fd = loadtxt(FD)
    fd_censored = fd[censvols==0]
    sec_not_censored = (1605-len(fd_censored))*TR

    motion_df.loc[subject] = [len(censvols),len(censvols)/1605,sec_not_censored,np.mean(fd),np.mean(fd_censored)]
    motion_df.to_csv(motion_df_file)
    return()

motion_summary = Node(Function(input_names=['motion_df_file','vols_to_censor','FD','TR'], 
                               output_names=[], 
                               function=summarize_motion), 
                      name='motion_summary')
motion_summary.inputs.motion_df_file = motion_df_file
motion_summary.inputs.TR = TR

In [None]:
preprocflow = Workflow(name='preprocflow')

preprocflow.connect([(infosource, preproc_selectfiles, [('subjid','subjid')]),
                     (infosource, motion_summary,[('subjid','subject')]),
                     (preproc_selectfiles,reorientfunc, [('func','in_file')]),
                     (infosource, fsid, [('subjid','subject_id')]),
                     (fsid, fssource, [('fs_subject','subject_id')]),
                     (fssource, resample, [('brainmask','in_file')]),
                     (resample, reorientanat, [('out_file','in_file')]),
                     (reorientanat, coregflt, [('out_file','reference')]),
                     (realignmc, coregflt, [('out_file','in_file')]),
                     (realignmc, coregflt2, [('out_file','in_file')]),
                     (coregflt, coregflt2, [('out_matrix_file','in_matrix_file')]),
                     (reorientanat, coregflt2, [('out_file','reference')]),              
                     (coregflt, reg2mni, [('out_file','in_file')]),
                     (coregflt2, reg2mni2, [('out_file','in_file')]),
                     (reg2mni, reg2mni2, [('out_matrix_file','in_matrix_file')]),                
                     
                     (preproc_selectfiles, calc_dvars, [('orig_file','in_file')]), 
                     (preproc_selectfiles, calc_fd, [('orig_file','in_file')]),
                     (calc_dvars, censor_interpolate, [('out_metric_values','dvars_file')]),
                     (calc_fd, censor_interpolate, [('out_metric_values','fd_file')]),
                     (reg2mni2, mask_wmcsf, [('out_file','in_file')]),
                     (mask_wmcsf, prep_noise, [('out_file','wmcsf_nifti')]),
                     (prep_noise, denoise_func, [('noise_mat','shared_noise_file')]),
                     (preproc_selectfiles, prep_noise, [('motion','motion_file')]),
                     
                     (binarize, gunzip_mask,[('binary_file','in_file')]), 
                     (coregflt2, gunzip_func, [('out_file','in_file')]),
                     (censor_interpolate, motion_summary, [('censored_vols_file','vols_to_censor')]),
                     (select_sub_files, censor_interpolate, [('motion','motion')]),
                      
                     (preproc_selectfiles, drop_himo,[('motion','motion')]),
                     (preproc_selectfiles, denoise_func, [('func','func')]),
                     (denoise_func,censor_interpolate,[('residuals','in_file')]),
                     (censor_interpolate, bandpass, [('interpolated_func','in_file')]),
                     (bandpass, drop_himo, [('out_file','in_file')]),
                     
                     (drop_himo,datasink,[('out_file','fully_processed_func')]),
                     (denoise_func,datasink,[('weights','denoising_weights'),
                                             ('residuals','orig_denoised_func')])
                     (reg2mni, datasink, [('out_file','reoriented_anat')]),
                     (reg2mni2, datasink, [('out_file','mnireg_func')])
                    ])
preprocflow.base_dir = workflow_dir
preprocflow.write_graph(graph2use='flat')
preprocflow.run('MultiProc', plugin_args={'n_procs': 8, 'memory_gb':32})