# Infant resting state fMRI preprocessing
This notebook contains preprocessing tailored to infant resting state fMRI collected in 5-8 month olds. 

The processing steps for the fMRI broadly include:
* Slice-time correction
* Rigid realignment
* Co-registration to the sMRI (T2-weighted structural MRI)
* Co-registration to template
* De-noising to remove:
    - Mean timeseries for that voxel
    - Component noise associated with white matter and CSF- delete the GM and smooth what is left
    - motion regressors
    - Motion derivatives (lagged 6 times)
    - Squared derivatives (lagged 6 times) as an exploratory
* Bandpass filtering

In [29]:
#import packages
from os import listdir, makedirs
from os.path import isdir
from nipype.interfaces.io import DataSink, SelectFiles, DataGrabber # Data i/o
from nipype.interfaces.utility import IdentityInterface, Function     # utility
from nipype.pipeline.engine import Node, Workflow, MapNode, JoinNode        # pypeline engine
from nipype.interfaces.nipy.preprocess import Trim
from nipype.interfaces.ants import N4BiasFieldCorrection
from nipype.interfaces.fsl import SliceTimer, MCFLIRT, FLIRT, SUSAN, BET
from nipype.interfaces.fsl.utils import Reorient2Std, MotionOutliers
from nipype.interfaces.fsl.model import GLM
from nipype.interfaces.fsl.maths import ApplyMask, MeanImage
from nipype.interfaces.freesurfer import Resample, Binarize
from nipype.algorithms.confounds import CompCor
from pandas import DataFrame, Series

#set output file type for FSL to NIFTI
from nipype.interfaces.fsl.preprocess import FSLCommand
FSLCommand.set_default_output_type('NIFTI_GZ')

# Set study variables
studyhome = '/Users/catcamacho/Box/SNAP/BABIES'
raw_data = studyhome + '/BABIES_rest/raw'
output_dir = studyhome + '/BABIES_rest/processed/preproc'
workflow_dir = studyhome + '/BABIES_rest/workflows'
#subjects_list = open(studyhome + '/rest_misc/subjects.txt').read().splitlines()
subjects_list = ['0002x','0010','0020','0023','0027','0032','0033x']

template_brain = studyhome + '/templates/6mo_T2w_template_2mm.nii.gz'
template_mask = studyhome + '/templates/6mo_T2w_template_2mm_mask.nii.gz'
template_gmmask = studyhome + '/templates/6mo_T2w_template_2mm_gm.nii.gz' #need to update mask

proc_cores = 2 # number of cores of processing for the workflows

vols_to_trim = 4
interleave = False
TR = 2.5 # in seconds
slice_dir = 3 # 1=x, 2=y, 3=z
resampled_voxel_size = (2,2,2)
fwhm = 4 #fwhm for smoothing with SUSAN

mask_erosion = 1
mask_dilation = 1

In [30]:
## File handling Nodes

# Identity node- select subjects
infosource = Node(IdentityInterface(fields=['subject_id']),
                     name='infosource')
infosource.iterables = ('subject_id', subjects_list)

# Datasink- where our select outputs will go
substitutions = [('_subject_id_', '')]
datasink = Node(DataSink(), name='datasink')
datasink.inputs.base_directory = output_dir
datasink.inputs.container = output_dir
datasink.inputs.substitutions = substitutions

## Preprocess T2w anatomical images
These nodes and workflow (anat_preprocflow) performs N4 bias correction and skullstripping.

In [None]:
## File handling nodes

template={'anat': raw_data + '/%s/%s_T2w*.nii.gz'}
selectfiles = Node(DataGrabber(sort_filelist=True,
                               template = raw_data + '/%s/%s_T2w*.nii.gz',
                               field_template = template,
                               base_directory=raw_data,
                               infields=['subject_id','subject_id2'],
                               template_args={'anat':[['subject_id','subject_id2']]}),
                   name='selectfiles')

n4biascorr = Node(N4BiasFieldCorrection(dimension=3,
                                        output_image='{0}_nucorrect.nii.gz'.format(anat_type)), 
                  name='n4biascorr')

skullstrip = Node(BET(out_file='{0}_nucorrect_strip.nii.gz'.format(anat_type)), name='skullstrip')

In [None]:
anat_preprocflow = Workflow(name='anat_preprocflow')
anat_preprocflow.connect([(infosource,selectfiles, [('subject_id','subject_id')]),
                          (infosource,selectfiles, [('subject_id','subject_id2')]),
                          (selectfiles, n4biascorr, [('anat','input_image')]),
                          (n4biascorr, skullstrip, [('output_image','in_file')]),
                          
                          (n4biascorr, datasink, [('output_image','nu_corrected_anat')]),
                          (skullstrip, datasink, [('out_file','skullstripped_anat')])
                         ])

anat_preprocflow.base_dir = workflow_dir
#anat_preprocflow.write_graph(graph2use='flat')
anat_preprocflow.run('MultiProc', plugin_args={'n_procs': 2, 'memory_gb':10})

## Preprocess fMRI resting state data
These nodes and workflow (preprocflow) perform basic preorpocessing to align the functional volumes into a common space.
1. Reorient images to standard space
2. Reslice the structural image to 2mm isotropic
3. Functional image slice time correction
4. Rigid realignment to first volume of functional image
5. Coregistration of functional images to structural image
6. Coregistration of functional images to template image
7. Trim first 4 volumes of the functional images to remove pre-steady-state images

In [31]:
## File handling Nodes

# Data grabber- select sMRI
anat_template = {'struct': raw_data + '/{subject_id}/t2w_anat.nii.gz'}
selectanat = Node(SelectFiles(anat_template), name='selectfiles')

# Data grabber- select fMRI
func_template = {'func':raw_data + '/%s/rest*.nii.gz'}
selectfunc = Node(DataGrabber(sort_filelist=True,
                              template = raw_data + '/%s/rest*.nii.gz',
                              field_template = func_template,
                              base_directory=raw_data,
                              infields=['subject_id'], 
                              template_args={'func':[['subject_id']]}), name='selectfunc')

In [32]:
## Nodes for preprocessing

# Reorient to standard space using FSL
reorientfunc = MapNode(Reorient2Std(), name='reorientfunc', iterfield=['in_file'])
reorientstruct = Node(Reorient2Std(), name='reorientstruct')

# Reslice- using MRI_convert 
reslice_struct = Node(Resample(voxel_size=resampled_voxel_size), name='reslice_struct')

#Slice timing correction based on interleaved acquisition using FSL
slicetime_correct = MapNode(SliceTimer(interleaved=interleave, 
                                       slice_direction=slice_dir,
                                       time_repetition=TR),
                            name='slicetime_correct', iterfield=['in_file'])
# Rigid realignment
realign = MapNode(MCFLIRT(save_plots=True), name='realign', iterfield=['in_file'])

# Registration- using FLIRT
# The BOLD image is 'in_file', the anat is 'reference', the output is 'out_file'
firstvol = MapNode(Trim(end_index=1), name='firstvol',iterfield=['in_file'])
coreg1 = MapNode(FLIRT(), name='coreg1', iterfield=['in_file'])
coreg2 = MapNode(FLIRT(apply_xfm=True), name='coreg2', iterfield=['in_file','in_matrix_file'])

# Registration
register_template = Node(FLIRT(reference=template_brain, 
                               out_file='preproc_anat.nii.gz'), 
                         name='register_template')

xfmFUNC = MapNode(FLIRT(reference=template_brain,apply_xfm=True), 
                  name='xfmFUNC', iterfield=['in_file'])

trim = MapNode(Trim(begin_index=4), name='trim', iterfield=['in_file'])

In [None]:
## Preprocessing Workflow

preprocflow = Workflow(name='preprocflow')
preprocflow.connect([(infosource,selectanat,[('subject_id','subject_id')]), 
                     (infosource,selectfunc,[('subject_id','subject_id')]), 
                     (selectanat,reorientstruct,[('struct','in_file')]),
                     
                     (reorientstruct,reslice_struct,[('out_file','in_file')]),
                     (reslice_struct,coreg1,[('resampled_file','reference')]),
                     (reslice_struct,coreg2,[('resampled_file','reference')]),
                     (reslice_struct,register_template,[('resampled_file','in_file')]),
                     
                     (selectfunc,reorientfunc,[('func','in_file')]),
                     (reorientfunc,slicetime_correct,[('out_file','in_file')]),
                     (slicetime_correct, realign, [('slice_time_corrected_file','in_file')]),
                     (realign,firstvol,[('out_file','in_file')]),
                     (firstvol,coreg1,[('out_file','in_file')]),
                     (realign,coreg2,[('out_file','in_file')]),
                     (coreg1,coreg2,[('out_matrix_file', 'in_matrix_file')]),
                     (register_template,xfmFUNC,[('out_matrix_file','in_matrix_file')]),
                     (coreg2,xfmFUNC,[('out_file','in_file')]),
                     (xfmFUNC,trim, [('out_file','in_file')]),
                   
                     (realign, datasink,[('par_file','motion_parameters')]),
                     (register_template,datasink,[('out_file','proc_struct')]),
                     (trim, datasink, [('out_file','registered_func')])
                    ])
preprocflow.base_dir = workflow_dir
#preprocflow.write_graph(graph2use='flat')
preprocflow.run('MultiProc', plugin_args={'n_procs': proc_cores})

## Create Nuissance Regressors
These nodes and workflow creates both the subject specific and general nuissance regressors needed for preprocessing the rest data per the process developed by David Montez. 

In [None]:
# Data grabber- select fMRI
funcs_template = {'funcs':output_dir + '/registered_func/*/*/rest*.nii.gz'}
selectallfunc = Node(DataGrabber(sort_filelist=True,
                              template = output_dir + '/registered_func/*/*/rest*.nii.gz',
                              field_template = funcs_template,
                              base_directory=output_dir,), name='selectallfunc')

func_template = {'func':output_dir + '/registered_func/%s/*/rest*.nii.gz'}
selectfunc = Node(DataGrabber(sort_filelist=True,
                              template = output_dir + '/registered_func/%s/*/rest*.nii.gz',
                              field_template = func_template,
                              base_directory=output_dir,
                              infields=['subject_id'], 
                              template_args={'func':[['subject_id']]}), name='selectfunc')

# select motion params
mot_template={'motion':output_dir + '/motion_parameters/%s/*/rest_reoriented_st_mcf.nii.gz.par'}
select_motion = Node(DataGrabber(sort_filelist=True,
                              template = output_dir + '/motion_parameters/%s/*/rest_reoriented_st_mcf.nii.gz.par',
                              field_template = mot_template,
                              base_directory=output_dir,
                              infields=['subject_id'], 
                              template_args={'func':[['subject_id']]}), name='select_motion')

# select sMRI
struct_template = {'anat': output_dir + '/proc_struct/{subject_id}/preproc_anat.nii.gz'}
selectanat = Node(SelectFiles(struct_template), name='selectanat')     

In [None]:
def multi_image_mean(in_files):
    '''This function enables averaging across 4D acquisitions that are not exactly 
    the same length in the t direction (4th dimension).'''
    from nipype import config, logging
    config.enable_debug_mode()
    logging.update_logging(config)
    from os.path import abspath
    from nibabel import load, save, Nifti1Image
    import numpy as np
    
    image = load(in_files[0])
    data = image.get_data()
    data = np.expand_dims(data,4)

    for a in range(1,len(files)):
        tempimg = load(files[a])
        tempdata = tempimg.get_data()
        tempdata = np.expand_dims(tempdata,4)
        if tempdata.shape[3] < data.shape[3]:
            padn=data.shape[3]-tempdata.shape[3]
            tempdata=np.pad(tempdata,pad_width=((0,0),(0,0),(0,0),(0,padn),(0,0)),
                            mode='constant',constant_values=np.nan)
        elif tempdata.shape[3] > data.shape[3]:
            padn=tempdata.shape[3]-data.shape[3]
            image = tempimg
            tempdata=np.pad(data,pad_width=((0,0),(0,0),(0,0),(0,padn),(0,0)),
                            mode='constant',constant_values=np.nan)
        data = np.concatenate((data,tempdata),axis=4)
    
    mean_data = np.mean(data,axis=4,keepdims=False)
    mean_img = Nifti1Image(mean_data, header=image.header,affine=image.affine)
    
    save(mean_img,'mean_func.nii.gz')
    mean_file = abspath('mean_func.nii.gz')
    return(mean_file)



In [None]:
# average whole sample timeseries
avg_sample = JoinNode(Function(input_names=['in_files'], 
                              output_names=['mean_file'], 
                              function=multi_image_mean), 
                     name='avg_sample', 
                     joinfield=['in_files'], 
                      joinsource='infosource')

#get principle components of scanner noise
scanner_noise = Node(Function(input_names=['mean_file'],
                              output_names=['component_noise'],
                              function=afni3DmaskSVD),
                     name='scanner_noise')


In [None]:
# Denoise

#remove scanner artifact
denoise = Node(GLM(out_res_name='denoised_residuals.nii.gz', 
                   out_data_name='denoised_func.nii.gz'), 
               name='denoise')
denoise.inputs.design = scanner_components

mask_func = Node(ApplyMask(mask_file=template_mask), 
                 name='mask_func')

In [None]:
denoise_flow = Workflow(name='denoise_flow')
denoise_flow.connect([(infosource, selectfiles,[('subject_id','subject_id')]),
                      (selectfiles, denoise, [('func','in_file')]),
                      (denoise, mask_func,[('out_res','in_file')]),
                      
                      (mask_func, datasink, [('out_file','denoised_func')])
                     ])
denoise_flow.base_dir = workflow_dir
denoise_flow.write_graph(graph2use='flat')
denoise_flow.run('MultiProc', plugin_args={'n_procs': proc_cores})

In [None]:
# Identity node- select subjects
infosource = Node(IdentityInterface(fields=['subject_id']),
                     name='infosource')
infosource.iterables = ('subject_id', subjects_list)

# Data grabber- select fMRI and sMRI
templates = {'denoised_func': output_dir + '/denoised_func/{subject_id}/denoised_func_final.nii.gz'}
selectfiles = Node(SelectFiles(templates), name='selectfiles')

# Datasink- where our select outputs will go
substitutions = [('_subject_id_', '')]
datasink = Node(DataSink(), name='datasink')
datasink.inputs.base_directory = output_dir
datasink.inputs.container = output_dir
datasink.inputs.substitutions = substitutions

In [None]:
# Data QC nodes
def create_coreg_plot(epi,anat):
    import os
    from nipype import config, logging
    config.enable_debug_mode()
    logging.update_logging(config)
    from nilearn import plotting
    
    coreg_filename='coregistration.png'
    display = plotting.plot_anat(epi, display_mode='ortho',
                                 draw_cross=False,
                                 title = 'coregistration to anatomy')
    display.add_edges(anat)
    display.savefig(coreg_filename) 
    display.close()
    coreg_file = os.path.abspath(coreg_filename)
    
    return(coreg_file)

def check_mask_coverage(epi,brainmask):
    import os
    from nipype import config, logging
    config.enable_debug_mode()
    logging.update_logging(config)
    from nilearn import plotting
    
    maskcheck_filename='maskcheck.png'
    display = plotting.plot_anat(epi, display_mode='ortho',
                                 draw_cross=False,
                                 title = 'brainmask coverage')
    display.add_contours(brainmask,levels=[.5], colors='r')
    display.savefig(maskcheck_filename)
    display.close()
    maskcheck_file = os.path.abspath(maskcheck_filename)

    return(maskcheck_file)

make_coreg_img = Node(name='make_coreg_img',
                      interface=Function(input_names=['epi','anat'],
                                         output_names=['coreg_file'],
                                         function=create_coreg_plot))

make_checkmask_img = Node(name='make_checkmask_img',
                      interface=Function(input_names=['epi','brainmask'],
                                         output_names=['maskcheck_file'],
                                         function=check_mask_coverage))



In [None]:
## Pull motion info for all subjects

motion_df_file = output_dir + '/motion_summary/motionSummary.csv'

if isdir(output_dir + '/motion_summary') ==False:
    makedirs(output_dir + '/motion_summary')
    motion_df = DataFrame(columns=['meanFD','maxFD','NumCensoredVols'])
    motion_df.to_csv(motion_df_file)

def summarize_motion(motion_df_file, motion_file, vols_to_censor):
    from nipype import config, logging
    config.enable_debug_mode()
    logging.update_logging(config)
    from os.path import dirname, basename
    from numpy import asarray, mean
    from pandas import DataFrame, Series, read_csv
    
    motion_df = read_csv(motion_df_file, index_col=0)
    
    motion = asarray(open(motion_file).read().splitlines()).astype(float)
    censvols = open(vols_to_censor).read().splitlines()

    fp = dirname(motion_file)
    subject = basename(fp)

    motion_df.loc[subject] = [mean(motion),max(motion),len(censvols)]
    motion_df.to_csv(motion_df_file)

    return()

# Make a list of tissues for component noise removal
def combine_masks(mask1,mask2):
    from nipype.interfaces.fsl.utils import Merge
    from os.path import abspath
    from nipype import config, logging
    config.enable_debug_mode()
    logging.update_logging(config)
    
    vols = []
    vols.append(mask1)
    vols.append(mask2)
    
    return(vols)
    
# Remove all noise (GLM with noise params)
def create_noise_matrix(vols_to_censor,motion_params,comp_noise):
    from numpy import genfromtxt, zeros,concatenate, savetxt
    from os import path

    motion = genfromtxt(motion_params, delimiter=' ', dtype=None, skip_header=0)
    comp_noise = genfromtxt(comp_noise, delimiter='\t', dtype=None, skip_header=1)
    censor_vol_list = genfromtxt(vols_to_censor, delimiter='\t', dtype=None, skip_header=0)

    c = len(censor_vol_list)
    d = len(comp_noise)
    if c > 0:
        scrubbing = zeros((d,c),dtype=int)
        for t in range(0,c):
            scrubbing[censor_vol_list[t]][t] = 1    
        noise_matrix = concatenate([motion[:,None],comp_noise,scrubbing],axis=1)
    else:
        noise_matrix = concatenate((motion[:,None],comp_noise),axis=1)

    noise_file = 'noise_matrix.txt'
    savetxt(noise_file, noise_matrix, delimiter='\t')
    noise_filepath = path.abspath(noise_file)
    
    return(noise_filepath)

