# Mega-Meta Functional Connectivity Pipeline

_________
#### Description
extracts signal from Power ROI spheres (264) for a given task condition, as defined by a model spec file. Create condition specific adjacency matrix

In [1]:
import numpy as np
import pandas as pd
import os,glob,sys,pickle,json

from IPython.display import Image


# NIYPE FUNCTIONS
import nipype.interfaces.io as nio           # Data i/o
from nipype.interfaces.utility import IdentityInterface, Function    # utility
from nipype.pipeline.engine import Node
from nipype.pipeline.engine.workflows import Workflow


from nilearn import image, plotting, input_data
from nilearn.connectome import ConnectivityMeasure
%matplotlib inline

  from ._conv import register_converters as _register_converters
  return f(*args, **kwds)


# Nipype Setup

1. Infosource for iterating over subjects
2. create subject information structure
3. process confounds
4. subset TR
5. process signal
6. correlation pairwise
7. save function


### Infosource for iterating subjects

In [3]:
# set up infosource
infoSource = Node(IdentityInterface(fields = ['subject_id']),
                 name = 'infosource')

infoSource.iterables = [('subject_id',SUBJECT_LIST)]


NameError: name 'SUBJECT_LIST' is not defined

### Get subject information
This function finds those runs for a given subject that are complete (e.g. have motion, events and functional data). The function then creates the `subject_str` which is a modified `model_str` with subject specific information.

In [4]:
def get_subject_info(subject_id,model_str):
    """
    checks what runs a given subject has all information for
    """
    
    import numpy as np
    import os
    
    # THIS IS ONE of TWO DIFFERENCES FOR THIS PIPELINE. ONLY TAKES BANNER TASK
    model_str['TaskName'] = model_str['TaskName'][0]
    model_str['Runs'] = model_str['Runs']['banner']
    
    subPath = model_str['sub_path'].format(PROJECT=model_str['ProjectID'],PID=subject_id)
    

    Runs = []
    for r in model_str['Runs']:  # THIS IS THE OTHER DIFFERENCE, ONLY TAKES BANNER TASK RUNS
        func = model_str['task_func_template'].format(PID=subject_id,
                                                      TASK=model_str['TaskName'],
                                                      RUN=r)
        motion = model_str['motion_template'].format(PID=subject_id,
                                                      TASK=model_str['TaskName'],
                                                      RUN=r)
        events = model_str['event_template'].format(PID=subject_id,
                                                      TASK=model_str['TaskName'],
                                                      RUN=r)

        # check if files exist
        if (os.path.isfile(os.path.join(subPath,func)) and 
            os.path.isfile(os.path.join(subPath,motion)) and
            os.path.isfile(os.path.join(subPath,events))):
            Runs.append(r)
    
    # return a subject modified model_structure
    subj_str = model_str
    subj_str['subject_id'] = subject_id
    subj_str['Runs'] = Runs
    
    return subj_str
            
    
get_sub_info = Node(Function(input_names=['subject_id','model_str'],
                             output_names=['subj_str'],
                            function = get_subject_info),
                   name = "get_subject_info")

get_sub_info.inputs.model_str = model_def

NameError: name 'model_def' is not defined

### Extract Confounds
This function extracts matter and motion confounds. Matter confounds include Global average signal (from grey matter mask), white matter, and CSF average signal. There are 24 motion parameters, as per Power (2012). These include all 6 motion regressors, their derivatives, the quadratic of the motion params, and the squared derivatives. 

In [30]:
def extract_confounds(subject_str):
    """
    extract confounds for all available runs
    """
    
    import numpy as np
    import glob
    import os
    from nilearn import image, input_data
    
    subPath = subject_str['sub_path'].format(PROJECT=subject_str['ProjectID'],PID=subject_str['subject_id'])
    struc_files = glob.glob(subject_str['anat_template'].format(PID=subject_str['subject_id'][4:]))
    
    # make matter masks
    maskers = [input_data.NiftiLabelsMasker(labels_img=struc,standardize=True,memory='nilearn_cache') for struc in struc_files]

    confound = {}
    for r in subject_str['Runs']:
        
        func = subject_str['task_func_template'].format(PID=subject_str['subject_id'],
                                                      TASK=subject_str['TaskName'],
                                                      RUN=r)
        func_file = os.path.join(subPath,func)
        
        # high variance confounds
        hv_confounds = image.high_variance_confounds(func_file)
        
        # get This runs matter confounds (grand mean, white matter, CSF)
        matter_confounds = None
        for mask in maskers:
            mt = mask.fit_transform(func_file)
            mean_matter = np.nanmean(mt,axis=1) # get average signal
            
            if matter_confounds is None:
                matter_confounds = mean_matter
            else:
                matter_confounds = np.column_stack([matter_confounds,mean_matter])
            
        # Motion includes xyz,roll,pitch,yaw
        # their derivatives, the quadratic term, and qaudratic derivatives
        motion = subject_str['motion_template'].format(PID=subject_str['subject_id'],
                                                      TASK=subject_str['TaskName'],
                                                      RUN=r)
        
        motion = np.genfromtxt(os.path.join(subPath,motion),delimiter='\t',skip_header=True)
        motion = motion[:,:6] # dont take framewise displacement
        
        # derivative of motion
        motion_deriv = np.concatenate([np.zeros([1,np.shape(motion)[1]]),np.diff(motion,axis=0)],axis=0)
        matter_deriv = np.concatenate([np.zeros([1,np.shape(matter_confounds)[1]]),np.diff(matter_confounds,axis=0)],axis=0)
        
        conf = np.concatenate([motion,motion**2,motion_deriv,motion_deriv**2,
                               matter_confounds,matter_confounds**2,matter_deriv,matter_deriv**2,
                               hv_confounds],axis=1)
        confound[r] = conf
    return confound
    
confounds = Node(Function(input_names=['subject_str'],
                         output_names = ['confound'],
                         function = extract_confounds),
                name = 'get_confounds')
    

### Condition TR
This function finds those TR for a run that match the condition labels of a given model specification. The `condition` input argument must be set for a given pipeline.

In [363]:
def get_condition_TR(subject_str):
    """
    Gets the TR list for condition of interest
    """
    
    import numpy as np
    import os
    import pandas as pd
    
    subPath = subject_str['sub_path'].format(PROJECT=subject_str['ProjectID'],PID=subject_str['subject_id'])
    
    conditions = subject_str['Conditions'][subject_str['condition']]
    
    TRs = {}
    for r in subject_str['Runs']:
        ev = subject_str['event_template'].format(PID=subject_str['subject_id'],
                                                      TASK=subject_str['TaskName'],
                                                      RUN=r)
        
        events_df = pd.read_csv(os.path.join(subPath,ev),delimiter='\t')
        rel_events = events_df.loc[events_df.trial_type.isin(conditions)].reset_index()
        
        rel_events['TR'] = (rel_events['onset']/subject_str['TR']).astype('int')
        rel_events['durTR'] = (rel_events['duration']/subject_str['TR']).astype('int')

        condition_TR = []
        
        for i,tr in enumerate(rel_events.TR):
            dur = rel_events.loc[0,'durTR']
            condition_TR.extend(list(range(tr,tr+dur)))
        TRs[r] = condition_TR
        
    return TRs

events = Node(Function(input_names=['subject_str'],
                         output_names = ['TRs'],
                         function = get_condition_TR),
                name = 'get_TRs')



### Get Signal
This is where things all come together. Data is masked and confounds are regressed from masked signal. Only those TR for the condition are then subset from the TR. Currently Power atlas is used as a masker (264 nodes). 

In [376]:
def get_signal(subject_str,confound,TRs,mask):
    """
    gets task data, regresses confounds and subsets relevant TR
    """
    
    from nilearn import image, input_data
    import numpy as np
    import os
    
    subPath = subject_str['sub_path'].format(PROJECT=subject_str['ProjectID'],PID=subject_str['subject_id'])
    
    signal = None
    for r in subject_str['Runs']:
        runTR = TRs[r]
        con = confound[r]
        func = subject_str['task_func_template'].format(PID=subject_str['subject_id'],
                                                      TASK=subject_str['TaskName'],
                                                      RUN=r)
        func_file = os.path.join(subPath,func)
        
        masked_fun = mask.fit_transform(func_file,con)
        
        condition_TR = [_ for _ in runTR if _ < masked_fun.shape[0]]
        
         # if condition is rest, take all TR that are unmodeled
        if subject_str['condition'] == 'rest':
            masked_condition = masked_fun[[i for i in range(masked_fun.shape[0]) if i not in condition_TR],:]
        else:
            masked_condition = masked_fun[condition_TR,:]
    
        if signal is None:
            signal = masked_condition
        else:
            signal = np.concatenate([signal,masked_condition],axis=0)
    
    return signal


signal = Node(Function(input_names=['subject_str','confound','TRs','mask'],
                         output_names = ['signal'],
                         function = get_signal),
                name = 'get_signal')

signal.inputs.mask = NODE_MASKER
    

### Adjacency matrix

The final step of the pipeline. Data is pairwise correlated using pearson R and output is a 264X264 adjacency matrix.

In [26]:
def make_adj_matrix(signal):
    import numpy as np
    from scipy import stats
    signal[np.isnan(signal)] = 0
    
    features = signal.shape[1]
    
    r_adj = np.zeros([features,features])
    p_adj = np.zeros([features,features])
    for i in range(features):
        for i2 in range(features):
            r_adj[i,i2],p_adj[i,i2] = stats.pearsonr(signal[:,i],signal[:,i2])

    return r_adj,p_adj

adj_matrix = Node(Function(input_names=['signal'],
                          output_names = ['r_adj','p_adj'],
                          function = make_adj_matrix),
                 name = 'adjacency_matrix')

### Data output
Output is a json file containing  
* the subject ID
* Project
* Task name
* Condition
* Pearson r value adj matrix
* p.value adj matrix

In [31]:
def data_out(subject_str,r_adj,p_adj):
    import pickle,os
    Output = {"SubjectID":subject_str['subject_id'],
              "Project":subject_str['ProjectID'],
              "Task":subject_str['TaskName'],
              "Condition":subject_str['condition'],
              'r_adj':r_adj,
              'p_adj':p_adj}
    
    subFile = '{PID}_task-{TASK}_condition-{CONDITION}_parcellation-POWER2011_desc-FCcorrelation_adj.pkl'.format(PID = subject_str['subject_id'],
                                                                                          TASK = subject_str['TaskName'],
                                                                                                                CONDITION=subject_str['condition'])
    outFile = os.path.join(subject_str['output_dir'],subFile)
    
    with open(outFile,'wb') as outp:
        pickle.dump(Output,outp)

data_save = Node(Function(input_names=['subject_str','r_adj','p_adj'],
                        function = data_out),
               name = 'data_out')
    

______

## WIRE UP

In [27]:
wfl = Workflow(name='workflow')
wfl.base_dir = working_dir

wfl.connect([(infoSource,get_sub_info,[("subject_id","subject_id")]),
            (get_sub_info, confounds,[("subj_str","subject_str")]),
            (get_sub_info, events,[('subj_str','subject_str')]),
            (get_sub_info,signal,[('subj_str','subject_str')]),
            (confounds,signal,[('confound','confound')]),
            (events,signal,[('TRs','TRs')]),
            (signal, adj_matrix,[('signal','signal')]),
            (get_sub_info,data_save,[('subj_str','subject_str')]),
            (adj_matrix, data_save,[('r_adj','r_adj'),('p_adj','p_adj')]),
            ])

NameError: name 'infoSource' is not defined