# Building second level models using _nipype_ and _SPM12_

## Base functionality for _megameta_ project

-------
#### History
* 11/14/19 mbod - add contrast checks and flow
* 11/6/19 mbod - update and test for ind behavior change for pure message
* 5/4/19 cscholz - add datasink, incorporate mreg design, incorporate sampling of first-level contrast based on percentage of available first-level models per project
* 4/15/19 mbod - incorporate function to read the 2nd level JSON model config
* 4/9/19 mbod - modify template to work with fmriprep processed data
* 3/20/19 mbod - initial setup for testing some simple one sample t-test models
-----

### Description

* Set up a nipype workflow to use SPM12 to make second level models for _megameta_ task data (preprocessed using `batch8` SPM8 scripts) in BIDS derivative format   


### Setup

In [1]:
import os  # system functions

# NIYPE FUNCTIONS
import nipype.interfaces.io as nio           # Data i/o
import nipype.interfaces.spm as spm          # spm
import nipype.interfaces.matlab as mlab      # how to run matlab
import nipype.interfaces.utility as util     # utility
import nipype.pipeline.engine as pe          # pypeline engine
import nipype.algorithms.modelgen as model   # model specification
from nipype.interfaces.base import Bunch
from nipype.algorithms.misc import Gunzip

import scipy.io as sio
import numpy as np
import json
import re
import pandas as pd

import random

from IPython.display import Image


from itertools import product

  from ._conv import register_converters as _register_converters


#### Matlab path


In [2]:
# Set the way matlab should be called
mlab.MatlabCommand.set_default_matlab_cmd("matlab -nodesktop -nosplash")
# If SPM is not in your MATLAB path you should add it here
mlab.MatlabCommand.set_default_paths(PATH_TO_SPM_FOLDER)

NameError: name 'PATH_TO_SPM_FOLDER' is not defined

In [79]:
GROUP_DIR = '/data00/projects/megameta/group_models/'

#### Load JSON model config

In [80]:
JSON_MODEL_FILE = os.path.join('/data00/projects/megameta/scripts/jupyter_megameta/second_level_models',
                               'model_specifications',
                               MODEL_SPEC_FILE)

In [61]:
with open(JSON_MODEL_FILE) as fh:
    model_def = json.load(fh)

In [81]:
MODEL_NAME = model_def['ModelName']

CONTRASTS = model_def['Contrasts']

ROOT_DIR = '/data00/projects/megameta'


In [63]:
l2_contrast_list = CONTRASTS # list of specific contrast files to use in 2nd level model (include .nii?)

output_dir = os.path.join(GROUP_DIR,'derivatives', 'nipype','model_2nd-level_{}'.format(MODEL_NAME))        
working_dir = os.path.join(GROUP_DIR, 'working', 
                           'nipype', 'workingdir_model_2nd-level_{}'.format(MODEL_NAME))   

In [64]:
if not os.path.exists(output_dir):
    os.makedirs(output_dir)
    
if not os.path.exists(working_dir):
    os.makedirs(working_dir)

## Get list of contrast files

## Define nodes

In [66]:
# Infosource - a function free node to iterate over the list of subject names
l2_infosource = pe.Node(util.IdentityInterface(fields=['contrast_id']),
                  name="infosource")

smoothing_kernels = [ 8 ]
resolutions = ['medium']

resolution_and_kernel_list = product(resolutions, smoothing_kernels)


l2_infosource.iterables = [('contrast_id', l2_contrast_list), 
                           ('resolution_and_smoothing', resolution_and_kernel_list)
                        ]

In [67]:
# SelectFiles - to grab the data (alternativ to DataGrabber)

subject_pattern='*'
OUTPUT_DIR = output_dir
l2_output_dir = output_dir

l2_templates = {'cons': os.path.join(output_dir, MODEL_NAME, subject_pattern, '{smoothing_ksize}',
                         '{contrast_id}.nii')}

l2_selectfiles = pe.Node(nio.SelectFiles(l2_templates,
                               base_directory=OUTPUT_DIR,
                               sort_filelist=True),
                   name="selectfiles")

In [38]:
def make_contrast_list(model_path, cname, sample_perc=100):
    #EDITED BY CHRISTIN to get randomly sample a given percentage of subjects for second-level model

    import json
    import random
    import os
    import scipy.io as sio
    import pandas as pd
    
    ROOT_DIR = '/data00/projects/megameta'
    
    
    def get_mreg(model_path, DEBUG=True):
        import json
        import os
        import pandas as pd

        ROOT_DIR = '/data00/projects/megameta'


        with open(model_path) as fh:
            model_def = json.load(fh)

        if not model_def:
            return None

        mreg_file='{}.tsv'.format(model_def['Regressors']['Name'])

        mreg_cols = model_def['Regressors']['Columns']

        project_phenotype_file = [os.path.join(ROOT_DIR,project['Name'], 'phenotype', mreg_file) for project in model_def['Projects']]

        data=[]
        for p in project_phenotype_file:
            if not os.path.exists(p):
                print('ERROR cannot find', p)
            else:
                df=pd.read_csv(p, sep='\t')

                # check to see if the participant_id column has compliant BIDS subject ids with sub- format
                df.loc[-df['participant_id'].str.startswith('sub-'), 'participant_id'] = 'sub-'+df['participant_id']


                # drop rows with NAs in regressor columns
                df = df[df[mreg_cols].notnull().all(axis=1)]

                data.append(df)

        return pd.concat(data), mreg_cols

    
    
    
    def process_project(project_name, model_def, scan_all_subjs=False, DEBUG=False):

        project_spec = [pspec for pspec in model_def['Projects'] if pspec['Name']==project_name]

        if not project_spec:
            print('Cannot find specification for project: ', project_name)
            return None

        model_name = project_spec[0]['Model']
        cmap = project_spec[0]['ContrastMap']


        model_dir = os.path.join(ROOT_DIR, project_name, 
                                 "derivatives", "nipype",
                                 "model_{}".format(model_name)
                                )

        if not os.path.exists(model_dir):
            print('Cannot find first level model directory:', model_dir)
            return None

        subjs_with_models = [s for s in os.listdir(model_dir) if s.startswith('sub-')]
        
        
        #Get a random sample of participants (based on a percentage)
        sample_size=(sample_perc/100)*len(subjs_with_models)
        subj_list=random.sample(subjs_with_models,int(sample_size))
        
        print('Project: {}, Sampling {} of {} participants with a model'.format(project_name, int(sample_size), len(subjs_with_models)))
        
        if DEBUG:
            print("Found {} first level subject models\n".format(len(subjs_with_models)))


        contrast_lists = { cname: [] for cname in cmap}


        model_contrasts=None
        for sidx,subj in enumerate(subj_list):

            if DEBUG:
                print('Processing',subj, '-',end='')

            first_level_dir = os.path.join(model_dir, subj, 'medium', 'fwhm_8')

            if scan_all_subjs or sidx==0:
                spm_mat_file = os.path.join(first_level_dir, 'SPM.mat')

                SPM = sio.loadmat(spm_mat_file, squeeze_me=True, struct_as_record=False)['SPM']

                model_contrasts = SPM.xCon

            if DEBUG:
                print(' found {} contrasts'.format(len(model_contrasts)))

            con_map = {con.name: 'con_{:0>4}.nii'.format(cidx) for cidx,con in enumerate(model_contrasts,1) }


            if DEBUG:
                print('\tContrasts are:', con_map)

            for model_con, proj_con in cmap.items():

                path_to_con = os.path.join(first_level_dir, con_map[proj_con])

                if os.path.exists(path_to_con):
                    contrast_lists[model_con].append(path_to_con)

        return contrast_lists, subjs_with_models


    with open(model_path) as fh:
        model_def = json.load(fh)
        if model_def.get('Regressors',False):
            mreg_df, mreg_cols=get_mreg(model_path)

        

        
    conlist=[]
    subjs_with_models=[]
    for p in model_def['Projects']:
        cons, subjs=process_project(p['Name'], model_def)
        conlist.extend(cons[cname])
    
    
    con_df = pd.DataFrame(conlist, columns=['conpath'])
    con_df['participant_id'] = con_df['conpath'].apply(lambda cp: cp.split('/')[8])
    
    
    final_df = con_df.merge(mreg_df)
        
    mregs=[]
    for k,v in final_df[mreg_cols].to_dict(orient='list').items():
        mregs.append({'name': k, 'vector': v, 'centering': 5})   # value of 5 for centering is iCC = 5 (no centuring in the spm_factorial model)


    # Ad covariates to conrol for behavior change
    # mregdesign.inputs.covariates=mregs
    
    con_list = final_df['conpath'].values.tolist()
    
    return con_list, mregs

In [72]:
l2_getcontrasts = pe.Node(util.Function(input_names=['model_path','cname'],
                                     output_names=['contrasts', 'covariates'],
                                      function=make_contrast_list),
                    name='makecontrasts')
MDIR = os.path.abspath('../model_specifications')
l2_getcontrasts.inputs.model_path=os.path.join(MDIR, MODEL_SPEC_FILE)
l2_getcontrasts.inputs.cname=CONTRAST_NAME

In [30]:
#EDITED BY CHRISTIN (ADDING DATASINK)
# Datasink - creates output folder for important outputs
datasink = pe.Node(nio.DataSink(base_directory=OUTPUT_DIR,
                         container=l2_output_dir),
                name="datasink")

# Use the following DataSink output substitutions
substitutions = [('_contrast_id_', '')]
datasink.inputs.substitutions = substitutions

## Model nodes

In [31]:
osttdesign = pe.Node(spm.model.OneSampleTTestDesign(),
                         name="osttdesign")

osttdesign.inputs.explicit_mask_file='/data00/tools/spm8/apriori/brainmask_th25.nii'
osttdesign.inputs.threshold_mask_none=True

In [4]:
#EDITED BY CHRISTIN TO IMPPLEMENT MREG

# Multiple Regression Design - creates mreg Design
mregdesign = pe.Node(spm.model.MultipleRegressionDesign(),
                         name="mregdesign")
mregdesign.inputs.threshold_mask_none=True
#mregdesign.inputs.explicit_mask_file='/data00/tools/spm8/apriori/brainmask_th25.nii'

In [None]:
# EstimateModel - estimate the parameters of the model
level2estimate = pe.Node(spm.model.EstimateModel(estimation_method={'Classical': 1}),
                      name="level2estimate")



In [None]:
# EstimateContrast - estimates simple group contrast
level2conestimate = pe.Node(spm.model.EstimateContrast(group_contrast=True),
                         name="level2conestimate")


In [7]:
'''
cont1 = ['QuitIntent', 'T', ['QuitIntent', 'FTND', 'mean_WC', 'mean'], [1, 0, 0, 0]]
cont2 = ['FTND', 'T', ['QuitIntent', 'FTND', 'mean_WC', 'mean'], [0, 1, 0, 0]]
cont3 = ['mean_WC', 'T', ['QuitIntent', 'FTND', 'mean_WC', 'mean'], [0, 0, 1, 0]]
cont4 = ['mean', 'T', ['QuitIntent', 'FTND', 'mean_WC', 'mean'], [0, 0, 0, 1]]
'''
# cont1 = ['Change', 'T', ['change', 'baseline','mean'], [1,0,0]] 
# cont2 = ['Baseline', 'T', ['change', 'baseline', 'mean'], [0,1,0]]
# cont3 = ['Group', 'T', ['change','baseline', 'mean'], [0,0,1]] 

# level2conestimate.inputs.contrasts = [cont1,cont2, cont3]

cont1 = ['Change', 'T', ['change', 'mean'], [1,0]] 
cont2 = ['Group', 'T', ['change','mean'], [0,1]] 

level2conestimate.inputs.contrasts = [cont1,cont2]

NameError: name 'level2conestimate' is not defined

## Setup second level workflow

In [None]:
#l2_working_dir = os.path.join(PROJECT_DIR, 'nipype', 'workingdir_banner_2nd_level')
l2_working_dir = working_dir

In [None]:
# EDITED BY CHRISTIN (adding datasink to the workflow)
l2analysis = pe.Workflow(name='l2analysis')

l2analysis.base_dir = l2_working_dir

# Connect up the 2nd-level analysis components
l2analysis.connect(
                    [
                        
                    #(l2_infosource, l2_getcontrasts, [('contrast_id', 'contrast_id'),
                     #                                ('model_path')]),
                     
                     (l2_getcontrasts,  mregdesign, [('contrasts', 'in_files'),
                                                     ('covariates', 'covariates')]),
                     
                    (mregdesign, level2estimate, [('spm_mat_file',
                                                          'spm_mat_file')] ),
                    (level2estimate, level2conestimate, [('spm_mat_file',
                                                          'spm_mat_file'),
                                                         ('beta_images',
                                                          'beta_images'),
                                                         ('residual_image',
                                                          'residual_image')]),
                    (level2conestimate, datasink, [('spm_mat_file',
                                                    'contrasts.@spm_mat'),
                                                   ('spmT_images',
                                                    'contrasts.@T'),
                                                   ('con_images',
                                                    'contrasts.@con')])
                    ])
