# Sets up design files for PPI analysis for use in lyman

In [1]:
#os and i/o
import os
import numpy as np
import glob
from os.path import abspath
import csv

#scientific computing
import matplotlib
import matplotlib.pyplot as plt
import pandas as pd
from scipy import stats, optimize
from pandas import DataFrame, Series
from moss import glm
import seaborn as sns
import random as rd
from statsmodels.formula.api import ols
from statsmodels.stats.anova import anova_lm
import scipy.stats

#ipython add-ons
from IPython.parallel import Client
from IPython.display import Image
import multiprocessing

##nipype
import nibabel as nib
from nipype.pipeline.engine import Node, MapNode, Workflow
from nipype.interfaces.io import DataGrabber, DataFinder, DataSink
from nipype.interfaces import fsl
from nipype.interfaces.fsl import ImageMeants
from nipype.interfaces.fsl import ImageStats

%matplotlib inline



In [2]:
#preliminary housekeeping
home_dir = '/data/home/iballard/fd/'
subj_file = home_dir + 'subjects.txt'
sub_list = list(np.loadtxt(subj_file,'string'))
os.chdir(home_dir)
exps = ['sim','ser']
runs = map(str,range(1,4))

In [3]:
def vector_rejection(a,b):
    return a - (np.dot(a,b)/np.dot(b,b) * b)

# Extract timeseries from the mask

In [10]:
def extract_roi(in_tuple):
    sub,exp,run,mask = in_tuple
    
    sub_path = home_dir + 'analysis/' + exp + '_4mm/' + sub + '/preproc/run_' + \
    run + '/'

    #make sure to get coregistered preproc data
    preproc_data = home_dir + 'analysis/' + exp + '_4mm/' + sub + '/reg/epi/unsmoothed/run_' \
    + str(run) + '/timeseries_xfm.nii.gz'

    mask_dir = home_dir + 'data/' + sub + '/masks/' 
    out_dir = mask_dir + 'extractions/'
    
    if not os.path.exists(out_dir):
        os.mkdir(out_dir)

    mask_file = mask_dir + mask + '.nii.gz'
    out_f = out_dir + ('').join(map(str,in_tuple))+ '.txt'
    print out_f

    if os.path.exists(sub_path):# and not os.path.exists(out_f):
        meants = ImageMeants(in_file = preproc_data, eig = True, order = 1, 
                             mask = mask_file, out_file = out_f)
        meants.run()

extract_roi(('fd_104','sim','1','hipp'))

/data/home/iballard/fd/data/fd_104/masks/extractions/fd_104sim1hipp.txt


In [6]:
def extract_roi_prob(in_tuple):
    sub,exp,run,mask = in_tuple
    
    sub_path = home_dir + 'analysis/' + exp + '_4mm/' + sub + '/preproc/run_' + \
    run + '/'

    #make sure to get coregistered preproc data
    preproc_data = home_dir + 'analysis/' + exp + '_4mm/' + sub + '/reg/epi/unsmoothed/run_' \
    + str(run) + '/timeseries_xfm.nii.gz'

    mask_dir = home_dir + 'data/' + sub + '/masks/' + mask + '/' 
    out_dir = mask_dir + 'extractions/'

    prob_file = mask_dir + exp + '_' + mask + '_func_space.nii.gz'
    mask_file = mask_dir + exp + '_' + mask + '_mask.nii.gz'
    out_f = out_dir + ('').join(map(str,in_tuple))+ '.txt'
    tmp_out = mask_dir + sub + exp + run + '.nii.gz'

    if os.path.exists(sub_path):# and not os.path.exists(out_f):
        cmd = ['fslmaths',preproc_data,'-mul',prob_file,tmp_out]
        cmd = ' '.join(cmd)
        os.system(cmd)
        
        meants = ImageMeants(in_file = tmp_out, eig = True, order = 1, 
                             mask = mask_file, out_file = out_f)
        meants.run()
        os.remove(tmp_out)

In [7]:
exps = ['sim']
rois = ['hipp']
in_tuples = []
for sub in sub_list:
    for exp in exps:
        for run in runs:
            for mask in rois:
                in_tuples.append((sub,exp,run,mask))

In [8]:
pool = multiprocessing.Pool(processes = 14)
pool.map(extract_roi,in_tuples)
pool.terminate()
pool.join()

# Set up design matrix

In [9]:
# def write_design(in_tuple):
#     mask = 'hipp'
#     sub,exp = in_tuple
#     #hrf params
#     hrf = getattr(glm,'GammaDifferenceHRF')
#     tr = 1.5
#     hrf = hrf(tr = tr)

#     out_f = home_dir + 'data/' + sub + '/design/ppi_regressors_' + exp + '_' + mask + '.csv' #out file

#     if True:#not os.path.exists(out_f):
#         #load design data for this subjects 
#         design_dir = home_dir + 'data/' + sub + '/design/'
#         design_file = design_dir + exp + '_ppi.csv'
#         design_data = pd.read_csv(design_file)

#         #load in pre-existing noise regressors
#         reg_file = design_dir + 'noise_regressors_' + exp + '.csv'
#         regressors = pd.read_csv(reg_file)

#         #initialize vars to fill
#         convolved_ev = []
#         ts = []
#         for run in runs:
#             sub_file = home_dir + 'analysis/' + exp + '_4mm/' + sub + '/preproc/run_' + str(run) + '/unsmoothed_timeseries.nii.gz'

#             if os.path.exists(sub_file):
#                 ntp = nib.load(sub_file).shape[-1] #get number of time points
#                 design = design_data[design_data['run']==int(run)]

#                 model = glm.DesignMatrix(design = design, tr = tr, ntp = ntp, hrf_model = hrf, hpf_cutoff = 128)
#                 convolved_ev.extend(model.design_matrix['state'].values) #get timeseries for regressor of interest

#                 #load ts data
#                 mask_dir = home_dir + 'data/' + sub + '/masks/vta/extractions/' 
#                 fid = (sub,exp,run,mask)
#                 mask_f = mask_dir + ('').join(map(str,fid))+ '.txt'
#                 roi_ts = np.loadtxt(mask_f)
#                 roi_ts = roi_ts - np.mean(roi_ts) #mean center
#                 ts.extend(roi_ts)

#         #update regressors dataframe
#         ts = scipy.stats.zscore(ts) #add ts to the regressors DF

#         ##centre convolved ev (see fsl docs)
#         diff = max(convolved_ev) - (max(convolved_ev) - min(convolved_ev))/2.0
#         convolved_ev = convolved_ev - diff
#         regressors['interaction'] = convolved_ev * ts #interaction regressor

#         #orthogonalize noise regressors to speed up computation
#         ts = vector_rejection(ts,regressors['ventricles'])
#         ts = vector_rejection(ts,regressors['wm'])
#         regressors[mask] = ts

#         #write outpt
#         regressors.to_csv(out_f, header=True,index = False, columns = ['wm','ventricles',mask,'interaction','run'])

In [21]:
def write_design(in_tuple):
    mask = 'hipp'
    sub,exp = in_tuple
    #hrf params
    hrf = getattr(glm,'GammaDifferenceHRF')
    tr = 1.5
    hrf = hrf(tr = tr)

    out_f = home_dir + 'data/' + sub + '/design/ppi_regressors_' + exp + '_' + mask + '.csv' #out file

    if True:#not os.path.exists(out_f):
        #load design data for this subjects 
        design_dir = home_dir + 'data/' + sub + '/design/'
        design_file = design_dir + exp + '_one_condition.csv'
        design_data = pd.read_csv(design_file)

        #load in pre-existing noise regressors
        reg_file = design_dir + 'noise_regressors_' + exp + '.csv'
        regressors = pd.DataFrame()

        #initialize vars to fill
        convolved_ev = []
        ts = []
        run_col = []
        for run in runs:
            sub_file = home_dir + 'analysis/' + exp + '_4mm/' + sub + '/preproc/run_' + str(run) + '/smoothed_timeseries.nii.gz'

            if os.path.exists(sub_file):
                ntp = nib.load(sub_file).shape[-1] #get number of time points
                run_col.extend(np.repeat(run,ntp))
                design = design_data[design_data['run']==int(run)]

                model = glm.DesignMatrix(design = design, tr = tr, ntp = ntp, hrf_model = hrf, hpf_cutoff = 128)
                convolved_ev.extend(model.design_matrix['event'].values) #get timeseries for regressor of interest

                #load ts data
                mask_dir = home_dir + 'data/' + sub + '/masks/extractions/' 
                fid = (sub,exp,run,mask)
                mask_f = mask_dir + ('').join(map(str,fid))+ '.txt'
                roi_ts = np.loadtxt(mask_f)
                roi_ts = roi_ts - np.mean(roi_ts) #mean center
                ts.extend(roi_ts)

        #update regressors dataframe
        ts = scipy.stats.zscore(ts) #add ts to the regressors DF
        regressors[mask] = ts
        
        #add in run idx
        regressors['run'] = run_col

        ##centre convolved ev (see fsl docs)
        diff = max(convolved_ev) - (max(convolved_ev) - min(convolved_ev))/2.0
        convolved_ev = convolved_ev - diff
        regressors['interaction'] = convolved_ev * ts #interaction regressor

        #write outpt
        regressors.to_csv(out_f, header=True,index = False, columns = [mask,'interaction','run'])


In [23]:
exps = ['sim']
in_tuples = []
for sub in sub_list:
    for exp in exps:
        in_tuples.append((sub,exp))

In [24]:
pool = multiprocessing.Pool(processes = len(in_tuples))
pool.map(write_design,in_tuples)
pool.terminate()
pool.join()