# StroopActFlow
### Identify motor response layer for Stroop Model
### Using GLM with for motor responses for neutral trials only

### Limit only to SMN network


#### Taku Ito
#### 02/22/17


In [124]:
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
import scipy.stats as stats
import statsmodels.sandbox.stats.multicomp as mc
import os
import nibabel as nib
from sklearn.svm import SVC
import utils
glasserfile = '/projects3/StroopActFlow/data/Q1-Q6_RelatedParcellation210.LR.CorticalAreas_dil_Colors.32k_fs_LR.dlabel.nii'

## 0.0 Specify functions for rest of script

In [125]:
def convertCSVToCIFTI64k(inputfilename,outputfilename):
    ciftitemplate = glasserfile
    wb_command = 'wb_command -cifti-convert -from-text' 
    wb_command += ' ' + inputfilename 
    wb_command += ' ' + ciftitemplate
    wb_command += ' ' + outputfilename
    wb_command += " -col-delim ','"
    wb_command += ' -reset-scalars'
    os.system(wb_command)
#     print wb_command

In [126]:
def loadBetas(subj):
    datadir = '/projects3/StroopActFlow/data/results/GlasserResults/glm_motorResponseLocalizer/'
    betas = np.loadtxt(datadir + subj + '_motorResponse_taskBetas_Surface64k.csv',delimiter=',')
        
    betas = betas[:,-2:] # last two betas are the condition manipulation betas
    return betas

def loadTvals(subj):
    datadir = '/projects3/StroopActFlow/data/results/GlasserResults/glm_motorResponseLocalizer/'
    tvals = np.loadtxt(datadir + subj + '_motorResponse_taskTstat_Surface64k.csv',delimiter=',')
        
    tvals = tvals[:,-2:] # last two betas are the condition manipulation betas
    return tvals


def loadSmoothedBetas(subj):
    datadir = '/projects3/StroopActFlow/data/results/GlasserResults/glm_motorResponseLocalizer/'
    sm_filename = datadir + subj + '_motorResponse_taskBetas_Surface64k_sm'

    betas = nib.load(sm_filename+'.dscalar.nii').get_data()
    betas = np.squeeze(betas).T
    betas = betas[:,-2:]
    return betas

In [127]:
def smoothMap(subj):
    print 'Subject', subj
    glmdir = '/projects3/StroopActFlow/data/results/glm_motorResponseLocalizer/' 
    
    print 'Converting CSV to dscalar...'
    filename = glmdir + subj + '_motorResponse_taskBetas_Surface64k'
    convertCSVToCIFTI64k(filename+'.csv', filename+'.dscalar.nii')
    
    print 'Smoothing surface files...'
    sm_filename = glmdir + subj + '_motorResponse_taskBetas_Surface64k_sm'
    subjdir = '/projects3/StroopActFlow/data/' + subj + '/MNINonLinear/fsaverage_LR32k/'
    lsurf = subjdir + subj + '.L.inflated.32k_fs_LR.surf.gii'
    rsurf = subjdir + subj + '.R.inflated.32k_fs_LR.surf.gii'
    
    wb_command = 'wb_command -cifti-smoothing ' + filename+'.dscalar.nii' + ' 4 4 COLUMN ' + sm_filename+'.dscalar.nii'
    wb_command += ' -left-surface ' + lsurf
    wb_command += ' -right-surface ' + rsurf
    os.system(wb_command)

## 0.1 Set up

In [128]:
subjNums = ['101', '102']

basedir = '/projects3/StroopActFlow/data/'
resultsdir = basedir + 'results/'
restdir = resultsdir + 'glmRest_GlasserParcels/'

glasser_nets = np.loadtxt(basedir + 'NetworkPartitions3.1/indexsort.txt', delimiter=',')
# Make into python numbering (starts from 0)
glasser_nets -= 1.0
networkorder = glasser_nets.astype(int)
networkorder.shape = (len(networkorder),1)

networkmappings = {'fpn':6, 'vis':1, 'smn':2, 'con':3, 'dmn':8, 'aud':7, 'van':5, 'dan':4}
networks = networkmappings.keys()

networkdef = basedir + 'NetworkPartitions3.1/network_partition.txt'
networkdef = np.loadtxt(networkdef, delimiter=',')
networkdef2 = np.zeros((networkdef.shape))
networkdef2[0:180] = networkdef[180:]
networkdef2[180:] = networkdef[0:180]
networkdef = networkdef2

xticks = {}
reorderednetworkaffil = networkdef[networkorder]
for net in networks:
    netNum = networkmappings[net]
    netind = np.where(reorderednetworkaffil==netNum)[0]
    tick = np.max(netind)
    xticks[tick] = net
    
# Load in Glasser parcels
glasser = nib.load(glasserfile).get_data()
glasser = np.squeeze(glasser)

## 2.1 Run GLM analysis between left and right motor response
* Rule 1: Color rule
* Rule 2: Word rule

##### Load in data as dicts

In [129]:
behavdata= {}
for subj in subjNums:
    behavdata[subj] = utils.loadBehavData(subj)   


## 2.2 Load in beta maps for each hand response, and compute the difference maps. Then Z-score the maps

##### This is not a rigorous statistical test. Perform this for each subject, and then compute an 'average' contrast t-map

In [130]:
nrules = 2
zthresh = 0

beta_dict = {}
zcontrast = {}
behavdata = {}

# Organize subject betas by stimulus
zcontrast = np.zeros((360,3)) # 1st subj; 2nd subj; average
i = 0
for subj in subjNums:
    beta_dict[subj] = np.zeros((zcontrast.shape[0],2))
    print 'Loading betas for subject', subj
#     smoothMap(subj)
    beta_dict[subj][:,:] = loadBetas(subj)
    # Only include SMN vertices
    rois = np.where(networkdef==networkmappings['smn'])[0]
#     rois = rois + 1
    roi_ind = []
    for roi in rois:
        roi_ind.append(roi)
    roi_ind = np.reshape(np.asarray(roi_ind),-1)
#     left_ind = stats.zscore(beta_dict[subj][roi_ind,0],axis=0)
#     right_ind = stats.zscore(beta_dict[subj][roi_ind,1],axis=0)
    left_ind = beta_dict[subj][roi_ind,0]
#     tmp = np.abs(left_ind)
#     left_ind = np.multiply(left_ind,tmp)
    right_ind = beta_dict[subj][roi_ind,1]
#     tmp = np.abs(right_ind)
#     right_ind = np.multiply(right_ind,tmp)
    zcontrast[roi_ind,i] = stats.zscore(left_ind - right_ind,axis=0)
    thresh = np.abs(zcontrast[roi_ind,i]) > zthresh 
    zcontrast[roi_ind,i] = np.multiply(zcontrast[roi_ind,i],thresh)
    zcontrast[roi_ind,2] = zcontrast[roi_ind,2] + (left_ind - right_ind)*.5
    i += 1
# Average
zcontrast[roi_ind,2] = stats.zscore(zcontrast[roi_ind,2],axis=0)
thresh = np.abs(zcontrast[roi_ind,2]) > zthresh
zcontrast[roi_ind,2] = np.multiply(zcontrast[roi_ind,2],thresh)


        

Loading betas for subject 101
Loading betas for subject 102


## 2.3 Save t-statistic contrast maps for each subject and average to CSV and dscalar files

In [131]:
basedir = '/projects3/StroopActFlow/data/results/GlasserResults/glm_motorResponseLocalizer/'
filerule = basedir+'zcontrast_motorresponse'

# Remap to vertices
zcontrast_64k = np.zeros((glasser.shape[0],zcontrast.shape[1]))
roicount = 0
for roi in np.unique(glasser):
    v_ind = np.where(glasser==roi)[0]
    for v in v_ind:
        zcontrast_64k[v,0] = zcontrast[roicount,0]
        zcontrast_64k[v,1] = zcontrast[roicount,1]
        zcontrast_64k[v,2] = zcontrast[roicount,2]
    roicount += 1

# zcontrast_64k[:29706,:] = 0

np.savetxt(filerule+'.csv', zcontrast_64k, delimiter=',')

convertCSVToCIFTI64k(filerule+'.csv', filerule+'.dscalar.nii')


# End

___

##### Run 2 classifications

Function for cross-validation

In [12]:
def runCV(svmmat, labels, cvs=10, leaveout=5, replacement=False):
    """
    Runs a cross validation given an svm matrix, labels, number of cross validations, and 
    number of leave outs for test predictions
    
    Assumes cross validation without replacement (for each train v. test prediction)
    """
    # Get number of classes
    classes = np.unique(labels)
    
    # Run cross-validation using Linear SVMs
    leaveout_total = leaveout*len(classes)

    # Spatially demean (across features, to ensure that mean activity does not factor into classification)
    spatialmean = np.mean(svmmat,axis=1)
    spatialmean.shape = (len(spatialmean),1)
    svmmat = svmmat - spatialmean
    
    # Start cross-validation
    nsamples = svmmat.shape[0]
    acc = []
    for cv in range(cvs):
        test_ind = []
        for stim in classes:
            stimlabs = np.where(labels==stim)[0]
            test_ind.extend(np.random.choice(stimlabs,leaveout,replace=replacement))
            
        test_ind = np.asarray(test_ind)
        train_ind = np.delete(np.arange(nsamples),test_ind)

        trainset = svmmat[train_ind,:]
        testset = svmmat[test_ind,:]
        # normalize train and test set according to train set mean & std
        mean = np.mean(trainset,axis=0)
        std = np.std(trainset,axis=0)
        trainset = np.divide((trainset - mean),std)
        testset = np.divide((testset - mean),std)

        # Construct classifier and fit
        svc = SVC(kernel='linear')
        svc.fit(trainset,labels[train_ind])
        # Get acc
        acc.append(svc.score(testset,labels[test_ind]))

    # Get average accuracy
#     accmat[subj][roi_ind,0] = np.mean(acc)
    avg_acc = np.mean(acc)

    
    # Perform within-subject binomial test
    ntotal = leaveout_total*cvs
    nsuccess = avg_acc*ntotal
    chance = 1.0/len(classes)
    p = stats.binom_test(nsuccess,ntotal,p=chance)

    # Make sure it's a one-sided binomial test
    if avg_acc > chance:
        p = p/2.0
    else:
        p = 1.0 - p/2.0
    
    return avg_acc, p


#### Run actual code/cross-validations

In [13]:
accmat = {}
pmat = {}
qmat = {}
accmat_thresh = {}
nparcels = 360

for subj in subjNums:
    print 'Running classifications for subject', subj
    accmat[subj] = np.zeros((glasser.shape[0],2)) # 2 for accuracies and thresholded accuracies    
    pmat[subj] = np.zeros((nparcels,))
    
    for roi in range(nparcels):
        if nparcels%100==0:
            print 'Running classifications on ROI,', roi + 1
        roi_ind = np.where(glasser==roi+1)[0]
        
        ####
        ## Color V Word Rule classification        
        # Set up SVM Matrix
        mat_tmp = [] # For a 2-way classification
        labels_tmp = [] 
        for rule in range(nrules):
            mat_tmp.append(beta_dict[subj][roi_ind,:,rule].T)
            labels_tmp.append(np.ones((nsamples_perrule,1))*rule)
        svmmat_color = np.vstack(mat_tmp)
        labels_color = np.squeeze(np.vstack(labels_tmp))
        
        # Run cross-validation using Linear SVMs
        crossvalidations = 30 # 30-fold cross validation; 60 samples, 58 train; 2 test
        leaveout = 2 # leave out 2 samples of each stim

        acc, p = runCV(svmmat_color, labels_color, cvs=crossvalidations, leaveout=leaveout, replacement=False)
        accmat[subj][roi_ind,0] = acc
        pmat[subj][roi] = p
#         h0, qs = mc.fdrcorrection0(pmat[subj][:,0])
        
   

Running classifications for subject 101
Running classifications for subject 102


#### Run FDR correction for each test

In [14]:
accmat['101'].shape

(59412, 2)

In [16]:
qmat = {}
mvpa_mask = {}

for subj in subjNums:
    mvpa_mask[subj] = np.zeros(accmat[subj].shape)
    
    h0,qs = mc.fdrcorrection0(pmat[subj])

    # Iterate through each ROI and make sure it is significant
    for roi in range(nparcels):
        roi_ind = np.where(glasser==roi+1)[0]
        if qs[roi] < 0.05:
            mvpa_mask[subj][roi_ind,0] = 1.0
            accmat[subj][roi_ind,1] = accmat[subj][roi_ind,0]
#     tmp = np.max(accmat[subj][:,0])
#     roi_ind = accmat[subj][:,0] == tmp
#     for roi in rois: 
#         roi_ind = np.where(glasser==roi+1)[0]
#     mvpa_mask[subj][roi_ind,0] = 1.0
#     mvpa_mask[subj][:,0] = 1.0
#     accmat[subj][roi_ind,1] = accmat[subj][roi_ind,0]
            


### 2.2 Save MVPA maps to CSV and dscalar files

In [17]:
basedir = '/projects3/StroopActFlow/data/results/glm_ruleStimBetaSeries/taskRule_localizer/'
for subj in subjNums:
    
    filename_mvpa = basedir + subj + '_taskEncoding_MVPA_WordVColor_v2'
        
    np.savetxt(filename_mvpa + '.csv', accmat[subj], delimiter=',')
    convertCSVToCIFTI64k(filename_mvpa+'.csv', filename_mvpa+'.dscalar.nii')
