# StroopActFlow
### Identify stimuli localizers for input layer for Stroop Model
### Identify top-down task rule inputs (e.g., PFC) for Stroop Model
### First use MVPA to identify ROIs for each stimulus type; then use GLM to find activation patterns
### Using new GLM with rule and trial beta series model

#### Taku Ito
#### 02/22/17


In [6]:
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
import scipy.stats as stats
import statsmodels.sandbox.stats.multicomp as mc
import os
import nibabel as nib
from sklearn.svm import SVC
import utils

In [2]:
def convertCSVToCIFTI64k(inputfilename,outputfilename):
    ciftitemplate = '/projects3/StroopActFlow/data/Q1-Q6_RelatedParcellation210.LR.CorticalAreas_dil_Colors.32k_fs_LR.dlabel.nii'
    wb_command = 'wb_command -cifti-convert -from-text' 
    wb_command += ' ' + inputfilename 
    wb_command += ' ' + ciftitemplate
    wb_command += ' ' + outputfilename
    wb_command += " -col-delim ','"
    wb_command += ' -reset-scalars'
    os.system(wb_command)
#     print wb_command

## 0.0 Set up

In [3]:
subjNums = ['101', '102']

basedir = '/projects3/StroopActFlow/data/'
resultsdir = basedir + 'results/'
restdir = resultsdir + 'glmRest_GlasserParcels/'

glasser_nets = np.loadtxt('/projects/AnalysisTools/netpartitions/ColeLabNetPartition_v1.1/community_order.txt', delimiter=',')
# Make into python numbering (starts from 0)
glasser_nets -= 1.0
networkorder = glasser_nets.astype(int)
networkorder.shape = (len(networkorder),1)

networkmappings = {'fpn':7, 'vis':1, 'smn':2, 'con':3, 'dmn':6, 'aud1':8, 'aud2':9, 'dan':11}
networks = networkmappings.keys()

networkdef = '/projects/AnalysisTools/netpartitions/ColeLabNetPartition_v1.1/parcel_network_assignments.txt'
networkdef = np.loadtxt(networkdef, delimiter=',')
xticks = {}
reorderednetworkaffil = networkdef[networkorder]
for net in networks:
    netNum = networkmappings[net]
    netind = np.where(reorderednetworkaffil==netNum)[0]
    tick = np.max(netind)
    xticks[tick] = net
    
# Load in Glasser parcels
glasserfile = '/projects3/StroopActFlow/data/Q1-Q6_RelatedParcellation210.LR.CorticalAreas_dil_Colors.32k_fs_LR.dlabel.nii'
glasser = nib.load(glasserfile).get_data()
glasser = np.squeeze(glasser)

## 2.0 Run task activation analysis on 64k Surface for four different stimuli localizers

In [18]:
def loadStimulusBetas(subj):
    datadir = '/projects3/StroopActFlow/data/results/glm_ruleStimBetaSeries/'
    betas = np.loadtxt(datadir + subj + '_RuleAndStimBetaSeries_taskBetas_Surface64k.csv',delimiter=',')
    
    nruleBetas = 60
    
    betas = betas[:,18+nruleBetas:] # 18 onwards are the two task betas
    return betas

## 2.1 Run 2 MVPA analysis 
### Color Localizer: Green V Red
### Word Localizer: Green V Red
* Stim 1: Neutral Color Green
* Stim 2: Neutral Color Red
* Stim 3: Neutral Word Green
* Stim 4: Neutral Word Red

##### Load in data as dicts

In [37]:
ntrials_perstim = 45
nstims = 4
colorstims = [0,1] # First 2 are color
wordstims = [2,3] # second 2 stims are word

beta_dict = {}
behavdata = {}

# Organize subject betas by stimulus
for subj in subjNums:
    print 'Loading betas for subject', subj
    betas = loadStimulusBetas(subj)
    behavdata[subj] = utils.loadBehavData(subj)
    # Get indices for neutral stims
    neutral_ind = np.where(behavdata[subj]['condition']=='neutral')[0]
    # Get color stims
    colorgreen = np.where(behavdata[subj]['colorStim']=='green')[0]
    neutral_colorgreen_ind = np.intersect1d(neutral_ind,colorgreen)
    colorred = np.where(behavdata[subj]['colorStim']=='red')[0]
    neutral_colorred_ind = np.intersect1d(neutral_ind, colorred)    
    # Get word stims
    wordgreen = np.where(behavdata[subj]['wordStim']=='GREEN')[0]
    neutral_wordgreen_ind = np.intersect1d(neutral_ind,wordgreen)
    wordred = np.where(behavdata[subj]['wordStim']=='RED')[0]
    neutral_wordred_ind = np.intersect1d(neutral_ind, wordred)
    
    # Organize beta dict
    beta_dict[subj] = np.zeros((betas.shape[0],ntrials_perstim,nstims))
    beta_dict[subj][:,:,0] = betas[:,neutral_colorgreen_ind]
    beta_dict[subj][:,:,1] = betas[:,neutral_colorred_ind]
    beta_dict[subj][:,:,2] = betas[:,neutral_wordgreen_ind]
    beta_dict[subj][:,:,3] = betas[:,neutral_wordred_ind]

        

Loading betas for subject 101
Loading betas for subject 102


##### Run 2 classifications

Function for cross-validation

In [39]:
def runCV(svmmat, labels, cvs=10, leaveout=5, replacement=False):
    """
    Runs a cross validation given an svm matrix, labels, number of cross validations, and 
    number of leave outs for test predictions
    
    Assumes cross validation without replacement (for each train v. test prediction)
    """
    # Get number of classes
    classes = np.unique(labels)
    
    # Run cross-validation using Linear SVMs
    leaveout_total = leaveout*len(classes)

    # Spatially demean (across features, to ensure that mean activity does not factor into classification)
    spatialmean = np.mean(svmmat,axis=1)
    spatialmean.shape = (len(spatialmean),1)
    svmmat = svmmat - spatialmean
    
    # Start cross-validation
    nsamples = svmmat.shape[0]
    acc = []
    for cv in range(cvs):
        test_ind = []
        for stim in classes:
            stimlabs = np.where(labels==stim)[0]
            test_ind.extend(np.random.choice(stimlabs,leaveout,replace=replacement))
            
        test_ind = np.asarray(test_ind)
        train_ind = np.delete(np.arange(nsamples),test_ind)

        trainset = svmmat[train_ind,:]
        testset = svmmat[test_ind,:]
        # normalize train and test set according to train set mean & std
        mean = np.mean(trainset,axis=0)
        std = np.std(trainset,axis=0)
        trainset = np.divide((trainset - mean),std)
        testset = np.divide((testset - mean),std)

        # Construct classifier and fit
        svc = SVC(kernel='linear')
        svc.fit(trainset,labels[train_ind])
        # Get acc
        acc.append(svc.score(testset,labels[test_ind]))

    # Get average accuracy
#     accmat[subj][roi_ind,0] = np.mean(acc)
    avg_acc = np.mean(acc)

    
    # Perform within-subject binomial test
    ntotal = leaveout_total*cvs
    nsuccess = avg_acc*ntotal
    chance = 1.0/len(classes)
    p = stats.binom_test(nsuccess,ntotal,p=chance)

    # Make sure it's a one-sided binomial test
    if avg_acc > chance:
        p = p/2.0
    else:
        p = 1.0 - p/2.0
    
    return avg_acc, p


#### Run actual code/cross-validations

In [40]:
accmat = {}
pmat = {}
qmat = {}
accmat_thresh = {}
nparcels = 360

for subj in subjNums:
    print 'Running classifications for subject', subj
    accmat[subj] = np.zeros((glasser.shape[0],3)) # 2 for two classifications; 3rd is for 4-way class    
    pmat[subj] = np.zeros((nparcels,3))
    
    for roi in range(nparcels):
        if nparcels%100==0:
            print 'Running classifications on ROI,', roi + 1
        roi_ind = np.where(glasser==roi+1)[0]
        
        ####
        ## Color STIM classification        
        # Set up SVM Matrix
        mat_tmp = [] # For a 2-way classification
        labels_tmp = [] 
        for stim in colorstims:
            mat_tmp.append(beta_dict[subj][roi_ind,:,stim].T)
            labels_tmp.append(np.ones((ntrials_perstim,1))*stim)
        svmmat_color = np.vstack(mat_tmp)
        labels_color = np.squeeze(np.vstack(labels_tmp))
        
        # Run cross-validation using Linear SVMs
        crossvalidations = 10 # 10-fold cross validation; 90 samples, 81 train; 9 test
        leaveout = 5 # leave out 5 samples of each stim

        acc, p = runCV(svmmat_color, labels_color, cvs=crossvalidations, leaveout=leaveout, replacement=False)
        accmat[subj][roi_ind,0] = acc
        pmat[subj][roi,0] = p
#         h0, qs = mc.fdrcorrection0(pmat[subj][:,0])
        
        
        ####
        ## Word STIM classification        
        # Set up SVM Matrix
        mat_tmp = [] # For a 2-way classification
        labels_tmp = [] 
        for stim in wordstims:
            mat_tmp.append(beta_dict[subj][roi_ind,:,stim].T)
            labels_tmp.append(np.ones((ntrials_perstim,1))*stim)
        svmmat_word = np.vstack(mat_tmp)
        labels_word = np.squeeze(np.vstack(labels_tmp))
        
        # Run cross-validation using Linear SVMs
        crossvalidations = 10 # 10-fold cross validation; 90 samples, 81 train; 9 test
        leaveout = 5 # leave out 5 samples of each stim

        acc, p = runCV(svmmat_word, labels_word, cvs=crossvalidations, leaveout=leaveout, replacement=False)
        accmat[subj][roi_ind,1] = acc
        pmat[subj][roi,1] = p
        
        
        ####
        ## 4-way STIM classification        
        # Set up SVM Matrix
        svmmat_4way = np.vstack((svmmat_color,svmmat_word))
        labels_4way = np.squeeze(np.hstack((labels_color,labels_word)))
        
        # Run cross-validation using Linear SVMs
        crossvalidations = 10 # 10-fold cross validation; 90 samples, 81 train; 9 test
        leaveout = 5 # leave out 5 samples of each stim

        acc, p = runCV(svmmat_4way, labels_4way, cvs=crossvalidations, leaveout=leaveout, replacement=False)
        accmat[subj][roi_ind,2] = acc
        pmat[subj][roi,2] = p
        
        
        
        

Running classifications for subject 101
Running classifications for subject 102


#### Run FDR correction for each test

In [56]:
qmat = {}
mvpa_mask = {}

for subj in subjNums:
    mvpa_mask[subj] = np.zeros(accmat[subj].shape)
    
    for test in range(accmat[subj].shape[1]):
        h0,qs = mc.fdrcorrection0(pmat[subj][:,test])
        
        # Iterate through each ROI and make sure it is significant
#         for roi in range(nparcels):
#             roi_ind = np.where(glasser==roi+1)[0]
#             if qs[roi] < 0.01:
#                 mvpa_mask[subj][roi_ind,test] = 1.0
        tmp = np.max(accmat[subj][:,test])
        roi_ind = accmat[subj][:,test] == tmp
#         for roi in rois: 
#             roi_ind = np.where(glasser==roi+1)[0]
        mvpa_mask[subj][roi_ind,test] = 1.0
        mvpa_mask[subj][:,test] = 1.0


## 3.0 Run GLM analysis on MVPA masks for Red V Green for Color and Word neutral stims separately

In [42]:
colorstims = [0, 1]
wordstims = [2, 3]

### 3.1 Perform univariate t-test on color stims

In [57]:
ttest_loc = {}

for subj in subjNums:
    ttest_loc[subj] = {}
    # 1st column is for t-vals; 2nd for p; 3rd for FDR-corrected t-values
    # Vertex X stats matrix
    ttest_loc[subj]['color'] = np.zeros((mvpa_mask[subj].shape[0],3)) 
    ttest_loc[subj]['word'] = np.zeros((mvpa_mask[subj].shape[0],3)) 

    # significant vertices in MVPA mask
    color_ind = np.where(mvpa_mask[subj][:,0])[0]
    word_ind = np.where(mvpa_mask[subj][:,1])[0]
    
    # Perform color t-test first
    for v in color_ind:
        t, p = stats.ttest_rel(beta_dict[subj][v,:,colorstims[0]],
                               beta_dict[subj][v,:,colorstims[1]])
        ttest_loc[subj]['color'][v,0], ttest_loc[subj]['color'][v,1] = t, p
    # Perform FDR-correction
#     h0, q = mc.fdrcorrection0(ttest_loc[subj]['color'][color_ind,1])
    h0 = ttest_loc[subj]['color'][color_ind,1] < 0.05
    ttest_loc[subj]['color'][color_ind,2] = np.multiply(h0,ttest_loc[subj]['color'][color_ind,0])
        
    # Perform word t-test second
    for v in word_ind:
        t, p = stats.ttest_rel(beta_dict[subj][v,:,wordstims[0]],
                               beta_dict[subj][v,:,wordstims[1]])
        ttest_loc[subj]['word'][v,0], ttest_loc[subj]['word'][v,1] = t, p
    # Perform FDR-correction
#     h0, q = mc.fdrcorrection0(ttest_loc[subj]['word'][word_ind,1])
    h0 = ttest_loc[subj]['word'][word_ind,1] < 0.05
    ttest_loc[subj]['word'][word_ind,2] = np.multiply(h0,ttest_loc[subj]['word'][word_ind,0])
        
    
        
        

### 3.2 Save statistical maps to CSV and dscalar files

In [58]:
for subj in subjNums:
    basedir = '/projects3/StroopActFlow/data/results/glm_ruleStimBetaSeries/neutralStims_localizer/'
    filecolor = basedir+subj+'_MVPAconstrained_ttestContrast_color_v2'
    fileword = basedir+subj+'_MVPAconstrained_ttestContrast_word_v2'
    
    np.savetxt(filecolor+'.csv', ttest_loc[subj]['color'], delimiter=',')
    np.savetxt(fileword+'.csv', ttest_loc[subj]['word'], delimiter=',')
    
    convertCSVToCIFTI64k(filecolor+'.csv', filecolor+'.dscalar.nii')
    convertCSVToCIFTI64k(fileword+'.csv', fileword+'.dscalar.nii')

In [55]:
qmat = {}
mvpa_mask = {}
accmat_thresh = {}

for subj in subjNums:
    mvpa_mask[subj] = np.zeros(accmat[subj].shape)
    accmat_thresh[subj] = np.zeros(accmat[subj].shape)
    for test in range(accmat[subj].shape[1]):
        h0,qs = mc.fdrcorrection0(pmat[subj][:,test])
        
        # Iterate through each ROI and make sure it is significant
        for roi in range(nparcels):
            roi_ind = np.where(glasser==roi+1)[0]
            if qs[roi] < 0.05:
                mvpa_mask[subj][roi_ind,test] = 1.0
            
        accmat_thresh[subj][:,test] = np.multiply(mvpa_mask[subj][:,test],accmat[subj][:,test])
        
for subj in subjNums:
    basedir = '/projects3/StroopActFlow/data/results/glm_ruleStimBetaSeries/neutralStims_localizer/'
    filemvpaacc = basedir+subj+'_neutralStimuliMVPA_Acc_v2'
    filemvpaaccthresh = basedir+subj+'_neutralStimuliMVPA_AccThresh_v2'
    
    np.savetxt(filemvpaacc+'.csv', accmat[subj], delimiter=',')
    np.savetxt(filemvpaaccthresh+'.csv', accmat_thresh[subj], delimiter=',')
    
    convertCSVToCIFTI64k(filemvpaacc+'.csv', filemvpaacc+'.dscalar.nii')
    convertCSVToCIFTI64k(filemvpaaccthresh+'.csv', filemvpaaccthresh+'.dscalar.nii')


___