# StroopActFlow
### Identify stimuli localizers for input layer for Stroop Model
### Identify top-down task rule inputs (e.g., PFC) for Stroop Model
### First use MVPA to identify ROIs for each stimulus type; then use GLM to find activation patterns
### Using new GLM with rule and trial beta series model


#### Taku Ito
#### 02/22/17


In [27]:
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
import scipy.stats as stats
import statsmodels.sandbox.stats.multicomp as mc
import os
import nibabel as nib
from sklearn.svm import SVC
import utils

In [28]:
def convertCSVToCIFTI64k(inputfilename,outputfilename):
    ciftitemplate = '/projects3/StroopActFlow/data/Q1-Q6_RelatedParcellation210.LR.CorticalAreas_dil_Colors.32k_fs_LR.dlabel.nii'
    wb_command = 'wb_command -cifti-convert -from-text' 
    wb_command += ' ' + inputfilename 
    wb_command += ' ' + ciftitemplate
    wb_command += ' ' + outputfilename
    wb_command += " -col-delim ','"
    wb_command += ' -reset-scalars'
    os.system(wb_command)
#     print wb_command

## 0.0 Set up

In [29]:
subjNums = ['101', '102']

basedir = '/projects3/StroopActFlow/data/'
resultsdir = basedir + 'results/'
restdir = resultsdir + 'glmRest_GlasserParcels/'

glasser_nets = np.loadtxt('/projects/AnalysisTools/netpartitions/ColeLabNetPartition_v1.1/community_order.txt', delimiter=',')
# Make into python numbering (starts from 0)
glasser_nets -= 1.0
networkorder = glasser_nets.astype(int)
networkorder.shape = (len(networkorder),1)

networkmappings = {'fpn':7, 'vis':1, 'smn':2, 'con':3, 'dmn':6, 'aud1':8, 'aud2':9, 'dan':11}
networks = networkmappings.keys()

networkdef = '/projects/AnalysisTools/netpartitions/ColeLabNetPartition_v1.1/parcel_network_assignments.txt'
networkdef = np.loadtxt(networkdef, delimiter=',')
xticks = {}
reorderednetworkaffil = networkdef[networkorder]
for net in networks:
    netNum = networkmappings[net]
    netind = np.where(reorderednetworkaffil==netNum)[0]
    tick = np.max(netind)
    xticks[tick] = net
    
# Load in Glasser parcels
glasserfile = '/projects3/StroopActFlow/data/Q1-Q6_RelatedParcellation210.LR.CorticalAreas_dil_Colors.32k_fs_LR.dlabel.nii'
glasser = nib.load(glasserfile).get_data()
glasser = np.squeeze(glasser)

## 2.0 Run task activation analysis on 64k Surface for 2 rule encodings

In [30]:
def loadStimBetas(subj):
    datadir = '/projects3/StroopActFlow/data/results/glm_ruleStimBetaSeries/'
    betas = np.loadtxt(datadir + subj + '_RuleAndStimBetaSeries_taskBetas_Surface64k.csv',delimiter=',')
    
    nruleBetas = 60
    
    betas = betas[:,18+nruleBetas:] # 18 onwards are the two task betas
    return betas

## 2.1 Run MVPA analysis between color and rule encodings
* Rule 1: Color rule
* Rule 2: Word rule

##### Load in data as dicts

In [31]:
nrules = 2

beta_dict = {}
behavdata = {}

# Organize subject betas by stimulus
for subj in subjNums:
    print 'Loading betas for subject', subj
    betas = loadStimBetas(subj)
    behavdata[subj] = utils.loadBehavData(subj)   
    # Get indices for task rule blocks
    left_ind = np.where(behavdata[subj]['response']=='y')[0]
    right_ind = np.where(behavdata[subj]['response']=='g')[0]
    neutral_ind = np.where(behavdata[subj]['condition']=='neutral')[0]
    left_ind = np.intersect1d(left_ind,neutral_ind)
    right_ind = np.intersect1d(right_ind,neutral_ind)
    
    nminsamples = np.min([left_ind.shape[0],right_ind.shape[0]])
    # Organize beta dict
    beta_dict[subj] = np.zeros((betas.shape[0],nminsamples,nrules))
    beta_dict[subj][:,:,0] = betas[:,left_ind[:nminsamples]]
    beta_dict[subj][:,:,1] = betas[:,right_ind[:nminsamples]]

        

Loading betas for subject 101
Loading betas for subject 102


##### Run 2 classifications

Function for cross-validation

In [36]:
def runCV(svmmat, labels, cvs=10, leaveout=5, replacement=False):
    """
    Runs a cross validation given an svm matrix, labels, number of cross validations, and 
    number of leave outs for test predictions
    
    Assumes cross validation without replacement (for each train v. test prediction)
    """
    # Get number of classes
    classes = np.unique(labels)
    
    # Run cross-validation using Linear SVMs
    leaveout_total = leaveout*len(classes)

    # Spatially demean (across features, to ensure that mean activity does not factor into classification)
#     spatialmean = np.mean(svmmat,axis=1)
#     spatialmean.shape = (len(spatialmean),1)
#     svmmat = svmmat - spatialmean
    
    # Start cross-validation
    nsamples = svmmat.shape[0]
    acc = []
    for cv in range(cvs):
        test_ind = []
        for stim in classes:
            stimlabs = np.where(labels==stim)[0]
            test_ind.extend(np.random.choice(stimlabs,leaveout,replace=replacement))
            
        test_ind = np.asarray(test_ind)
        train_ind = np.delete(np.arange(nsamples),test_ind)

        trainset = svmmat[train_ind,:]
        testset = svmmat[test_ind,:]
        # normalize train and test set according to train set mean & std
        mean = np.mean(trainset,axis=0)
        std = np.std(trainset,axis=0)
        trainset = np.divide((trainset - mean),std)
        testset = np.divide((testset - mean),std)

        # Construct classifier and fit
        svc = SVC(kernel='linear')
        svc.fit(trainset,labels[train_ind])
        # Get acc
        acc.append(svc.score(testset,labels[test_ind]))

    # Get average accuracy
#     accmat[subj][roi_ind,0] = np.mean(acc)
    avg_acc = np.mean(acc)

    
    # Perform within-subject binomial test
    ntotal = leaveout_total*cvs
    nsuccess = avg_acc*ntotal
    chance = 1.0/len(classes)
    p = stats.binom_test(nsuccess,ntotal,p=chance)

    # Make sure it's a one-sided binomial test
    if avg_acc > chance:
        p = p/2.0
    else:
        p = 1.0 - p/2.0
    
    return avg_acc, p


#### Run actual code/cross-validations

In [37]:
accmat = {}
pmat = {}
qmat = {}
accmat_thresh = {}
nparcels = 360

for subj in subjNums:
    print 'Running classifications for subject', subj
    accmat[subj] = np.zeros((glasser.shape[0],2)) # 2 for accuracies and thresholded accuracies    
    pmat[subj] = np.zeros((nparcels,))
    
    for roi in range(nparcels):
        if nparcels%100==0:
            print 'Running classifications on ROI,', roi + 1
        roi_ind = np.where(glasser==roi+1)[0]
        
        ####
        ## Color V Word Rule classification        
        # Set up SVM Matrix
        mat_tmp = [] # For a 2-way classification
        labels_tmp = [] 
        for rule in range(nrules):
            mat_tmp.append(beta_dict[subj][roi_ind,:,rule].T)
            labels_tmp.append(np.ones((beta_dict[subj].shape[1],1))*rule)
        svmmat_color = np.vstack(mat_tmp)
        labels_color = np.squeeze(np.vstack(labels_tmp))
        
        # Run cross-validation using Linear SVMs
        crossvalidations = 10 # 30-fold cross validation; 60 samples, 58 train; 2 test
        leaveout = 20 # leave out 2 samples of each stim

        acc, p = runCV(svmmat_color, labels_color, cvs=crossvalidations, leaveout=leaveout, replacement=False)
        accmat[subj][roi_ind,0] = acc
        pmat[subj][roi] = p
#         h0, qs = mc.fdrcorrection0(pmat[subj][:,0])
        
   

Running classifications for subject 101
Running classifications for subject 102


#### Run FDR correction for each test

In [38]:
qmat = {}
mvpa_mask = {}

for subj in subjNums:
    mvpa_mask[subj] = np.zeros(accmat[subj].shape)
    
    h0,qs = mc.fdrcorrection0(pmat[subj])

    # Iterate through each ROI and make sure it is significant
    for roi in range(nparcels):
        roi_ind = np.where(glasser==roi+1)[0]
        if qs[roi] < 0.05:
            mvpa_mask[subj][roi_ind,0] = 1.0
            accmat[subj][roi_ind,1] = accmat[subj][roi_ind,0]
#     tmp = np.max(accmat[subj][:,0])
#     roi_ind = accmat[subj][:,0] == tmp
#     for roi in rois: 
#         roi_ind = np.where(glasser==roi+1)[0]
#     mvpa_mask[subj][roi_ind,0] = 1.0
#     mvpa_mask[subj][:,0] = 1.0
#     accmat[subj][roi_ind,1] = accmat[subj][roi_ind,0]
            


### 2.2 Save MVPA maps to CSV and dscalar files

In [39]:
basedir = '/projects3/StroopActFlow/data/results/glm_ruleStimBetaSeries/motorResponse_localizer/'
for subj in subjNums:
    
    filename_mvpa = basedir + subj + '_motorResponse_MVPA_LeftVRight_v2'
        
    np.savetxt(filename_mvpa + '.csv', accmat[subj], delimiter=',')
    convertCSVToCIFTI64k(filename_mvpa+'.csv', filename_mvpa+'.dscalar.nii')


## 3.0 Run GLM analysis on MVPA masks for Word V Color Rule separately

### 3.1 Perform univariate t-test on color stims

In [21]:
ttest_loc = {}

for subj in subjNums:
    # 1st column is for t-vals; 2nd for p; 3rd for FDR-corrected t-values
    # Vertex X stats matrix
    ttest_loc[subj] = np.zeros((mvpa_mask[subj].shape[0],2)) 

    # significant vertices in MVPA mask
    rule_ind = np.where(mvpa_mask[subj][:,0])[0]
    rule_ind = np.arange(mvpa_mask[subj].shape[0])
    
    # Only include SMN vertices
    rois = np.where(networkdef==networkmappings['smn'])[0]
    roi_ind = []
    for roi in rois:
        roi_ind.extend(np.where(glasser==roi)[0])
    rule_ind = np.reshape(np.asarray(roi_ind),-1)
    
    # Perform color t-test first
    for v in rule_ind:
        t, p = stats.ttest_rel(beta_dict[subj][v,:,0],
                               beta_dict[subj][v,:,1])
        ttest_loc[subj][v,0], ttest_loc[subj][v,1] = t, p
    # Perform FDR-correction
#     h0, q = mc.fdrcorrection0(ttest_loc[subj]['color'][color_ind,1])
    h0 = ttest_loc[subj][rule_ind,1] < 0.05
    ttest_loc[subj][rule_ind,1] = np.multiply(h0,ttest_loc[subj][rule_ind,0])
        
        
    
        
        

### 3.2 Save statistical maps to CSV and dscalar files

In [22]:
for subj in subjNums:
    basedir = '/projects3/StroopActFlow/data/results/glm_ruleStimBetaSeries/taskRule_localizer/'
    filerule = basedir+subj+'_MVPAconstrained_ttestContrast_rules_v2'
    
    np.savetxt(filerule+'.csv', ttest_loc[subj], delimiter=',')
    
    convertCSVToCIFTI64k(filerule+'.csv', filerule+'.dscalar.nii')
