# ActFlow 1 -- Compute the baseline decodability of Motor responses (LINDEX v. LMID and RINDEX v. RMID)
## Using ActFlow, all to one, via ridge FC

## Use SVM classifications to decode hand-specific responses
## Using Ciric-style postprocessing

## Takuya Ito
#### 12/12/2018

In [1]:
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
import os
import multiprocessing as mp
import scipy.stats as stats
import nibabel as nib
import os
os.environ['OMP_NUM_THREADS'] = str(1)
import statsmodels.api as sm
import sklearn.svm as svm
import statsmodels.sandbox.stats.multicomp as mc
import sklearn
from sklearn.feature_selection import f_classif
import seaborn as sns
import h5py
os.sys.path.append('glmScripts/')
import taskGLMPipeline as tgp
import statsmodels.api as sm

sns.set_style("whitegrid")
plt.rcParams["font.family"] = "FreeSans"


In [2]:
# Excluding 084
subjNums = ['013','014','016','017','018','021','023','024','026','027','028','030','031','032','033',
            '034','035','037','038','039','040','041','042','043','045','046','047','048','049','050',
            '053','055','056','057','058','062','063','066','067','068','069','070','072','074','075',
            '076','077','081','085','086','087','088','090','092','093','094','095','097','098','099',
            '101','102','103','104','105','106','108','109','110','111','112','114','115','117','119',
            '120','121','122','123','124','125','126','127','128','129','130','131','132','134','135',
            '136','137','138','139','140','141']



basedir = '/projects3/SRActFlow/'

# Using final partition
networkdef = np.loadtxt('/projects3/NetworkDiversity/data/network_partition.txt')
networkorder = np.asarray(sorted(range(len(networkdef)), key=lambda k: networkdef[k]))
networkorder.shape = (len(networkorder),1)
# network mappings for final partition set
networkmappings = {'fpn':7, 'vis1':1, 'vis2':2, 'smn':3, 'aud':8, 'lan':6, 'dan':5, 'con':4, 'dmn':9, 
                   'pmulti':10, 'none1':11, 'none2':12}
networks = networkmappings.keys()

xticks = {}
reorderednetworkaffil = networkdef[networkorder]
for net in networks:
    netNum = networkmappings[net]
    netind = np.where(reorderednetworkaffil==netNum)[0]
    tick = np.max(netind)
    xticks[tick] = net

## General parameters/variables
nParcels = 360
nSubjs = len(subjNums)

glasserfile2 = '/projects/AnalysisTools/ParcelsGlasser2016/Q1-Q6_RelatedParcellation210.LR.CorticalAreas_dil_Colors.32k_fs_RL.dlabel.nii'
glasser2 = nib.load(glasserfile2).get_data()
glasser2 = np.squeeze(glasser2)

sortednets = np.sort(xticks.keys())
orderednetworks = []
for net in sortednets: orderednetworks.append(xticks[net])
    
networkpalette = ['royalblue','slateblue','paleturquoise','darkorchid','limegreen',
                  'lightseagreen','yellow','orchid','r','peru','orange','olivedrab']
networkpalette = np.asarray(networkpalette)

OrderedNetworks = ['VIS1','VIS2','SMN','CON','DAN','LAN','FPN','AUD','DMN','PMM','VMM','ORA']

# 0.0 Define functions for loading data

In [3]:
def loadMotorResponses(subj):
    x = tgp.loadTaskTiming(subj,'ALL')
    stimIndex = np.asarray(x['stimIndex'])
    ind = np.where(stimIndex=='motorResponse')[0]
    
    datadir = basedir + 'data/postProcessing/hcpPostProcCiric/'
    h5f = h5py.File(datadir + subj + '_glmOutput_data.h5','r')
    data = h5f['taskRegression/ALL_24pXaCompCorXVolterra_taskReg_betas_canonical'][:].copy()
    data = data[:,ind].copy()
#     data = np.loadtxt(datadir + subj + '_motorResponse_taskBetas_Surface64k_GSR.csv',delimiter=',')
#     data = data[:,-4:]
    h5f.close()
    return data

def loadRSFCMapping(subj,roi):
    fcdir = '/projects3/SRActFlow/data/results/ridgeFC/'
    filename = fcdir + 'TargetParcel' + str(roi) + '_RidgeFC.h5'
    h5f = h5py.File(filename,'r')
    fcmapping = h5f[subj]['sourceToTargetMapping'][:].copy()
    h5f.close()
    return fcmapping
        

## 0.1 Load data

In [4]:
# gsr = True
nResponses = 4
data_task = np.zeros((len(glasser2),nResponses,len(subjNums)))

scount = 0
for subj in subjNums:
    data_task[:,:,scount] = loadMotorResponses(subj)
    scount += 1

## 0.2 Generate actflow data

In [12]:
roi_lh = 9
roi_rh = 189
target_rh_ind = np.where(glasser2==roi_rh)[0]
target_lh_ind = np.where(glasser2==roi_lh)[0]
fcmapping_rh = np.zeros((len(glasser2),len(target_rh_ind)))
fcmapping_lh = np.zeros((len(glasser2),len(target_lh_ind)))

scount = 0
for subj in subjNums:
    fcmapping_rh[:,:] = fcmapping_rh[:,:] + loadRSFCMapping(subj,roi_rh)
    fcmapping_lh[:,:] = fcmapping_lh[:,:] + loadRSFCMapping(subj,roi_lh)
    scount += 1

fcmapping_rh = np.divide(fcmapping_rh,len(subjNums))
fcmapping_lh = np.divide(fcmapping_lh,len(subjNums))

In [33]:

actflow_data = np.zeros((len(glasser2),nResponses,len(subjNums)))

scount = 0
for subj in subjNums:
#     source_network = np.where(networkdef==networkmappings['c'])[0] + 1
#     source_ind = []
#     for source_roi in source_network[0]:
#         source_ind.extend(np.where(glasser2==source_roi)[0])
#     source_ind = np.asarray(source_ind)
#     source_ind = np.where(glasser2==np.random.choice(source_network,1))
    
    # identify homologous region on other hemisphere
#     otherROI = 9
#     other_ind = np.where(glasser2==otherROI)[0]
#     fcmapping[other_ind,:] = 0
    
    target_ind = np.where(glasser2==roi_rh)[0]
    # Left Finger 1
    actflow_data[target_ind,0,scount] = np.dot(stats.zscore(data_task[:,0,scount],axis=0),fcmapping_rh[:,:])
    # Left Finger 2
    actflow_data[target_ind,1,scount] = np.dot(stats.zscore(data_task[:,1,scount],axis=0),fcmapping_rh[:,:])
    # Right Finger 1
    actflow_data[target_ind,2,scount] = np.dot(stats.zscore(data_task[:,2,scount],axis=0),fcmapping_rh[:,:])
    # Right Finger 2
    actflow_data[target_ind,3,scount] = np.dot(stats.zscore(data_task[:,3,scount],axis=0),fcmapping_rh[:,:])
    
    target_ind = np.where(glasser2==roi_lh)[0]
    # Left Finger 1
    actflow_data[target_ind,0,scount] = np.dot(stats.zscore(data_task[:,0,scount],axis=0),fcmapping_lh[:,:])
    # Left Finger 2
    actflow_data[target_ind,1,scount] = np.dot(stats.zscore(data_task[:,1,scount],axis=0),fcmapping_lh[:,:])
    # Right Finger 1
    actflow_data[target_ind,2,scount] = np.dot(stats.zscore(data_task[:,2,scount],axis=0),fcmapping_lh[:,:])
    # Right Finger 2
    actflow_data[target_ind,3,scount] = np.dot(stats.zscore(data_task[:,3,scount],axis=0),fcmapping_lh[:,:])
    
    scount += 1
    

# 1.0 Define functions for motor response decodings

In [75]:
from scipy.spatial.distance import cdist

def motorResponseDecodings(data, actflow_data, ncvs=100, nproc=5):
    """
    Run an across-subject classification
    Decode responses on each hand separately from CPRO data
    Limit to ROIs within SMN network
    """
    
    hands = {'Left':[0,1],'Right':[2,3]}
    
    nSubjs = data.shape[2]
    nHands = len(hands)
    rois = np.where(networkdef==networkmappings['smn'])[0] + 1
    smnStats = np.zeros((len(rois),nSubjs*2,nHands))
    
    #### Hack ####
    rois = [9,189]
    smnStats = np.zeros((len(rois),nSubjs*2,nHands))


    taskcount = 0
    for hand in hands:
        print 'Computing SVM classification for task', hand

        nfing = len(hands[hand])
        hand_ind = hands[hand]
        
        nsamples = nSubjs * nfing

        # Label array for supervised learning
        labels = np.tile(range(nfing),nSubjs)
        subjarray = np.repeat(range(nSubjs),nfing)

        # Run SVM classifications on network-level activation patterns across subjects
        roicount = 0
        for roi in rois:
            roi_ind = np.where(glasser2==roi)[0]
            nfeatures = len(roi_ind)
            roi_ind.shape = (len(roi_ind),1)       

            svm_mat = np.zeros((nsamples,roi_ind.shape[0]))
            actflow_svm_mat = np.zeros((nsamples,roi_ind.shape[0]))
            samplecount = 0
            scount = 0
            for subj in range(len(subjNums)):
                roidata = np.squeeze(data[roi_ind,hand_ind,scount])
                actflow_roidata = np.squeeze(actflow_data[roi_ind,hand_ind,scount])
                svm_mat[samplecount:(samplecount+nfing),:] = roidata.T
                actflow_svm_mat[samplecount:(samplecount+nfing),:] = actflow_roidata.T

                scount += 1
                samplecount += nfing

#             # Spatially demean matrix across features
#             samplemean = np.mean(svm_mat,axis=1)
#             samplemean.shape = (len(samplemean),1)
#             svm_mat = svm_mat - samplemean
            
            scores = randomSplitLOOBaselineCV(ncvs, svm_mat, actflow_svm_mat, labels, subjarray, nproc=nproc)
            smnStats[roicount,:,taskcount] = scores
            roicount += 1
        
        taskcount += 1

    return smnStats

def randomSplitLOOBaselineCV(ncvs, svm_mat, actflow_svm_mat, labels, subjarray, nproc=5):
    """
    Runs cross validation for an across-subject SVM analysis
    """
    
    ntasks = len(np.unique(labels))
    nsamples = svm_mat.shape[0]
    nsubjs = nsamples/ntasks

    subjects = np.unique(subjarray)
    indices = np.arange(nsamples)
    
    numsubjs_perfold = 4
    if nsubjs%numsubjs_perfold!=0: 
        raise Exception("Error: Folds don't match number of subjects")
        
    nfolds = nsubjs/numsubjs_perfold
    subj_array_folds = subjarray.copy()
    
    inputs = [] 
    
    for fold in range(nfolds):
        test_subjs = np.random.choice(subj_array_folds,numsubjs_perfold,replace=False)
        train_subjs_all = np.delete(subjects,test_subjs)
        for cv in range(ncvs):
            # Randomly sample half of train set subjects for each cv (CV bootstrapping)
            train_subjs = np.random.choice(train_subjs_all,
                                         int(np.floor(len(train_subjs_all)*(2.0))),
                                         replace=True)

            train_ind = []
            for subj in train_subjs:
                train_ind.extend(np.where(subjarray==subj)[0])

            test_ind = []
            for subj in test_subjs:
                test_ind.extend(np.where(subjarray==subj)[0])
            
            train_ind = np.asarray(train_ind)
            test_ind = np.asarray(test_ind)

            trainset = actflow_svm_mat[train_ind,:]
            testset = svm_mat[test_ind,:]

            # Normalize trainset and testset
            mean = np.mean(svm_mat[train_ind,:],axis=0)
            mean.shape = (1,len(mean))
            std = np.std(svm_mat[train_ind,:],axis=0)
            std.shape = (1,len(std))

            trainset = np.divide((trainset - mean),std)
            testset = np.divide((testset - mean),std)

            # Normalize trainset and testset
            trainmean = np.mean(actflow_svm_mat[train_ind,:],axis=0)
            trainmean.shape = (1,len(trainmean))
            trainstd = np.std(actflow_svm_mat[train_ind,:],axis=0)
            trainstd.shape = (1,len(trainstd))
            
            # Normalize trainset and testset
            testmean = np.mean(svm_mat[train_ind,:],axis=0)
            testmean.shape = (1,len(testmean))
            teststd = np.std(svm_mat[train_ind,:],axis=0)
            teststd.shape = (1,len(teststd))

            trainset = np.divide((trainset - trainmean),trainstd)
            testset = np.divide((testset - testmean),teststd)

            inputs.append((trainset,testset,labels[train_ind],labels[test_ind]))
        
        subj_array_folds = np.delete(subj_array_folds,test_subjs)
        
    pool = mp.Pool(processes=nproc)
    scores = pool.map_async(_decoding,inputs).get()
    pool.close()
    pool.join()
    
#     subj_acc = np.zeros((len(subjects),))
#     scount = 0
#     i = 0
#     for subj in subjects:
#         subjmean = []
#         for cv in range(ncvs):
#             subjmean.append(scores[i])
#             i += 1
        
#         subj_acc[scount] = np.mean(subjmean)
        
#         scount += 1

#     return subj_acc
    acc = []
    for score in scores:
        acc.extend(score)
    return acc

def _decoding((trainset,testset,trainlabels,testlabels)):

#     clf = sklearn.linear_model.LogisticRegression()
    clf = svm.SVC(C=1.0, kernel='linear')

    clf.fit(trainset,trainlabels)
    predictions = clf.predict(testset)
    acc = predictions==testlabels
    
#     unique_cond = np.unique(trainlabels)
#     rdm = np.zeros((len(unique_cond),len(unique_cond)))
#     acc = []
#     for cond1 in unique_cond:
#         mismatches = []
#         prototype_ind = np.where(trainlabels==cond1)[0]
#         prototype = np.mean(trainset[prototype_ind,:],axis=0)
#         for cond2 in unique_cond:
#             test_ind = np.where(testlabels==cond2)[0]
#             test = np.mean(testset[test_ind,:],axis=0)
#             if cond1 == cond2: 
#                 correct = stats.pearsonr(prototype,test)[0]
#             else:
#                 mismatches.append(stats.pearsonr(prototype,test)[0])
        
#         if correct > np.max(mismatches): 
#             acc.append(1.0)
#         else:
#             acc.append(0.0)
    
    return acc

## 1.1 Run across subject decoding on hand-specific motor responses

In [76]:
nproc = 30
# ncvs = 50
ncvs = 1

distances_baseline = motorResponseDecodings(stats.zscore(data_task,axis=0), actflow_data, ncvs=ncvs, nproc=nproc)


Computing SVM classification for task Right
Computing SVM classification for task Left


## 1.2 Compute statistics

In [77]:
hands = {'Left':[0,1],'Right':[2,3]}
smnROIs = np.where(networkdef==networkmappings['smn'])[0] + 1

#### Hack ####
smnROIs = [9,189]

statistics = {}
taskcount = 0
for hand in hands:
    statistics[hand] = np.zeros((len(smnROIs),3)) # acc, q, acc_thresh
    for roicount in range(len(smnROIs)):
        ntrials = len(subjNums)*len(hands[hand])
        p = stats.binom_test(np.mean(distances_baseline[roicount,:,taskcount])*ntrials,n=ntrials,p=1/float(len(hands[hand])))
        if np.mean(distances_baseline[roicount,:,taskcount])>1/float(len(hands[hand])):
            p = p/2.0
        else:
            p = 1.0-p/2.0
        
        statistics[hand][roicount,0] = np.mean(distances_baseline[roicount,:,taskcount])
        statistics[hand][roicount,1] = p
        
    h0, qs = mc.fdrcorrection0(statistics[hand][:,1])
    for roicount in range(len(smnROIs)):
        statistics[hand][roicount,1] = qs[roicount]
        statistics[hand][roicount,2] = h0[roicount]*statistics[hand][roicount,0]
        
    taskcount += 1
    

In [78]:
statistics

{'Left': array([[ 0.50520833,  0.64084057,  0.        ],
        [ 0.484375  ,  0.64084057,  0.        ]]),
 'Right': array([[ 0.53125   ,  0.21367729,  0.        ],
        [ 0.54166667,  0.21367729,  0.        ]])}

In [72]:
# Count number of significant ROIs for LH decoding
sigLH_ind = np.where(statistics['Left'][:,1]<0.05)[0]
print 'Number of ROIs significant for left hand responses:', sigLH_ind.shape[0]
if sigLH_ind.shape[0]>0:
    print 'Significant ROIs:', smnROIs[sigLH_ind]
print 'Accuracies:', statistics['Left'][sigLH_ind,0]

print ''
# Count number of significant ROIs for RH decoding
sigRH_ind = np.where(statistics['Right'][:,1]<0.05)[0]
print 'Number of ROIs significant for right hand responses:', sigRH_ind.shape[0]
if sigRH_ind.shape[0]>0:
    print 'Significant ROIs:', smnROIs[sigRH_ind]
print 'Accuracies:', statistics['Right'][sigRH_ind,0]

Number of ROIs significant for left hand responses: 0
Accuracies: []

Number of ROIs significant for right hand responses: 0
Accuracies: []


## 1.3 Map accuracies back to cortical surface

In [127]:
# Put all data into a single matrix (since we only run a single classification)
lefthand = np.zeros((glasser2.shape[0],3))
righthand = np.zeros((glasser2.shape[0],3))

lh_ind = [0,1]
rh_ind = [0,1]
roicount = 0
for roi in smnROIs:
    # Print significant parcel number
    vertex_ind = np.where(glasser2==roi)[0]
    lefthand[vertex_ind,0] = statistics['Left'][roicount,0]
    lefthand[vertex_ind,1] = statistics['Left'][roicount,1]
    lefthand[vertex_ind,2] = statistics['Left'][roicount,2]

    righthand[vertex_ind,0] = statistics['Right'][roicount,0]
    righthand[vertex_ind,1] = statistics['Right'][roicount,1]
    righthand[vertex_ind,2] = statistics['Right'][roicount,2]

    roicount += 1

    
#### 
# Write file to csv and run wb_command
outdir = '/projects3/SRActFlow/data/results/Decoding_MotorResponse/'
filename = 'smnDecodingsLH_v2'
np.savetxt(outdir + filename + '.csv', lefthand,fmt='%s')
wb_file = filename + '.dscalar.nii'
wb_command = 'wb_command -cifti-convert -from-text ' + outdir + filename + '.csv ' + glasserfile2 + ' ' + outdir + wb_file + ' -reset-scalars'
os.system(wb_command)

outdir = '/projects3/SRActFlow/data/results/Decoding_MotorResponse/'
filename = 'smnDecodingsRH_v2'
np.savetxt(outdir + filename + '.csv', righthand,fmt='%s')
wb_file = filename + '.dscalar.nii'
wb_command = 'wb_command -cifti-convert -from-text ' + outdir + filename + '.csv ' + glasserfile2 + ' ' + outdir + wb_file + ' -reset-scalars'
os.system(wb_command)




0