# fMRI1 -- Compute the baseline decodability (within-subject!) of Motor rule response (LINDEX v. LMID and RINDEX v. RMID)

## Use SVM classifications to decode hand-specific responses
## Using Ciric-style postprocessing

## Takuya Ito
#### 01/15/2019

In [1]:
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
import os
import multiprocessing as mp
import scipy.stats as stats
import nibabel as nib
import os
os.environ['OMP_NUM_THREADS'] = str(1)
import statsmodels.api as sm
import sklearn.svm as svm
import statsmodels.sandbox.stats.multicomp as mc
import sklearn
from sklearn.feature_selection import f_classif
import seaborn as sns
import h5py
os.sys.path.append('glmScripts/')
import taskGLMPipeline as tgp
os.sys.path.append('utils/')
import loadExperimentalData as led
sns.set_style("whitegrid")
plt.rcParams["font.family"] = "FreeSans"


  from pandas.core import datetools


In [2]:
# Excluding 084
subjNums = ['013','014','016','017','018','021','023','024','026','027','028','030','031','032','033',
            '034','035','037','038','039','040','041','042','043','045','046','047','048','049','050',
            '053','055','056','057','058','062','063','066','067','068','069','070','072','074','075',
            '076','077','081','085','086','087','088','090','092','093','094','095','097','098','099',
            '101','102','103','104','105','106','108','109','110','111','112','114','115','117','119',
            '120','121','122','123','124','125','126','127','128','129','130','131','132','134','135',
            '136','137','138','139','140','141']

basedir = '/projects3/SRActFlow/'

# Using final partition
networkdef = np.loadtxt('/projects3/NetworkDiversity/data/network_partition.txt')
networkorder = np.asarray(sorted(range(len(networkdef)), key=lambda k: networkdef[k]))
networkorder.shape = (len(networkorder),1)
# network mappings for final partition set
networkmappings = {'fpn':7, 'vis1':1, 'vis2':2, 'smn':3, 'aud':8, 'lan':6, 'dan':5, 'con':4, 'dmn':9, 
                   'pmulti':10, 'none1':11, 'none2':12}
networks = networkmappings.keys()

xticks = {}
reorderednetworkaffil = networkdef[networkorder]
for net in networks:
    netNum = networkmappings[net]
    netind = np.where(reorderednetworkaffil==netNum)[0]
    tick = np.max(netind)
    xticks[tick] = net

## General parameters/variables
nParcels = 360
nSubjs = len(subjNums)

glasserfile2 = '/projects/AnalysisTools/ParcelsGlasser2016/Q1-Q6_RelatedParcellation210.LR.CorticalAreas_dil_Colors.32k_fs_RL.dlabel.nii'
glasser2 = nib.load(glasserfile2).get_data()
glasser2 = np.squeeze(glasser2)

sortednets = np.sort(xticks.keys())
orderednetworks = []
for net in sortednets: orderednetworks.append(xticks[net])
    
networkpalette = ['royalblue','slateblue','paleturquoise','darkorchid','limegreen',
                  'lightseagreen','yellow','orchid','r','peru','orange','olivedrab']
networkpalette = np.asarray(networkpalette)

OrderedNetworks = ['VIS1','VIS2','SMN','CON','DAN','LAN','FPN','AUD','DMN','PMM','VMM','ORA']

# 0.0 Define functions for loading data

In [3]:
def loadMotorResponses(subj):
#     x = tgp.loadTaskTiming(subj,'betaSeries')
#     stimIndex = np.asarray(x['stimIndex'])
#     ind = np.where(stimIndex=='motorResponse')[0]
    
    datadir = basedir + 'data/postProcessing/hcpPostProcCiric/'
    h5f = h5py.File(datadir + subj + '_glmOutput_data.h5','r')
    data = h5f['taskRegression/betaSeries_24pXaCompCorXVolterra_taskReg_betas_canonical'][:].copy()
    # Probe activations are starting from index 128 (first 128 are encoding activations)
    data = data[:,128:].copy()
#     data = np.loadtxt(datadir + subj + '_motorResponse_taskBetas_Surface64k_GSR.csv',delimiter=',')
#     data = data[:,-4:]
    h5f.close()
    return data
        

## 0.1 Load data

In [4]:
# gsr = True
nTrials = 384
data_task = np.zeros((len(glasser2),nTrials,len(subjNums)))

scount = 0
for subj in subjNums:
    if scount%4==0: print 'Loading subject', scount, '/', len(subjNums)
    data_task[:,:,scount] = loadMotorResponses(subj)
    scount += 1

Loading subject 0 / 89
Loading subject 4 / 89
Loading subject 8 / 89
Loading subject 12 / 89
Loading subject 16 / 89
Loading subject 20 / 89
Loading subject 24 / 89
Loading subject 28 / 89
Loading subject 32 / 89
Loading subject 36 / 89
Loading subject 40 / 89
Loading subject 44 / 89
Loading subject 48 / 89
Loading subject 52 / 89
Loading subject 56 / 89
Loading subject 60 / 89
Loading subject 64 / 89
Loading subject 68 / 89
Loading subject 72 / 89
Loading subject 76 / 89
Loading subject 80 / 89
Loading subject 84 / 89
Loading subject 88 / 89


# 1.0 Define functions for motor response decodings

In [5]:
from scipy.spatial.distance import cdist

def motorResponseDecodings((data, subj, hand, ncvs)):
    """
    Run a within-subject classification
    Assumes data is a space X feature matrix
    Decode responses on each hand separately from CPRO data
    """
    
    df_task = led.loadExperimentalData(subj) 
    # Motor responses are 'b (LMID), y (LIND), g (RIND), r (RMID)'
    motor_responses = df_task['MotorResponses'].values
    if hand=='left':
        fing1_ind = np.where(motor_responses=='b')[0] #lmid
        fing2_ind = np.where(motor_responses=='y')[0] #lind
    elif hand=='right':
        fing1_ind = np.where(motor_responses=='g')[0] #rind
        fing2_ind = np.where(motor_responses=='r')[0] #rmid
    
    fing1_nsamples = len(fing1_ind)
    fing2_nsamples = len(fing2_ind)
    
    labels = []
    labels.extend(np.repeat(0,fing1_nsamples))
    labels.extend(np.repeat(1,fing2_nsamples))
    
    # Find the minimum number of unique samples
    min_unique_samples = np.min([fing1_nsamples,fing2_nsamples])
    
    svm_mat1 = data[:,fing1_ind].T
    svm_mat2 = data[:,fing2_ind].T
    svm_mat = np.vstack((svm_mat1,svm_mat2))

    # Spatially demean matrix across features
    samplemean = np.mean(svm_mat,axis=1)
    samplemean.shape = (len(samplemean),1)
    svm_mat = svm_mat - samplemean

    scores = randomSplitLOOBaselineCV(ncvs, svm_mat, labels)

    return scores

def randomSplitLOOBaselineCV(ncvs, svm_mat, labels):
    """
    Runs cross validation for a within-subject SVM analysis
    Using boot-strapped CV
    Approx. 80% train set, 20% test set
    """
    
    # Data set might be unbalanced, so find minimium number of unique samples
    maxpossible = len(labels)
    for i in np.unique(labels):
        if np.sum(labels==i)<maxpossible:
            maxpossible = np.sum(labels==i)
    min_unique_samples = maxpossible
    # Train set is approximately 80%
    n_trainset_per_cond = np.floor(min_unique_samples*.8)
    # Test set is the remaining samples
    n_testset_per_cond = min_unique_samples - n_trainset_per_cond
    
    accuracies = []
    for cv in range(ncvs):
        # Define training and test set labels
        train_ind = []
        trainlabels = []
        for i in np.unique(labels):
            ind = np.where(labels==i)[0]
            train_ind.extend(np.random.choice(ind,int(n_trainset_per_cond),replace=False))
            trainlabels.extend(np.repeat(i,n_trainset_per_cond))
        train_ind = np.asarray(train_ind)
        test_ind = np.delete(np.arange(len(labels)),train_ind)
        testlabels = np.delete(labels,train_ind)
        
        # Define train set and test set matrices
        trainset = svm_mat[train_ind,:]
        testset = svm_mat[test_ind,:]
        
        # Normalize trainset and testset using trainset stats
        mean = np.mean(svm_mat[train_ind,:],axis=0)
        mean.shape = (1,len(mean))
        std = np.std(svm_mat[train_ind,:],axis=0)
        std.shape = (1,len(std))

        trainset = np.divide((trainset - mean),std)
        testset = np.divide((testset - mean),std)

#         ## Feature selection and downsampling
#         unique_labels = np.unique(labels)
#         feat1_labs = np.where(trainlabels==unique_labels[0])[0]
#         feat2_labs = np.where(trainlabels==unique_labels[1])[0]
#         # Perform t-test
#         t, p = stats.ttest_rel(trainset[feat1_labs,:],trainset[feat2_labs,:],axis=0)
#         h0, qs = mc.fdrcorrection0(p)
# #         h0 = p<0.1
# #         # Construct feature masks
# #         feat1_mask = np.multiply(t>0,h0).astype(bool)
# #         feat2_mask = np.multiply(t<0,h0).astype(bool)
#         feat1_mask = t>0
#         feat2_mask = t<0
#         # Downsample training set into original vertices into 2 ROI signals
#         trainset_downsampled = np.zeros((trainset.shape[0],2))
#         trainset_downsampled[:,0] = np.nanmean(trainset[:,feat1_mask],axis=1)
#         trainset_downsampled[:,1] = np.nanmean(trainset[:,feat2_mask],axis=1)
#         trainset_downsampled = trainset[:,h0]
#         # Downsample test set into original vertices
#         testset_downsampled = np.zeros((testset.shape[0],2))
#         testset_downsampled[:,0] = np.nanmean(testset[:,feat1_mask],axis=1)
#         testset_downsampled[:,1] = np.nanmean(testset[:,feat2_mask],axis=1)
#         testset_downsampled = testset[:,h0]

#         if np.sum(feat1_mask)==0 or np.sum(feat2_mask==0):
#             accuracies.append(_decoding((trainset,testset,trainlabels,testlabels)))
#         else:
#             accuracies.append(_decoding((trainset_downsampled,testset_downsampled,trainlabels,testlabels)))
        
        accuracies.append(_decoding((trainset,testset,trainlabels,testlabels)))
        
    return np.mean(accuracies)

def _decoding((trainset,testset,trainlabels,testlabels)):

# #     clf = sklearn.linear_model.LogisticRegression()
#     clf = svm.SVC(C=1.0, kernel='linear')

#     clf.fit(trainset,trainlabels)
#     predictions = clf.predict(testset)
#     acc = predictions==testlabels
#     acc = np.mean(acc)

    unique_cond = np.unique(trainlabels)
    rdm = np.zeros((len(unique_cond),len(unique_cond)))
    acc = []
    for cond1 in unique_cond:
        mismatches = []
        prototype_ind = np.where(trainlabels==cond1)[0]
        prototype = np.mean(trainset[prototype_ind,:],axis=0)
        for cond2 in unique_cond:
            test_ind = np.where(testlabels==cond2)[0]
            test = np.mean(testset[test_ind,:],axis=0)
            if cond1 == cond2: 
                correct = stats.spearmanr(prototype,test)[0]
            else:
                mismatches.append(stats.spearmanr(prototype,test)[0])
        
        if correct > np.max(mismatches): 
            acc.append(1.0)
        else:
            acc.append(0.0)
    
    return acc

## 1.1 Run across subject decoding on left hand motor responses

In [6]:
nproc = 20
rois = np.where(networkdef==networkmappings['smn'])[0]
# rois = []
# rois.extend(np.where(networkdef==networkmappings['vis1'])[0])
# rois.extend(np.where(networkdef==networkmappings['vis2'])[0])
statistics_lh = np.zeros((len(rois),len(subjNums)))
hand = 'left'
nCVs = 10 # These are bootstrapped CVs

roicount = 0
for roi in rois:
    print 'Running within-subject decoding of', hand, 'motor responses on ROI', roi
    roi_ind = np.where(glasser2==roi+1)[0]
    scount = 0
    inputs = []
    for subj in subjNums:
        roi_data = data_task[roi_ind,:,scount]
        inputs.append((roi_data,subj,hand,nCVs))
        scount += 1
    
    # Run in parallel
    pool = mp.Pool(processes=nproc)
    results = pool.map_async(motorResponseDecodings,inputs).get()
    pool.close()
    pool.join()
    
    # Store in array
    statistics_lh[roicount,:] = np.asarray(results).copy()
    
    roicount += 1

Running within-subject decoding of left motor responses on ROI 7
Running within-subject decoding of left motor responses on ROI 8
Running within-subject decoding of left motor responses on ROI 35
Running within-subject decoding of left motor responses on ROI 38
Running within-subject decoding of left motor responses on ROI 39
Running within-subject decoding of left motor responses on ROI 40
Running within-subject decoding of left motor responses on ROI 41
Running within-subject decoding of left motor responses on ROI 46
Running within-subject decoding of left motor responses on ROI 50
Running within-subject decoding of left motor responses on ROI 51
Running within-subject decoding of left motor responses on ROI 52
Running within-subject decoding of left motor responses on ROI 53
Running within-subject decoding of left motor responses on ROI 54
Running within-subject decoding of left motor responses on ROI 55
Running within-subject decoding of left motor responses on ROI 99
Running with

# Compute statistics

In [7]:
ts, ps = stats.ttest_1samp(statistics_lh,0.5,axis=1)

for i in range(len(rois)):
    if ts[i] > 0:
        ps[i] = ps[i]/2.0
    else:
        ps[i] = 1.0 - ps[i]/2.0
        
qs = mc.fdrcorrection0(ps)[1]

for i in range(len(rois)):
    if qs[i] < 0.05:
        print 'ROI', rois[i] + 1, 'Accuracy:', np.mean(statistics_lh[i,:]), '| p =', qs[i]
    

ROI 47 Accuracy: 0.580898876404 | p = 0.0265417356019
ROI 52 Accuracy: 0.57191011236 | p = 0.0310004801457
ROI 188 Accuracy: 0.604494382022 | p = 0.00400456719306
ROI 189 Accuracy: 0.689887640449 | p = 6.34762804495e-08
ROI 227 Accuracy: 0.596629213483 | p = 0.00414887516979
ROI 231 Accuracy: 0.596629213483 | p = 0.00414887516979
ROI 232 Accuracy: 0.574157303371 | p = 0.0310004801457
ROI 233 Accuracy: 0.603370786517 | p = 0.00496044678858


## 2.1 Run across subject decoding on right hand motor responses

In [9]:
nproc = 10
rois = np.where(networkdef==networkmappings['smn'])[0]
statistics_rh = np.zeros((len(rois),len(subjNums)))
hand = 'right'
nCVs = 10 # These are bootstrapped CVs

roicount = 0
for roi in rois:
    print 'Running within-subject decoding of', hand, 'motor responses on ROI', roi
    roi_ind = np.where(glasser2==roi+1)[0]
    scount = 0
    inputs = []
    for subj in subjNums:
        roi_data = data_task[roi_ind,:,scount]
        inputs.append((roi_data,subj,hand,nCVs))
        scount += 1
    
    # Run in parallel
    pool = mp.Pool(processes=nproc)
    results = pool.map_async(motorResponseDecodings,inputs).get()
    pool.close()
    pool.join()
    
    # Store in array
    statistics_rh[roicount,:] = np.asarray(results).copy()
    
    roicount += 1

Running within-subject decoding of right motor responses on ROI 7
Running within-subject decoding of right motor responses on ROI 8
Running within-subject decoding of right motor responses on ROI 35
Running within-subject decoding of right motor responses on ROI 38
Running within-subject decoding of right motor responses on ROI 39
Running within-subject decoding of right motor responses on ROI 40
Running within-subject decoding of right motor responses on ROI 41
Running within-subject decoding of right motor responses on ROI 46
Running within-subject decoding of right motor responses on ROI 50
Running within-subject decoding of right motor responses on ROI 51
Running within-subject decoding of right motor responses on ROI 52
Running within-subject decoding of right motor responses on ROI 53
Running within-subject decoding of right motor responses on ROI 54
Running within-subject decoding of right motor responses on ROI 55
Running within-subject decoding of right motor responses on ROI 

# Compute statistics

In [13]:
ts, ps = stats.ttest_1samp(statistics_rh,0.5,axis=1)

for i in range(len(rois)):
    if ts[i] > 0:
        ps[i] = ps[i]/2.0
    else:
        ps[i] = 1.0 - ps[i]/2.0
        
qs = mc.fdrcorrection0(ps)[1]

for i in range(len(rois)):
    if qs[i] < 0.05:
        print 'ROI', rois[i] + 1, 'Accuracy:', np.mean(statistics_rh[i,:]), '| p =', qs[i]
    

ROI 8 Accuracy: 0.603370786517 | p = 0.00168990719807
ROI 9 Accuracy: 0.701123595506 | p = 1.86576169434e-08
ROI 41 Accuracy: 0.565168539326 | p = 0.0454587970772
ROI 42 Accuracy: 0.576404494382 | p = 0.0197641861417
ROI 47 Accuracy: 0.602247191011 | p = 0.000705113000091
ROI 51 Accuracy: 0.650561797753 | p = 2.13391837867e-05
ROI 52 Accuracy: 0.65393258427 | p = 3.62674940115e-06
ROI 53 Accuracy: 0.642696629213 | p = 8.33874162334e-06
ROI 54 Accuracy: 0.632584269663 | p = 0.000110517716004
ROI 56 Accuracy: 0.595505617978 | p = 0.00117615840676
ROI 231 Accuracy: 0.576404494382 | p = 0.0286368656299
ROI 232 Accuracy: 0.601123595506 | p = 0.00168990719807
ROI 234 Accuracy: 0.565168539326 | p = 0.0439978704038


## 1.3 Map accuracies back to cortical surface

In [11]:
# Put all data into a single matrix (since we only run a single classification)
lefthand = np.zeros((glasser2.shape[0],3))
righthand = np.zeros((glasser2.shape[0],3))

roicount = 0
for roi in rois:
    # Print significant parcel number
    vertex_ind = np.where(glasser2==roi)[0]
    lefthand[vertex_ind,0] = statistics_lh[roicount,0]
    lefthand[vertex_ind,1] = statistics_lh[roicount,1]
    lefthand[vertex_ind,2] = statistics_lh[roicount,2]

    righthand[vertex_ind,0] = statistics_rh[roicount,0]
    righthand[vertex_ind,1] = statistics_rh[roicount,1]
    righthand[vertex_ind,2] = statistics_rh[roicount,2]

    roicount += 1

    
#### 
# Write file to csv and run wb_command
outdir = '/projects3/SRActFlow/data/results/WithinSubject_MotorDecoding/'
filename = 'smnDecodingsLH_v2'
np.savetxt(outdir + filename + '.csv', lefthand,fmt='%s')
wb_file = filename + '.dscalar.nii'
wb_command = 'wb_command -cifti-convert -from-text ' + outdir + filename + '.csv ' + glasserfile2 + ' ' + outdir + wb_file + ' -reset-scalars'
os.system(wb_command)

outdir = '/projects3/SRActFlow/data/results/WithinSubject_MotorDecoding/'
filename = 'smnDecodingsRH_v2'
np.savetxt(outdir + filename + '.csv', righthand,fmt='%s')
wb_file = filename + '.dscalar.nii'
wb_command = 'wb_command -cifti-convert -from-text ' + outdir + filename + '.csv ' + glasserfile2 + ' ' + outdir + wb_file + ' -reset-scalars'
os.system(wb_command)




0