# GroupfMRI6a - Use motor rule regions to predict Motor response regions
#### Using ActFlow

#### Takuya Ito
#### 03/01/2019

In [2]:
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
import multiprocessing as mp
import scipy.stats as stats
import os
os.environ['OMP_NUM_THREADS'] = str(1)
import statsmodels.sandbox.stats.multicomp as mc
import seaborn as sns
import h5py
import tools_group
import nibabel as nib
sns.set_style("whitegrid")
plt.rcParams["font.family"] = "FreeSans"


In [3]:
# Excluding 084
subjNums = ['013','014','016','017','018','021','023','024','026','027','028','030','031','032','033',
            '034','035','037','038','039','040','041','042','043','045','046','047','048','049','050',
            '053','055','056','057','058','062','063','066','067','068','069','070','072','074','075',
            '076','077','081','085','086','087','088','090','092','093','094','095','097','098','099',
            '101','102','103','104','105','106','108','109','110','111','112','114','115','117','119',
            '120','121','122','123','124','125','126','127','128','129','130','131','132','134','135',
            '136','137','138','139','140','141']



basedir = '/projects3/SRActFlow/'

# Using final partition
networkdef = np.loadtxt('/projects3/NetworkDiversity/data/network_partition.txt')
networkorder = np.asarray(sorted(range(len(networkdef)), key=lambda k: networkdef[k]))
networkorder.shape = (len(networkorder),1)
# network mappings for final partition set
networkmappings = {'fpn':7, 'vis1':1, 'vis2':2, 'smn':3, 'aud':8, 'lan':6, 'dan':5, 'con':4, 'dmn':9, 
                   'pmulti':10, 'none1':11, 'none2':12}
networks = networkmappings.keys()

xticks = {}
reorderednetworkaffil = networkdef[networkorder]
for net in networks:
    netNum = networkmappings[net]
    netind = np.where(reorderednetworkaffil==netNum)[0]
    tick = np.max(netind)
    xticks[tick] = net

## General parameters/variables
nParcels = 360
nSubjs = len(subjNums)

glasserfile2 = '/projects/AnalysisTools/ParcelsGlasser2016/Q1-Q6_RelatedParcellation210.LR.CorticalAreas_dil_Colors.32k_fs_RL.dlabel.nii'
glasser2 = nib.load(glasserfile2).get_data()
glasser2 = np.squeeze(glasser2)

sortednets = np.sort(xticks.keys())
orderednetworks = []
for net in sortednets: orderednetworks.append(xticks[net])
    
networkpalette = ['royalblue','slateblue','paleturquoise','darkorchid','limegreen',
                  'lightseagreen','yellow','orchid','r','peru','orange','olivedrab']
networkpalette = np.asarray(networkpalette)

OrderedNetworks = ['VIS1','VIS2','SMN','CON','DAN','LAN','FPN','AUD','DMN','PMM','VMM','ORA']

# 1.0 Define functions for motor response decodings

In [74]:
from scipy.spatial.distance import cdist

def motorResponseDecodings(data, actflow_data, ncvs=1, nproc=5):
    """
    Run an across-subject classification
    Decode responses on each hand separately from CPRO data
    """

    nSubjs = data.shape[2]
    stats = np.zeros((len(rois),))
    
    nfing = data.shape[1]

    nsamples = nSubjs * nfing
    nfeatures = data.shape[0]

    # Label array for supervised learning
    labels = np.tile(range(nfing),nSubjs)
    subjarray = np.repeat(range(nSubjs),nfing)

    svm_mat = np.zeros((nsamples,nfeatures))
    actflow_svm_mat = np.zeros((nsamples,nfeatures))
    samplecount = 0
    scount = 0
    for subj in range(len(subjNums)):
        roidata = data[:,:,scount]
        actflow_roidata = actflow_data[:,:,scount]
        svm_mat[samplecount:(samplecount+nfing),:] = roidata.T
        actflow_svm_mat[samplecount:(samplecount+nfing),:] = actflow_roidata.T

        scount += 1
        samplecount += nfing

        # Spatially demean matrix across features
        samplemean = np.mean(svm_mat,axis=1)
        samplemean.shape = (len(samplemean),1)
        svm_mat = svm_mat - samplemean

        samplemean = np.mean(actflow_svm_mat,axis=1)
        samplemean.shape = (len(samplemean),1)
        actflow_svm_mat = actflow_svm_mat - samplemean

    scores = randomSplitLOOBaselineCV(ncvs, svm_mat, actflow_svm_mat, labels, subjarray, nproc=nproc)
#     stats = np.mean(scores)
    stats = scores 
        
    return stats

def randomSplitLOOBaselineCV(ncvs, svm_mat, actflow_svm_mat, labels, subjarray, nproc=5):
    """
    Runs cross validation for an across-subject SVM analysis
    """
    
    ntasks = len(np.unique(labels))
    nsamples = svm_mat.shape[0]
    nsubjs = nsamples/ntasks

    subjects = np.unique(subjarray)
    indices = np.arange(nsamples)
    
    numsubjs_perfold = 1
    if nsubjs%numsubjs_perfold!=0: 
        raise Exception("Error: Folds don't match number of subjects")
        
    nfolds = nsubjs/numsubjs_perfold
    subj_array_folds = subjarray.copy()
    
    inputs = [] 
    
    for fold in range(nfolds):
        test_subjs = np.random.choice(subj_array_folds,numsubjs_perfold,replace=False)
        train_subjs_all = np.delete(subjects,test_subjs)
        for cv in range(ncvs):
            # Randomly sample half of train set subjects for each cv (CV bootstrapping)
            train_subjs = np.random.choice(train_subjs_all,
                                         int(np.floor(len(train_subjs_all)*(10.0))),
                                         replace=True)

            train_ind = []
            for subj in train_subjs:
                train_ind.extend(np.where(subjarray==subj)[0])

            test_ind = []
            for subj in test_subjs:
                test_ind.extend(np.where(subjarray==subj)[0])
            
            train_ind = np.asarray(train_ind)
            test_ind = np.asarray(test_ind)

            trainset = actflow_svm_mat[train_ind,:]
            testset = svm_mat[test_ind,:]
            orig_training = svm_mat[train_ind,:]

            # Normalize trainset and testset
            trainmean = np.mean(actflow_svm_mat[train_ind,:],axis=0)
            trainmean.shape = (1,len(trainmean))
            trainstd = np.std(actflow_svm_mat[train_ind,:],axis=0)
            trainstd.shape = (1,len(trainstd))
            
            # Normalize trainset and testset
            testmean = np.mean(svm_mat[train_ind,:],axis=0)
            testmean.shape = (1,len(testmean))
            teststd = np.std(svm_mat[train_ind,:],axis=0)
            teststd.shape = (1,len(teststd))

            trainset = np.divide((trainset - trainmean),trainstd)
            testset = np.divide((testset - testmean),teststd)

#             ######## FEATURE SELECTION & REDUCTION
#             ## Feature selection and downsampling
#             trainlabels = labels[train_ind]
#             testlabels = labels[test_ind]
#             unique_labels = np.unique(labels)
#             feat1_labs = np.where(trainlabels==0)[0]
#             feat2_labs = np.where(trainlabels==1)[0]
#             # Perform t-test
#             t, p = stats.ttest_rel(orig_training[feat1_labs,:],orig_training[feat2_labs,:],axis=0)
#             h0, qs = mc.fdrcorrection0(p)
#             # Construct feature masks
#             feat1_mask = np.multiply(t<0,h0)
#             feat2_mask = np.multiply(t>0,h0)
# #             feat1_mask = t>0
# #             feat2_mask = t<0
#             # Downsample training set into original vertices into 2 ROI signals
#             trainset_downsampled = np.zeros((trainset.shape[0],2))
#             trainset_downsampled[:,0] = np.nanmean(trainset[:,feat1_mask],axis=1)
#             trainset_downsampled[:,1] = np.nanmean(trainset[:,feat2_mask],axis=1)
#             trainset_downsampled = trainset[:,h0]
#             # Downsample test set into original vertices
#             testset_downsampled = np.zeros((testset.shape[0],2))
#             testset_downsampled[:,0] = np.nanmean(testset[:,feat1_mask],axis=1)
#             testset_downsampled[:,1] = np.nanmean(testset[:,feat2_mask],axis=1)
#             testset_downsampled = testset[:,h0]
# #             print 'feat1_mask', np.sum(feat1_mask), '| feat2_mask', np.sum(feat2_mask)

#             if np.sum(feat1_mask)==0 or np.sum(feat2_mask)==0:
#                 print 'not running feature selection'
#                 inputs.append((trainset,testset,labels[train_ind],labels[test_ind]))
#             else:
#                 inputs.append((trainset_downsampled,testset_downsampled,labels[train_ind],labels[test_ind]))

            inputs.append((trainset,testset,labels[train_ind],labels[test_ind]))         
    
        subj_array_folds = np.delete(subj_array_folds,test_subjs)
        
    pool = mp.Pool(processes=nproc)
    scores = pool.map_async(_decoding,inputs).get()
    pool.close()
    pool.join()

    acc = []
    for score in scores:
        acc.extend(score)
    return acc

def _decoding((trainset,testset,trainlabels,testlabels)):

#     clf = sklearn.linear_model.LogisticRegression()
#     clf = svm.SVC(C=1.0, kernel='linear')

#     clf.fit(trainset,trainlabels)
#     predictions = clf.predict(testset)
#     acc = predictions==testlabels
    
    unique_cond = np.unique(trainlabels)
    acc = []
    for cond1 in unique_cond:
        mismatches = []
        prototype_ind = np.where(trainlabels==cond1)[0]
        prototype = np.mean(trainset[prototype_ind,:],axis=0)
        for cond2 in unique_cond:
            test_ind = np.where(testlabels==cond2)[0]
            test = np.mean(testset[test_ind,:],axis=0)
            if cond1 == cond2: 
                correct = stats.spearmanr(prototype,test)[0]
            else:
                mismatches.append(stats.spearmanr(prototype,test)[0])
#         print correct, mismatches
        if correct > np.max(mismatches): 
            acc.append(1.0)
        else:
            acc.append(0.0)
    
    return acc

## 1.01 Load data for RH responses

In [4]:
# gsr = True
nResponses = 2
data_task_rh = np.zeros((len(glasser2),nResponses,len(subjNums)))

scount = 0
for subj in subjNums:
    data_task_rh[:,:,scount] = tools_group.loadMotorResponses(subj,hand='Right')
    scount += 1

## 1.02 Generate actflow data for RH responses

In [9]:
sourcedir = '/projects3/SRActFlow/data/results/GroupfMRI/RuleDecoding/'
sourceROIs = np.loadtxt(sourcedir + 'MotorRule_Regions.csv',delimiter=',')
targetROIs = [9,189]

overlappingROIs = np.intersect1d(sourceROIs,targetROIs)
unique_sources = []
for roi in sourceROIs:
    if roi in overlappingROIs:
        continue
    else:
        unique_sources.append(roi)

unique_targets = []
for roi in targetROIs:
    #if roi in overlappingROIs:
    #    continue
    #else:
    unique_targets.append(roi)

####
# Step 2 - concatenate indices for unique sources/targets
source_ind = []
for roi in unique_sources:
    roi_ind = np.where(glasser2==roi+1)[0]
    source_ind.extend(roi_ind)
source_ind = np.asarray(source_ind)

target_ind = []
for roi in unique_targets:
    roi_ind = np.where(glasser2==roi+1)[0]
    target_ind.extend(roi_ind)
target_ind = np.asarray(target_ind)

fcmapping = np.zeros((len(source_ind),len(target_ind)))

scount = 0
for subj in subjNums:
    fcmapping[:,:] = fcmapping[:,:] + tools_group.loadMotorRuleToMotorOutputFC(subj)
    scount += 1

fcmapping = np.divide(fcmapping,len(subjNums))

In [110]:
roi_rh = 9 # left s1
roi_lh = 189 # right s1
actflow_data = np.zeros((len(glasser2),nResponses,len(subjNums)))

bad_rois = []
scount = 0
for subj in subjNums:
#     print 'Subject', subj, '(', scount+1, '/', len(subjNums), ')'
        
#     # Right Finger 1
#     actflow_data[target_ind,0,scount] = np.dot(stats.zscore(data_task_rh[source_ind,0,scount],axis=0),fcmapping)
#     # Right Finger 2
#     actflow_data[target_ind,1,scount] = np.dot(stats.zscore(data_task_rh[source_ind,1,scount],axis=0),fcmapping)
    # Right Finger 1
    actflow_data[target_ind,0,scount] = np.dot(data_task_rh[source_ind,0,scount],fcmapping)
    # Right Finger 2
    actflow_data[target_ind,1,scount] = np.dot(data_task_rh[source_ind,1,scount],fcmapping)

    scount += 1
    

## 1.1 Run across subject decoding on right-hand motor responses

In [129]:
nproc = 20
ncvs = 1

data_task_rh[target_ind,:,:] = stats.zscore(data_task_rh[target_ind,:,:],axis=0)
actflow_data[target_ind,:,:] = stats.zscore(actflow_data[target_ind,:,:],axis=0)

distances_baseline_rh = np.zeros((1,len(subjNums)*nResponses))
distances_baseline_rh[0,:] = motorResponseDecodings(data_task_rh[target_ind,:,:],
                                               actflow_data[target_ind,:,:],
                                               ncvs=ncvs, nproc=nproc)
    

## 1.2 Compute statistics

In [130]:
statistics_rh = np.zeros((distances_baseline_rh.shape[0],2))
ntrials = len(subjNums)*2
p = stats.binom_test(np.mean(distances_baseline_rh[0,:])*ntrials,n=ntrials,p=0.5)
if np.mean(distances_baseline_rh[0,:])>0.5:
    p = p/2.0
else:
    p = 1.0-p/2.0

    
statistics_rh[0,0] = np.mean(distances_baseline_rh[0,:])
statistics_rh[0,1] = p

print 'Activity flow accuracy =', statistics_rh[0,0]
print 'p =', statistics_rh[0,1]



Activity flow accuracy = 0.614583333333
p = 0.000919430398124


## 2.01 Load data for LH responses

In [49]:
# gsr = True
nResponses = 2
data_task_lh = np.zeros((len(glasser2),nResponses,len(subjNums)))

scount = 0
for subj in subjNums:
    data_task_lh[:,:,scount] = tools_group.loadMotorResponses(subj,hand='Left')
    scount += 1

## 2.02 Generate actflow data for LH responses

In [50]:
sourcedir = '/projects3/SRActFlow/data/results/GroupfMRI/RuleDecoding/'
sourceROIs = np.loadtxt(sourcedir + 'MotorRule_Regions.csv',delimiter=',')
targetROIs = [9,189]

overlappingROIs = np.intersect1d(sourceROIs,targetROIs)
unique_sources = []
for roi in sourceROIs:
    if roi in overlappingROIs:
        continue
    else:
        unique_sources.append(roi)

unique_targets = []
for roi in targetROIs:
    #if roi in overlappingROIs:
    #    continue
    #else:
    unique_targets.append(roi)

####
# Step 2 - concatenate indices for unique sources/targets
source_ind = []
for roi in unique_sources:
    roi_ind = np.where(glasser2==roi+1)[0]
    source_ind.extend(roi_ind)
source_ind = np.asarray(source_ind)

target_ind = []
for roi in unique_targets:
    roi_ind = np.where(glasser2==roi+1)[0]
    target_ind.extend(roi_ind)
target_ind = np.asarray(target_ind)

fcmapping = np.zeros((len(source_ind),len(target_ind)))

scount = 0
for subj in subjNums:
    fcmapping[:,:] = fcmapping[:,:] + tools_group.loadMotorRuleToMotorOutputFC(subj)
    scount += 1

fcmapping = np.divide(fcmapping,len(subjNums))

In [144]:
roi_rh = 9 # left s1
roi_lh = 189 # right s1
actflow_data = np.zeros((len(glasser2),nResponses,len(subjNums)))

bad_rois = []
scount = 0
for subj in subjNums:
#     print 'Subject', subj, '(', scount+1, '/', len(subjNums), ')'
        
#     # Right Finger 1
#     actflow_data[target_ind,0,scount] = np.dot(stats.zscore(data_task_lh[source_ind,0,scount],axis=0),fcmapping)
#     # Right Finger 2
#     actflow_data[target_ind,1,scount] = np.dot(stats.zscore(data_task_lh[source_ind,1,scount],axis=0),fcmapping)
    # Right Finger 1
    actflow_data[target_ind,0,scount] = np.dot(data_task_lh[source_ind,0,scount],fcmapping)
    # Right Finger 2
    actflow_data[target_ind,1,scount] = np.dot(data_task_lh[source_ind,1,scount],fcmapping)

    scount += 1
    

## 1.1 Run across subject decoding on right-hand motor responses

In [145]:
nproc = 20
ncvs = 1

data_task_lh[target_ind,:,:] = stats.zscore(data_task_lh[target_ind,:,:],axis=0)
actflow_data[target_ind,:,:] = stats.zscore(actflow_data[target_ind,:,:],axis=0)

distances_baseline_lh = np.zeros((1,len(subjNums)*nResponses))
distances_baseline_lh[0,:] = motorResponseDecodings(data_task_lh[target_ind,:,:],
                                               actflow_data[target_ind,:,:],
                                               ncvs=ncvs, nproc=nproc)
    

## 1.2 Compute statistics

In [146]:
statistics_lh = np.zeros((distances_baseline_lh.shape[0],2))
ntrials = len(subjNums)*2
p = stats.binom_test(np.mean(distances_baseline_lh[0,:])*ntrials,n=ntrials,p=0.5)
if np.mean(distances_baseline_lh[0,:])>0.5:
    p = p/2.0
else:
    p = 1.0-p/2.0

    
statistics_lh[0,0] = np.mean(distances_baseline_lh[0,:])
statistics_lh[0,1] = p

print 'Activity flow accuracy =', statistics_lh[0,0]
print 'p =', statistics_lh[0,1]



Activity flow accuracy = 0.458333333333
p = 0.860509115482
