# Load Libraries

In [1]:
%matplotlib inline
import scipy.io
from scipy.stats import stats
from sklearn.cross_validation import LeaveOneLabelOut
from sklearn.cross_validation import cross_val_score
from sklearn.metrics import confusion_matrix
from sklearn.svm import NuSVC
import numpy as np

In [2]:
import sys
sys.path.append('/Users/ChimatChen/brainiak')

# Load Data 

In [3]:
movie_file = scipy.io.loadmat('data/movie_data.mat')

In [10]:
movie_data_left = movie_file['movie_data_lh']
movie_data_right = movie_file['movie_data_rh']
subjects = movie_data_left.shape[2]
nTR = movie_data_right.shape[1]

train_data = []
test_data = []
for s in range(subjects):
    train_data.append(np.concatenate([movie_data_left[:,:nTR//2,s], movie_data_right[:,:nTR//2,s]], axis=0))
    test_data.append(np.concatenate([movie_data_left[:,nTR//2:,s], movie_data_right[:,nTR//2:,s]], axis=0))

# Preprocessing the data if not already processed

z-score

In [14]:
for subject in range(subjects):
    train_data[subject] = stats.zscore(train_data[subject],axis=1,ddof=1)
for subject in range(subjects):
    test_data[subject] = stats.zscore(test_data[subject],axis=1,ddof=1)

# Fit model to Data

In [15]:
import brainiak.funcalign.srm

In [16]:
srm = brainiak.funcalign.srm.SRM(n_iter=10, features=50)
srm.fit(train_data)

SRM(features=50, n_iter=10, rand_seed=0)

# Evaluation

In [33]:
def timesegmentmatching_accuracy_evaluation_loo_cv(data, win_size=6):
 
    nsubjs = len(data)
    (ndim, nsample) = data[0].shape
    accu = np.zeros(shape=nsubjs)

    nseg = nsample - win_size 
    # mysseg prediction prediction
    trn_data = np.zeros((ndim*win_size, nseg))

    # the trn data also include the tst data, but will be subtracted when 
    # calculating A
    for m in range(nsubjs):
        for w in range(win_size):
            trn_data[w*ndim:(w+1)*ndim,:] += data[m][:,w:(w+nseg)]

    for tst_subj in range(nsubjs):
        tst_data = np.zeros((ndim*win_size, nseg))
        for w in range(win_size):
            tst_data[w*ndim:(w+1)*ndim,:] = data[tst_subj][:,w:(w+nseg)]

        A =  stats.zscore((trn_data - tst_data),axis=0, ddof=1)
        B =  stats.zscore(tst_data,axis=0, ddof=1)

        corr_mtx = B.T.dot(A)

        for i in range(nseg):
            for j in range(nseg):
                if abs(i-j)<win_size and i != j :
                    corr_mtx[i,j] = -np.inf

        rank =  np.argmax(corr_mtx, axis=1)
        accu[tst_subj] = sum(rank == range(nseg)) / float(nseg)
    print(accu)
    print("The average accuracy among all subjects is {0:f} +/- {1:f}".format(np.mean(accu), np.std(accu)))

In [34]:
data_shared = srm.transform(test_data)
for subject in range(subjects):
    data_shared[subject] = stats.zscore(data_shared[subject], axis=1, ddof=1)

timesegmentmatching_accuracy_evaluation_loo_cv(data_shared, win_size=6)

[ 0.82025547  0.74270073  0.68430657  0.92062044  0.75729927  0.76733577
  0.84032847  0.8020073   0.79288321  0.89963504]
The average accuracy among all subjects is 0.802737 +/- 0.067861
