# Organize and preprocessing data from matlab files

In [None]:
import numpy as np
import scipy.io

In [None]:
data_structure_file = 'D:\data_structure_ANM210862.tar\data_structure_ANM210862\data_structure_ANM210862_20130628.mat'
label_to_save = 'ANM210862_20130628_labels.npy'
trace_dir = 'D:\\voltage_traces_ANM210862_20130628\\voltage_traces_ANM210862_20130628\\raw_trace_958_trial_'
sparse_epoch_to_save = 'sparse_ANM210862_20130628.npy'

In [None]:
mat_dict = scipy.io.loadmat(data_structure_file)


data = mat_dict['obj']
data = data['trialTypeMat'][0,0]

valid_trials = []
labels = []
for i in range(len(data[0,:])):
    if data[1,i] == 1:
        labels.append(1)
        valid_trials.append(i+1)
    elif data[0,i] == 1:
        labels.append(0)
        valid_trials.append(i+1)
        
print(valid_trials)
print(labels)
# 1 means right, 0 menas left


file = open(label_to_save, 'wb')
np.save(file, labels)
file.close()


In [None]:
# raw_trace_127_trial_5.mat

epochs = []
epochs = np.asarray(epochs)
min_timepoint = 1000000

for t in valid_trials:
    print(t)
    trial = scipy.io.loadmat(trace_dir + str(t) + '.mat')['ch_MUA']
    timepoint, chan = trial.shape
    if min_timepoint > timepoint:
        min_timepoint = timepoint
    
    trial = trial[:min_timepoint, :]
    trial = trial.reshape((1, min_timepoint, chan))
    print(trial.shape)
    if epochs.size == 0:
        epochs = trial
    else:
        epochs = epochs[:, :min_timepoint, :]
        epochs = np.concatenate((epochs, trial), axis = 0)
    print(epochs.shape)




In [None]:
trial, timepoint, channel = epochs.shape
sparse_ind = [i*100 for i in range(timepoint//100) ]
print(len(sparse_ind))
sparse_epochs = epochs[:, sparse_ind, :]
sparse_epochs = np.swapaxes(sparse_epochs,1,2)


file = open(sparse_epoch_to_save, 'wb')
np.save(file, sparse_epochs)
file.close()

# Loading Data

In [61]:
sparse_epochs = np.load('sparse_ANM210861_20130701.npy')
labels = np.load('ANM210861_20130701_labels.npy')
print(sparse_epochs.shape)

ind = np.linspace(0,3443-1, num = 1033)
ind = [int(np.floor(i)) for i in ind]
sparse_epochs = sparse_epochs[:,:, ind]
print(sparse_epochs.shape)



#get rid of flat channel

# trial, channel,timepoint = sparse_epochs.shape
# flat_chan = [i for i in range(channel) if sparse_epochs[0,i,0] == 0]
# good_chan = [i for i in range(0,32) if i not in flat_chan]
# print(flat_chan)
# sparse_epochs = sparse_epochs[:,good_chan,:]
# print(sparse_epochs.shape)


#standardize signal based on each channel

# from sklearn.preprocessing import StandardScaler
# import copy

# epochs_std = copy.copy(sparse_epochs)
# sample_num, chan_num, timepoint = sparse_epochs.shape
# for c in range(chan_num):
#     original_timepoints = sparse_epochs[:,c,:]
#     scaler = StandardScaler()
#     scaler.fit(original_timepoints)
#     chan_std = scaler.transform(original_timepoints)
#     epochs_std[:,c,:] = chan_std

(328, 32, 3443)
(328, 32, 1033)


# Prediction on single session data

In [18]:
import sklearn 
import pyriemann.estimation
import pyriemann.classification
import sklearn.linear_model
from sklearn.model_selection import ShuffleSplit  
from scipy.fftpack import fft, ifft

cv = ShuffleSplit(n_splits=10, test_size=0.2, random_state=42)
scores = []
epochs_data = sparse_epochs

for train_idx, test_idx in cv.split(epochs_data):
    y_train, y_test = np.asarray(labels)[train_idx], np.asarray(labels)[test_idx]
    X_train = epochs_data[train_idx]
    X_test = epochs_data[test_idx]
    print(X_train.shape)

    
    cov =  pyriemann.estimation.Covariances('lwf')
    cov_X_train = cov.transform(X_train)
    cov_X_test = cov.transform(X_test)
    
    TSclassifier = pyriemann.classification.TSclassifier(metric='riemann')
    TSclassifier.fit(cov_X_train, y_train)
    
    
    y_predict = TSclassifier.predict(cov_X_test)
    print(sklearn.metrics.accuracy_score(TSclassifier.predict(cov_X_train), y_train))
    scores.append(sklearn.metrics.accuracy_score(y_predict, y_test))


class_balance = np.mean(np.asarray(labels) == np.asarray(labels)[0])
print("Classification accuracy: %f / Chance level: %f" % (np.mean(scores),
                                                          class_balance))

(262, 32, 1033)
0.8969465648854962
(262, 32, 1033)
0.8969465648854962
(262, 32, 1033)
0.9045801526717557
(262, 32, 1033)
0.8969465648854962
(262, 32, 1033)
0.8893129770992366
(262, 32, 1033)
0.8854961832061069
(262, 32, 1033)
0.9083969465648855
(262, 32, 1033)
0.8969465648854962
(262, 32, 1033)
0.8816793893129771
(262, 32, 1033)
0.9045801526717557
Classification accuracy: 0.860606 / Chance level: 0.509146


In [None]:
import sklearn
from sklearn.decomposition import PCA
from sklearn import linear_model, decomposition, datasets
from sklearn.metrics import accuracy_score
from sklearn.model_selection import ShuffleSplit  


for n_component in range(30,31):
    sparse_epochs = sparse_epochs.reshape((trial, -1))
    pca = PCA(n_components=n_component)
    pca.fit(sparse_epochs)
    total_variance = np.asarray(pca.explained_variance_ratio_)
    print(sum(total_variance))
    new_X = pca.transform(sparse_epochs)

    samples = new_X

    scores = []
    cv = ShuffleSplit(n_splits=10, test_size=0.2, random_state=42)
    for train_idx, test_idx in cv.split(samples):
        y_train, y_test = np.asarray(labels)[train_idx], np.asarray(labels)[test_idx]
        X_train = samples[train_idx]
        X_test = samples[test_idx]



#         logistic = linear_model.LogisticRegression(C = 1e-5)
#         logistic.fit(X_train, y_train)
        lda = LinearDiscriminantAnalysis()
        lda.fit(X_train, y_train)



#         y_predict = logistic.predict(X_test)
        y_predict = lda.predict(X_test)
        scores.append(sklearn.metrics.accuracy_score(y_predict, y_test))


    class_balance = np.mean(np.asarray(labels) == np.asarray(labels)[0])
    print("Classification accuracy: %f / Chance level: %f" % (np.mean(scores),
                                                              class_balance))

In [66]:
for n_components in range(3,12):
    samples = []
    for s in range(sparse_epochs.shape[0]):
        X = sparse_epochs[s,:,:]
        pca = PCA(n_components=n_components)
        pca.fit(X)
        new_X = pca.transform(X)
        samples.append(new_X)
    samples = np.asarray(samples)
    print(samples.shape)
    samples = samples.reshape((sparse_epochs.shape[0], -1))
    print(samples.shape)


    labels = agg_labels
    scores = []
    cv = ShuffleSplit(n_splits=10, test_size=0.2, random_state=42)
    for train_idx, test_idx in cv.split(samples):
        y_train, y_test = np.asarray(labels)[train_idx], np.asarray(labels)[test_idx]
        X_train = samples[train_idx]
        X_test = samples[test_idx]



    #         logistic = linear_model.LogisticRegression(C = 1e-5)
    #         logistic.fit(X_train, y_train)
        lda = LinearDiscriminantAnalysis()
        lda.fit(X_train, y_train)



    #         y_predict = logistic.predict(X_test)
        y_predict = lda.predict(X_test)
        scores.append(sklearn.metrics.accuracy_score(y_predict, y_test))


    class_balance = np.mean(np.asarray(labels) == np.asarray(labels)[0])
    print("Classification accuracy: %f / Chance level: %f" % (np.mean(scores),
                                                              class_balance))

(328, 32, 3)
(328, 96)
Classification accuracy: 0.750000 / Chance level: 0.484907




(328, 32, 4)
(328, 128)
Classification accuracy: 0.769697 / Chance level: 0.484907
(328, 32, 5)
(328, 160)
Classification accuracy: 0.780303 / Chance level: 0.484907
(328, 32, 6)
(328, 192)
Classification accuracy: 0.769697 / Chance level: 0.484907
(328, 32, 7)
(328, 224)
Classification accuracy: 0.751515 / Chance level: 0.484907
(328, 32, 8)
(328, 256)
Classification accuracy: 0.792424 / Chance level: 0.484907
(328, 32, 9)
(328, 288)
Classification accuracy: 0.742424 / Chance level: 0.484907
(328, 32, 10)
(328, 320)
Classification accuracy: 0.713636 / Chance level: 0.484907
(328, 32, 11)
(328, 352)
Classification accuracy: 0.681818 / Chance level: 0.484907


In [None]:
import sklearn
from sklearn.pipeline import Pipeline
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.model_selection import ShuffleSplit, cross_val_score

from mne import Epochs, pick_types, find_events
from mne.channels import read_layout
from mne.io import concatenate_raws, read_raw_edf
from mne.datasets import eegbci
from mne.decoding import CSP

#you need to make sure there is no flat channel! otherwise covariance matrix is not positive definite

scores = []
cv = ShuffleSplit(n_splits=10, test_size=0.2, random_state=42)
for train_idx, test_idx in cv.split(epochs_std):
    y_train, y_test = np.asarray(labels)[train_idx], np.asarray(labels)[test_idx]
    X_train = epochs_std[train_idx]
    X_test = epochs_std[test_idx]
    print(X_train.shape)
    
    csp = CSP(n_components=4, reg=None, log=True, norm_trace=False, cov_est = 'epoch', cov_method_params= 'shrinkage')
    new_epochs = csp.fit_transform(X_train,  y_train)
    print(new_epochs.shape)
    lda = LinearDiscriminantAnalysis()
    lda.fit(new_epochs, y_train)
    
#     logistic = linear_model.LogisticRegression(C = 1e-5)
#     logistic.fit(new_epochs, y_train)
    

    y_pred_csp = csp.transform(X_test)
    y_pred = lda.predict(y_pred_csp)
#     y_predict = logistic.predict(y_pred_csp)
    
    print(y_pred)
    print(y_test)
    print(sklearn.metrics.accuracy_score(y_pred, y_test))
    scores.append(sklearn.metrics.accuracy_score(y_pred, y_test))


class_balance = np.mean(np.asarray(labels) == np.asarray(labels)[0])
print("Classification accuracy: %f / Chance level: %f" % (np.mean(scores),
                                                          class_balance))

#  Aggregate data across session

In [62]:
sparse_files = ['sparse_ANM210861_20130701.npy', 'sparse_ANM210861_20130702.npy', 
               'sparse_ANM210861_20130703.npy']

label_files = ['ANM210861_20130701_labels.npy', 'ANM210861_20130702_labels.npy',
              'ANM210861_20130703_labels.npy']


agg_spares_epochs = np.load(sparse_files[0])
agg_labels = np.load(label_files[0])
ind = np.linspace(0,3443-1, num = 1033)
ind = [int(np.floor(i)) for i in ind]
agg_spares_epochs = agg_spares_epochs[:,:, ind]


for i in range(1,3):
    new_epochs =  np.load(sparse_files[i])
    agg_spares_epochs = np.concatenate((agg_spares_epochs, new_epochs), axis = 0)
    new_label = np.load(label_files[i])
    agg_labels = np.concatenate((agg_labels, new_label), axis = 0)

print(agg_spares_epochs.shape)
print(agg_labels.shape)




(1027, 32, 1033)
(1027,)


#  Prediction using Riemannian based classifier on mix dataset across session from same subject

In [21]:
import sklearn 
import pyriemann.estimation
import pyriemann.classification
import sklearn.linear_model
from sklearn.model_selection import ShuffleSplit  
from scipy.fftpack import fft, ifft

cv = ShuffleSplit(n_splits=10, test_size=0.2, random_state=42)
scores = []
epochs_data = agg_spares_epochs
labels = agg_labels

for train_idx, test_idx in cv.split(epochs_data):
    y_train, y_test = np.asarray(labels)[train_idx], np.asarray(labels)[test_idx]
    X_train = epochs_data[train_idx]
    X_test = epochs_data[test_idx]
    print(X_train.shape)

    
    cov =  pyriemann.estimation.Covariances('lwf')
    cov_X_train = cov.transform(X_train)
    cov_X_test = cov.transform(X_test)
    
    TSclassifier = pyriemann.classification.TSclassifier(metric='riemann')
    TSclassifier.fit(cov_X_train, y_train)
    
    
    y_predict = TSclassifier.predict(cov_X_test)
    print(sklearn.metrics.accuracy_score(TSclassifier.predict(cov_X_train), y_train))
    scores.append(sklearn.metrics.accuracy_score(y_predict, y_test))


class_balance = np.mean(np.asarray(labels) == np.asarray(labels)[0])
print("Classification accuracy: %f / Chance level: %f" % (np.mean(scores),
                                                          class_balance))

(821, 32, 1033)
0.7990255785627284
(821, 32, 1033)
0.7917174177831913
(821, 32, 1033)
0.7892813641900122
(821, 32, 1033)
0.8087697929354446
(821, 32, 1033)
0.7929354445797807
(821, 32, 1033)
0.7941534713763703
(821, 32, 1033)
0.784409257003654
(821, 32, 1033)
0.7953714981729598
(821, 32, 1033)
0.8063337393422655
(821, 32, 1033)
0.7917174177831913
Classification accuracy: 0.736893 / Chance level: 0.484907


#  Prediction using PCA + Logistic regression

In [24]:
import sklearn
from sklearn.decomposition import PCA
from sklearn import linear_model, decomposition, datasets
from sklearn.metrics import accuracy_score
from sklearn.model_selection import ShuffleSplit  

trial,_,_ = agg_spares_epochs.shape
for n_component in range(10,30):
    epochs_reshaped = agg_spares_epochs.reshape((trial, -1))
    pca = PCA(n_components=n_component)
    samples = pca.fit_transform(epochs_reshaped)
    total_variance = np.asarray(pca.explained_variance_ratio_)
    print(sum(total_variance))
    

    scores = []
    cv = ShuffleSplit(n_splits=10, test_size=0.2, random_state=42)
    for train_idx, test_idx in cv.split(samples):
        y_train, y_test = np.asarray(labels)[train_idx], np.asarray(labels)[test_idx]
        X_train = samples[train_idx]
        X_test = samples[test_idx]



#         logistic = linear_model.LogisticRegression(C = 1e-5)
#         logistic.fit(X_train, y_train)
        lda = LinearDiscriminantAnalysis()
        lda.fit(X_train, y_train)



#         y_predict = logistic.predict(X_test)
        y_predict = lda.predict(X_test)
        scores.append(sklearn.metrics.accuracy_score(y_predict, y_test))


    class_balance = np.mean(np.asarray(labels) == np.asarray(labels)[0])
    print("Classification accuracy: %f / Chance level: %f" % (np.mean(scores),
                                                              class_balance))

0.052429577348040116
Classification accuracy: 0.515049 / Chance level: 0.484907
0.05701270750361295
Classification accuracy: 0.514078 / Chance level: 0.484907
0.061464911647578814
Classification accuracy: 0.513107 / Chance level: 0.484907
0.06556584584671517
Classification accuracy: 0.514563 / Chance level: 0.484907
0.06986368611392207
Classification accuracy: 0.515534 / Chance level: 0.484907
0.07394506213444246
Classification accuracy: 0.514078 / Chance level: 0.484907
0.0779305953120939
Classification accuracy: 0.505340 / Chance level: 0.484907
0.08199828336941267
Classification accuracy: 0.506796 / Chance level: 0.484907
0.08557103062888351
Classification accuracy: 0.512621 / Chance level: 0.484907
0.08958147689395413
Classification accuracy: 0.512136 / Chance level: 0.484907
0.09321724253893537
Classification accuracy: 0.504369 / Chance level: 0.484907
0.09683000091046615
Classification accuracy: 0.523786 / Chance level: 0.484907
0.1005017045261865
Classification accuracy: 0.50485

#  Prediction Using PCA + logistic across channels on aggregated dataset

In [64]:
for n_components in range(3,30):
    samples = []
    for s in range(agg_spares_epochs.shape[0]):
        X = agg_spares_epochs[s,:,:]
        pca = PCA(n_components=n_components)
        pca.fit(X)
        new_X = pca.transform(X)
        samples.append(new_X)
    samples = np.asarray(samples)
    print(samples.shape)
    samples = samples.reshape((agg_spares_epochs.shape[0], -1))

    print(samples.shape)


    labels = agg_labels
    scores = []
    cv = ShuffleSplit(n_splits=10, test_size=0.2, random_state=42)
    for train_idx, test_idx in cv.split(samples):
        y_train, y_test = np.asarray(labels)[train_idx], np.asarray(labels)[test_idx]
        X_train = samples[train_idx]
        X_test = samples[test_idx]



    #         logistic = linear_model.LogisticRegression(C = 1e-5)
    #         logistic.fit(X_train, y_train)
        lda = LinearDiscriminantAnalysis()
        lda.fit(X_train, y_train)



    #         y_predict = logistic.predict(X_test)
        y_predict = lda.predict(X_test)
        scores.append(sklearn.metrics.accuracy_score(y_predict, y_test))


    class_balance = np.mean(np.asarray(labels) == np.asarray(labels)[0])
    print("Classification accuracy: %f / Chance level: %f" % (np.mean(scores),
                                                              class_balance))

(1027, 32, 3)
(1027, 96)
Classification accuracy: 0.573301 / Chance level: 0.484907




(1027, 32, 4)
(1027, 128)
Classification accuracy: 0.604854 / Chance level: 0.484907
(1027, 32, 5)
(1027, 160)
Classification accuracy: 0.634951 / Chance level: 0.484907
(1027, 32, 6)
(1027, 192)
Classification accuracy: 0.654369 / Chance level: 0.484907
(1027, 32, 7)
(1027, 224)
Classification accuracy: 0.650485 / Chance level: 0.484907
(1027, 32, 8)
(1027, 256)
Classification accuracy: 0.662136 / Chance level: 0.484907
(1027, 32, 9)
(1027, 288)
Classification accuracy: 0.658252 / Chance level: 0.484907
(1027, 32, 10)
(1027, 320)
Classification accuracy: 0.655825 / Chance level: 0.484907
(1027, 32, 11)
(1027, 352)
Classification accuracy: 0.655340 / Chance level: 0.484907
(1027, 32, 12)
(1027, 384)
Classification accuracy: 0.643689 / Chance level: 0.484907
(1027, 32, 13)
(1027, 416)
Classification accuracy: 0.650000 / Chance level: 0.484907
(1027, 32, 14)
(1027, 448)
Classification accuracy: 0.652913 / Chance level: 0.484907
(1027, 32, 15)
(1027, 480)
Classification accuracy: 0.647573