In [None]:
import mne
import scipy
import numpy as np
import matplotlib.pyplot as plt
import utils
import os
import glob
from numpy import linalg as LA
from scipy.stats import zscore, pearsonr
from scipy.io import savemat, loadmat
from scipy import signal
%matplotlib widget

## Data loading

In [None]:
%%capture
subjects = ['AS', 'YY']
folder = 'EOG'
videos = ['Dr', 'Parra', 'Safety', 'Spring']
# videos = ['Parra', 'Dr']
# videos = ['Safety']
feature_type = ['muFlow'] # muSqTemporalContrast
features_list = []
handcrafted_list = []
eeg_multisub_list = []
for video in videos:
    features, eeg_multisub, fs, _ = utils.multisub_data_org(subjects, video, folder, feature_type=feature_type, bads=['B25'], eog=True, regression=True, normalize=True)
    # features_list.append(features) # Or do a normalization here using zscore
    # eeg_multisub_list.append(eeg_multisub)
    
    handcrafted = np.zeros_like(features)
    peaks = np.array([])
    for i in range(features.shape[1]):
        feature_normalized = features[:,i]/LA.norm(features[:,i])
        peak, _ = signal.find_peaks(np.squeeze(feature_normalized), prominence=(0.05, None), width=(None, 1.5))
        peaks = np.union1d(peaks, peak)
    peaks = peaks.astype(int)
    # features[peaks, 0] = max(features)
    handcrafted[peaks, 0] = 1
    # features = np.delete(features, peaks, axis=0)
    # eeg_multisub = np.delete(eeg_multisub, peaks, axis=0)
    features_list.append(features) # Or do a normalization here using zscore
    handcrafted_list.append(handcrafted)
    eeg_multisub_list.append(eeg_multisub)
feature_concat = np.concatenate(tuple(features_list), axis=0)
feature_concat = feature_concat/LA.norm(feature_concat)
handcrafted_concat = np.concatenate(tuple(handcrafted_list), axis=0)
eeg_multisub_concat = np.concatenate(tuple(eeg_multisub_list), axis=0)
T = feature_concat.shape[0]
times = np.array(range(T))/fs

In [None]:
len(feature_concat)

In [None]:
len(handcrafted_concat)

In [None]:
# peaks, _ = signal.find_peaks(np.squeeze(feature_concat), prominence=(0.05, None), width=(None, 1.5))
plt.close()
plt.plot(handcrafted_concat)
# plt.plot(peaks, feature_concat[peaks], "x")
plt.show()

In [None]:
peaks/30

## CCA

In [None]:
L_timefilter = int(fs)
n_components = 5
fold = 10
eeg_onesub = eeg_multisub_concat[:,:,0]
corr_train, corr_test, V_A_train, V_B_train = utils.cross_val_CCA(eeg_onesub, feature_concat, fs, L_EEG=1, L_feat=fs, causalx=False, causaly=True, fold=10, n_components=5, regularization='lwcov', K_regu=None, message=True, signifi_level=True)


In [None]:
# Visualization:
forward_model = utils.forward_model(eeg_multisub_concat[:,:,0], V_A_train, regularization='lwcov')
biosemi_layout = mne.channels.read_layout('biosemi')
create_info = mne.create_info(biosemi_layout.names, ch_types='eeg', sfreq=30)
create_info.set_montage('biosemi64')
plt.close()
plt.figure()
# plt.figure(figsize=(20, 20))
for i in range(5):
    ax = plt.subplot(2, 3, i + 1)
    mne.viz.plot_topomap(forward_model[:,i], create_info, ch_type='eeg', axes=ax)
    ax.set_title('CC '+str(i+1))
plt.show()

In [None]:
# Note: GCCA-one subject + stimulus = CCA
datalist = [eeg_multisub_concat[:,:,0], feature_concat]
Llist = [1, fs]
causal_list = [False, True]
n_components = 5
rhos= [1, 1]
corr_train, corr_test, Wlist_train, Flist_train = utils.cross_val_GCCA_multi_mod(datalist, Llist, causal_list, rhos, fs, fold=10, n_components=5, regularization='lwcov', message=True, signifi_level=True, ISC=True)

In [None]:
# GCCA-all subjects
datalist = [eeg_multisub_concat]
Llist = [1]
causal_list = [False]
n_components = 5
rhos = [1]
corr_train, corr_test, Wlist_train, Flist_train = utils.cross_val_GCCA_multi_mod(datalist, Llist, causal_list, rhos, fs, fold=10, n_components=5, regularization='lwcov', message=True, signifi_level=True, ISC=True)

In [None]:
# GCCA-all subjects + stimulus
datalist = [eeg_multisub_concat, feature_concat]
Llist = [1, fs]
causal_list = [False, True]
n_components = 5
rhos = utils.rho_sweep(datalist, np.linspace(-2,3,11), Llist, causal_list, fs, fold=10, n_components=5, message=True)
corr_train, corr_test, Wlist_train, Flist_train = utils.cross_val_GCCA_multi_mod(datalist, Llist, causal_list, rhos, fs, fold=10, n_components=5, regularization='lwcov', message=True, signifi_level=True, ISC=True)

In [None]:
# Forward_model: calculated using the shared subspace of all subjects/views
# forward_model = Flist_train[0][:,0,:] # subject 1
# Average forward model of different subjects
nb_sub = Flist_train[0].shape[1]
forward_model = np.zeros_like(Flist_train[0][:,0,:])
for n in range(nb_sub):
    forward_model = forward_model + Flist_train[0][:,n,:]
forward_model = forward_model/nb_sub
biosemi_layout = mne.channels.read_layout('biosemi')
create_info = mne.create_info(biosemi_layout.names, ch_types='eeg', sfreq=30)
create_info.set_montage('biosemi64')
plt.close()
plt.figure()
# plt.figure(figsize=(20, 20))
for i in range(5):
    ax = plt.subplot(2, 3, i + 1)
    mne.viz.plot_topomap(forward_model[:,i], create_info, ch_type='eeg', axes=ax)
    ax.set_title('CC '+str(i+1))
plt.show()

In [None]:
# Forward_model: calculated using projected subspaces of each subject
forward_model = utils.forward_model(eeg_multisub_concat[:,:,0], Wlist_train[0][:,0,:], regularization=None) # subject 1
# forward_model = utils.forward_model(eeg_multisub_concat[:,:,1], Wlist_train[0][:,1,:], regularization=None) # subject 2
biosemi_layout = mne.channels.read_layout('biosemi')
create_info = mne.create_info(biosemi_layout.names, ch_types='eeg', sfreq=30)
create_info.set_montage('biosemi64')
plt.close()
plt.figure()
# plt.figure(figsize=(20, 20))
for i in range(5):
    ax = plt.subplot(2, 3, i + 1)
    mne.viz.plot_topomap(forward_model[:,i], create_info, ch_type='eeg', axes=ax)
    ax.set_title('CC '+str(i+1))
plt.show()

## Find appropriate amount of time

Old version. Do not run.

In [None]:
L_timefilter = int(fs)
n_components = 5
fold = 10
t_list = range(1, 30)
corr_test_t = np.empty((0, n_components))
for t in t_list:
    eeg = eeg_multisub_concat[:t*fs*60, :, 0]
    feature = feature_concat[:t*fs*60, :]
    corr_train, corr_test, V_A_train, V_B_train = utils.cross_val_CCA(eeg, feature, fs, L_EEG=1, L_feat=fs, causalx=False, causaly=True, fold=10, n_components=5, regularization='lwcov', K_regu=None, message=True, signifi_level=True)
    corr_test_t = np.append(corr_test_t, np.mean(corr_test, axis=0, keepdims=True), axis=0)


In [None]:
plt.close()
plt.plot(t_list, corr_test_t[:,0], label='1st component')
plt.plot(t_list, corr_test_t[:,1], label='2nd component')
plt.axhline(y=0.0431, linestyle='-.', color='c', label='significance level')
plt.legend(loc='best')
plt.xlabel('data length (min)')
plt.title('Average correlation coefficient on the test sets (CCA)')
plt.show()

In [None]:
L_timefilter = int(fs)
n_components = 5
fold = 10
sweep_list = np.linspace(-1,3,9) # 10^{-2}, 10^{-2.5}, ..., 10^{3}
t_list = range(1, 30)
corr_test_t = np.empty((0, n_components))
for t in t_list:
    eeg = eeg_multisub_concat[:t*fs*60, :, :]
    feature = feature_concat[:t*fs*60, :]
    rhos = utils.rho_sweep([eeg, feature], sweep_list, L_timefilter, fold, n_components)
    corr_train, corr_test, Wlist = utils.cross_val_GCCA_multi_mod([eeg, feature], L_timefilter, rhos, fold, n_components, regularizaion='lwcov', message=False)
    print('ISC of the top {} components on the test sets when rhos={}: {}'.format(n_components, rhos, np.average(corr_test, axis=0)))
    corr_test_t = np.append(corr_test_t, np.mean(corr_test, axis=0, keepdims=True), axis=0)

In [None]:
plt.close()
plt.plot(t_list, corr_test_t[:,0], label='1st component')
plt.plot(t_list, corr_test_t[:,1], label='2nd component')
plt.plot(t_list, corr_test_t[:,2], label='3rd component')
plt.axhline(y=0.0191, linestyle='-.', color='c', label='significance level')
plt.legend(loc='best')
plt.xlabel('data length (min)')
plt.title('Average correlation coefficient on the test sets (GCCA)')
plt.show()