In [1]:
import scipy.io
import numpy as np
import utils
import matplotlib.pyplot as plt
import os
from numpy import linalg as LA
from scipy import signal
from scipy.linalg import toeplitz
from scipy.stats import zscore, pearsonr
from sklearn.covariance import LedoitWolf
from tqdm import tqdm

In [2]:
EEG_data = scipy.io.loadmat('../Correlated Component Analysis/data/Prepro_EEG.mat')
X_prepro = EEG_data['X']
fsEEG = 256
T, D, N = X_prepro.shape

In [3]:
features_data = scipy.io.loadmat('../Correlated Component Analysis/data/features.mat')
fsStim = int(features_data['fsVideo'])
features = np.nan_to_num(features_data['muFlow'])

In [4]:
downsampledEEG = signal.resample_poly(X_prepro, fsStim, fsEEG)
features = features[:downsampledEEG.shape[0]]
normalized_features = zscore(features)

###  Only use the data of one subject and perform CCA

In [5]:
EEG_of_one_sub = downsampledEEG[:,:,0]
L_timefilter = fsStim

In [6]:
n_components = 5
conv_mtx = utils.convolution_mtx(L_timefilter, normalized_features)
corr_coe, p_value, V_A, V_B = utils.cano_corr(EEG_of_one_sub, conv_mtx, n_components=n_components)
corr_coe

array([0.2989372 , 0.2285284 , 0.18205523, 0.17242502, 0.14464959])

#### Equivalence of CCA and GCCA when there are only two datasets

In [7]:
datalist = [EEG_of_one_sub, conv_mtx]
_, W = utils.GCCA_multi_modal(datalist, n_components, regularization=None)
W_EEG = W[:D,:]
W_Stim = W[D:D+L_timefilter,:]

In [8]:
EEG_trans = EEG_of_one_sub@W_EEG
Stim_trans = conv_mtx@W_Stim
corr_pvalue = [pearsonr(EEG_trans[:,k], Stim_trans[:,k]) for k in range(n_components)]
corr_coe = np.array([corr_pvalue[k][0] for k in range(n_components)])
corr_coe

array([0.2989372 , 0.2285284 , 0.18205523, 0.17242502, 0.14464959])

#### Cross-Validation

In [9]:
fold = 10
corr_train_avg = np.zeros(n_components)
corr_test_avg = np.zeros(n_components)
for idx in range(fold):
    EEG_train, EEG_test, Sti_train, Sti_test = utils.split(EEG_of_one_sub, normalized_features, fold=fold, fold_idx=idx+1)
    conv_mtx_train = utils.convolution_mtx(L_timefilter, Sti_train)
    corr_coe_train, p_value_train, V_A_train, V_B_train = utils.cano_corr(EEG_train, conv_mtx_train, n_components=n_components)
    conv_mtx_test = utils.convolution_mtx(L_timefilter, Sti_test)
    corr_coe_test, p_value_test, _, _ = utils.cano_corr(EEG_test, conv_mtx_test, n_components=n_components, V_A=V_A_train, V_B=V_B_train)
    corr_train_avg = corr_train_avg + corr_coe_train
    corr_test_avg = corr_test_avg + corr_coe_test
corr_train_avg = corr_train_avg / fold
corr_test_avg = corr_test_avg / fold

In [10]:
corr_train_avg

array([0.3091124 , 0.24412388, 0.19284348, 0.18006853, 0.15290927])

In [11]:
corr_test_avg

array([ 0.10005108,  0.00236058, -0.0071686 ,  0.0207658 ,  0.00207027])

### Make use of all data using GCCA

In [12]:
datalist = [downsampledEEG, conv_mtx]
_, W = utils.GCCA_multi_modal(datalist, n_components, regularization='lwcov')
W_EEG = W[:N*D,:]
W_EEG_stack = np.reshape(W_EEG, (N,D,-1))
W_EEG_stack = np.transpose(W_EEG_stack, [1,0,2]) # W: D*N*n_components
W_Stim = W[N*D:N*D+L_timefilter,:]
Wlist = [W_EEG_stack, W_Stim]

In [13]:
avg_corr = utils.avg_corr_coe_multi_modal(datalist, Wlist, n_components=5)
avg_corr

array([0.1772279 , 0.15980386, 0.15726257, 0.15421245, 0.15289233])

#### Cross-Validation

In [14]:
fold = 10
cv_n_components = 5
corr_train = np.zeros((fold, cv_n_components))
corr_test = np.zeros_like(corr_train)
for i in range(fold):
    EEG_train, EEG_test, Sti_train, Sti_test = utils.split(downsampledEEG, normalized_features, fold=fold, fold_idx=i+1)
    _, W_train = utils.GCCA_multi_modal([EEG_train, Sti_train], cv_n_components, regularization='lwcov')
    W_EEG_train = W_train[:N*D,:]
    W_EEG_stack_train = np.reshape(W_EEG_train, (N,D,-1))
    W_EEG_stack_train = np.transpose(W_EEG_stack_train, [1,0,2]) # W: D*N*n_components
    W_Stim_train = W_train[N*D:N*D+L_timefilter,:]
    Wlist_train = [W_EEG_stack_train, W_Stim_train]
    corr_train[i,:] = utils.avg_corr_coe_multi_modal([EEG_train, Sti_train], Wlist_train, n_components=5)
    corr_test[i,:] = utils.avg_corr_coe_multi_modal([EEG_test, Sti_test], Wlist_train, n_components=5)


In [15]:
np.average(corr_train, axis=0)

array([0.17237445, 0.16662443, 0.1625196 , 0.15965791, 0.15506266])

In [16]:
np.average(corr_test, axis=0)

array([0.03224798, 0.01250905, 0.01286681, 0.00900797, 0.01145189])

#### If we do not include features

In [25]:
fold = 10
cv_n_components = 5
corr_train = np.zeros((fold, cv_n_components))
corr_test = np.zeros_like(corr_train)
X_prepro = downsampledEEG
T, D, N = X_prepro.shape
for i in range(fold):
    len_test = T // fold
    X_test = X_prepro[len_test*i:len_test*(i+1),:,:]
    X_train = np.delete(X_prepro, range(len_test*i, len_test*(i+1)), axis=0)
    _, W_train, _ = utils.GCCA(X_train, n_components=cv_n_components, regularization='lwcov')
    corr_train[i,:] = utils.avg_corr_coe(X_train, W_train, N, n_components=cv_n_components)
    corr_test[i,:] = utils.avg_corr_coe(X_test, W_train, N, n_components=cv_n_components)

In [26]:
np.average(corr_train, axis=0)

array([0.19235087, 0.18572353, 0.18103591, 0.17796426, 0.17295483])

In [27]:
np.average(corr_test, axis=0)

array([0.03680234, 0.01379785, 0.01454057, 0.01068417, 0.01319305])

Higher than including stimulus. Quite different when EEG signals are not down sampled.