In [None]:
import mne
import scipy
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import utils
import algo
import os
import random
import glob
import copy
import itertools
from findpeaks import findpeaks
from tqdm import tqdm
from numpy import linalg as LA
from scipy.stats import zscore, pearsonr
from scipy.io import savemat, loadmat
from scipy import signal
%matplotlib widget

## Data Loading

In [None]:
def multisub_data_org(subjects, video_id, fsStim, bads, band=None, eog=False, regression=False, normalize=True, smooth=True):
    feats_path_folder = '../Feature extraction/features/'
    objf = np.load(feats_path_folder + video_id + '_mask.npy')
    flow = np.load(feats_path_folder + video_id + '_flow.npy')
    flow_mag = np.expand_dims(flow[:,8], axis=1)
    tempctra = np.load(feats_path_folder + video_id + '_tempctra.npy')
    tc_mask = np.load(feats_path_folder + video_id + '_tcmask.npy')
    feats = np.concatenate((objf, tc_mask, flow_mag, tempctra), axis=1)
    feats = utils.clean_features(feats, smooth=smooth)
    T = feats.shape[0]
    eeg_list = []
    hf_list = []
    eog_list = []
    N = len(subjects)
    for n in range(N):
        eeg_path = '../../Experiments/data/'+ subjects[n] +'/' + video_id + '_eeg.set'
        eeg_prepro, fs, high_freq = utils.preprocessing(eeg_path, HP_cutoff = 0.5, AC_freqs=50, band=band, resamp_freqs=fsStim, bads=bads[n], eog=eog, regression=regression, normalize=normalize)
        eeg_channel_indices = mne.pick_types(eeg_prepro.info, eeg=True)
        eog_channel_indices = mne.pick_types(eeg_prepro.info, eog=True)
        eeg_downsampled, _ = eeg_prepro[eeg_channel_indices]
        eog_downsampled, _ = eeg_prepro[eog_channel_indices]
        eeg_downsampled = eeg_downsampled.T
        eog_downsampled = eog_downsampled.T
        eeg_list.append(eeg_downsampled)
        eog_list.append(eog_downsampled)
        if eeg_downsampled.shape[0] < T:
            T = eeg_downsampled.shape[0]
        hf_indices = mne.pick_types(high_freq.info, eeg=True)
        hf_downsampled, _ = high_freq[hf_indices]
        hf_downsampled = hf_downsampled.T
        hf_list.append(hf_downsampled)
        if hf_downsampled.shape[0] < T:
            T = hf_downsampled.shape[0] 
    # Clip data
    feats = feats[fsStim:T-fsStim, :]
    eeg_list = [np.expand_dims(eeg[fsStim:T-fsStim,:], axis=2) for eeg in eeg_list]
    eog_list = [np.expand_dims(eog[fsStim:T-fsStim,:], axis=2) for eog in eog_list]
    hf_list = [np.expand_dims(eeg[fsStim:T-fsStim,:], axis=2) for eeg in hf_list]
    eeg_multisub = np.concatenate(tuple(eeg_list), axis=2)
    eog_multisub = np.concatenate(tuple(eog_list), axis=2)
    hf_multisub = np.concatenate(tuple(hf_list), axis=2)
    times = np.array(range(T))/fs
    return feats, eeg_multisub, eog_multisub, hf_multisub, fs, times

In [None]:
%%capture
subjects = ['AS', 'YY', 'CM', 'SUB1', 'IR', 'SUB2', 'YZ', 'WD', 'CC', 'CW', 'WS', 'VC','HV','JC','DV','CD','JV','KY']
bads = [[], ['B25'], ['A24'], ['B25'], ['B25'], ['B25'], ['A11','B7','B25'], ['B25','B31'], ['B25'], [], ['A1','A30','B25'], ['B25','B31'], ['B25','B31','A20','A21','A26','A31'], ['B25','B31','B32','A28','A29','A30'],['A25','A30','B25','B29'],['A30','B25','B31'],['A30','B25'],[]]
eeg_path_folder = "../../Experiments/data/CD/"
video_ids = [dataset[0:2] for dataset in os.listdir(eeg_path_folder) if dataset.endswith('.set')]
video_ids.remove('Mr')
features_list = []
eeg_multisub_list = []
eog_multisub_list = []
hf_multisub_list = []
for video_id in video_ids:
    features, eeg_multisub, eog_multisub, hf_multisub, fs, _ = multisub_data_org(subjects, video_id, fsStim=30, bads=bads, band=[25,35], eog=True, regression=True, normalize=True, smooth=True)
    features_list.append(features) 
    eeg_multisub_list.append(eeg_multisub)
    eog_multisub_list.append(eog_multisub)
    hf_multisub_list.append(hf_multisub)
T = sum([F.shape[0] for F in features_list])
times = np.array(range(T))/fs

In [None]:
n_sub = len(subjects)
T/fs/60

In [None]:
bio32 = mne.channels.make_standard_montage('biosemi32')
bio64 = mne.channels.make_standard_montage('biosemi64')
downsample_ind = [ch in bio32.ch_names for ch in bio64.ch_names]
# complem_ind = [ch not in bio32.ch_names for ch in bio64.ch_names]

In [None]:
eeg_32_list = [eeg64[:,downsample_ind,:] for eeg64 in eeg_multisub_list]
hf_32_list = [hf64[:,downsample_ind,:] for hf64 in hf_multisub_list]
mix_64_list = [np.concatenate((eeg32, hf32), axis=1) for eeg32, hf32 in zip(eeg_32_list, hf_32_list)]

## CCA

In [None]:
def pip_CCA(eeg_multisub_list, feature_list, fs, n_sub, fig_name, tab_name, L_EEG=3, L_Stim=int(fs/2), offset_EEG=1, offset_Stim=0, signifi_level=True, n_components=5, ifEOG=False):
    cc = np.zeros((n_sub, n_components+3))
    for id_sub in range(n_sub):
        print('subject: ', id_sub+1)
        eeg_onesub_list = [eeg[:,:,id_sub] for eeg in eeg_multisub_list]
        CCA = algo.CanonicalCorrelationAnalysis(eeg_onesub_list, feature_list, fs, L_EEG, L_Stim, offset_EEG, offset_Stim, signifi_level=signifi_level, n_components=n_components)
        _, corr_test, sig_corr, _, tsc_test, _, _, V_A_train, V_B_train = CCA.cross_val()
        cc[id_sub,0] = int(id_sub+1)
        cc[id_sub,1:n_components+1] = np.average(corr_test, axis=0)
        cc[id_sub,-2] = np.average(tsc_test)
        cc[id_sub,-1] = sig_corr
        eeg_onesub = np.concatenate(tuple(eeg_onesub_list), axis=0)
        forward_model = CCA.forward_model(eeg_onesub, V_A_train)
        if not ifEOG:
            if not os.path.exists('figures/CCA/OneShot'):
                os.makedirs('figures/CCA/OneShot')
            utils.plot_spatial_resp(forward_model, corr_test, 'figures/CCA/OneShot/'+fig_name+str(id_sub+1)+'.png')
    columns = ['ID'] + ['CC'+str(i+1) for i in range(n_components)] + ['TSC(top2)','Sig_corr']
    df_cca = pd.DataFrame(cc, columns = columns)
    if not os.path.exists('tables/CCA/OneShot'):
        os.makedirs('tables/CCA/OneShot')
    df_cca.to_csv('tables/CCA/OneShot/'+tab_name+'.csv', index=False)

In [None]:
avgflow_list = [feats[:,20] for feats in features_list]
avgtempctr_list = [feats[:,21] for feats in features_list]
objflow_list = [feats[:,8] for feats in features_list]
objtempctr_list = [feats[:,17] for feats in features_list]

In [None]:
pip_CCA(eeg_multisub_list, avgflow_list, fs, n_sub, 'SR_avgflow_', 'avgflow_eeg')

In [None]:
pip_CCA(eeg_multisub_list, avgtempctr_list, fs, n_sub, 'SR_avgtempctr_', 'avgtpempctr_eeg')

In [None]:
pip_CCA(eeg_multisub_list, objflow_list, fs, n_sub, 'SR_objflow_', 'objflow_eeg')

In [None]:
pip_CCA(eeg_multisub_list, objtempctr_list, fs, n_sub, 'SR_objtempctr_', 'objtempctr_eeg')

In [None]:
pip_CCA(eog_multisub_list, objflow_list, fs, n_sub, 'DoesNotMatter', 'objflow_eog', ifEOG=True)

In [None]:
pip_CCA(eog_multisub_list, objtempctr_list, fs, n_sub, 'DoesNotMatter', 'objtempctr_eog', ifEOG=True)

## CorrCA 

In [None]:
n_sub = len(subjects)
eeg = [eeg[:,:,0:n_sub] for eeg in eeg_multisub_list]
corr_CA = algo.CorrelatedComponentAnalysis(eeg, fs, L=5, offset=2, signifi_level=True, n_components=10)
corr_train, corr_test, tsc_train, tsc_test, isc_train, isc_test, W_train, F_train = corr_CA.cross_val()
# eeg_concat = np.concatenate(eeg, axis=0)
# F = corr_CA.forward_model(eeg_concat, W_train)
if not os.path.exists('figures/CorrCA/Oneshot'):
    os.makedirs('figures/CorrCA/Oneshot')
utils.plot_spatial_resp(F_train, corr_test, 'figures/CorrCA/Oneshot/'+str(n_sub)+'.png')

In [None]:
subject_list = range(len(subjects))
corr_mean = []
tsc_mean = []
corr_std = []
tsc_std = []
for n_sub in range(2, len(subjects)+1):
    print('number of subjects: ', n_sub)
    corr_list = []
    tsc_list = []
    count = 0
    comb_list = list(itertools.combinations(subject_list, n_sub))
    random.shuffle(comb_list)
    for item in comb_list:
        count += 1
        if count > 30:
            break
        eeg = [eeg[:,:,list(item)] for eeg in eeg_multisub_list]
        corr_CA = algo.CorrelatedComponentAnalysis(eeg, fs, L=5, offset=2, n_components=10, signifi_level=False, message=False)
        _, corr_test, _, tsc_test, _, _, _, _ = corr_CA.cross_val()
        corr_list.append(corr_test)
        tsc_list.append(tsc_test)
    corr_all = np.concatenate(tuple(corr_list), axis=0)
    tsc_all = np.concatenate(tuple(tsc_list), axis=0)
    corr_mean.append(np.mean(corr_all, axis=0))
    tsc_mean.append(np.mean(tsc_all))
    print('tsc_mean: ', np.mean(tsc_all))
    corr_std.append(np.std(corr_all, axis=0))
    tsc_std.append(np.std(tsc_all))
    print('tsc_std: ', np.std(tsc_all))


In [None]:
# np.savez('tables/CorrCA_18.npz', tsc_mean=tsc_mean, tsc_std=tsc_std, corr_mean=corr_mean, corr_std=corr_std)

In [None]:
nb_samples = eeg_multisub_concat.shape[0]
len_list = np.linspace(10, 60, 11).astype(int)
corr_mean = []
tsc_mean = []
corr_std = []
tsc_std = []
for length in len_list:
    print('amount of data (min): ', length)
    len_block = length*fs*60
    idx_end = nb_samples-len_block
    corr_list = []
    tsc_list = []
    for count in range(30):
        start_point = random.randint(0, idx_end)
        eeg = [eeg_multisub_concat[start_point:start_point+len_block,:,:]]
        corr_CA = algo.CorrelatedComponentAnalysis(eeg, fs, L=5, offset=2, n_components=10, signifi_level=False, message=False, trials=True)
        _, corr_test, _, tsc_test, _, isc_test, _, _ = corr_CA.cross_val()
        corr_list.append(corr_test)
        tsc_list.append(tsc_test)
    corr_all = np.concatenate(tuple(corr_list), axis=0)
    tsc_all = np.concatenate(tuple(tsc_list), axis=0)
    corr_mean.append(np.mean(corr_all, axis=0))
    tsc_mean.append(np.mean(tsc_all))
    print('tsc_mean: ', np.mean(tsc_all))
    corr_std.append(np.std(corr_all, axis=0))
    tsc_std.append(np.std(tsc_all))
    print('tsc_std: ', np.std(tsc_all))

### Stimulus-informed version

In [None]:
L_EEG = 5 # correlations can be higher if include time lags
offset_EEG = 2
L_Stim = int(fs/2)
offset_Stim = 0
SICorrCA = algo.StimulusInformedCorrCA([eeg_sub_list, objflow_list], fs, [L_EEG, L_Stim], [offset_EEG, offset_Stim])
_, corr_test, _, tsc_test, _, dist_test, _, _, rho = SICorrCA.cross_val()

In [None]:
subject_list = range(len(subjects))
corr_mean = []
tsc_mean = []
corr_std = []
tsc_std = []
for n_sub in range(2, len(subjects)+1):
    print('number of subjects: ', n_sub)
    corr_list = []
    tsc_list = []
    count = 0
    comb_list = list(itertools.combinations(subject_list, n_sub))
    random.shuffle(comb_list)
    for item in comb_list:
        count += 1
        if count > 30:
            break
        eeg = [eeg[:,:,list(item)] for eeg in eeg_multisub_list]
        nested_datalist = [eeg, mag_avg_list]
        Llist = [5, int(fs/2)]
        offsetlist = [2, 0]
        SI_CorrCA = algo.StimulusInformedCorrCA(nested_datalist, fs, Llist, offsetlist, n_components=10, signifi_level=False, message=False)
        _, corr_test, _, tsc_test, _, _, _, _, _ = SI_CorrCA.cross_val()
        corr_list.append(corr_test)
        tsc_list.append(tsc_test)
    corr_all = np.concatenate(tuple(corr_list), axis=0)
    tsc_all = np.concatenate(tuple(tsc_list), axis=0)
    corr_mean.append(np.mean(corr_all, axis=0))
    tsc_mean.append(np.mean(tsc_all))
    print('tsc_mean: ', np.mean(tsc_all))
    corr_std.append(np.std(corr_all, axis=0))
    tsc_std.append(np.std(tsc_all))
    print('tsc_std: ', np.std(tsc_all))


In [None]:
# np.savez('tables/SICorrCA_md.npz', tsc_mean=tsc_mean, tsc_std=tsc_std, corr_mean=corr_mean, corr_std=corr_std, isc_mean=isc_mean, isc_std=isc_std)

## GCCA

In [None]:
# GCCA-x subjects
eeg = [eeg[:,:,0:len(subjects)] for eeg in eeg_multisub_list]
GCCA = algo.GeneralizedCCA(eeg, fs, L=5, offset=2, n_components=10, signifi_level=True)
corr_train, corr_test, tsc_train, tsc_test, dist_train, dist_test, W_train, F_train = GCCA.cross_val()

In [None]:
subject_list = range(len(subjects))
corr_mean = []
tsc_mean = []
corr_std = []
tsc_std = []
for n_sub in range(2, len(subjects)+1):
    print('number of subjects: ', n_sub)
    corr_list = []
    tsc_list = []
    isc_list = []
    count = 0
    comb_list = list(itertools.combinations(subject_list, n_sub))
    random.shuffle(comb_list)
    for item in comb_list:
        count += 1
        if count > 30:
            break
        eeg = [eeg[:,:,list(item)] for eeg in eeg_multisub_list]
        GCCA = algo.GeneralizedCCA(eeg, fs, L=5, offset=2, n_components=10, signifi_level=False, message=False)
        _, corr_test, _, tsc_test, _, _, _, _ = GCCA.cross_val()
        corr_list.append(corr_test)
        tsc_list.append(tsc_test)
    corr_all = np.concatenate(tuple(corr_list), axis=0)
    tsc_all = np.concatenate(tuple(tsc_list), axis=0)
    corr_mean.append(np.mean(corr_all, axis=0))
    tsc_mean.append(np.mean(tsc_all))
    print('tsc_mean: ', np.mean(tsc_all))
    corr_std.append(np.std(corr_all, axis=0))
    tsc_std.append(np.std(tsc_all))
    print('tsc_std: ', np.std(tsc_all))


In [None]:
# np.savez('tables/GCCA_18.npz', tsc_mean=tsc_mean, tsc_std=tsc_std, corr_mean=corr_mean, corr_std=corr_std)

In [None]:
nb_samples = eeg_multisub_concat.shape[0]
len_list = np.linspace(10, 60, 11).astype(int)
corr_mean = []
tsc_mean = []
corr_std = []
tsc_std = []
for length in len_list:
    print('amount of data (min): ', length)
    len_block = length*fs*60
    idx_end = nb_samples-len_block
    corr_list = []
    tsc_list = []
    for count in range(30):
        start_point = random.randint(0, idx_end)
        eeg = [eeg_multisub_concat[start_point:start_point+len_block,:,:]]
        GCCA = algo.GeneralizedCCA(eeg, fs, L=5, offset=2, n_components=10, signifi_level=False, message=False, trials=True)
        _, corr_test, _, tsc_test, _, _, _, _ = GCCA.cross_val()
        corr_list.append(corr_test)
        tsc_list.append(tsc_test)
    corr_all = np.concatenate(tuple(corr_list), axis=0)
    tsc_all = np.concatenate(tuple(tsc_list), axis=0)
    corr_mean.append(np.mean(corr_all, axis=0))
    tsc_mean.append(np.mean(tsc_all))
    print('tsc_mean: ', np.mean(tsc_all))
    corr_std.append(np.std(corr_all, axis=0))
    tsc_std.append(np.std(tsc_all))
    print('tsc_std: ', np.std(tsc_all))

### Stimulus-informed version

In [None]:
eeg = [eeg[:,:,0:len(subjects)] for eeg in eeg_multisub_list]
mag_avg_list = [feats[:,8] for feats in features_list]
nested_datalist = [eeg, mag_avg_list]
Llist = [5, int(fs/2)]
offsetlist = [2, 0]
SI_GCCA = algo.StimulusInformedGCCA(nested_datalist, fs, Llist, offsetlist, n_components=10, message=True, signifi_level=False)
_, corr_test, _, tsc_test, _, dist_test, _, F_train, rho = SI_GCCA.cross_val(rho=1)

In [None]:
subject_list = range(len(subjects))
corr_mean = []
tsc_mean = []
corr_std = []
tsc_std = []
for n_sub in range(2, len(subjects)+1):
    print('number of subjects: ', n_sub)
    corr_list = []
    tsc_list = []
    isc_list = []
    count = 0
    comb_list = list(itertools.combinations(subject_list, n_sub))
    random.shuffle(comb_list)
    for item in comb_list:
        count += 1
        if count > 30:
            break
        eeg = [eeg[:,:,list(item)] for eeg in eeg_multisub_list]
        nested_datalist = [eeg, mag_avg_list]
        Llist = [5, int(fs/2)]
        offsetlist = [2, 0]
        SI_GCCA = algo.StimulusInformedGCCA(nested_datalist, fs, Llist, offsetlist, n_components=10, signifi_level=False, message=False)
        _, corr_test, _, tsc_test, _, _, _, _, _ = SI_GCCA.cross_val()
        corr_list.append(corr_test)
        tsc_list.append(tsc_test)
    corr_all = np.concatenate(tuple(corr_list), axis=0)
    tsc_all = np.concatenate(tuple(tsc_list), axis=0)
    corr_mean.append(np.mean(corr_all, axis=0))
    tsc_mean.append(np.mean(tsc_all))
    print('tsc_mean: ', np.mean(tsc_all))
    corr_std.append(np.std(corr_all, axis=0))
    tsc_std.append(np.std(tsc_all))
    print('tsc_std: ', np.std(tsc_all))


In [None]:
# np.savez('tables/SIGCCA_md.npz', tsc_mean=tsc_mean, tsc_std=tsc_std, corr_mean=corr_mean, corr_std=corr_std)

## CorrCA/GCCA + LS

In [None]:
def pip_GCCA_LS(eeg_multisub_list, feature_list, n_sub, tab_name, ifcorrca, L_EEG=5, L_Stim=int(fs/2), offset_EEG=2, offset_Stim=0, id_sub=0, n_components=10):
    LSGCCA = algo.LSGCCA(eeg_multisub_list, feature_list, fs, L_EEG, L_Stim, offset_EEG, offset_Stim, id_sub=0, corrca=ifcorrca, n_components=n_components)
    LSGCCA.to_latent_space()
    cc = np.zeros((n_sub, n_components+2))
    for id_sub in range(n_sub):
        print('subject: ', id_sub+1)
        LSGCCA.id_sub = id_sub
        _, corr_test, sig_corr, We_train, Ws_train, F_train = LSGCCA.cross_val()
        cc[id_sub,0] = int(id_sub+1)
        cc[id_sub,1:n_components+1] = np.average(corr_test, axis=0)
        cc[id_sub,-1] = sig_corr
    columns = ['ID'] + ['CC'+str(i+1) for i in range(n_components)] + ['Sig_corr']
    df_cca = pd.DataFrame(cc, columns = columns)
    if not os.path.exists('tables/GCCA_LS/OneShot'):
        os.makedirs('tables/GCCA_LS/OneShot')
    df_cca.to_csv('tables/GCCA_LS/OneShot/'+tab_name+'.csv', index=False)

In [None]:
pip_GCCA_LS(eeg_multisub_list, objflow_list, n_sub, 'GCCA_objflow', ifcorrca=False)

In [None]:
pip_GCCA_LS(eeg_multisub_list, objtempctr_list, n_sub, 'GCCA_objtempctr', ifcorrca=False)