In [None]:
import scipy.io
import numpy as np
import utils
import algo
import copy
import mne
import matplotlib.pyplot as plt
import pandas as pd
import os
from numpy import linalg as LA
from scipy import signal
from scipy.linalg import toeplitz
from scipy.stats import zscore, pearsonr
from sklearn.covariance import LedoitWolf
from tqdm import tqdm
%matplotlib widget

## Data Loading

In [None]:
def multisub_data_org(subjects, video_id, fsStim, bads, mask=False, band=None, eog=True, regression=True, normalize=True):
    feats_path_folder = '../Feature extraction/features/'
    if mask:
        feats_path = feats_path_folder + video_id + '_mask.npy'
    else:
        feats_path = feats_path_folder + video_id + '_flow.npy'
    feats = np.load(feats_path)
    tempctra = np.load(feats_path_folder + video_id + '_tempctra.npy')
    # Discard box info and concatenate tempctra
    # feats: histogram of flow + mag/up/down/left/right + absTC/sqTC/muTC
    feats = np.concatenate((feats[:,:-4], tempctra), axis=1)
    T = feats.shape[0]
    eeg_list = []
    hf_list = []
    eog_list = []
    N = len(subjects)
    for n in range(N):
        eeg_path = '../../Experiments/data/'+ subjects[n] +'/' + video_id + '_eeg.set'
        eeg_prepro, fs, high_freq = utils.preprocessing(eeg_path, HP_cutoff = 0.5, AC_freqs=50, band=band, resamp_freqs=fsStim, bads=bads[n], eog=eog, regression=regression, normalize=normalize)
        eeg_channel_indices = mne.pick_types(eeg_prepro.info, eeg=True)
        eog_channel_indices = mne.pick_types(eeg_prepro.info, eog=True)
        eeg_downsampled, _ = eeg_prepro[eeg_channel_indices]
        eog_downsampled, _ = eeg_prepro[eog_channel_indices]
        eeg_downsampled = eeg_downsampled.T
        eog_downsampled = eog_downsampled.T
        eeg_list.append(eeg_downsampled)
        eog_list.append(eog_downsampled)
        if eeg_downsampled.shape[0] < T:
            T = eeg_downsampled.shape[0]
        hf_indices = mne.pick_types(high_freq.info, eeg=True)
        hf_downsampled, _ = high_freq[hf_indices]
        hf_downsampled = hf_downsampled.T
        hf_list.append(hf_downsampled)
        if hf_downsampled.shape[0] < T:
            T = hf_downsampled.shape[0] 
    # Clip data
    feats = feats[fsStim:T-fsStim, :]
    eeg_list = [np.expand_dims(eeg[fsStim:T-fsStim,:], axis=2) for eeg in eeg_list]
    eog_list = [np.expand_dims(eog[fsStim:T-fsStim,:], axis=2) for eog in eog_list]
    hf_list = [np.expand_dims(eeg[fsStim:T-fsStim,:], axis=2) for eeg in hf_list]
    eeg_multisub = np.concatenate(tuple(eeg_list), axis=2)
    eog_multisub = np.concatenate(tuple(eog_list), axis=2)
    hf_multisub = np.concatenate(tuple(hf_list), axis=2)
    times = np.array(range(T))/fs
    return feats, eeg_multisub, eog_multisub, hf_multisub, fs, times

In [None]:
%%capture
subjects = ['VC','HV','JC','DV','CD','JV','AD','KY']
bads = [['B25','B31'],['B25','B31','A20','A21','A26','A31'],['B25','B31','B32','A28','A29','A30'],['A25','A30','B25','B29'],['A30','B25','B31'],['A30','B25'],['B25','B28'],[]]
video_id = 'Mr'
features, eeg_multisub, eog_multisub, hf_multisub, fs, _ = multisub_data_org(subjects, video_id, fsStim=30, bads=bads, band=[25,35], eog=True, regression=True, normalize=True)

In [None]:
n_sub = len(subjects)
features[:,15] = np.abs(features[:,15])
features_smooth = utils.clean_features(features, smooth=True)

In [None]:
print('Length of the video (min):', features.shape[0]/30/60)

In [None]:
plt.close()
plt.plot(features_smooth[:,8]/LA.norm(features_smooth[:,8]), label='flow')
plt.plot(features_smooth[:,13]/LA.norm(features_smooth[:,13]), label='abs temporal contrast')
# plt.plot(features_smooth[:,14]/LA.norm(features_smooth[:,14]), label='Sq temporal contrast')
# plt.plot(features_smooth[:,15]/LA.norm(features_smooth[:,15]), label='temporal contrast')
plt.legend()
plt.show()

## Keep shot cuts

In [None]:
def pip_CCA(eeg_multisub, feature, fs, n_sub, fig_name, tab_name, L_EEG=3, L_Stim=int(fs/2), offset_EEG=1, offset_Stim=0, signifi_level=True, n_components=5):
    cc = np.zeros((n_sub, n_components+3))
    for id_sub in range(n_sub):
        print('subject: ', id_sub+1)
        eeg_onesub_list = [eeg_multisub[:,:,id_sub]]
        CCA = algo.CanonicalCorrelationAnalysis(eeg_onesub_list, [feature], fs, L_EEG, L_Stim, offset_EEG, offset_Stim, signifi_level=signifi_level, n_components=n_components)
        _, corr_test, sig_corr, _, tsc_test, _, _, V_A_train, V_B_train = CCA.cross_val()
        cc[id_sub,0] = int(id_sub+1)
        cc[id_sub,1:n_components+1] = np.average(corr_test, axis=0)
        cc[id_sub,-2] = np.average(tsc_test)
        cc[id_sub,-1] = sig_corr
        eeg_onesub = np.concatenate(tuple(eeg_onesub_list), axis=0)
        forward_model = CCA.forward_model(eeg_onesub, V_A_train)
        # if CCA/MrBean does not exist, create it
        if not os.path.exists('figures/CCA/MrBean'):
            os.makedirs('figures/CCA/MrBean')
        utils.plot_spatial_resp(forward_model, corr_test, 'figures/CCA/MrBean/'+fig_name+str(id_sub+1)+'.png')
    columns = ['ID'] + ['CC'+str(i+1) for i in range(n_components)] + ['TSC(top2)','Sig_corr']
    df_cca = pd.DataFrame(cc, columns = columns)
    if not os.path.exists('tables/CCA/MrBean'):
        os.makedirs('tables/CCA/MrBean')
    df_cca.to_csv('tables/CCA/MrBean/'+tab_name+'.csv', index=False)

In [None]:
avg_flow = features_smooth[:,8]
pip_CCA(eeg_multisub, avg_flow, fs, n_sub, 'SR_avgflow_', 'avgflow_eeg', L_EEG=3, L_Stim=int(fs/2), offset_EEG=1, offset_Stim=0, signifi_level=True, n_components=5)

In [None]:
avg_tempctr = features_smooth[:,13]
pip_CCA(eeg_multisub, avg_tempctr, fs, n_sub, 'SR_avgtempctr_', 'avgtempctr_eeg', L_EEG=3, L_Stim=int(fs/2), offset_EEG=1, offset_Stim=0, signifi_level=True, n_components=5)

In [None]:
eeg_onesub_list = [eeg_multisub[:,:,2]]
CCA = algo.CanonicalCorrelationAnalysis(eeg_onesub_list, [avg_flow], fs, L_EEG=3, L_Stim=int(fs/2), offset_EEG=1, offset_Stim=0, signifi_level=False, n_components=2)
_, corr_test, sig_corr, _, tsc_test, _, _, V_A_train, V_B_train = CCA.cross_val()
eeg_onesub = np.concatenate(tuple(eeg_onesub_list), axis=0)
forward_model = CCA.forward_model(eeg_onesub, V_A_train)
utils.plot_spatial_resp(forward_model, corr_test, '../../Manuscript/1st/images/avgflow_eeg.png', fig_size=(6,2.5))

In [None]:
eeg_onesub_list = [eeg_multisub[:,:,2]]
CCA = algo.CanonicalCorrelationAnalysis(eeg_onesub_list, [avg_tempctr], fs, L_EEG=3, L_Stim=int(fs/2), offset_EEG=1, offset_Stim=0, signifi_level=False, n_components=2)
_, corr_test, sig_corr, _, tsc_test, _, _, V_A_train, V_B_train = CCA.cross_val()
eeg_onesub = np.concatenate(tuple(eeg_onesub_list), axis=0)
forward_model = CCA.forward_model(eeg_onesub, V_A_train)
utils.plot_spatial_resp(forward_model, corr_test, '../../Manuscript/1st/images/avgtempctr_eeg.png', fig_size=(6,2.5))

In [None]:
eeg_onesub_list = [eeg_multisub[:,:,2]]
CCA = algo.CanonicalCorrelationAnalysis(eeg_onesub_list, [binary_sc], fs, L_EEG=3, L_Stim=int(fs/2), offset_EEG=1, offset_Stim=0, signifi_level=False, n_components=2)
_, corr_test, sig_corr, _, tsc_test, _, _, V_A_train, V_B_train = CCA.cross_val()
eeg_onesub = np.concatenate(tuple(eeg_onesub_list), axis=0)
forward_model = CCA.forward_model(eeg_onesub, V_A_train)
utils.plot_spatial_resp(forward_model, corr_test, '../../Manuscript/1st/images/binary_eeg.png', fig_size=(6,2.5))

## Remove shot cuts

In [None]:
# Find shot cuts based on the peaks of (normalized) avgflow
avgflow_norm = features[:,8]/LA.norm(features[:,8])
peak, _ = signal.find_peaks(avgflow_norm, prominence=(0.02, None), distance=fs, width=(None, 1.5))
plt.close()
plt.plot(range(len(avgflow_norm)), avgflow_norm)
plt.plot(peak, avgflow_norm[peak], "x")
plt.tight_layout()
plt.show()

In [None]:
# Get handcrafted binary shot cut features
binary_sc = np.zeros_like(avgflow_norm)
binary_sc[peak] = 1

In [None]:
plt.close()
time_axis = np.array(range(len(avgflow_norm)))/fs
plt.plot(time_axis, binary_sc, figure=plt.figure(figsize=(6, 2.5)))
xlabel = 'Time (s)'
ylabel = 'Binary shot cut feature'
plt.xlabel(xlabel)
plt.ylabel(ylabel)
plt.tight_layout()
plt.show()
plt.savefig('../../Manuscript/1st/images/binary.png', dpi=1200)

In [None]:
pip_CCA(eeg_multisub, binary_sc, fs, n_sub, 'SR_binary_', 'binary_eeg', L_EEG=3, L_Stim=int(fs/2), offset_EEG=1, offset_Stim=0, signifi_level=True, n_components=5)

In [None]:
# 1s before and after the peaks will be removed
nearby_idx = []
for p in peak:
    nearby_idx = nearby_idx + list(range(max(0, p-30), min(p+30, len(avgflow_norm))))
nearby_idx = list(set(nearby_idx))
features_clean = utils.clean_features(np.delete(features, nearby_idx, axis=0), smooth=True)
EEG_clean = np.delete(eeg_multisub, nearby_idx, axis=0)
EOG_clean = np.delete(eog_multisub, nearby_idx, axis=0)

In [None]:
avgflow_clean = features_clean[:,8]
pip_CCA(EEG_clean, avgflow_clean, fs, n_sub, 'SR_avgflow_clean_', 'avgflow_clean_eeg', L_EEG=3, L_Stim=int(fs/2), offset_EEG=1, offset_Stim=0, signifi_level=True, n_components=5)

In [None]:
avgtempctr_clean = features_clean[:,13]
pip_CCA(EEG_clean, avgtempctr_clean, fs, n_sub, 'SR_avgtempctr_clean_', 'avgtempctr_clean_eeg', L_EEG=3, L_Stim=int(fs/2), offset_EEG=1, offset_Stim=0, signifi_level=True, n_components=5)

In [None]:
# Normalize the features between two shot cuts
# features_normalized = copy.deepcopy(features)
# idx_start = 0
# for p in peak:
#     idx_end = p
#     features_normalized[idx_start+1:idx_end, :] = features[idx_start+1:idx_end, :]/LA.norm(features[idx_start+1:idx_end, :], axis=0)
#     idx_start = p
# features_normalized[idx_start+1:, :] = features[idx_start+1:, :]/LA.norm(features[idx_start+1:, :], axis=0)

In [None]:
# Alternative: Don't delete but interpolate
# features_nonan = utils.clean_features(features, smooth=False)
# nb_features = features_nonan.shape[1]
# for i in range(nb_features):
#     for p in peak:
#         x = np.array([p-2, p-1, p+1, p+2])
#         y = np.array([features_nonan[p-2,i], features_nonan[p-1,i], features_nonan[p+1,i], features_nonan[p+2,i]])
#         xnew = np.array([p-2, p-1, p, p+1, p+2])
#         ynew = np.interp(xnew, x, y)
#         features_nonan[p,i] = ynew[2]
# features_interp = utils.clean_features(features_nonan, smooth=True)

## CorrCA/GCCA

In [None]:
corr_CA = algo.CorrelatedComponentAnalysis([eeg_multisub], fs, L=5, offset=2, signifi_level=True, n_components=10)
corr_train, corr_test, tsc_train, tsc_test, isc_train, isc_test, W_train, F_train = corr_CA.cross_val()
if not os.path.exists('figures/CorrCA/MrBean'):
    os.makedirs('figures/CorrCA/MrBean')
utils.plot_spatial_resp(F_train, corr_test, 'figures/CorrCA/MrBean/'+str(n_sub)+'.png')

In [None]:
corr_CA = algo.CorrelatedComponentAnalysis([EEG_clean], fs, L=5, offset=2, signifi_level=True, n_components=10)
corr_train, corr_test, tsc_train, tsc_test, isc_train, isc_test, W_train, F_train = corr_CA.cross_val()
utils.plot_spatial_resp(F_train, corr_test, 'figures/CorrCA/MrBean/clean_'+str(n_sub)+'.png')

In [None]:
GCCA = algo.GeneralizedCCA([eeg_multisub], fs, L=5, offset=2, n_components=10, signifi_level=True)
corr_train, corr_test, tsc_train, tsc_test, dist_train, dist_test, W_train, F_train = GCCA.cross_val()
if not os.path.exists('figures/GCCA/MrBean'):
    os.makedirs('figures/GCCA/MrBean')
utils.plot_spatial_resp(F_train, corr_test, 'figures/GCCA/MrBean/'+str(n_sub)+'.png')

In [None]:
GCCA = algo.GeneralizedCCA([EEG_clean], fs, L=5, offset=2, n_components=10, signifi_level=True)
corr_train, corr_test, tsc_train, tsc_test, dist_train, dist_test, W_train, F_train = GCCA.cross_val()
if not os.path.exists('figures/GCCA/MrBean'):
    os.makedirs('figures/GCCA/MrBean')
utils.plot_spatial_resp(F_train, corr_test, 'figures/GCCA/MrBean/clean_'+str(n_sub)+'.png')

## CorrCA/GCCA + LS

In [None]:
def pip_GCCA_LS(eeg_multisub, feature, n_sub, tab_name, ifcorrca, L_EEG=5, L_Stim=int(fs/2), offset_EEG=2, offset_Stim=0, id_sub=0, n_components=10):
    LSGCCA = algo.LSGCCA([eeg_multisub], [feature], fs, L_EEG, L_Stim, offset_EEG, offset_Stim, id_sub=0, corrca=ifcorrca, n_components=n_components)
    LSGCCA.to_latent_space()
    cc = np.zeros((n_sub, n_components+2))
    for id_sub in range(n_sub):
        print('subject: ', id_sub+1)
        LSGCCA.id_sub = id_sub
        _, corr_test, sig_corr, We_train, Ws_train, F_train = LSGCCA.cross_val()
        cc[id_sub,0] = int(id_sub+1)
        cc[id_sub,1:n_components+1] = np.average(corr_test, axis=0)
        cc[id_sub,-1] = sig_corr
    columns = ['ID'] + ['CC'+str(i+1) for i in range(n_components)] + ['Sig_corr']
    df_cca = pd.DataFrame(cc, columns = columns)
    if not os.path.exists('tables/GCCA_LS/MrBean'):
        os.makedirs('tables/GCCA_LS/MrBean')
    df_cca.to_csv('tables/GCCA_LS/MrBean/'+tab_name+'.csv', index=False)

In [None]:
pip_GCCA_LS(eeg_multisub, binary_sc, n_sub, 'CorrCA_binary', ifcorrca=True)

In [None]:
pip_GCCA_LS(eeg_multisub, binary_sc, n_sub, 'GCCA_binary', ifcorrca=False)