In [None]:
import numpy as np
import matplotlib.pyplot as plt
import utils
import algo
import pandas as pd
import os
%matplotlib widget

## Load data

In [None]:
subjects = ['Pilot_1', 'Pilot_2', 'Pilot_4', 'Pilot_5', 'Pilot_6', 'Pilot_7', 'Pilot_8', 'Pilot_9', 'Pilot_10', 'Pilot_11', 'Pilot_12', 'Pilot_13', 'Pilot_14', 'Pilot_15', 'Pilot_17', 'Pilot_18', 'Pilot_19']
PATTERN = 'Overlay'
SINGLEOBJ = True
subj_path = ['../../Experiments/data/Two_Obj/' + PATTERN + '/' + sub + '/' for sub in subjects]
nb_subj = len(subjects)
bads = [['A30', 'B25'], ['B25'], ['B25'], [], ['A31', 'B31'], ['B25'], ['A30', 'B25'], ['A30', 'B25'], ['B25'], ['B25', 'B26'], ['A30', 'B25'], ['B31'], ['B25', 'A23'], ['A30', 'B25'], ['B25'], ['B25'], ['A30', 'B25']]
fsStim = 30
feats_path_folder = '../Feat_Multi/features/'

In [None]:
LOAD_ONLY = True
ALL_NEW = False
eeg_multisubj_list, eog_multisubj_list, feat_all_att_list, feat_all_unatt_list, gaze_multisubj_list, fs, len_seg_list = utils.load_data(subj_path, fsStim, bads, feats_path_folder, PATTERN, singleobj=False, LOAD_ONLY=LOAD_ONLY, ALL_NEW=ALL_NEW)
eeg_multisubj_list_SO, eog_multisubj_list_SO, feat_all_list_SO, _, gaze_multisubj_list_SO, fs, len_seg_list_SO = utils.load_data(subj_path, fsStim, bads, feats_path_folder, PATTERN, singleobj=True, LOAD_ONLY=LOAD_ONLY, ALL_NEW=ALL_NEW)

In [None]:
gaze_velocity_list = [utils.calcu_gaze_velocity(gaze) for gaze in gaze_multisubj_list]
gaze_coords_list = [gaze[:,0:2,:] for gaze in gaze_multisubj_list]
saccade_multisubj_list = [np.expand_dims(gaze[:,2,:], axis=1) for gaze in gaze_multisubj_list]
blink_multisubj_list = [np.expand_dims(gaze[:,3,:], axis=1) for gaze in gaze_multisubj_list]
saccade_multisubj_list = utils.refine_saccades(saccade_multisubj_list, blink_multisubj_list)
eog_velocity_list = [utils.calcu_gaze_vel_from_EOG(eog) for eog in eog_multisubj_list]
gaze_velocity_list = [utils.interpolate_blinks(gaze_velocity, blink) for gaze_velocity, blink in zip(gaze_velocity_list, blink_multisubj_list)]
gaze_coords_list = [utils.interpolate_blinks(gaze_coords, blink) for gaze_coords, blink in zip(gaze_coords_list, blink_multisubj_list)]
eog_velocity_list = [utils.interpolate_blinks(eog_velocity, blink) for eog_velocity, blink in zip(eog_velocity_list, blink_multisubj_list)] # blinks are not removed as cleanly as in the gaze data
mod_list = [eeg_multisubj_list, eog_multisubj_list, gaze_coords_list, gaze_velocity_list, eog_velocity_list, saccade_multisubj_list, feat_all_att_list, feat_all_unatt_list]

In [None]:
if SINGLEOBJ: # if include single object data in the analysis
    gaze_velocity_list_SO = [utils.calcu_gaze_velocity(gaze) for gaze in gaze_multisubj_list_SO]
    gaze_coords_list_SO = [gaze[:,0:2,:] for gaze in gaze_multisubj_list_SO]
    saccade_multisubj_list_SO = [np.expand_dims(gaze[:,2,:], axis=1) for gaze in gaze_multisubj_list_SO]
    blink_multisubj_list_SO = [np.expand_dims(gaze[:,3,:], axis=1) for gaze in gaze_multisubj_list_SO]
    saccade_multisubj_list_SO = utils.refine_saccades(saccade_multisubj_list_SO, blink_multisubj_list_SO)
    eog_velocity_list_SO = [utils.calcu_gaze_vel_from_EOG(eog) for eog in eog_multisubj_list_SO]
    gaze_velocity_list_SO = [utils.interpolate_blinks(gaze_velocity, blink) for gaze_velocity, blink in zip(gaze_velocity_list_SO, blink_multisubj_list_SO)]
    gaze_coords_list_SO = [utils.interpolate_blinks(gaze_coords, blink) for gaze_coords, blink in zip(gaze_coords_list_SO, blink_multisubj_list_SO)]
    eog_velocity_list_SO = [utils.interpolate_blinks(eog_velocity, blink) for eog_velocity, blink in zip(eog_velocity_list_SO, blink_multisubj_list_SO)] # blinks are not removed as cleanly as in the gaze data
    mod_list = [eeg_multisubj_list, eog_multisubj_list, gaze_coords_list, gaze_velocity_list, eog_velocity_list, saccade_multisubj_list, feat_all_att_list, feat_all_unatt_list, eeg_multisubj_list_SO, eog_multisubj_list_SO, gaze_coords_list_SO, gaze_velocity_list_SO, eog_velocity_list_SO, saccade_multisubj_list_SO, feat_all_list_SO]

In [None]:
# Check the alignment between eog and gaze. The synchronization is good if the peaks of two signals (eye blinks) are aligned.
# subj_to_check = 'Pilot_19'
# subj_ID = subjects.index(subj_to_check)
# utils.check_alignment(subj_ID, eog_multisubj_list, gaze_multisubj_list, nb_points=500)

In [None]:
RemoveSC = True # remove shot cuts
if RemoveSC:
    mod_list = [[utils.remove_shot_cuts(d, fs) for d in sublist] for sublist in mod_list]
if not SINGLEOBJ:
    [eeg_multisubj_list, eog_multisubj_list, gaze_coords_list, gaze_velocity_list, eog_velocity_list, saccade_multisubj_list, feat_all_att_list, feat_all_unatt_list] = mod_list
else:
    [eeg_multisubj_list, eog_multisubj_list, gaze_coords_list, gaze_velocity_list, eog_velocity_list, saccade_multisubj_list, feat_all_att_list, feat_all_unatt_list, eeg_multisubj_list_SO, eog_multisubj_list_SO, gaze_coords_list_SO, gaze_velocity_list_SO, eog_velocity_list_SO, saccade_multisubj_list_SO, feat_all_list_SO] = mod_list

In [None]:
# Get object optical flow and object temporal contrast
objflow_att_list = [feats[:,8] for feats in feat_all_att_list]
objtempctr_att_list = [feats[:,17] for feats in feat_all_att_list]
objflow_unatt_list = [feats[:,8] for feats in feat_all_unatt_list]
objtempctr_unatt_list = [feats[:,17] for feats in feat_all_unatt_list]
if SINGLEOBJ:
    objflow_list_SO = [feats[:,8] for feats in feat_all_list_SO]
    objtempctr_list_SO = [feats[:,17] for feats in feat_all_list_SO]

In [None]:
# Features being used in the analysis: object optical flow
feat_att_list = objflow_att_list
feat_unatt_list = objflow_unatt_list
# Dictionary for modalities
modal_dict = {'EEG': eeg_multisubj_list, 'EOG': eog_multisubj_list, 'GAZE': gaze_coords_list, 'GAZE_V': gaze_velocity_list
              , 'EOG_V': eog_velocity_list, 'SACC': saccade_multisubj_list}

if SINGLEOBJ:
    feat_list_SO = objflow_list_SO
    modal_dict_SO = {'EEG': eeg_multisubj_list_SO, 'EOG': eog_multisubj_list_SO, 'GAZE': gaze_coords_list_SO, 'GAZE_V': gaze_velocity_list_SO
                  , 'EOG_V': eog_velocity_list_SO, 'SACC': saccade_multisubj_list_SO}

In [None]:
# Parameters in CCA; L_EEG and offset_EEG are not only used for EEG but also for other modalities
L_EEG = 3 
L_Stim = int(fsStim/2) 
offset_EEG = 1 
offset_Stim = 0 
trial_len_list = list(range(5, 50, 5))

In [None]:
# Folders for saving figures and tables; Currently there are no figures for modalities other than EEG, but the folders are still there.
figure_dirs = {}
table_dirs = {}
CLEAR = False # clear the content if set to True
for data_type in modal_dict.keys():
    figure_path = f'figures/{PATTERN}/{data_type}/'
    utils.create_dir(figure_path, CLEAR)
    figure_dirs[data_type] = figure_path
    table_path = f'tables/{PATTERN}/{data_type}/'
    utils.create_dir(table_path, CLEAR)
    table_dirs[data_type] = table_path

## Pipelines

In [None]:
def pipe_att_or_unatt_LVO(Subj_ID, eeg_multisubj_list, feat_att_list, feat_unatt_list, fs, L_EEG, L_Stim, offset_EEG, offset_Stim, TRAIN_WITH_ATT, eeg_ori_list=None, dim_list_EEG=None, dim_list_Stim=None, n_components=3, saccade_multisubj_list=None, V_eeg=None, V_Stim=None, PLOT=False, figure_dir=None, SAVERES=False, table_dir=None, OVERWRITE=False, feat_name='ObjFlow'):
    '''
    TASK: Perform CCA analysis for attended or unattended features and save forward models and correlations

    Inputs that need further explanation:
    TRAIN_WITH_ATT: True if training with attended features, False if training with unattended features
    eeg_ori_list: Original EEG data, necessary for calculating the forward model when the input eeg_multisubj_list is already hankelized (e.g., due to spatial-temporal regression)
    dim_list_EEG: If the input eeg_multisubj_list is actually a stack of EEG and other modalities, dim_list_EEG is a list of the dimensions of each modality. E.g., [64, 4] for stacked EEG and EOG. Always put EEG at the first place.
    dim_list_Stim: Similar to dim_list_EEG, but for the dimensions of the stimulus features.
    saccade_multisubj_list: Saccade data. A mask will be created if saccade data is provided to exclude the time points around saccades.
    V_eeg, V_Stim: If want to use pretrained filters trained from single object data, provide the filters here.
    PLOT: True if want to plot the forward models [only applicable for EEG], False otherwise
    SAVERES: True if want to save the results in a table, False otherwise
    OVERWRITE: True if want to overwrite the existing results in the table, False otherwise
    '''
    eeg_onesubj_list = [eeg[:,:,Subj_ID] for eeg in eeg_multisubj_list]
    eeg_ori_onesubj_list = [eeg_ori[:,:,Subj_ID] for eeg_ori in eeg_ori_list] if eeg_ori_list is not None else None 
    feat_att_list = [feat_att[:,:,Subj_ID] for feat_att in feat_att_list] if np.ndim(feat_att_list[0]) == 3 else feat_att_list
    feat_unatt_list = [feat_unatt[:,:,Subj_ID] for feat_unatt in feat_unatt_list] if np.ndim(feat_unatt_list[0]) == 3 else feat_unatt_list
    if saccade_multisubj_list is not None:
        saccade_onesubj_list = [saccade[:,:,Subj_ID] for saccade in saccade_multisubj_list]
        mask_list = utils.get_mask_list(saccade_onesubj_list, before=10, after=20)
    else:
        mask_list = None
    CCA = algo.CanonicalCorrelationAnalysis(eeg_onesubj_list, feat_att_list, fs, L_EEG, L_Stim, offset_EEG, offset_Stim, dim_list_EEG=dim_list_EEG, dim_list_Stim=dim_list_Stim, n_components=n_components, mask_list=mask_list)
    corr_att_fold, corr_unatt_fold, sig_corr_fold, sig_corr_pool, forward_model_fold = CCA.att_or_unatt_LVO(feat_unatt_list, TRAIN_WITH_ATT, V_eeg=V_eeg, V_Stim=V_Stim, EEG_ori_list=eeg_ori_onesubj_list)
    train_type = 'SO' if V_eeg is not None else 'Att' if TRAIN_WITH_ATT else 'Unatt'
    ifmask = True if mask_list is not None else False
    if PLOT:
        figure_name = f"{figure_dir}{feat_name}_Subj_{Subj_ID+1}_Train_{train_type}_Mask_{ifmask}_Folds.png"
        # if FM_org is not None:
        #     forward_model_fold = [utils.F_organize(forward_model, FM_org[0], FM_org[1]) for forward_model in forward_model_fold]
        utils.plot_spatial_resp_fold(forward_model_fold, corr_att_fold, corr_unatt_fold, sig_corr_fold, figure_name, AVG=False)
        utils.plot_spatial_resp_fold(forward_model_fold, corr_att_fold, corr_unatt_fold, sig_corr_pool, figure_name, AVG=True)
    if SAVERES:
        table_name = table_dir + f'{feat_name}_Corr_Train_{train_type}_Mask_{ifmask}.csv'
        # check if the file exists
        if not os.path.isfile(table_name):
            res_df = utils.create_corr_df(Subj_ID, sig_corr_fold, corr_att_fold, corr_unatt_fold)
        else:
            # read the dataframe
            res_df = pd.read_csv(table_name, header=0, index_col=[0,1,2])                
            if not 'Subj '+str(Subj_ID+1) in res_df.index.get_level_values('Subject ID'):
                res_add = utils.create_corr_df(Subj_ID, sig_corr_fold, corr_att_fold, corr_unatt_fold)
                res_df = pd.concat([res_df, res_add], axis=0)
            elif OVERWRITE:
                res_df = res_df.drop('Subj '+str(Subj_ID+1), level='Subject ID')
                res_add = utils.create_corr_df(Subj_ID, sig_corr_fold, corr_att_fold, corr_unatt_fold)
                res_df = pd.concat([res_df, res_add], axis=0)
            else:
                print(f"Results for Subj {Subj_ID+1} already exist in {table_name}")
        with open(table_name, 'w') as f:
            res_df.to_csv(f, header=True)

In [None]:
def pipe_compete_trials_LVO(Subj_ID, eeg_multisubj_list, feat_att_list, feat_unatt_list, fs, L_EEG, L_Stim, offset_EEG, offset_Stim, trial_len_list, table_dir, dim_list_EEG=None, dim_list_Stim=None, saccade_multisubj_list=None, BOOTSTRAP=True, V_eeg=None, V_Stim=None, n_components=3, nb_comp_into_account=3, signifi_level=False, message=True, OVERWRITE=False, feat_name='ObjFlow'):
    '''
    TASK: Determine the attended feature from the unattended feature using CCA and evaluate the performance

    Inputs that need further explanation:
    TRAIN_WITH_ATT: True if training with attended features, False if training with unattended features
    eeg_ori_list: Original EEG data, necessary for calculating the forward model when the input eeg_multisubj_list is already hankelized (e.g., due to spatial-temporal regression)
    dim_list_EEG: If the input eeg_multisubj_list is actually a stack of EEG and other modalities, dim_list_EEG is a list of the dimensions of each modality. E.g., [64, 4] for stacked EEG and EOG. Always put EEG at the first place.
    dim_list_Stim: Similar to dim_list_EEG, but for the dimensions of the stimulus features.
    saccade_multisubj_list: Saccade data. A mask will be created if saccade data is provided to exclude the time points around saccades.
    BOOTSTRAP: True if selecting trials with given length randomly (wiith overlap), False if dividing the trials without overlap
    V_eeg, V_Stim: If want to use pretrained filters trained from single object data, provide the filters here.
    nb_comp_into_account: Number of components into account when calculating the accuracy
    OVERWRITE: True if want to overwrite the existing results in the table, False otherwise
    '''
    res = []
    eeg_onesubj_list = [eeg[:,:,Subj_ID] for eeg in eeg_multisubj_list]
    feat_att_list = [feat_att[:,:,Subj_ID] for feat_att in feat_att_list] if np.ndim(feat_att_list[0]) == 3 else feat_att_list
    feat_unatt_list = [feat_unatt[:,:,Subj_ID] for feat_unatt in feat_unatt_list] if np.ndim(feat_unatt_list[0]) == 3 else feat_unatt_list
    if saccade_multisubj_list is not None:
        saccade_onesubj_list = [saccade[:,:,Subj_ID] for saccade in saccade_multisubj_list]
        mask_list = utils.get_mask_list(saccade_onesubj_list, before=10, after=20)
    else:
        mask_list = None
    ifmask = True if mask_list is not None else False
    CCA = algo.CanonicalCorrelationAnalysis(eeg_onesubj_list, feat_att_list, fs, L_EEG, L_Stim, offset_EEG, offset_Stim, dim_list_EEG=dim_list_EEG, dim_list_Stim=dim_list_Stim, n_components=n_components, mask_list=mask_list, signifi_level=signifi_level, message=message)
    for trial_len in trial_len_list:
        print('Trial length: ', trial_len)
        corr_att_eeg, corr_unatt_eeg = CCA.att_or_unatt_LVO_trials(feat_unatt_list, trial_len=trial_len, BOOTSTRAP=BOOTSTRAP, V_eeg=V_eeg, V_Stim=V_Stim)
        acc, _, _, _, _= utils.eval_compete(corr_att_eeg, corr_unatt_eeg, TRAIN_WITH_ATT=True, nb_comp_into_account=nb_comp_into_account)
        res.append(acc)
    train_type = 'SO' if V_eeg is not None else 'Att'
    table_name = table_dir + f'{feat_name}_Acc_Train_{train_type}_Mask_{ifmask}.csv'
    if not os.path.isfile(table_name):
        # create a pandas dataframe that contains Subj_ID, Corr_Att, Corr_Unatt, Sig_Corr
        res_df = utils.create_acc_df(Subj_ID, trial_len_list, res)
    else:
        # read the dataframe
        res_df = pd.read_csv(table_name, header=0)
        if ('Subj ' + str(Subj_ID + 1)) not in res_df['Subject ID'].values:
            res_add = utils.create_acc_df(Subj_ID, trial_len_list, res)
            res_df = pd.concat([res_df, res_add], axis=0)
        elif OVERWRITE:
            res_df = res_df[res_df['Subject ID'] != 'Subj ' + str(Subj_ID + 1)]
            res_add = utils.create_acc_df(Subj_ID, trial_len_list, res)
            res_df = pd.concat([res_df, res_add], axis=0)
        else:
            print(f"Results for Subj {Subj_ID+1} already exist in {table_name}")
    with open(table_name, 'w') as f:
        res_df.to_csv(f, header=True, index=False)

In [None]:
def pipe_mm_trials_LVO(Subj_ID, eeg_multisubj_list, feat_match_list, fs, L_EEG, L_Stim, offset_EEG, offset_Stim, trial_len_list, table_dir, MATCHATT, dim_list_EEG=None, dim_list_Stim=None, saccade_multisubj_list=None, V_eeg=None, V_Stim=None, n_components=3, nb_comp_into_account=3, signifi_level=False, message=True, OVERWRITE=False, feat_name='ObjFlow'):
    '''
    TASK: Determine the matched feature from a random feature sampled from a different time point using CCA and evaluate the performance
    '''
    res = []
    eeg_onesubj_list = [eeg[:,:,Subj_ID] for eeg in eeg_multisubj_list]
    feat_match_list = [feat_match[:,:,Subj_ID] for feat_match in feat_match_list] if np.ndim(feat_match_list[0]) == 3 else feat_match_list
    if saccade_multisubj_list is not None:
        saccade_onesubj_list = [saccade[:,:,Subj_ID] for saccade in saccade_multisubj_list]
        mask_list = utils.get_mask_list(saccade_onesubj_list, before=10, after=20)
    else:
        mask_list = None
    ifmask = True if mask_list is not None else False
    CCA = algo.CanonicalCorrelationAnalysis(eeg_onesubj_list, feat_match_list, fs, L_EEG, L_Stim, offset_EEG, offset_Stim, dim_list_EEG=dim_list_EEG, dim_list_Stim=dim_list_Stim, n_components=n_components, mask_list=mask_list, signifi_level=signifi_level, message=message)
    for trial_len in trial_len_list:
        print('Trial length: ', trial_len)
        corr_match_eeg, corr_mismatch_eeg = CCA.match_mismatch_LVO(trial_len=trial_len, V_eeg=V_eeg, V_Stim=V_Stim)
        acc, _, _ = utils.eval_mm(corr_match_eeg, corr_mismatch_eeg, nb_comp_into_account)
        res.append(acc)
    train_type = 'SO' if V_eeg is not None else 'Att' if MATCHATT else 'Unatt'
    table_name = table_dir + f'{feat_name}_Acc_MM_Train_{train_type}_Mask_{ifmask}.csv'
    if not os.path.isfile(table_name):
        # create a pandas dataframe that contains Subj_ID, Corr_Att, Corr_Unatt, Sig_Corr
        res_df = utils.create_acc_df(Subj_ID, trial_len_list, res)
        res_df.rename(columns={'Att': 'Match', 'Unatt': 'Mismatch'}, inplace=True)
    else:
        # read the dataframe
        res_df = pd.read_csv(table_name, header=0)
        if ('Subj ' + str(Subj_ID + 1)) not in res_df['Subject ID'].values:
            res_add = utils.create_acc_df(Subj_ID, trial_len_list, res)
            res_add.rename(columns={'Att': 'Match', 'Unatt': 'Mismatch'}, inplace=True)
            res_df = pd.concat([res_df, res_add], axis=0)
        elif OVERWRITE:
            res_df = res_df[res_df['Subject ID'] != 'Subj ' + str(Subj_ID + 1)]
            res_add = utils.create_acc_df(Subj_ID, trial_len_list, res)
            res_add.rename(columns={'Att': 'Match', 'Unatt': 'Mismatch'}, inplace=True)
            res_df = pd.concat([res_df, res_add], axis=0)
        else:
            print(f"Results for Subj {Subj_ID+1} already exist in {table_name}")
    with open(table_name, 'w') as f:
        res_df.to_csv(f, header=True, index=False)

In [None]:
def pipe_single_obj(Subj_ID, eeg_multisubj_list, feat_list, fs, L_EEG, L_Stim, offset_EEG, offset_Stim, figure_dir, table_dir, dim_list_EEG=None, dim_list_Stim=None, saccade_multisubj_list=None, n_components=3, PLOT=False, OVERWRITE=False, feat_name='ObjFlow'):
    '''
    TASK: Train and test on the single-object data. Then use all single-object data to train the filters to be used in the overlaid-object data.
    '''
    eeg_onesubj_list = [eeg[:,:,Subj_ID] for eeg in eeg_multisubj_list]
    feat_list = [feat[:,:,Subj_ID] for feat in feat_list] if np.ndim(feat_list[0]) == 3 else feat_list
    if saccade_multisubj_list is not None:
        saccade_onesubj_list = [saccade[:,:,Subj_ID] for saccade in saccade_multisubj_list]
        mask_list = utils.get_mask_list(saccade_onesubj_list, before=10, after=20)
    else:
        mask_list = None
    ifmask = True if mask_list is not None else False
    CCA = algo.CanonicalCorrelationAnalysis(eeg_onesubj_list, feat_list, fs, L_EEG, L_Stim, offset_EEG, offset_Stim, dim_list_EEG=dim_list_EEG, dim_list_Stim=dim_list_Stim, n_components=n_components, mask_list=mask_list)
    corr_train_fold, corr_test_fold, sig_corr_fold, sig_corr_pool, forward_model_fold = CCA.cross_val_LVO()
    if PLOT:
        figure_name = f"{figure_dir}{feat_name}_Subj_{Subj_ID+1}_Folds.png"
        utils.plot_spatial_resp_fold(forward_model_fold, corr_test_fold, None, sig_corr_fold, figure_name, AVG=False)
        utils.plot_spatial_resp_fold(forward_model_fold, corr_test_fold, None, sig_corr_pool, figure_name, AVG=True)
    table_name = table_dir + f'{feat_name}_Corr_SO_Mask_{ifmask}.csv'
    # check if the file exists
    if not os.path.isfile(table_name):
        # create a pandas dataframe that contains Subj_ID, Corr_Att, Corr_Unatt, Sig_Corr
        res_df = utils.create_corr_df(Subj_ID, sig_corr_fold, corr_train_fold, corr_test_fold)
        res_df.rename(columns={'Att': 'Train', 'Unatt': 'Test'}, inplace=True)
    else:
        # read the dataframe
        res_df = pd.read_csv(table_name, header=0, index_col=[0,1,2])
        if not 'Subj '+str(Subj_ID+1) in res_df.index.get_level_values('Subject ID'):
            res_add = utils.create_corr_df(Subj_ID, sig_corr_fold, corr_train_fold, corr_test_fold)
            res_add.rename(columns={'Att': 'Train', 'Unatt': 'Test'}, inplace=True)
            res_df = pd.concat([res_df, res_add], axis=0)
        elif OVERWRITE:
            res_df = res_df.drop('Subj '+str(Subj_ID+1), level='Subject ID')
            res_add = utils.create_corr_df(Subj_ID, sig_corr_fold, corr_train_fold, corr_test_fold)
            res_add.rename(columns={'Att': 'Train', 'Unatt': 'Test'}, inplace=True)
            res_df = pd.concat([res_df, res_add], axis=0)
        else:
            print(f"Results for Subj {Subj_ID+1} already exist in {table_name}")
    with open(table_name, 'w') as f:
        res_df.to_csv(f, header=True)
    EEG_all = np.concatenate(eeg_onesubj_list, axis=0)
    feat_all = np.concatenate(feat_list, axis=0)
    _, _, _, _, V_eeg_SO, V_stim_SO, _ = CCA.fit(EEG_all, feat_all)
    return V_eeg_SO, V_stim_SO

## Mode=Compete: Discriminating between attended and unattended segments

In [None]:
new_subj = ['Pilot_17', 'Pilot_18', 'Pilot_19']
new_subj_idx = [subjects.index(sub) for sub in new_subj]
CALCUNEWOPONLY = False
Subj_Set = new_subj_idx if CALCUNEWOPONLY else range(nb_subj)

In [None]:
# Get the results for either all subjects or new subjects
for Subj_ID in Subj_Set:
    for modal in modal_dict.keys():
        PLOT = True if modal == 'EEG' else False
        pipe_att_or_unatt_LVO(Subj_ID, modal_dict[modal], feat_att_list, feat_unatt_list, fs, L_EEG, L_Stim, offset_EEG, offset_Stim, TRAIN_WITH_ATT=True, PLOT=PLOT, figure_dir=figure_dirs[modal], SAVERES=True, table_dir=table_dirs[modal], OVERWRITE=True)
        pipe_att_or_unatt_LVO(Subj_ID, modal_dict[modal], feat_att_list, feat_unatt_list, fs, L_EEG, L_Stim, offset_EEG, offset_Stim, TRAIN_WITH_ATT=False, PLOT=PLOT, figure_dir=figure_dirs[modal], SAVERES=True, table_dir=table_dirs[modal], OVERWRITE=True)
        pipe_compete_trials_LVO(Subj_ID, modal_dict[modal], feat_att_list, feat_unatt_list, fs, L_EEG, L_Stim, offset_EEG, offset_Stim, trial_len_list, table_dirs[modal], OVERWRITE=True)

### If contains single object dataset

In [None]:
# Get the results for either all subjects or new subjects
if SINGLEOBJ:
    for Subj_ID in Subj_Set:
        V_eeg_SO, V_stim_SO = pipe_single_obj(Subj_ID, modal_dict_SO['EEG'], feat_list_SO, fs, L_EEG, L_Stim, offset_EEG, offset_Stim, figure_dirs['EEG'], table_dirs['EEG'], saccade_multisubj_list=None, n_components=3, PLOT=True, OVERWRITE=True)
        pipe_att_or_unatt_LVO(Subj_ID, modal_dict['EEG'], feat_att_list, feat_unatt_list, fs, L_EEG, L_Stim, offset_EEG, offset_Stim, TRAIN_WITH_ATT=True, PLOT=True, figure_dir=figure_dirs['EEG'], SAVERES=True, table_dir=table_dirs['EEG'], V_eeg=V_eeg_SO, V_Stim=V_stim_SO, OVERWRITE=True)
        pipe_compete_trials_LVO(Subj_ID, modal_dict['EEG'], feat_att_list, feat_unatt_list, fs, L_EEG, L_Stim, offset_EEG, offset_Stim, trial_len_list, table_dirs['EEG'], V_eeg=V_eeg_SO, V_Stim=V_stim_SO, OVERWRITE=True)

### If remove saccades

In [None]:
# Get the results for either all subjects or new subjects
for Subj_ID in Subj_Set:
    for modal in modal_dict.keys():
        PLOT = True if modal == 'EEG' else False
        if modal != 'SACC':
            pipe_att_or_unatt_LVO(Subj_ID, modal_dict[modal], feat_att_list, feat_unatt_list, fs, L_EEG, L_Stim, offset_EEG, offset_Stim, TRAIN_WITH_ATT=True, saccade_multisubj_list=modal_dict['SACC'], PLOT=PLOT, figure_dir=figure_dirs[modal], SAVERES=True, table_dir=table_dirs[modal], OVERWRITE=True)
            pipe_compete_trials_LVO(Subj_ID, modal_dict[modal], feat_att_list, feat_unatt_list, fs, L_EEG, L_Stim, offset_EEG, offset_Stim, trial_len_list, table_dirs[modal], saccade_multisubj_list=modal_dict['SACC'], OVERWRITE=True)

## Mode=Match-Mismatch: Discriminating between match and mismatch segments

In [None]:
# Get the results for either all subjects or new subjects
for Subj_ID in Subj_Set:
    for modal in modal_dict.keys():
        pipe_mm_trials_LVO(Subj_ID, modal_dict[modal], feat_att_list, fs, L_EEG, L_Stim, offset_EEG, offset_Stim, trial_len_list, table_dirs[modal], MATCHATT=True, OVERWRITE=True)
        pipe_mm_trials_LVO(Subj_ID, modal_dict[modal], feat_unatt_list, fs, L_EEG, L_Stim, offset_EEG, offset_Stim, trial_len_list, table_dirs[modal], MATCHATT=False, OVERWRITE=True)

## Making use of multi-modal data

### Stacking different modalities

In [None]:
comb_dict = {'EEG+EOG': utils.stack_modal([modal_dict['EEG'], modal_dict['EOG']]), 
             'EEG+GAZE_V': utils.stack_modal([modal_dict['EEG'], modal_dict['GAZE_V']]), 
             'EOG+GAZE_V': utils.stack_modal([modal_dict['EOG'], modal_dict['GAZE_V']]), 
             'EEG+EOG+GAZE_V': utils.stack_modal([modal_dict['EEG'], modal_dict['EOG'], modal_dict['GAZE_V']])}

In [None]:
for comb, modal in comb_dict.items():
    table_path = f'tables/{PATTERN}/{comb}/'
    utils.create_dir(table_path, CLEAR)
    for Subj_ID in Subj_Set:
        pipe_att_or_unatt_LVO(Subj_ID, modal[0], feat_att_list, feat_unatt_list, fs, L_EEG, L_Stim, offset_EEG, offset_Stim, dim_list_EEG=modal[1], TRAIN_WITH_ATT=True, SAVERES=True, table_dir=table_path, OVERWRITE=True)
        pipe_compete_trials_LVO(Subj_ID, modal[0], feat_att_list, feat_unatt_list, fs, L_EEG, L_Stim, offset_EEG, offset_Stim, trial_len_list, table_path, dim_list_EEG=modal[1], OVERWRITE=True)

### Regressing out other modalities from EEG (Partial CCA)

In [None]:
def regress_out_confounds(data_list, feat_att_list, feat_unatt_list, confound_list, L_data, L_Stim, offset_data, offset_Stim):
    '''
    Regressing confound (modality to be controlled) out of the data and features
    '''
    data_reg = utils.further_regress_out_list(data_list, confound_list, L_data, L_data, offset_data, offset_data)
    feat_att_reg = utils.further_regress_out_list(feat_att_list, confound_list, L_Stim, L_data, offset_Stim, offset_data)
    feat_unatt_reg = utils.further_regress_out_list(feat_unatt_list, confound_list, L_Stim, L_data, offset_Stim, offset_data)
    return (data_reg, feat_att_reg, feat_unatt_reg)

In [None]:
reg_dict = {'EEG-EOG': regress_out_confounds(modal_dict['EEG'], feat_att_list, feat_unatt_list, modal_dict['EOG'], L_EEG, L_Stim, offset_EEG, offset_Stim),
             'EEG-GAZE_V': regress_out_confounds(modal_dict['EEG'], feat_att_list, feat_unatt_list, modal_dict['GAZE_V'], L_EEG, L_Stim, offset_EEG, offset_Stim),
             'GAZE_V-EEG': regress_out_confounds(modal_dict['GAZE_V'], feat_att_list, feat_unatt_list, modal_dict['EEG'], L_EEG, L_Stim, offset_EEG, offset_Stim),
             'EEG-EOG&GAZE_V': regress_out_confounds(modal_dict['EEG'], feat_att_list, feat_unatt_list, comb_dict['EOG+GAZE_V'][0], L_EEG, L_Stim, offset_EEG, offset_Stim)}

In [None]:
for reg, modal in reg_dict.items():
    table_path = f'tables/{PATTERN}/{reg}/'
    utils.create_dir(table_path, CLEAR=False)
    PLOT = (reg.split('-')[0] == 'EEG')
    if PLOT:
        figure_path = f'figures/{PATTERN}/{reg}/'
        utils.create_dir(figure_path, CLEAR=False)
    else:
        figure_path = None
    for Subj_ID in Subj_Set:
        pipe_att_or_unatt_LVO(Subj_ID, modal[0], modal[1], modal[2], fs, L_EEG=1, L_Stim=1, offset_EEG=0, offset_Stim=0, TRAIN_WITH_ATT=True, eeg_ori_list=modal_dict['EEG'], PLOT=PLOT, figure_dir=figure_path, SAVERES=True, table_dir=table_path, OVERWRITE=True)
        pipe_compete_trials_LVO(Subj_ID, modal[0], modal[1], modal[2], fs, L_EEG=1, L_Stim=1, offset_EEG=0, offset_Stim=0, trial_len_list=trial_len_list, table_dir=table_path, OVERWRITE=True)

## Training with both attended and unattended features [Not very useful]

In [None]:
feat_att_unatt = [np.stack([att, unatt], axis=1) for att, unatt in zip(feat_att_list, feat_unatt_list)]
feat_unatt_att = [np.stack([unatt, att], axis=1) for att, unatt in zip(feat_att_list, feat_unatt_list)]

In [None]:
for Subj_ID in range(len(subjects)):
    for modal in modal_dict.keys():
        PLOT = True if modal == 'EEG' else False
        pipe_att_or_unatt_LVO(Subj_ID, modal_dict[modal], feat_att_unatt, feat_unatt_att, fs, L_EEG, L_Stim, offset_EEG, offset_Stim, TRAIN_WITH_ATT=True, PLOT=PLOT, figure_dir=figure_dirs[modal], SAVERES=True, table_dir=table_dirs[modal], OVERWRITE=False, feat_name='OF-CB')
        pipe_compete_trials_LVO(Subj_ID, modal_dict[modal], feat_att_unatt, feat_unatt_att, fs, L_EEG, L_Stim, offset_EEG, offset_Stim, trial_len_list, table_dirs[modal], OVERWRITE=False, feat_name='OF-CB')

In [None]:
for comb, modal in comb_dict.items():
    table_path = f'tables/{PATTERN}/{comb}/'
    utils.create_dir(table_path, CLEAR)
    for Subj_ID in range(len(subjects)):
        pipe_att_or_unatt_LVO(Subj_ID, modal[0], feat_att_unatt, feat_unatt_att, fs, L_EEG, L_Stim, offset_EEG, offset_Stim, dim_list_EEG=modal[1], TRAIN_WITH_ATT=True, SAVERES=True, table_dir=table_path, OVERWRITE=True, feat_name='OF-CB')
        pipe_compete_trials_LVO(Subj_ID, modal[0], feat_att_unatt, feat_unatt_att, fs, L_EEG, L_Stim, offset_EEG, offset_Stim, trial_len_list, table_path, dim_list_EEG=modal[1], OVERWRITE=True, feat_name='OF-CB')

In [None]:
modal_dict = {'EEG': eeg_multisubj_list, 'EOG': eog_multisubj_list, 'GAZE_V': gaze_velocity_list}
reg_cb_dict = {'EEG-EOG': regress_out_confounds(modal_dict['EEG'], feat_att_unatt, feat_unatt_att, modal_dict['EOG'], L_EEG, L_Stim, offset_EEG, offset_Stim),
             'EEG-GAZE_V': regress_out_confounds(modal_dict['EEG'], feat_att_unatt, feat_unatt_att, modal_dict['GAZE_V'], L_EEG, L_Stim, offset_EEG, offset_Stim),
             'GAZE_V-EEG': regress_out_confounds(modal_dict['GAZE_V'], feat_att_unatt, feat_unatt_att, modal_dict['EEG'], L_EEG, L_Stim, offset_EEG, offset_Stim),
             'EEG-EOG&GAZE_V': regress_out_confounds(modal_dict['EEG'], feat_att_unatt, feat_unatt_att, comb_dict['EOG+GAZE_V'][0], L_EEG, L_Stim, offset_EEG, offset_Stim)}

In [None]:
for reg, modal in reg_cb_dict.items():
    table_path = f'tables/{PATTERN}/{reg}/'
    utils.create_dir(table_path, CLEAR=False)
    PLOT = (reg.split('-')[0] == 'EEG')
    if PLOT:
        figure_path = f'figures/{PATTERN}/{reg}/'
        utils.create_dir(figure_path, CLEAR=False)
    else:
        figure_path = None
    for Subj_ID in range(len(subjects)):
        pipe_att_or_unatt_LVO(Subj_ID, modal[0], modal[1], modal[2], fs, L_EEG=1, L_Stim=1, offset_EEG=0, offset_Stim=0, TRAIN_WITH_ATT=True, eeg_ori_list=modal_dict['EEG'], PLOT=PLOT, figure_dir=figure_path, SAVERES=True, table_dir=table_path, OVERWRITE=True, feat_name='OF-CB')
        pipe_compete_trials_LVO(Subj_ID, modal[0], modal[1], modal[2], fs, L_EEG=1, L_Stim=1, offset_EEG=0, offset_Stim=0, trial_len_list=trial_len_list, table_dir=table_path, OVERWRITE=True, feat_name='OF-CB')

## GCCA

In [None]:
def pipe_GCCA(nested_datalist, fs, L_list, offset_list, mod_name_list, W_list, nested_dimlist, figure_dir, table_dir, n_components=10, SINGLEOBJ=False, OVERWRITE=False, FM_ORG=None):
    prefix = 'SO_' if SINGLEOBJ else 'OL_'
    table_name = table_dir + f'{prefix}Single_Mod.csv' if W_list[0] is None else table_dir + f'Single_Mod_Pretrain_on_SO.csv'
    for datalist, L, offset, mod_name, W_data, dim_list in zip(nested_datalist, L_list, offset_list, mod_name_list, W_list, nested_dimlist):
        GCCA = algo.GeneralizedCCA(datalist, fs, L=L, offset=offset, dim_list=dim_list, n_components=n_components, signifi_level=True)
        _, corr_test_fold, _, cov_test_fold, _, _, sig_corr_fold, sig_corr_pool, forward_model_fold = GCCA.cross_val_LVO(W_eeg=W_data)
        if FM_ORG is not None:
            forward_model_fold = [utils.F_organize(forward_model, FM_ORG[0], FM_ORG[1]) for forward_model in forward_model_fold]
        if forward_model_fold[0].shape[0] == 64:
            figure_name = figure_dir + f"{prefix}{mod_name}_Folds.png" if W_data is None else figure_dir + f"{prefix}{mod_name}_Pretrain_on_SO_Folds.png"
            utils.plot_spatial_resp_fold(forward_model_fold, corr_test_fold, None, sig_corr_fold, figure_name, AVG=False)
            utils.plot_spatial_resp_fold(forward_model_fold, corr_test_fold, None, sig_corr_pool, figure_name, AVG=True)
        # check if the file exists
        if not os.path.isfile(table_name):
            # create a pandas dataframe that contains Subj_ID, Corr_Att, Corr_Unatt, Sig_Corr
            res_df = utils.create_ISC_df(corr_test_fold, cov_test_fold, sig_corr_fold, mod_name)
        else:
            # read the dataframe
            res_df = pd.read_csv(table_name, header=0, index_col=[0,1,2])
            if not mod_name in res_df.index.get_level_values('Modality'):
                res_add = utils.create_ISC_df(corr_test_fold, cov_test_fold, sig_corr_fold, mod_name)
                res_df = pd.concat([res_df, res_add], axis=0)
            elif OVERWRITE:
                res_df = res_df.drop(mod_name, level='Modality')
                res_add = utils.create_ISC_df(corr_test_fold, cov_test_fold, sig_corr_fold, mod_name)
                res_df = pd.concat([res_df, res_add], axis=0)
            else:
                print(f"Results for {mod_name} already exist in {table_name}")
        with open(table_name, 'w') as f:
            res_df.to_csv(f, header=True)

In [None]:
GCCA_table_path = f'tables/{PATTERN}/GCCA/'
GCCA_figure_path = f'figures/{PATTERN}/GCCA/'
utils.create_dir(GCCA_table_path, CLEAR=False)
utils.create_dir(GCCA_figure_path, CLEAR=False)

### Single-object dataset

In [None]:
nested_datalist = list(modal_dict_SO.values()) 
mod_name_list = list(modal_dict_SO.keys()) 
L_list = [5]*len(nested_datalist)
offset_list = [2]*len(nested_datalist)
W_list = [None]*len(nested_datalist)
nested_dimlist = [None]*len(nested_datalist)
pipe_GCCA(nested_datalist, fs, L_list, offset_list, mod_name_list, W_list, nested_dimlist, GCCA_figure_path, GCCA_table_path, SINGLEOBJ=True, OVERWRITE=True)

In [None]:
comb_dict_SO = {'EEG+EOG': utils.stack_modal([modal_dict_SO['EEG'], modal_dict_SO['EOG']]), 
             'EEG+GAZE_V': utils.stack_modal([modal_dict_SO['EEG'], modal_dict_SO['GAZE_V']]), 
             'EOG+GAZE_V': utils.stack_modal([modal_dict_SO['EOG'], modal_dict_SO['GAZE_V']]), 
             'EEG+EOG+GAZE_V': utils.stack_modal([modal_dict_SO['EEG'], modal_dict_SO['EOG'], modal_dict_SO['GAZE_V']])}

In [None]:
# nested_datalist = [modal[0] for modal in comb_dict_SO.values()]
# nested_dimlist = [modal[1] for modal in comb_dict_SO.values()]
# mod_name_list = list(comb_dict.keys())
# L_list = [5]*len(nested_datalist)
# offset_list = [2]*len(nested_datalist)
# W_list = [None]*len(nested_datalist)
# pipe_GCCA(nested_datalist, fs, L_list, offset_list, mod_name_list, W_list, nested_dimlist, GCCA_figure_path, GCCA_table_path, SINGLEOBJ=True,  OVERWRITE=True)

In [None]:
reg_gcca_dict_SO = {'EEG-EOG': regress_out_confounds(modal_dict_SO['EEG'], feat_list_SO, feat_list_SO, modal_dict_SO['EOG'], L_data=5, L_Stim=1, offset_data=2, offset_Stim=0),
             'EEG-GAZE_V': regress_out_confounds(modal_dict_SO['EEG'], feat_list_SO, feat_list_SO, modal_dict_SO['GAZE_V'], L_data=5, L_Stim=1, offset_data=2, offset_Stim=0),
             'EEG-EOG&GAZE_V': regress_out_confounds(modal_dict_SO['EEG'], feat_list_SO, feat_list_SO, comb_dict_SO['EOG+GAZE_V'][0], L_data=5, L_Stim=1, offset_data=2, offset_Stim=0)}

In [None]:
nested_datalist = [modal[0] for modal in reg_gcca_dict_SO.values()] 
mod_name_list = list(reg_gcca_dict_SO.keys()) 
L_list = [1]*len(nested_datalist)
offset_list = [0]*len(nested_datalist)
W_list = [None]*len(nested_datalist)
nested_dimlist = [None]*len(nested_datalist)
pipe_GCCA(nested_datalist, fs, L_list, offset_list, mod_name_list, W_list, nested_dimlist, GCCA_figure_path, GCCA_table_path, SINGLEOBJ=True, OVERWRITE=True, FM_ORG=[5 ,2])

### Overlaid-object dataset

In [None]:
nested_datalist = list(modal_dict.values())
mod_name_list = list(modal_dict.keys()) 
L_list = [5]*len(nested_datalist)
offset_list = [2]*len(nested_datalist)
W_list = [None]*len(nested_datalist)
nested_dimlist = [None]*len(nested_datalist)
pipe_GCCA(nested_datalist, fs, L_list, offset_list, mod_name_list, W_list, nested_dimlist, GCCA_figure_path, GCCA_table_path, SINGLEOBJ=False, OVERWRITE=True)

In [None]:
# nested_datalist = [modal[0] for modal in comb_dict.values()]
# nested_dimlist = [modal[1] for modal in comb_dict.values()]
# mod_name_list = list(comb_dict.keys())
# L_list = [5]*len(nested_datalist)
# offset_list = [2]*len(nested_datalist)
# W_list = [None]*len(nested_datalist)
# pipe_GCCA(nested_datalist, fs, L_list, offset_list, mod_name_list, W_list, nested_dimlist, GCCA_figure_path, GCCA_table_path, SINGLEOBJ=False,  OVERWRITE=True)

In [None]:
reg_gcca_dict = {'EEG-EOG': regress_out_confounds(modal_dict['EEG'], feat_att_list, feat_unatt_list, modal_dict['EOG'], L_data=5, L_Stim=1, offset_data=2, offset_Stim=0),
             'EEG-GAZE_V': regress_out_confounds(modal_dict['EEG'], feat_att_list, feat_unatt_list, modal_dict['GAZE_V'], L_data=5, L_Stim=1, offset_data=2, offset_Stim=0),
             'EEG-EOG&GAZE_V': regress_out_confounds(modal_dict['EEG'], feat_att_list, feat_unatt_list, comb_dict['EOG+GAZE_V'][0], L_data=5, L_Stim=1, offset_data=2, offset_Stim=0)}

In [None]:
nested_datalist = [modal[0] for modal in reg_gcca_dict.values()] 
mod_name_list = list(reg_gcca_dict.keys()) 
L_list = [1]*len(nested_datalist)
offset_list = [0]*len(nested_datalist)
W_list = [None]*len(nested_datalist)
nested_dimlist = [None]*len(nested_datalist)
pipe_GCCA(nested_datalist, fs, L_list, offset_list, mod_name_list, W_list, nested_dimlist, GCCA_figure_path, GCCA_table_path, SINGLEOBJ=False, OVERWRITE=True, FM_ORG=[5 ,2])