# Internal consistency

### Imports

In [None]:
import os
import re
import glob
import os
import ast
import os.path as op
from collections import defaultdict
from copy import deepcopy
import copy

import pickle
from time import time
import pywt
import mne
import scipy
import numpy as np
import plotly.graph_objects as go
import plotly.express as px
import pandas as pd
import cesium.featurize
from plotly.subplots import make_subplots
from ipywidgets import Dropdown, FloatRangeSlider, IntSlider, FloatSlider, interact
from sklearn.metrics import roc_auc_score
from sklearn.model_selection import StratifiedKFold
from sklearn.model_selection import train_test_split
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.base import TransformerMixin, BaseEstimator

import sys

from sklearn.pipeline import Pipeline, FeatureUnion
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import FunctionTransformer
from sklearn.svm import SVR
from sklearn.decomposition import FastICA
from sklearn.decomposition import PCA

from rumination_experiment_transformers_averaged_CDS import *

import warnings

warnings.filterwarnings("ignore")


---
## Loading data

Loading EEG data and data from rumination questionnaire. By default create_df_data loads all info from given file but one can specify it by passing a list of desired labels from csv file.

In [None]:
# paths TODO
dir_path = os.path.dirname(os.path.abspath(""))

In [None]:
tmin, tmax = -0.101562, 0.5937525  # Start and end of the segments
signal_frequency = 256
ERROR = 0
CORRECT = 1
ALL = 2
random_state = 0

In [None]:
channels_order_list = [
    "Fp1",
    "AF7",
    "AF3",
    "F1",
    "F3",
    "F5",
    "F7",
    "FT7",
    "FC5",
    "FC3",
    "FC1",
    "C1",
    "C3",
    "C5",
    "T7",
    "TP7",
    "CP5",
    "CP3",
    "CP1",
    "P1",
    "P3",
    "P5",
    "P7",
    "P9",
    "PO7",
    "PO3",
    "O1",
    "Iz",
    "Oz",
    "POz",
    "Pz",
    "CPz",
    "Fpz",
    "Fp2",
    "AF8",
    "AF4",
    "AFz",
    "Fz",
    "F2",
    "F4",
    "F6",
    "F8",
    "FT8",
    "FC6",
    "FC4",
    "FC2",
    "FCz",
    "Cz",
    "C2",
    "C4",
    "C6",
    "T8",
    "TP8",
    "CP6",
    "CP4",
    "CP2",
    "P2",
    "P4",
    "P6",
    "P8",
    "P10",
    "PO8",
    "PO4",
    "O2",
]

channels_dict = dict(zip(channels_order_list, np.arange(1, 64, 1)))

In [None]:
box_3 = ["Fpz",
        "F1","Fz", "F2",
        "FC1", "FCz", "FC2",
        "C1","Cz","C2",
        "CP1", "CPz", "CP2",
        "P1","Pz", "P2",
]

box_4 = ["Fpz",
         "AFz",
        "F1","Fz", "F2",
        "FC1", "FCz", "FC2",
        "C1","Cz","C2",
        "CP1", "CPz", "CP2",
        "P1","Pz", "P2",
]

box_5 = ["F1","Fz", "F2",
        "FC1", "FCz", "FC2",
        "C1","Cz","C2",
        "CP1", "CPz", "CP2",
        "P1","Pz", "P2",
]

# linia i klaster z tyłu
red_box7 = [
    "Fpz", 
    "AFz",
    "Fz",
    "FCz",
    "Cz",
    "CPz",
    "P3", "P1", "Pz", "P2", "P4",
]

red_box7_prim = [
    "Fpz", 
    "AFz",
    "Fz",
    "FCz",
    "Cz",
    "CPz",
    "P1", "Pz", "P2",
]

# linia i klaster z tyłu i na środku
red_box8 = [
    "Fpz", 
    "AFz",
    "Fz",
    "FCz",
    "C3", "C1", "Cz","C2", "C4",
    "CPz",
    "P3", "P1", "Pz", "P2", "P4"
]

red_box8_prim = [
    "Fpz", 
    "AFz",
    "Fz",
    "FCz",
    "C1", "Cz","C2",
    "CPz",
    "P1", "Pz", "P2",
]

# linia i klaster z tyłu i na środku i na poczatku
red_box9 = [
    "Fpz", 
    "AFz",
    "F3", "F1", "Fz", "F2", "F4",
    "FCz",
    "C3", "C1", "Cz","C2", "C4",
    "CPz",
    "P3", "P1", "Pz", "P2", "P4",
]

red_box9_prim = [
    "Fpz", 
    "AFz",
    "F1", "Fz", "F2",
    "FCz",
    "C1", "Cz","C2",
    "CPz",
    "P1", "Pz", "P2",
]



box_list = [red_box7_prim, red_box8_prim, red_box9_prim, box_5]
# box_list = [red_box7_prim, red_box9_prim]


In [None]:
def create_df_data(
    test_participants=False,
    test_epochs=False,
    info_filename=None,
    info="all",
    personal=True,
):
    """Loads data for all participants and create DataFrame with optional additional info from given .csv file.
CenteredSignalAfterBaseline3CenteredSignalAfterBaseline3CenteredSignalAfterBaseline3
    On default, loads a train set: chooses only 80% of participants
    and for each of them chooses 80% of epochs.
    It will choose them deterministically.

    Participants with less than 10 epochs per condition are rejected.

    If test_participants is set to True, it will load remaining 20% of participants.
    If test_epochs is set to True, it will load remaining 20% of epochs.
    Test epochs are chronologically after train epochs,
    because it reflects real usage (first callibration and then classification).

    Parameters
    ----------
    test_participants: bool
        whether load data for training or final testing.
        If true load participants data for testing.
    test_epochs: bool
        whether load data for training or final testing.
        If true load epochs of each participants data for testing.
    info_filename: String | None
        path to .csv file with additional data.
    info: array
        listed parameters from the info file to be loaded.
        if 'all', load all parameters
    personal: bool
        whether a model will be both trained and tested on epochs from one person
        if false, person's epochs aren't split into test and train


    Returns
    -------
    go_nogo_data_df : pandas.DataFrame

    """
    print(os.path.abspath(""))
    dir_path = os.path.dirname(os.path.abspath(""))
    print(dir_path)
    header_files_glob = os.path.join(dir_path, "data/responses_100_600/*.vhdr")
    header_files = glob.glob(header_files_glob)

    header_files = sorted(header_files)
    go_nogo_data_df = pd.DataFrame()

    # cut 20% of data for testing
    h_train, h_test = train_test_split(header_files, test_size=0.3, random_state=0)
    
    print(f"train size: {len(h_train)} ; test size: {len(h_test)}")

    if test_participants:
        header_files = h_test
    else:
        header_files = h_train

    for file in header_files:
        #  load eeg data for given participant
        participant_epochs = load_epochs_from_file(file)

        # and compute participant's id from file_name
        participant_id = re.match(r".*_(\w+).*", file).group(1)

        error = participant_epochs["error_response"]._data
        correct = participant_epochs["correct_response"]._data

        # exclude those participants who have too few samples
        if len(error) < 5 or len(correct) < 5:
            # not enough data for this participant
            continue

        # construct dataframe for participant with: id|epoch_data|response_type|additional info...
        participant_df = create_df_from_epochs(
            participant_id, participant_epochs, info_filename, info
        )
        print(participant_id)
        go_nogo_data_df = go_nogo_data_df.append(participant_df, ignore_index=True)

    return go_nogo_data_df

In [None]:
def create_df_from_epochs(id, participant_epochs, info_filename, info):
    """Create df for each participant. DF structure is like: {id: String ; epoch: epoch_data ; marker: 1.0|0.0}
    1.0 means correct and 0.0 means error response.
    Default info extracted form .csv file is 'Rumination Full Scale' and participants' ids.
    With this info df structure is like:
    {id: String ; epoch: epoch_data ; marker: 1.0|0.0 ; File: id ; 'Rumination Full Scale': int}

    Parameters
    ----------
    id: String
        participant's id extracted from filename
    correct: array
        correct responses' data
    error: array
        error responses' data
    info_filename: String
        path to .csv file with additional data.
    info: array
        listed parameters from the info file to be loaded.
        if 'all', load all parameters

    Returns
    -------
    participant_df : pandas.DataFrame

    """
    participant_df = pd.DataFrame()
    info_df = pd.DataFrame()

    # get additional info from file
    if info_filename is not None:
        if info == "all":
            rumination_df = pd.read_csv(info_filename)
        else:
            rumination_df = pd.read_csv(info_filename, usecols=["Demo_kod"] + info)
        info_df = (
            rumination_df.loc[rumination_df["Demo_kod"] == id]
            .reset_index()
            .drop("index", axis=1)
        )

#     for epoch in correct:
#         epoch_df = pd.DataFrame(
#             {"id": [id], "epoch": [epoch], "marker": [CORRECT]}
#         ).join(info_df)
#         participant_df = participant_df.append(epoch_df, ignore_index=True)

#     for epoch in error:
#         epoch_df = pd.DataFrame({"id": [id], "epoch": [epoch], "marker": [ERROR]}).join(
#             info_df
#         )
#         participant_df = participant_df.append(epoch_df, ignore_index=True)
        
#     print(participant_epochs)
        
    epoch_df = pd.DataFrame({"id": [id], "epoch": [participant_epochs], "marker": [ALL]}).join(
            info_df
        )
    participant_df = participant_df.append(epoch_df, ignore_index=True)

    return participant_df

In [None]:
def load_epochs_from_file(file, reject_bad_segments="auto", mask=None):
    """Load epochs from a header file.

    Args:
        file: path to a header file (.vhdr)
        reject_bad_segments: 'auto' means that bad segments are rejected automatically.

    Returns:
        mne Epochs

    """
    # Import the BrainVision data into an MNE Raw object
    raw = mne.io.read_raw_brainvision(file)

    # Construct annotation filename
    annot_file = file[:-4] + "vmrk"

    # Read in the event information as MNE annotations
    annotations = mne.read_annotations(annot_file)

    # Add the annotations to our raw object so we can use them with the data
    raw.set_annotations(annotations)

    # Map with response markers only
    event_dict = {
        "Stimulus/RE*ex*1_n*1_c_1*R*FB": 10004,
        "Stimulus/RE*ex*1_n*1_c_1*R*FG": 10005,
        "Stimulus/RE*ex*1_n*1_c_2*R": 10006,
        "Stimulus/RE*ex*1_n*2_c_1*R": 10007,
        "Stimulus/RE*ex*2_n*1_c_1*R": 10008,
        "Stimulus/RE*ex*2_n*2_c_1*R*FB": 10009,
        "Stimulus/RE*ex*2_n*2_c_1*R*FG": 10010,
        "Stimulus/RE*ex*2_n*2_c_2*R": 10011,
    }

    # Map for merged correct/error response markers
    merged_event_dict = {"correct_response": 0, "error_response": 1}

    # Reconstruct the original events from Raw object
    events, event_ids = mne.events_from_annotations(raw, event_id=event_dict)

    # Merge correct/error response events
    merged_events = mne.merge_events(
        events,
        [10004, 10005, 10009, 10010],
        merged_event_dict["correct_response"],
        replace_events=True,
    )
    merged_events = mne.merge_events(
        merged_events,
        [10006, 10007, 10008, 10011],
        merged_event_dict["error_response"],
        replace_events=True,
    )

    epochs = []
    bads = []
    this_reject_by_annotation = True
    
    # maximum acceptable peak-to-peak amplitudes
    reject_criteria = dict(eeg=150e-6)       # 200 µV
    
    # minimum acceptable peak-to-peak amplitudes
    flat_criteria = dict(eeg=1e-6)           # 1 µV
    
    picks_eeg = mne.pick_types(raw.info, meg=False, eeg=True, eog=False,
                           stim=False, exclude='bads', selection=red_box7_prim)

    # Read epochs
    epochs = mne.Epochs(
        raw=raw,
        events=merged_events,
        event_id=merged_event_dict,
        tmin=tmin,
        tmax=tmax,
        # reject_tmin=-0.101,
        # reject_tmax=0.2,
        # reject=reject_criteria, 
        # flat=flat_criteria
        baseline=None,
        # picks=picks_eeg,
        reject_by_annotation=this_reject_by_annotation,
        preload=True,
        # verbose='CRITICAL',
    )
    
    # epochs.drop_bad()

    return epochs

#### Read the data

In [None]:
df_name = "go_nogo_100_600_df_3-5_all_scales"
pickled_data_filename = "../data/" + df_name + ".pkl"
# info_filename = "../data/Demographic_Questionnaires_Behavioral_Results_N=163.csv"
info_filename = "../data/scales/all_scales.csv"


# Check if data is already loaded
if os.path.isfile(pickled_data_filename):
    print("Pickled file found. Loading pickled data...")
    epochs_df = pd.read_pickle(pickled_data_filename)
    print("Done")
    pass
else:
    print("Pickled file not found. Loading data...")
    epochs_df = create_df_data(
        test_participants=False, info="all", personal=False, info_filename=info_filename
    )
    epochs_df.name = df_name
    # save loaded data into a pickle file
    epochs_df.to_pickle("../data/" + epochs_df.name + ".pkl")
    print("Done. Pickle file created")

In [None]:
# df_name = "go_nogo_500_300_df_3-5_all_scales"
# pickled_data_filename = "../data/" + df_name + ".pkl"
# info_filename = "../data/Demographic_Questionnaires_Behavioral_Results_N=163.csv"

# # Check if data is already loaded
# if os.path.isfile(pickled_data_filename):
#     print("Pickled file found. Loading pickled data...")
#     epochs_df_3 = pd.read_pickle(pickled_data_filename)
#     print("Done")
# else:
#     print("Pickled file not found. Loading data...")
#     epochs_df_3 = create_df_data(
#         test_participants=False, info="all", personal=False, info_filename=info_filename
#     )
#     epochs_df_3.name = df_name
#     # save loaded data into a pickle file
#     epochs_df_3.to_pickle("../data/" + epochs_df_3.name + ".pkl")
#     print("Done. Pickle file created")

#### Read data for external testing

In [None]:
df_name = "go_nogo_100_600_test_df_3-5_all_scales"
pickled_data_filename = "../data/" + df_name + ".pkl"
info_filename = "../data/scales/all_scales.csv"

# Check if data is already loaded
if os.path.isfile(pickled_data_filename):
    print("Pickled file found. Loading pickled data...")
    epochs_test_df = pd.read_pickle(pickled_data_filename)
    print("Done")
else:
    print("Pickled file not found. Loading data...")
    epochs_test_df = create_df_data(
        test_participants=True, info="all", personal=False, info_filename=info_filename
    )
    epochs_test_df.name = df_name
    # save loaded data into a pickle file
    epochs_test_df.to_pickle("../data/" + epochs_test_df.name + ".pkl")
    print("Done. Pickle file created")

In [None]:
df_name = "go_nogo_500_300_test_df_3-5_all"
pickled_data_filename = "../data/" + df_name + ".pkl"
info_filename = "../data/Demographic_Questionnaires_Behavioral_Results_N=163.csv"

# Check if data is already loaded
if os.path.isfile(pickled_data_filename):
    print("Pickled file found. Loading pickled data...")
    epochs_test_df_3 = pd.read_pickle(pickled_data_filename)
    print("Done")
else:
    print("Pickled file not found. Loading data...")
    epochs_test_df_3 = create_df_data(
        test_participants=True, info="all", personal=False, info_filename=info_filename
    )
    epochs_test_df_3.name = df_name
    # save loaded data into a pickle file
    epochs_test_df_3.to_pickle("../data/" + epochs_test_df_3.name + ".pkl")
    print("Done. Pickle file created")

In [None]:
X_train_df_100 = epochs_df
# X_test_df_100 = epochs_test_df

# X_train_df_500 = epochs_df_3
# X_test_df_500 = epochs_test_df_3

In [None]:
condition = 'ern'

---
# Between subject variation

In [None]:
between_df = pd.DataFrame({'pipeline': [], 'values': []})

In [None]:
box = ["Fpz",
        "AFz",
        "Fz", 
        "FCz",
        "C1","Cz","C2",
        "CPz",
        "P1","Pz", "P2",
        ] if condition == 'pe' else [
    "Fpz", 
    "AFz",
    "Fz",
    "FCz",
    "Cz",
    "CPz",
    "P1", "Pz", "P2",
]

- lowpass filters; ref:M, baseline:100, SF:no

In [None]:
for cutoff in [40, 30, 20, 15]:
    X_train_df_100copy = pd.DataFrame(copy.deepcopy(X_train_df_100.to_dict()))
    
    pipeline_name = str(cutoff) + ' Hz'

    ern_features = Pipeline(steps=[
                                    ("ern_data_extraction", ErnTransformer()),
                                    ("ern_amplitude", ErnAmplitude2()),
                    ])


    pe_features = Pipeline(steps = [
                                    ("pe_data_extraction", PeTransformer(start_pe_bin=3, stop_pe_bin=8)),
                                    ("pe_amplitude", PeAmplitude2()),
                    ])
    
    

    ern_pe_features = FeatureUnion([("pe_features", pe_features)]) if condition == 'pe' else FeatureUnion([("ern_features", ern_features)])

    x_pre = Pipeline([
                ("channels_extraction",PickChannels(channels_list=box)),
                ("average", Evoked()),
                ('extract_data', ExtractData()),
                ("lowpass_filter", LowpassFilter2(cutoff=cutoff)),
                # ('neg', ReverseSignal()),
                ("binning", BinTransformer(step=12)),
                # ("baseline", ErnBaselined()),
                ("centering", CenteredSignalAfterBaseline3()),
                ("features", ern_pe_features),
    ]).fit_transform(X_train_df_100copy)

    x_feature_100_between = np.sum(x_pre, axis=1)
    x_100_std_between = np.std(x_feature_100_between, axis=0)

    values = x_feature_100_between.flatten().tolist()
    names = [pipeline_name] * len(x_feature_100_between)

    temp_df = pd.DataFrame(zip(names, values), columns=['pipeline', 'values'])

    between_df = between_df.append(temp_df, ignore_index=True)
    
    ###########################################################################################
   

In [None]:
cutoff = 40

- reference : Mastoids vs Average ; lowpass:40, baseline:100, SF=no

In [None]:
X_train_df_100copy = pd.DataFrame(copy.deepcopy(X_train_df_100.to_dict()))

pipeline_name = 'Mastoids'

ern_features = Pipeline(steps=[
                                ("ern_data_extraction", ErnTransformer()),
                                ("ern_amplitude", ErnAmplitude2()),
                ])


pe_features = Pipeline(steps = [
                                ("pe_data_extraction", PeTransformer(start_pe_bin=3, stop_pe_bin=8)),
                                ("pe_amplitude", PeAmplitude2()),
                ])

ern_pe_features = FeatureUnion([("pe_features", pe_features)]) if condition == 'pe' else FeatureUnion([("ern_features", ern_features)])


x_pre = Pipeline([
            ("channels_extraction",PickChannels(channels_list=box)),
            ("average", Evoked()),
            ('extract_data', ExtractData()),
            ("lowpass_filter", LowpassFilter2(cutoff=cutoff)),
            ("binning", BinTransformer(step=12)),
            ("centering", CenteredSignalAfterBaseline3()),
            ("features", ern_pe_features),
]).fit_transform(X_train_df_100copy)

x_feature_100_between = np.sum(x_pre, axis=1)
x_100_std_between = np.std(x_feature_100_between, axis=0)

values = x_feature_100_between.flatten().tolist()
names = [pipeline_name] * len(x_feature_100_between)

temp_df = pd.DataFrame(zip(names, values), columns=['pipeline', 'values'])

between_df = between_df.append(temp_df, ignore_index=True)

################################################################################################

X_train_df_100copy = pd.DataFrame(copy.deepcopy(X_train_df_100.to_dict()))

pipeline_name = 'Average'

ern_features = Pipeline(steps=[
                                ("ern_data_extraction", ErnTransformer()),
                                ("ern_amplitude", ErnAmplitude2()),
                ])


pe_features = Pipeline(steps = [
                                ("pe_data_extraction", PeTransformer(start_pe_bin=3, stop_pe_bin=8)),
                                ("pe_amplitude", PeAmplitude2()),
                ])

ern_pe_features = FeatureUnion([("pe_features", pe_features)]) if condition == 'pe' else FeatureUnion([("ern_features", ern_features)])


x_pre = Pipeline([
            ("channels_extraction",PickChannels(channels_list=box)),
            ("reference_to_avg", ReferenceToAverage()),
            ("average", Evoked()),
            ('extract_data', ExtractData()),
            ("lowpass_filter", LowpassFilter2(cutoff=cutoff)),
            ("binning", BinTransformer(step=12)),
            ("centering", CenteredSignalAfterBaseline3()),
            ("features", ern_pe_features),
]).fit_transform(X_train_df_100copy)

x_feature_100_between = np.sum(x_pre, axis=1)
x_100_std_between = np.std(x_feature_100_between, axis=0)

values = x_feature_100_between.flatten().tolist()
names = [pipeline_name] * len(x_feature_100_between)

temp_df = pd.DataFrame(zip(names, values), columns=['pipeline', 'values'])

between_df = between_df.append(temp_df, ignore_index=True) 

- Baseline: -100, -500 ; ref:M, lowpass:30, SF=no

In [None]:
X_train_df_100copy = pd.DataFrame(copy.deepcopy(X_train_df_100.to_dict()))

pipeline_name = '-100:0'

ern_features = Pipeline(steps=[
                                ("ern_data_extraction", ErnTransformer()),
                                ("ern_amplitude", ErnAmplitude2()),
                ])


pe_features = Pipeline(steps = [
                                ("pe_data_extraction", PeTransformer(start_pe_bin=3, stop_pe_bin=8)),
                                ("pe_amplitude", PeAmplitude2()),
                ])

ern_pe_features = FeatureUnion([("pe_features", pe_features)]) if condition == 'pe' else FeatureUnion([("ern_features", ern_features)])


x_pre = Pipeline([
            ("channels_extraction",PickChannels(channels_list=box)),
            ("average", Evoked()),
            ('extract_data', ExtractData()),
            ("lowpass_filter", LowpassFilter2(cutoff=cutoff)),
            ("binning", BinTransformer(step=12)),
            # ("baseline", ErnBaselined()),
            ("centering", CenteredSignalAfterBaseline3()),
            ("features", ern_pe_features),
]).fit_transform(X_train_df_100copy)

x_feature_100_between = np.sum(x_pre, axis=1)
x_100_std_between = np.std(x_feature_100_between, axis=0)

values = x_feature_100_between.flatten().tolist()
names = [pipeline_name] * len(x_feature_100_between)

temp_df = pd.DataFrame(zip(names, values), columns=['pipeline', 'values'])

between_df = between_df.append(temp_df, ignore_index=True) 

##########################################################################

X_train_df_500copy = pd.DataFrame(copy.deepcopy(X_train_df_500.to_dict()))

pipeline_name = '-500:-300'

ern_features = Pipeline(steps=[
                                ("ern_data_extraction", ErnTransformer()),
                                ("ern_amplitude", ErnAmplitude2()),
                ])


pe_features = Pipeline(steps = [
                                ("pe_data_extraction", PeTransformer(start_pe_bin=3, stop_pe_bin=8)),
                                ("pe_amplitude", PeAmplitude2()),
                ])

ern_pe_features = FeatureUnion([("pe_features", pe_features)]) if condition == 'pe' else FeatureUnion([("ern_features", ern_features)])


x_pre = Pipeline([
            ("channels_extraction",PickChannels(channels_list=box)),
            ("average", Evoked()),
            ('extract_data', ExtractData()),
            ("lowpass_filter", LowpassFilter2(cutoff=cutoff)),
            ("binning", BinTransformer(step=12)),
            # ("baseline", ErnBaselined()),
            ("centering", CenteredSignalAfterBaseline3()),
            ("features", ern_pe_features),
]).fit_transform(X_train_df_500copy)

x_feature_500_between = np.sum(x_pre, axis=1)
x_500_std_between = np.std(x_feature_500_between, axis=0)

values = x_feature_500_between.flatten().tolist()
names = [pipeline_name] * len(x_feature_500_between)

temp_df = pd.DataFrame(zip(names, values), columns=['pipeline', 'values'])

between_df = between_df.append(temp_df, ignore_index=True)

- spatial filter with different boxes

In [None]:
box_list = [red_box7_prim, red_box8_prim, red_box9_prim, box_5]

In [None]:
for index, box in enumerate(box_list): 
    X_train_df_100copy = pd.DataFrame(copy.deepcopy(X_train_df_100.to_dict()))

    pipeline_name = f'SF Box-{index}'

    ern_features = Pipeline(steps=[
                                    ("ern_data_extraction", ErnTransformer()),
                                    ("ern_amplitude", ErnAmplitude2()),
                                    # ("data_channel_swap", ChannelDataSwap()),
                                    # ("postprocessing", PostprocessingTransformer()),
                                    # ("scaler", StandardScaler()),
                    ])


    pe_features = Pipeline(steps = [
                                    ("pe_data_extraction", PeTransformer(start_pe_bin=3, stop_pe_bin=8)),
                                    ("pe_amplitude", PeAmplitude2()),
                                    # ("data_channel_swap", ChannelDataSwap()),
                                    # ("postprocessing", PostprocessingTransformer()),
                                    # ("scaler", StandardScaler()),
                    ])

    ern_pe_features = FeatureUnion([("pe_features", pe_features)]) if condition == 'pe' else FeatureUnion([("ern_features", ern_features)])

    x_pre = Pipeline([
                ("channels_extraction",PickChannels(channels_list=box)),
                ("average", Evoked()),
                ('extract_data', ExtractData()),
                ("spatial_filter_preprocessing", SpatialFilterPreprocessing()),
                ("spatial_filter",PCA(n_components=4, random_state=random_state)),
                ("spatial_filter_postprocessing",SpatialFilterPostprocessing(timepoints_count=181)),
                ("lowpass_filter", LowpassFilter2(cutoff=cutoff)),
                ("binning", BinTransformer(step=12)),
                ("centering", CenteredSignalAfterBaseline3()),
                ("features", ern_pe_features),
    ]).fit_transform(X_train_df_100copy)

    x_feature_100_sf_between = np.sum(x_pre, axis=1)
    x_100_std_sf_between = np.std(x_feature_100_sf_between, axis=0)

    values = x_feature_100_sf_between.flatten().tolist()
    names = [pipeline_name] * len(x_feature_100_sf_between)

    temp_df = pd.DataFrame(zip(names, values), columns=['pipeline', 'values'])

    between_df = between_df.append(temp_df, ignore_index=True)

------
## Internal and external between-subject variance

In [None]:
condition = 'ern'

In [None]:
between_internal_external_df = pd.DataFrame({'pipeline': [], 'values': []})

In [None]:
box = ["Fpz",
        "AFz",
        "Fz", 
        "FCz",
        "C1","Cz","C2",
        "CPz",
        "P1","Pz", "P2",
        ] if condition == 'pe' else [
    "Fpz", 
    "AFz",
    "Fz",
    "FCz",
    "Cz",
    "CPz",
    "P1", "Pz", "P2",
]

In [None]:
cutoff = 40

In [None]:
X_train_df_100copy = pd.DataFrame(copy.deepcopy(X_train_df_100.to_dict()))

pipeline_name = 'Internal'

ern_features = Pipeline(steps=[
                                ("ern_data_extraction", ErnTransformer()),
                                ("ern_amplitude", ErnAmplitude2()),
                                # ("data_channel_swap", ChannelDataSwap()),
                                # ("postprocessing", PostprocessingTransformer()),
                                # ("scaler", StandardScaler()),
                ])


pe_features = Pipeline(steps = [
                                ("pe_data_extraction", PeTransformer(start_pe_bin=3, stop_pe_bin=9)),
                                ("pe_amplitude", PeAmplitude2()),
                                # ("data_channel_swap", ChannelDataSwap()),
                                # ("postprocessing", PostprocessingTransformer()),
                                # ("scaler", StandardScaler()),
                ])

ern_pe_features = FeatureUnion([("pe_features", pe_features)]) if condition == 'pe' else FeatureUnion([("ern_features", ern_features)])

x_pre = Pipeline([
            ("channels_extraction",PickChannels(channels_list=box)),
            ("average", Evoked()),
            ('extract_data', ExtractData()),
            ("spatial_filter_preprocessing", SpatialFilterPreprocessing()),
            ("spatial_filter",PCA(n_components=4, random_state=random_state)),
            ("spatial_filter_postprocessing",SpatialFilterPostprocessing(timepoints_count=181)),
            ("lowpass_filter", LowpassFilter2(cutoff=cutoff)),
            ("binning", BinTransformer(step=12)),
            ("centering", CenteredSignalAfterBaseline3()),
            ("features", ern_pe_features),
]).fit_transform(X_train_df_100copy)

x_feature_100_sf_between = np.sum(x_pre, axis=1)
x_100_std_sf_between = np.std(x_feature_100_sf_between, axis=0)

values = x_feature_100_sf_between.flatten().tolist()
names = [pipeline_name] * len(x_feature_100_sf_between)

temp_df = pd.DataFrame(zip(names, values), columns=['pipeline', 'values'])

between_internal_external_df = between_internal_external_df.append(temp_df, ignore_index=True)

In [None]:
X_test_df_100copy = pd.DataFrame(copy.deepcopy(X_test_df_100.to_dict()))

pipeline_name = 'External'

ern_features = Pipeline(steps=[
                                ("ern_data_extraction", ErnTransformer()),
                                ("ern_amplitude", ErnAmplitude2()),
                                # ("data_channel_swap", ChannelDataSwap()),
                                # ("postprocessing", PostprocessingTransformer()),
                                # ("scaler", StandardScaler()),
                ])


pe_features = Pipeline(steps = [
                                ("pe_data_extraction", PeTransformer(start_pe_bin=3, stop_pe_bin=9)),
                                ("pe_amplitude", PeAmplitude2()),
                                # ("data_channel_swap", ChannelDataSwap()),
                                # ("postprocessing", PostprocessingTransformer()),
                                # ("scaler", StandardScaler()),
                ])

ern_pe_features = FeatureUnion([("pe_features", pe_features)]) if condition == 'pe' else FeatureUnion([("ern_features", ern_features)])

x_pre = Pipeline([
            ("channels_extraction",PickChannels(channels_list=box)),
            ("average", Evoked()),
            ('extract_data', ExtractData()),
            ("spatial_filter_preprocessing", SpatialFilterPreprocessing()),
            ("spatial_filter",PCA(n_components=4, random_state=random_state)),
            ("spatial_filter_postprocessing",SpatialFilterPostprocessing(timepoints_count=181)),
            ("lowpass_filter", LowpassFilter2(cutoff=cutoff)),
            ("binning", BinTransformer(step=12)),
            ("centering", CenteredSignalAfterBaseline3()),
            ("features", ern_pe_features),
]).fit_transform(X_test_df_100copy)

x_feature_100_sf_between = np.sum(x_pre, axis=1)
x_100_std_sf_between = np.std(x_feature_100_sf_between, axis=0)

values = x_feature_100_sf_between.flatten().tolist()
names = [pipeline_name] * len(x_feature_100_sf_between)

temp_df = pd.DataFrame(zip(names, values), columns=['pipeline', 'values'])

between_internal_external_df = between_internal_external_df.append(temp_df, ignore_index=True)

## Visualization

In [None]:
import seaborn as sns
import matplotlib.pyplot as plt

cm = 1/2.54
dpi = 200

sns.set_style("whitegrid")
plt.rcParams['figure.dpi'] = dpi
plt.rcParams['figure.figsize'] = [9*cm,3.5*cm]
plt.rcParams["font.family"] = "Times New Roman"
plt.rcParams['ytick.labelsize'] = 5
plt.rcParams['xtick.labelsize'] = 5
plt.rcParams['axes.labelsize'] = 6
plt.rcParams["axes.edgecolor"] = ".15"
plt.rcParams["axes.linewidth"]  = 0.3

plt.ticklabel_format(style='sci', axis='x', scilimits=(-2,2))
plt.tick_params(axis='both', which='major', bottom=True, left = True)

pal = sns.color_palette('deep')
colors = [pal.as_hex()[3], pal.as_hex()[2]]

sns.set_palette(sns.color_palette(colors))


ax = sns.pointplot(x = 'values', y='pipeline', hue='pipeline', data = between_internal_external_df, 
                   orient='h', join=False, estimator=np.std, ci=95,capsize=0, errwidth=1, 
                   scale = 0.4, legend=False)

plt.legend([],[], frameon=False)

ax.spines['bottom'].set_color('black')
ax.spines['top'].set_color('black') 
ax.spines['right'].set_color('black')
ax.spines['left'].set_color('black')

ax.set_xlabel("Standard Deviation")
ax.set_ylabel('')

ax.figure.savefig(f"paper_images/between_subject_internal_external_{condition}_dpi_{dpi}.png",  bbox_inches='tight')

----
# WITHIN SUBJECT
- without spatial filter

In [None]:
within_df = pd.DataFrame({'pipeline': [], 'values': []})

In [None]:
box = ["Fpz",
        "AFz",
        "Fz", 
        "FCz",
        "C1","Cz","C2",
        "CPz",
        "P1","Pz", "P2",
        ] if condition == 'pe' else [
    "Fpz", 
    "AFz",
    "Fz",
    "FCz",
    "Cz",
    "CPz",
    "P1", "Pz", "P2",
]

- lowpass filters; ref:M, baseline:100, SF:no

In [None]:
for cutoff in [40, 30, 20, 15]:
    X_train_df_100copy = pd.DataFrame(copy.deepcopy(X_train_df_100.to_dict()))
    
    pipeline_name = str(cutoff) + ' Hz'
    
    for i in range(0,len(X_train_df_100copy)):

        X = X_train_df_100copy[i:i+1]    
        x_pre = Pipeline([
                ("channels_extraction",PickChannels(channels_list=box)),
                ('extract_data', ExtractDataEpochs()),
        ]).fit_transform(X)  

        x_pre = x_pre[0] 

        ern_features = Pipeline(steps=[
                                        ("ern_data_extraction", ErnTransformer()),
                                        ("ern_amplitude", ErnAmplitude2()),
                        ])


        pe_features = Pipeline(steps = [
                                        ("pe_data_extraction", PeTransformer(start_pe_bin=3, stop_pe_bin=8)),
                                        ("pe_amplitude", PeAmplitude2()),
                        ])

        ern_pe_features = FeatureUnion([("pe_features", pe_features)]) if condition == 'pe' else FeatureUnion([("ern_features", ern_features)])


        this_x = Pipeline([
                    ("lowpass_filter", LowpassFilter2(cutoff=cutoff)),
                    ("binning", BinTransformer(step=12)),
                    ("centering", CenteredSignalAfterBaseline3()),
                    ("features", ern_pe_features),
        ]).fit_transform(x_pre)
        
        f_vector = np.sum(this_x, axis=1)
        f_variance = np.std(f_vector)

        data = {'pipeline' : pipeline_name,
                'values' : f_variance,
               }

        within_df = within_df.append(data, ignore_index = True)

In [None]:
cutoff = 40

- reference : Mastoids vs Average ; lowpass:40, baseline:100, SF=no

In [None]:
X_train_df_100copy = pd.DataFrame(copy.deepcopy(X_train_df_100.to_dict()))

pipeline_name = 'Mastoids'

for i in range(0,len(X_train_df_100copy)):

    X = X_train_df_100copy[i:i+1]    
    x_pre = Pipeline([
            ("channels_extraction",PickChannels(channels_list=box)),
            ('extract_data', ExtractDataEpochs()),
    ]).fit_transform(X)  

    x_pre = x_pre[0] 

    ern_features = Pipeline(steps=[
                                    ("ern_data_extraction", ErnTransformer()),
                                    ("ern_amplitude", ErnAmplitude2()),
                    ])


    pe_features = Pipeline(steps = [
                                    ("pe_data_extraction", PeTransformer(start_pe_bin=3, stop_pe_bin=8)),
                                    ("pe_amplitude", PeAmplitude2()),
                    ])

    ern_pe_features = FeatureUnion([("pe_features", pe_features)]) if condition == 'pe' else FeatureUnion([("ern_features", ern_features)])


    this_x = Pipeline([
                ("lowpass_filter", LowpassFilter2(cutoff=cutoff)),
                ("binning", BinTransformer(step=12)),
                ("centering", CenteredSignalAfterBaseline3()),
                ("features", ern_pe_features),
    ]).fit_transform(x_pre)

    f_vector = np.sum(this_x, axis=1)
    f_variance = np.std(f_vector)

    data = {'pipeline' : pipeline_name,
            'values' : f_variance,
           }

    within_df = within_df.append(data, ignore_index = True)

################################################################################################

X_train_df_100copy = pd.DataFrame(copy.deepcopy(X_train_df_100.to_dict()))

pipeline_name = 'Average'

for i in range(0,len(X_train_df_100copy)):

    X = X_train_df_100copy[i:i+1]    
    x_pre = Pipeline([
            ("channels_extraction",PickChannels(channels_list=box)),
            ("reference_to_avg", ReferenceToAverage()),
            ('extract_data', ExtractDataEpochs()),
    ]).fit_transform(X)  

    x_pre = x_pre[0] 
    
    ern_features = Pipeline(steps=[
                                    ("ern_data_extraction", ErnTransformer()),
                                    ("ern_amplitude", ErnAmplitude2()),
                    ])


    pe_features = Pipeline(steps = [
                                    ("pe_data_extraction", PeTransformer(start_pe_bin=2, stop_pe_bin=8)),
                                    ("pe_amplitude", PeAmplitude2()),
                    ])

    ern_pe_features = FeatureUnion([("pe_features", pe_features)]) if condition == 'pe' else FeatureUnion([("ern_features", ern_features)])


    this_x = Pipeline([
                ("lowpass_filter", LowpassFilter2(cutoff=cutoff)),
                ("binning", BinTransformer(step=12)),
                ("centering", CenteredSignalAfterBaseline3()),
                ("features", ern_pe_features),
    ]).fit_transform(x_pre)

    f_vector = np.sum(this_x, axis=1)
    f_variance = np.std(f_vector)

    data = {'pipeline' : pipeline_name,
            'values' : f_variance,
           }

    within_df = within_df.append(data, ignore_index = True)

- Baseline: -100, -500 ; ref:M, lowpass:30, SF=no

In [None]:
X_train_df_100copy = pd.DataFrame(copy.deepcopy(X_train_df_100.to_dict()))

pipeline_name = '-100:0'


for i in range(0,len(X_train_df_100copy)):

    X = X_train_df_100copy[i:i+1]    
    x_pre = Pipeline([
            ("channels_extraction",PickChannels(channels_list=box)),
            ('extract_data', ExtractDataEpochs()),
    ]).fit_transform(X) 
    x_pre = x_pre[0] 

    ern_features = Pipeline(steps=[
                                    ("ern_data_extraction", ErnTransformer()),
                                    ("ern_amplitude", ErnAmplitude2()),
                    ])


    pe_features = Pipeline(steps = [
                                    ("pe_data_extraction", PeTransformer(start_pe_bin=3, stop_pe_bin=8)),
                                    ("pe_amplitude", PeAmplitude2()),
                    ])

    ern_pe_features = FeatureUnion([("pe_features", pe_features)]) if condition == 'pe' else FeatureUnion([("ern_features", ern_features)])


    this_x = Pipeline([
                ("lowpass_filter", LowpassFilter2(cutoff=cutoff)),
                ("binning", BinTransformer(step=12)),
                ("centering", CenteredSignalAfterBaseline3()),
                ("features", ern_pe_features),
    ]).fit_transform(x_pre)

    f_vector = np.sum(this_x, axis=1)
    f_variance = np.std(f_vector)

    data = {'pipeline' : pipeline_name,
            'values' : f_variance,
           }

    within_df = within_df.append(data, ignore_index = True)

##########################################################################

X_train_df_500copy = pd.DataFrame(copy.deepcopy(X_train_df_500.to_dict()))

pipeline_name = '-500:-300'

for i in range(0,len(X_train_df_500copy)):

    X = X_train_df_500copy[i:i+1]    
    x_pre = Pipeline([
            ("channels_extraction",PickChannels(channels_list=box)),
            ('extract_data', ExtractDataEpochs()),
    ]).fit_transform(X) 
    x_pre = x_pre[0] 

    ern_features = Pipeline(steps=[
                                    ("ern_data_extraction", ErnTransformer()),
                                    ("ern_amplitude", ErnAmplitude2()),
                    ])


    pe_features = Pipeline(steps = [
                                    ("pe_data_extraction", PeTransformer(start_pe_bin=3, stop_pe_bin=8)),
                                    ("pe_amplitude", PeAmplitude2()),
                    ])

    ern_pe_features = FeatureUnion([("pe_features", pe_features)]) if condition == 'pe' else FeatureUnion([("ern_features", ern_features)])


    this_x = Pipeline([
                ("lowpass_filter", LowpassFilter2(cutoff=cutoff)),
                ("binning", BinTransformer(step=12)),
                ("centering", CenteredSignalAfterBaseline3()),
                ("features", ern_pe_features),
    ]).fit_transform(x_pre)

    f_vector = np.sum(this_x, axis=1)
    f_variance = np.std(f_vector)

    data = {'pipeline' : pipeline_name,
            'values' : f_variance,
           }

    within_df = within_df.append(data, ignore_index = True)

- spatial filter

In [None]:
box_list = [red_box7_prim, red_box8_prim, red_box9_prim, box_5]

In [None]:
for index, box in enumerate(box_list): 
    X_train_df_100copy = pd.DataFrame(copy.deepcopy(X_train_df_100.to_dict()))
    
    pre_pip_for_spatial_filter = Pipeline([
        ("channels_extraction",PickChannels(channels_list = box)),
        ("average", Evoked()),
        ('extract_averaged_data', ExtractData()),
        ("spatial_filter_preprocessing", SpatialFilterPreprocessing()),
        ("spatial_filter",PCA(n_components=4, random_state=random_state)),
    ]).fit(X_train_df_100copy)

    spatial_filter = pre_pip_for_spatial_filter['spatial_filter']
    
    X_train_df_100copy = pd.DataFrame(copy.deepcopy(X_train_df_100.to_dict()))

    pipeline_name = f'SF Box-{index}'

    for i in range(0,len(X_train_df_100copy)):

        X = X_train_df_100copy[i:i+1]    
        x_pre = Pipeline([
                ("channels_extraction",PickChannels(channels_list=box)),
                ('extract_data', ExtractDataEpochs()),
        ]).fit_transform(X) 
        x_pre = x_pre[0] 

        x_pre_pre = Pipeline([
                ("spatial_filter_preprocessing", SpatialFilterPreprocessing()),
                ("spatial_filter", spatial_filter),
                ("spatial_filter_postprocessing",SpatialFilterPostprocessing(timepoints_count=181)),
                ("lowpass_filter", LowpassFilter2(cutoff=cutoff)),
                ("binning", BinTransformer(step=12)),
                ("centering", CenteredSignalAfterBaseline3())]).transform(x_pre)


        ern_features = Pipeline(steps=[
                                        ("ern_data_extraction", ErnTransformer()),
                                        ("ern_amplitude", ErnAmplitude2()),
                                        # ("data_channel_swap", ChannelDataSwap()),
                                        # ("postprocessing", PostprocessingTransformer()),
                                        # ("scaler", StandardScaler()),
                        ])


        pe_features = Pipeline(steps = [
                                        ("pe_data_extraction", PeTransformer(start_pe_bin=3, stop_pe_bin=8)),
                                        ("pe_amplitude", PeAmplitude2()),
                                        # ("data_channel_swap", ChannelDataSwap()),
                                        # ("postprocessing", PostprocessingTransformer()),
                                        # ("scaler", StandardScaler()),
                        ])

        ern_pe_features = FeatureUnion([("pe_features", pe_features)]) if condition == 'pe' else FeatureUnion([("ern_features", ern_features)])


        this_x = Pipeline([
                ("features", ern_pe_features),
            ]).fit_transform(x_pre_pre)


        f_vector = np.sum(this_x, axis=1)
        f_variance = np.std(f_vector)

        data = {'pipeline' : pipeline_name,
                'values' : f_variance,
               }

        within_df = within_df.append(data, ignore_index = True)

In [None]:
# X_train_df_100copy = pd.DataFrame(copy.deepcopy(X_train_df_100.to_dict()))

# pipeline_name = 'Spatial Filter'

# for i in range(0,len(X_train_df_100copy)):

#     X = X_train_df_100copy[i:i+1]    
#     x_pre = Pipeline([
#             ("channels_extraction",PickChannels(channels_list=box)),
#             ('extract_data', ExtractDataEpochs()),
#     ]).fit_transform(X) 
#     x_pre = x_pre[0] 
    
#     x_pre_pre = Pipeline([
#             ("spatial_filter_preprocessing", SpatialFilterPreprocessing()),
#             ("spatial_filter", spatial_filter),
#             ("spatial_filter_postprocessing",SpatialFilterPostprocessing(timepoints_count=181)),
#             ("lowpass_filter", LowpassFilter2(cutoff=cutoff)),
#             ("binning", BinTransformer(step=12)),
#             ("centering", CenteredSignalAfterBaseline3_bis())]).transform(x_pre)


#     ern_features = Pipeline(steps=[
#                                     ("ern_data_extraction", ErnTransformer()),
#                                     ("ern_amplitude", ErnAmplitude2()),
#                                     # ("data_channel_swap", ChannelDataSwap()),
#                                     # ("postprocessing", PostprocessingTransformer()),
#                                     # ("scaler", StandardScaler()),
#                     ])


#     pe_features = Pipeline(steps = [
#                                     ("pe_data_extraction", PeTransformer(start_pe_bin=2, stop_pe_bin=8)),
#                                     ("pe_amplitude", PeAmplitude2()),
#                                     # ("data_channel_swap", ChannelDataSwap()),
#                                     # ("postprocessing", PostprocessingTransformer()),
#                                     # ("scaler", StandardScaler()),
#                     ])
    
#     ern_pe_features = FeatureUnion([("pe_features", pe_features)]) if condition == 'pe' else FeatureUnion([("ern_features", ern_features)])


#     this_x = Pipeline([
#             ("features", ern_pe_features),
#         ]).fit_transform(x_pre_pre)
    
    
#     f_vector = np.sum(this_x, axis=1)
#     f_variance = np.std(f_vector)

#     data = {'pipeline' : pipeline_name,
#             'values' : f_variance,
#            }

#     within_df = within_df.append(data, ignore_index = True)

----
## Internal and external within-subject variance

In [None]:
within_internal_external_df = pd.DataFrame({'pipeline': [], 'values': []})

In [None]:
box = ["Fpz",
        "AFz",
        "Fz", 
        "FCz",
        "C1","Cz","C2",
        "CPz",
        "P1","Pz", "P2",
        ] if condition == 'pe' else [
    "Fpz", 
    "AFz",
    "Fz",
    "FCz",
    "Cz",
    "CPz",
    "P1", "Pz", "P2",
]

In [None]:
X_train_df_100copy = pd.DataFrame(copy.deepcopy(X_train_df_100.to_dict()))

In [None]:
pre_pip_for_spatial_filter = Pipeline([
        ("channels_extraction",PickChannels(channels_list = box)),
        ("average", Evoked()),
        ('extract_averaged_data', ExtractData()),
        ("spatial_filter_preprocessing", SpatialFilterPreprocessing()),
        ("spatial_filter",PCA(n_components=4, random_state=random_state)),
]).fit(X_train_df_100copy)

spatial_filter = pre_pip_for_spatial_filter['spatial_filter']

In [None]:
X_train_df_100copy = pd.DataFrame(copy.deepcopy(X_train_df_100.to_dict()))

pipeline_name = 'Internal'

for i in range(0,len(X_train_df_100copy)):

    X = X_train_df_100copy[i:i+1]    
    x_pre = Pipeline([
            ("channels_extraction",PickChannels(channels_list=box)),
            ('extract_data', ExtractDataEpochs()),
    ]).fit_transform(X) 
    x_pre = x_pre[0] 
    
    x_pre_pre = Pipeline([
            ("spatial_filter_preprocessing", SpatialFilterPreprocessing()),
            ("spatial_filter", spatial_filter),
            ("spatial_filter_postprocessing",SpatialFilterPostprocessing(timepoints_count=181)),
            ("lowpass_filter", LowpassFilter2(cutoff=cutoff)),
            ("binning", BinTransformer(step=12)),
            ("centering", CenteredSignalAfterBaseline3())]).transform(x_pre)


    ern_features = Pipeline(steps=[
                                    ("ern_data_extraction", ErnTransformer()),
                                    ("ern_amplitude", ErnAmplitude2()),
                                    # ("data_channel_swap", ChannelDataSwap()),
                                    # ("postprocessing", PostprocessingTransformer()),
                                    # ("scaler", StandardScaler()),
                    ])


    pe_features = Pipeline(steps = [
                                    ("pe_data_extraction", PeTransformer(start_pe_bin=3, stop_pe_bin=8)),
                                    ("pe_amplitude", PeAmplitude2()),
                                    # ("data_channel_swap", ChannelDataSwap()),
                                    # ("postprocessing", PostprocessingTransformer()),
                                    # ("scaler", StandardScaler()),
                    ])
    
    ern_pe_features = FeatureUnion([("pe_features", pe_features)]) if condition == 'pe' else FeatureUnion([("ern_features", ern_features)])


    this_x = Pipeline([
            ("features", ern_pe_features),
        ]).fit_transform(x_pre_pre)
    
    
    f_vector = np.sum(this_x, axis=1)
    f_variance = np.std(f_vector)

    data = {'pipeline' : pipeline_name,
            'values' : f_variance,
           }

    within_internal_external_df = within_internal_external_df.append(data, ignore_index = True)

In [None]:
X_test_df_100copy = pd.DataFrame(copy.deepcopy(X_test_df_100.to_dict()))

pipeline_name = 'External'

for i in range(0,len(X_test_df_100copy)):

    X = X_test_df_100copy[i:i+1]    
    x_pre = Pipeline([
            ("channels_extraction",PickChannels(channels_list=box)),
            ('extract_data', ExtractDataEpochs()),
    ]).fit_transform(X) 
    x_pre = x_pre[0] 
    
    x_pre_pre = Pipeline([
            ("spatial_filter_preprocessing", SpatialFilterPreprocessing()),
            ("spatial_filter", spatial_filter),
            ("spatial_filter_postprocessing",SpatialFilterPostprocessing(timepoints_count=181)),
            ("lowpass_filter", LowpassFilter2(cutoff=cutoff)),
            ("binning", BinTransformer(step=12)),
            ("centering", CenteredSignalAfterBaseline3())]).transform(x_pre)


    ern_features = Pipeline(steps=[
                                    ("ern_data_extraction", ErnTransformer()),
                                    ("ern_amplitude", ErnAmplitude2()),
                                    # ("data_channel_swap", ChannelDataSwap()),
                                    # ("postprocessing", PostprocessingTransformer()),
                                    # ("scaler", StandardScaler()),
                    ])


    pe_features = Pipeline(steps = [
                                    ("pe_data_extraction", PeTransformer(start_pe_bin=2, stop_pe_bin=8)),
                                    ("pe_amplitude", PeAmplitude2()),
                                    # ("data_channel_swap", ChannelDataSwap()),
                                    # ("postprocessing", PostprocessingTransformer()),
                                    # ("scaler", StandardScaler()),
                    ])
    
    ern_pe_features = FeatureUnion([("pe_features", pe_features)]) if condition == 'pe' else FeatureUnion([("ern_features", ern_features)])


    this_x = Pipeline([
            ("features", ern_pe_features),
        ]).fit_transform(x_pre_pre)
    
    
    f_vector = np.sum(this_x, axis=1)
    f_variance = np.std(f_vector)

    data = {'pipeline' : pipeline_name,
            'values' : f_variance,
           }

    within_internal_external_df = within_internal_external_df.append(data, ignore_index = True)

## Visualization

In [None]:
import seaborn as sns
import matplotlib.pyplot as plt
import matplotlib.pyplot as plt

sns.set_style("whitegrid")
cm = 1/2.54
dpi = 200

sns.set_style("whitegrid")
plt.rcParams['figure.dpi'] = dpi
plt.rcParams['figure.figsize'] = [9*cm,3.5*cm]
plt.rcParams["font.family"] = "Times New Roman"
plt.rcParams['ytick.labelsize'] = 5
plt.rcParams['xtick.labelsize'] = 5
plt.rcParams['axes.labelsize'] = 6
plt.rcParams["axes.edgecolor"] = ".15"
plt.rcParams["axes.linewidth"]  = 0.3

plt.ticklabel_format(style='sci', axis='x', scilimits=(-2,2))
plt.tick_params(axis='both', which='major', bottom=True, left = True)

pal = sns.color_palette('deep')
colors = [pal.as_hex()[3], pal.as_hex()[2]]

sns.set_palette(sns.color_palette(colors))

ax = sns.violinplot(
    x = 'values', 
    y = 'pipeline', 
    data = within_internal_external_df, 
    orient='h',  
    inner="quartile", 
    cut=2, 
    scale='count', 
    linewidth=0.4,
    # palette='deep'
)

ax.spines['bottom'].set_color('black')
ax.spines['top'].set_color('black') 
ax.spines['right'].set_color('black')
ax.spines['left'].set_color('black')


ax.set_xlabel("Standard Deviation")
ax.set_ylabel('') 

ax.figure.savefig(f"paper_images/within_subject_internal_external_{condition}_dpi_{dpi}.png",  bbox_inches='tight')

## Internal consistency

consistency = betweenPerson / between_person + within_person

In [None]:
consistency_internal_external_df = pd.DataFrame({'pipeline': [], 'internal_variance': []})

for pipeline in between_internal_external_df['pipeline'].unique().tolist():
    
    between_std = np.std(np.array(between_internal_external_df.loc[between_internal_external_df['pipeline'] == pipeline, 'values'].tolist()))                     
    within_list = np.array(within_internal_external_df.loc[within_internal_external_df['pipeline'] == pipeline, 'values'].tolist())
    
    for person_variance in within_list:
        
        internal = between_std/(between_std + person_variance)    
        data = {'pipeline' : pipeline,
                'internal_variance' : internal,
               }
    
        consistency_internal_external_df = consistency_internal_external_df.append(data, ignore_index = True)

In [None]:
internal_df = pd.DataFrame({'pipeline': [], 'internal_variance': []})

for pipeline in between_df['pipeline'].unique().tolist():
    
    between_std = np.std(np.array(between_df.loc[between_df['pipeline'] == pipeline, 'values'].tolist()))                     
    within_list = np.array(within_df.loc[within_df['pipeline'] == pipeline, 'values'].tolist())
    
    for person_variance in within_list:
        
        internal = between_std/(between_std + person_variance)    
        data = {'pipeline' : pipeline,
                'internal_variance' : internal,
               }
    
        internal_df = internal_df.append(data, ignore_index = True)

In [None]:
import seaborn as sns
import matplotlib.pyplot as plt

cm = 1/2.54
dpi = 200

sns.set_style("whitegrid")
plt.rcParams['figure.dpi'] = dpi
plt.rcParams['figure.figsize'] = [9*cm,3.5*cm]
plt.rcParams["font.family"] = "Times New Roman"
plt.rcParams['ytick.labelsize'] = 5
plt.rcParams['xtick.labelsize'] = 5
plt.rcParams['axes.labelsize'] = 6
plt.rcParams["axes.edgecolor"] = ".15"
plt.rcParams["axes.linewidth"]  = 0.3

plt.ticklabel_format(style='sci', axis='x', scilimits=(-2,2))
plt.tick_params(axis='both', which='major', bottom=True, left = True)

pal = sns.color_palette('deep')
colors = [pal.as_hex()[3], pal.as_hex()[2]]

sns.set_palette(sns.color_palette(colors))

ax = sns.violinplot(
    x = 'internal_variance', 
    y = 'pipeline', 
    data = consistency_internal_external_df, 
    orient='h', 
    inner="quartile", 
    scale='count', 
    linewidth=0.4,
    # palette='deep'
)

ax.spines['bottom'].set_color('black')
ax.spines['top'].set_color('black') 
ax.spines['right'].set_color('black')
ax.spines['left'].set_color('black')

ax.set_xlabel("Internal Consistency")
ax.set_ylabel('') 

ax.figure.savefig(f"paper_images/internal_consistency_internal_external_{condition}_dpi_{dpi}.png",  bbox_inches='tight')

---
# Visualization of Components

In [None]:
import copy
X_train_df_copy = pd.DataFrame(copy.deepcopy(X_train_df_100.to_dict()))
# X_test_df_copy = pd.DataFrame(copy.deepcopy(X_test_df.to_dict()))

In [None]:
box = ['Fpz', 'AFz', 'Fz', 'FCz', 'C1', 'Cz', 'C2', 'CPz', 'P1', 'Pz', 'P2']
red_box8_prim = [
    "Fpz", 
    "AFz",
    "F1", "Fz", "F2",
    "FCz",
    "C1", "Cz","C2",
    "CPz",
    "P1", "Pz", "P2",
]

red_box7_prim = [
    "Fpz", 
    "AFz",
    "Fz",
    "FCz",
    "Cz",
    "CPz",
    "P1", "Pz", "P2",
]

In [None]:
X_train_df_copy = pd.DataFrame(copy.deepcopy(X_train_df_100.to_dict()))


pre_pip = Pipeline([
        ("channels_extraction",PickChannels(channels_list = red_box8_prim)),
        ("average", Evoked()),
        ('extract_averaged_data', ExtractData()),
        # ("narrow_indices", NarrowIndices(start=76, stop=257)),
        ("spatial_filter_preprocessing", SpatialFilterPreprocessing()),
        ("spatial_filter",PCA(n_components=3, random_state=random_state)),
        ("spatial_filter_postprocessing",SpatialFilterPostprocessing(timepoints_count=181)),
        # ("lowpass_filter", LowpassFilter()),
        # ('neg', ReverseComponent3()),
        # ("binning", BinTransformer(step=12)),
        # ("baseline", ErnBaselined()),
        # ("centering", CenteredSignalAfterBaseline3()),
         # ("ern_data_extraction", ErnTransformer()),
        # ("ern_amplitude", ErnAmplitude2_prim()),
]).fit(X_train_df_copy)

X = pre_pip.transform(X_train_df_copy)
X_mean = np.mean(X, axis=0)

In [None]:
X_mean.shape

In [None]:
sf = pre_pip['spatial_filter']

In [None]:
components = sf.components_

In [None]:
components_copy = components.copy()

In [None]:
components_copy

In [None]:
components_0 = [components_copy[0]]
components_1 = [components_copy[1]]
components_2 = [components_copy[2]]

In [None]:
components_0

In [None]:
X_train_df_copy = pd.DataFrame(copy.deepcopy(X_train_df_100.to_dict()))


pre_pip = Pipeline([
        ("channels_extraction",PickChannels(channels_list = red_box8_prim)),
        ("average", Evoked()),
        ('extract_averaged_data', ExtractData()),
        # ("narrow_indices", NarrowIndices(start=76, stop=257)),
        # ("spatial_filter_preprocessing", SpatialFilterPreprocessing()),
        # ("spatial_filter",PCA(n_components=3, random_state=random_state)),
        # ("spatial_filter_postprocessing",SpatialFilterPostprocessing(timepoints_count=179)),
        # ("lowpass_filter", LowpassFilter()),
        # ('neg', ReverseComponent3()),
        # ("binning", BinTransformer(step=12)),
        # ("baseline", ErnBaselined()),
        # ("centering", CenteredSignalAfterBaseline3()),
        # ("ern_data_extraction", ErnTransformer()),
        # ("ern_amplitude", ErnAmplitude2_prim()),
]).fit(X_train_df_copy)

X = pre_pip.transform(X_train_df_copy)
X_mean = np.mean(X, axis=0)

In [None]:
for component in components_0:
    print(component)

    positive = []
    negative = []
    for item in component:
        if item > 0:
            positive.append(item)
            negative.append(0)
        else:
            positive.append(0)
            negative.append(item)
    print(positive)
    print(negative)
    
positive = np.array(positive).reshape(13,-1)
negative = np.array(negative).reshape(13,-1)

positive_compo = X_mean * positive
negative_compo = X_mean * negative

positive_signal_0 = np.sum(positive_compo, axis=0)
negative_signal_0 = np.sum(negative_compo, axis=0)

In [None]:
for component in components_1:
    print(component)

    positive = []
    negative = []
    for item in component:
        if item > 0:
            positive.append(item)
            negative.append(0)
        else:
            positive.append(0)
            negative.append(item)
    print(positive)
    print(negative)
    
positive = np.array(positive).reshape(13,-1)
negative = np.array(negative).reshape(13,-1)

positive_compo = X_mean * positive
negative_compo = X_mean * negative

positive_signal_1 = np.sum(positive_compo, axis=0)
negative_signal_1 = np.sum(negative_compo, axis=0)

In [None]:
for component in components_2:
    print(component)

    positive = []
    negative = []
    for item in component:
        if item > 0:
            positive.append(item)
            negative.append(0)
        else:
            positive.append(0)
            negative.append(item)
    print(positive)
    print(negative)
    
positive = np.array(positive).reshape(13,-1)
negative = np.array(negative).reshape(13,-1)

positive_compo = X_mean * positive
negative_compo = X_mean * negative

positive_signal_2 = np.sum(positive_compo, axis=0)
negative_signal_2 = np.sum(negative_compo, axis=0)

In [None]:
c_0 = positive_signal_0 + negative_signal_0
c_1 = positive_signal_1 + negative_signal_1
c_2 = positive_signal_2 + negative_signal_2

In [None]:
import seaborn as sns

pal = sns.color_palette('deep')
blue, red, green =  pal.as_hex()[0], pal.as_hex()[3], pal.as_hex()[2]

In [None]:
pal

In [None]:
import matplotlib.pyplot as plt

cm = 1/2.54
dpi = 200

plt.rcParams['figure.dpi'] = dpi
plt.rcParams['figure.figsize'] = [9*cm,5*cm]
plt.rcParams["font.family"] = "Times New Roman"
plt.rcParams['ytick.labelsize'] = 5
plt.rcParams['xtick.labelsize'] = 5
plt.rcParams['axes.labelsize'] = 6
plt.rcParams["axes.edgecolor"] = ".15"
plt.rcParams["axes.linewidth"]  = 0.3
plt.rc('legend',fontsize=5) 

fig, ax = plt.subplots()

plt.plot(positive_signal_0, lw=1, color=blue)
# plt.plot(-negative_signal_0, lw=3, color=red)

plt.plot(-c_0, lw= 3, color = green)

ax.set_xticks([0, 26, 52, 78, 104, 130, 156, 181])
ax.set_xticklabels(['-100', '0','100', '200', '300','400', '500', '600'])

ax.axvspan(37-11, 37+15, alpha=0.2, color='grey', lw=0)
ax.axvspan(37+30, 37+65, alpha=0.2, color='grey', lw=0)
plt.axhline(y=0, color='black', linestyle='--', lw=1)


plt.xlabel("Time (ms)")
plt.ylabel("Amplitude (μV)")

plt.legend(["Negative part", "PCA component"], loc=1)


plt.savefig(f'first_components_dpi_{dpi}.png', bbox_inches='tight')

In [None]:
cm = 1/2.54
dpi = 200

plt.rcParams['figure.dpi'] = dpi
plt.rcParams['figure.figsize'] = [9*cm,5*cm]
plt.rcParams["font.family"] = "Times New Roman"
plt.rcParams['ytick.labelsize'] = 5
plt.rcParams['xtick.labelsize'] = 5
plt.rcParams['axes.labelsize'] = 6
plt.rcParams["axes.edgecolor"] = ".15"
plt.rc('legend',fontsize=5) # using a size in points


fig, ax = plt.subplots()


plt.plot(positive_signal_1, color=red, lw=1)
plt.plot(-negative_signal_1, color=blue, lw=1)

ax.set_xticks([0, 26, 52, 78, 104, 130, 156, 181])
ax.set_xticklabels(['-100', '0','100', '200', '300','400', '500', '600'])

ax.axvspan(37-11, 37+15, alpha=0.2, color='grey', lw=0)
ax.axvspan(37+55, 37+85, alpha=0.2, color='grey', lw=0)

plt.plot(c_1, lw=3,  color = green)

plt.axhline(y=0, color='black', linestyle='--', lw=1)

plt.xlabel("Time (ms)")
plt.ylabel("Amplitude (μV)")
plt.legend(["Positive part", "Negative part", "PCA component"], loc=1)

plt.savefig(f'second_components_dpi_{dpi}.png', bbox_inches='tight')

In [None]:
cm = 1/2.54
dpi = 200

plt.rcParams['figure.dpi'] = dpi
plt.rcParams['figure.figsize'] = [9*cm,5*cm]
plt.rcParams["font.family"] = "Times New Roman"
plt.rcParams['ytick.labelsize'] = 5
plt.rcParams['xtick.labelsize'] = 5
plt.rcParams['axes.labelsize'] = 6
plt.rcParams["axes.edgecolor"] = ".15"
plt.rc('legend',fontsize=5) # using a size in points


fig, ax = plt.subplots()


plt.plot(positive_signal_2, color=red, lw=1)
plt.plot(-negative_signal_2, color=blue, lw=1)

ax.set_xticks([0, 26, 52, 78, 104, 130, 156, 181])
ax.set_xticklabels(['-100', '0','100', '200', '300','400', '500', '600'])

ax.axvspan(37-11, 37+15, alpha=0.2, color='grey', lw=0)
ax.axvspan(37+23, 37+55, alpha=0.2, color='grey', lw=0)

plt.plot(c_2, lw = 3,  color = green)

plt.axhline(y=0, color='black', linestyle='--', lw=1)

plt.xlabel("Time (ms)")
plt.ylabel("Amplitude (μV)")
plt.legend(["Positive part", "Negative part", "PCA component"], loc=1)


plt.savefig(f'third_components_dpi_{dpi}.png', bbox_inches='tight')

In [None]:
import matplotlib.pyplot as plt

# plt.plot(positive_signal_0)
# plt.plot(-negative_signal_0)

# plt.plot(X_mean[0], lw=4)
plt.plot(X[1][0], lw = 2)

In [None]:
import matplotlib.pyplot as plt

plt.plot(positive_signal_1)
plt.plot(-negative_signal_1)

# plt.plot(X_mean[1], lw=4)
plt.plot(c_1, lw = 2)
# plt.savefig("differences_component_3")

In [None]:
import matplotlib.pyplot as plt

plt.plot(positive_signal_2)
plt.plot(-negative_signal_2)

# plt.plot(X_mean[1], lw=4)
plt.plot(c_2, lw = 2)
# plt.savefig("differences_component_3")

In [None]:
import matplotlib.pyplot as plt

for i in range(1,3):
    plt.plot(X_mean[i])

In [None]:
import matplotlib.pyplot as plt

for i in range(0,10):
    plt.plot(X[i][0])

In [None]:
import matplotlib.pyplot as plt

for i in range(0,10):
    plt.plot(X[i][1])

In [None]:
import matplotlib.pyplot as plt

for i in range(0,10):
    plt.plot(X[i][2])

In [None]:
df = pd.read_pickle("../data/results/regression_union_100-600_cached_ern_amplitude_various_scales_with_external_p.pkl")

In [None]:
columns = ['scale', 'mean_cv_r2', 'external_score', 'p-value', 'external_p-value']

In [None]:
df[(df['external_score'] >= 0) & (df['mean_cv_r2'] >= 0) ][columns]