# Rumination prediction

### Imports

In [None]:
import os
import re
import glob
import os
import ast
import os.path as op
from collections import defaultdict
from copy import deepcopy
import copy

import pickle
from time import time
import pywt
import mne
import scipy
import numpy as np
import plotly.graph_objects as go
import plotly.express as px
import pandas as pd
import cesium.featurize
from plotly.subplots import make_subplots
from ipywidgets import Dropdown, FloatRangeSlider, IntSlider, FloatSlider, interact
from sklearn.metrics import roc_auc_score
from sklearn.model_selection import StratifiedKFold
from sklearn.model_selection import train_test_split
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.base import TransformerMixin, BaseEstimator


import sys

# sys.path.append("..")
# from utils import *

---
## Loading data

Loading EEG data and data from rumination questionnaire. By default create_df_data loads all info from given file but one can specify it by passing a list of desired labels from csv file.

In [None]:
# paths TODO
dir_path = os.path.dirname(os.path.abspath(""))

In [None]:
tmin, tmax = -0.1, 0.6  # Start and end of the segments
signal_frequency = 256
ERROR = 0
CORRECT = 1
ALL = 2
random_state = 0

In [None]:
channels_order_list = [
    "Fp1",
    "AF7",
    "AF3",
    "F1",
    "F3",
    "F5",
    "F7",
    "FT7",
    "FC5",
    "FC3",
    "FC1",
    "C1",
    "C3",
    "C5",
    "T7",
    "TP7",
    "CP5",
    "CP3",
    "CP1",
    "P1",
    "P3",
    "P5",
    "P7",
    "P9",
    "PO7",
    "PO3",
    "O1",
    "Iz",
    "Oz",
    "POz",
    "Pz",
    "CPz",
    "Fpz",
    "Fp2",
    "AF8",
    "AF4",
    "AFz",
    "Fz",
    "F2",
    "F4",
    "F6",
    "F8",
    "FT8",
    "FC6",
    "FC4",
    "FC2",
    "FCz",
    "Cz",
    "C2",
    "C4",
    "C6",
    "T8",
    "TP8",
    "CP6",
    "CP4",
    "CP2",
    "P2",
    "P4",
    "P6",
    "P8",
    "P10",
    "PO8",
    "PO4",
    "O2",
]

channels_dict = dict(zip(channels_order_list, np.arange(1, 64, 1)))

In [None]:
def create_df_data(
    test_participants=False,
    test_epochs=False,
    info_filename=None,
    info="all",
    personal=True,
):
    """Loads data for all participants and create DataFrame with optional additional info from given .csv file.

    On default, loads a train set: chooses only 80% of participants
    and for each of them chooses 80% of epochs.
    It will choose them deterministically.

    Participants with less than 10 epochs per condition are rejected.

    If test_participants is set to True, it will load remaining 20% of participants.
    If test_epochs is set to True, it will load remaining 20% of epochs.
    Test epochs are chronologically after train epochs,
    because it reflects real usage (first callibration and then classification).

    Parameters
    ----------
    test_participants: bool
        whether load data for training or final testing.
        If true load participants data for testing.
    test_epochs: bool
        whether load data for training or final testing.
        If true load epochs of each participants data for testing.
    info_filename: String | None
        path to .csv file with additional data.
    info: array
        listed parameters from the info file to be loaded.
        if 'all', load all parameters
    personal: bool
        whether a model will be both trained and tested on epochs from one person
        if false, person's epochs aren't split into test and train


    Returns
    -------
    go_nogo_data_df : pandas.DataFrame

    """
    print(os.path.abspath(""))
    dir_path = os.path.dirname(os.path.abspath(""))
    print(dir_path)
    header_files_glob = os.path.join(dir_path, "data/responses_100_600/*.vhdr")
    header_files = glob.glob(header_files_glob)

    header_files = sorted(header_files)
    go_nogo_data_df = pd.DataFrame()

    # cut 20% of data for testing
    h_train, h_test = train_test_split(header_files, test_size=0.3, random_state=0)

    if test_participants:
        header_files = h_test
    else:
        header_files = h_train

    for file in header_files:
        #  load eeg data for given participant
        participant_epochs = load_epochs_from_file(file)

        # and compute participant's id from file_name
        participant_id = re.match(r".*_(\w+).*", file).group(1)

        error = participant_epochs["error_response"]._data
        correct = participant_epochs["correct_response"]._data

        # exclude those participants who have too few samples
        if len(error) < 3 or len(correct) < 3:
            # not enough data for this participant
            continue

        # construct dataframe for participant with: id|epoch_data|response_type|additional info...
        participant_df = create_df_from_epochs(
            participant_id, participant_epochs, info_filename, info
        )
        print(participant_id)
        go_nogo_data_df = go_nogo_data_df.append(participant_df, ignore_index=True)

    return go_nogo_data_df

In [None]:
def create_df_from_epochs(id, participant_epochs, info_filename, info):
    """Create df for each participant. DF structure is like: {id: String ; epoch: epoch_data ; marker: 1.0|0.0}
    1.0 means correct and 0.0 means error response.
    Default info extracted form .csv file is 'Rumination Full Scale' and participants' ids.
    With this info df structure is like:
    {id: String ; epoch: epoch_data ; marker: 1.0|0.0 ; File: id ; 'Rumination Full Scale': int}

    Parameters
    ----------
    id: String
        participant's id extracted from filename
    correct: array
        correct responses' data
    error: array
        error responses' data
    info_filename: String
        path to .csv file with additional data.
    info: array
        listed parameters from the info file to be loaded.
        if 'all', load all parameters

    Returns
    -------
    participant_df : pandas.DataFrame

    """
    participant_df = pd.DataFrame()
    info_df = pd.DataFrame()

    # get additional info from file
    if info_filename is not None:
        if info == "all":
            rumination_df = pd.read_csv(info_filename)
        else:
            rumination_df = pd.read_csv(info_filename, usecols=["File"] + info)
        info_df = (
            rumination_df.loc[rumination_df["File"] == id]
            .reset_index()
            .drop("index", axis=1)
        )

#     for epoch in correct:
#         epoch_df = pd.DataFrame(
#             {"id": [id], "epoch": [epoch], "marker": [CORRECT]}
#         ).join(info_df)
#         participant_df = participant_df.append(epoch_df, ignore_index=True)

#     for epoch in error:
#         epoch_df = pd.DataFrame({"id": [id], "epoch": [epoch], "marker": [ERROR]}).join(
#             info_df
#         )
#         participant_df = participant_df.append(epoch_df, ignore_index=True)
        
#     print(participant_epochs)
        
    epoch_df = pd.DataFrame({"id": [id], "epoch": [participant_epochs], "marker": [ALL]}).join(
            info_df
        )
    participant_df = participant_df.append(epoch_df, ignore_index=True)

    return participant_df

In [None]:
def load_epochs_from_file(file, reject_bad_segments="auto", mask=None):
    """Load epochs from a header file.

    Args:
        file: path to a header file (.vhdr)
        reject_bad_segments: 'auto' means that bad segments are rejected automatically.

    Returns:
        mne Epochs

    """
    # Import the BrainVision data into an MNE Raw object
    raw = mne.io.read_raw_brainvision(file)

    # Construct annotation filename
    annot_file = file[:-4] + "vmrk"

    # Read in the event information as MNE annotations
    annotations = mne.read_annotations(annot_file)

    # Add the annotations to our raw object so we can use them with the data
    raw.set_annotations(annotations)

    # Map with response markers only
    event_dict = {
        "Stimulus/RE*ex*1_n*1_c_1*R*FB": 10004,
        "Stimulus/RE*ex*1_n*1_c_1*R*FG": 10005,
        "Stimulus/RE*ex*1_n*1_c_2*R": 10006,
        "Stimulus/RE*ex*1_n*2_c_1*R": 10007,
        "Stimulus/RE*ex*2_n*1_c_1*R": 10008,
        "Stimulus/RE*ex*2_n*2_c_1*R*FB": 10009,
        "Stimulus/RE*ex*2_n*2_c_1*R*FG": 10010,
        "Stimulus/RE*ex*2_n*2_c_2*R": 10011,
    }

    # Map for merged correct/error response markers
    merged_event_dict = {"correct_response": 0, "error_response": 1}

    # Reconstruct the original events from Raw object
    events, event_ids = mne.events_from_annotations(raw, event_id=event_dict)

    # Merge correct/error response events
    merged_events = mne.merge_events(
        events,
        [10004, 10005, 10009, 10010],
        merged_event_dict["correct_response"],
        replace_events=True,
    )
    merged_events = mne.merge_events(
        merged_events,
        [10006, 10007, 10008, 10011],
        merged_event_dict["error_response"],
        replace_events=True,
    )

    epochs = []
    bads = []
    this_reject_by_annotation = True

    # Read epochs
    epochs = mne.Epochs(
        raw=raw,
        events=merged_events,
        event_id=merged_event_dict,
        tmin=tmin,
        tmax=tmax,
        baseline=None,
        reject_by_annotation=this_reject_by_annotation,
        preload=True,
    )

    return epochs

#### Read the data

In [None]:
df_name = "go_nogo_100_600_df_3-5_all"
pickled_data_filename = "../data/" + df_name + ".pkl"
info_filename = "../data/Demographic_Questionnaires_Behavioral_Results_N=163.csv"

# Check if data is already loaded
if os.path.isfile(pickled_data_filename):
    print("Pickled file found. Loading pickled data...")
    epochs_df = pd.read_pickle(pickled_data_filename)
    print("Done")
else:
    print("Pickled file not found. Loading data...")
    epochs_df = create_df_data(
        test_participants=False, info="all", personal=False, info_filename=info_filename
    )
    epochs_df.name = df_name
    # save loaded data into a pickle file
    epochs_df.to_pickle("../data/" + epochs_df.name + ".pkl")
    print("Done. Pickle file created")

In [None]:
df_name = "go_nogo_500_300_df_3-5_all"
pickled_data_filename = "../data/" + df_name + ".pkl"
info_filename = "../data/Demographic_Questionnaires_Behavioral_Results_N=163.csv"

# Check if data is already loaded
if os.path.isfile(pickled_data_filename):
    print("Pickled file found. Loading pickled data...")
    epochs_df_3 = pd.read_pickle(pickled_data_filename)
    print("Done")
else:
    print("Pickled file not found. Loading data...")
    epochs_df_3 = create_df_data(
        test_participants=False, info="all", personal=False, info_filename=info_filename
    )
    epochs_df_3.name = df_name
    # save loaded data into a pickle file
    epochs_df_3.to_pickle("../data/" + epochs_df_3.name + ".pkl")
    print("Done. Pickle file created")

In [None]:
df_name = "go_nogo_400_600_df_3-5_all"
pickled_data_filename = "../data/" + df_name + ".pkl"
info_filename = "../data/Demographic_Questionnaires_Behavioral_Results_N=163.csv"

# Check if data is already loaded
if os.path.isfile(pickled_data_filename):
    print("Pickled file found. Loading pickled data...")
    epochs_df2 = pd.read_pickle(pickled_data_filename)
    print("Done")
else:
    print("Pickled file not found. Loading data...")
    epochs_df2 = create_df_data(
        test_participants=False, info="all", personal=False, info_filename=info_filename
    )
    epochs_df2.name = df_name
    # save loaded data into a pickle file
    epochs_df2.to_pickle("../data/" + epochs_df2.name + ".pkl")
    print("Done. Pickle file created")

#### Read data for external testing

In [None]:
df_name = "go_nogo_100_600_test_df_3-5_all"
pickled_data_filename = "../data/" + df_name + ".pkl"
info_filename = "../data/Demographic_Questionnaires_Behavioral_Results_N=163.csv"

# Check if data is already loaded
if os.path.isfile(pickled_data_filename):
    print("Pickled file found. Loading pickled data...")
    epochs_test_df = pd.read_pickle(pickled_data_filename)
    print("Done")
else:
    print("Pickled file not found. Loading data...")
    epochs_test_df = create_df_data(
        test_participants=True, info="all", personal=False, info_filename=info_filename
    )
    epochs_test_df.name = df_name
    # save loaded data into a pickle file
    epochs_test_df.to_pickle("../data/" + epochs_test_df.name + ".pkl")
    print("Done. Pickle file created")

In [None]:
df_name = "go_nogo_500_300_test_df_3-5_all"
pickled_data_filename = "../data/" + df_name + ".pkl"
info_filename = "../data/Demographic_Questionnaires_Behavioral_Results_N=163.csv"

# Check if data is already loaded
if os.path.isfile(pickled_data_filename):
    print("Pickled file found. Loading pickled data...")
    epochs_test_df_3 = pd.read_pickle(pickled_data_filename)
    print("Done")
else:
    print("Pickled file not found. Loading data...")
    epochs_test_df_3 = create_df_data(
        test_participants=True, info="all", personal=False, info_filename=info_filename
    )
    epochs_test_df_3.name = df_name
    # save loaded data into a pickle file
    epochs_test_df_3.to_pickle("../data/" + epochs_test_df_3.name + ".pkl")
    print("Done. Pickle file created")

---
## Training and prediction

In [None]:
from sklearn.linear_model import LinearRegression
from sklearn.linear_model import Lasso
from sklearn.linear_model import ElasticNet
from sklearn.tree import DecisionTreeRegressor
from sklearn.neighbors import KNeighborsRegressor
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import KFold
from sklearn.model_selection import StratifiedKFold
from sklearn.model_selection import permutation_test_score
from sklearn.pipeline import Pipeline, FeatureUnion
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import FunctionTransformer
from sklearn.dummy import DummyRegressor
from sklearn.model_selection import GridSearchCV
from sklearn.svm import SVR
from tempfile import mkdtemp
from sklearn.linear_model import Ridge
from sklearn.kernel_ridge import KernelRidge


from sklearn.metrics import mean_absolute_error
from sklearn.metrics import mean_squared_error
from sklearn.metrics import r2_score


from sklearn.decomposition import FastICA
from sklearn.decomposition import PCA

from rumination_experiment_transformers_averaged_CDS import *

import warnings

warnings.filterwarnings("ignore")

#### Create X train and y train sets

In [None]:
# selection of the analysed condition: erroneous responses or correct responses

dataset = ERROR
dataset_name = "correct_response" if dataset == CORRECT else "error_response"

In [None]:
X_train_df_100 = epochs_df
# X_train_df_400 = epochs_df2
X_train_df_500 = epochs_df_3

In [None]:
X_test_df_500 = epochs_test_df_3
X_test_df_100 = epochs_test_df

In [None]:
X_train_df = X_train_df_100
X_test_df = X_test_df_100

In [None]:
# shape 1-D: scores
rumination = np.array(X_train_df["Rumination Full Scale"].to_list())
deppression = np.array(X_train_df["DASS-21 Depression scale"].to_list())
anxiety = np.array(X_train_df["DASS-21 Anxiety scale"].to_list())
stress = np.array(X_train_df["DASS-21 Stress scale"].to_list())


y_train = rumination
y_train.shape

In [None]:
y_rum_test = np.array(X_test_df["Rumination Full Scale"].to_list())
y_rum_test.shape

In [None]:
X_test = []
y_test = []

---
### Experiments 

Parameters of experiments:
- regressors
- hyperparameters
- preprocessing pipelines

#### Prepare experiment estimating 
____

In [None]:
# Rating model with grid search


def rate_regressor(
    X_train, y_train, X_test, y_test, regressor, regressor_params, base_steps, cv=3
):
    # define cross-validation method
    cv_kf = KFold(n_splits=3)

    pipeline = Pipeline([base_steps, regressor])
    param_grid = regressor_params
    # print(f"Param grid {param_grid}")
    grid_search = GridSearchCV(
        pipeline,
        param_grid,
        cv=cv_kf,
        scoring={"r2", "neg_mean_absolute_error", "neg_mean_squared_error"},
        refit="r2",
        return_train_score=True,
        n_jobs=10,
        verbose=1,
        error_score="raise",
    )
    grid_search.fit(X_train, y_train)

    return grid_search

In [None]:
# Calculating p-value with permutation test


def calculate_p_permutations(estimator, X, y, cv=3, n_permutations=1000, n_jobs=10):

    score_, perm_scores_, pvalue_ = permutation_test_score(
        estimator, X, y, cv=cv, n_permutations=n_permutations, n_jobs=n_jobs
    )

    # summarize
    print(f"     The permutation P-value is = {pvalue_:.4f}")
    print(f"     The permutation score is = {score_:.4f}\n")

    return score_, pvalue_

In [None]:
# conducting experiment and saving selected info do result df


def run_experiment(
    tested_regressors,
    regressor_params,
    pipeline_name,
    X_train,
    X_test,
    y_train,
    y_test,
    dataset_name,
    base_steps,
    preprocessed_pipeline,
    X_test_df,
    y_rum,
    results_df,
):

    for (regressor, params) in tested_regressors:
        print(f"Rating {regressor} \n")
        tested_params = {**regressor_params, **params}

        # enter to grid search
        grid_result = rate_regressor(
            X_train,
            y_train,
            X_test,
            y_test,
            regressor,
            tested_params,
            base_steps,
            cv=3,
        )

        #     predictions = grid_result.predict(X_test)
        #     r2 = grid_result.score(X_test, y_test)
        #     mae = mean_absolute_error(y_test, predictions)
        #     r2_adj = r2_adjusted_scorer(y_test, predictions, len(X_test[0]), len(X_test))

        best_estimator_index = grid_result.best_index_
        mean_cv_r2 = grid_result.cv_results_["mean_test_r2"][best_estimator_index]
        std_cv_r2 = grid_result.cv_results_["std_test_r2"][best_estimator_index]
        mean_cv_neg_mean_absolute_error = grid_result.cv_results_[
            "mean_test_neg_mean_absolute_error"
        ][best_estimator_index]
        std_cv_neg_mean_absolute_error = grid_result.cv_results_[
            "std_test_neg_mean_absolute_error"
        ][best_estimator_index]
        mean_cv_neg_mean_squared_error = grid_result.cv_results_[
            "mean_test_neg_mean_squared_error"
        ][best_estimator_index]
        std_cv_neg_mean_squared_error = grid_result.cv_results_[
            "std_test_neg_mean_squared_error"
        ][best_estimator_index]
        
        mean_train_r2 = grid_result.cv_results_["mean_train_r2"][best_estimator_index]
        mean_train_mae = grid_result.cv_results_["mean_train_neg_mean_absolute_error"][best_estimator_index]
        mean_train_mse = grid_result.cv_results_["mean_train_neg_mean_squared_error"][best_estimator_index]


        print(f"     Best parameters: {grid_result.best_params_}")
        print(f"     mean r2: {mean_cv_r2}           ± {round(std_cv_r2,3)}")
        print(f"     mean r2 train: {mean_train_r2}")

        cv_results = grid_result.cv_results_

        # calculate p-value
        scores_, pvalue_ = calculate_p_permutations(
            grid_result.best_estimator_, X_train, y_train
        )
        
        pre_processed_test_X = preprocessed_pipeline.transform(X_test_df)
        estimator = grid_result.best_estimator_
        score = estimator.score(pre_processed_test_X, y_rum)
        
        print(print(f"     external validation r2: {score}"))
        

        # insert selected info to df
        data = {
            "data_set": dataset_name,
            "pipeline_name": pipeline_name,
            "model": regressor[0],
            "parameters": grid_result.best_params_,
            "mean_cv_r2": mean_cv_r2,
            "std_cv_r2": std_cv_r2,
            "mean_cv_mae": mean_cv_neg_mean_absolute_error,
            "std_cv_mae": std_cv_neg_mean_absolute_error,
            "mean_cv_mse":mean_cv_neg_mean_squared_error,
            "std_cv_mse": std_cv_neg_mean_squared_error,
            "cv_results": cv_results,
            "mean_train_r2": mean_train_r2,
            "mean_train_mae":mean_train_mae,
            "mean_train_mse":mean_train_mse,
            "p-value": pvalue_,
            "best_estimator": grid_result.best_estimator_,
            "pre_processed_pipeline": preprocessed_pipeline,
            "external_score":score
        }

        results_df = results_df.append(data, ignore_index=True)
    return results_df

### Perform Experiments
___

#### Global parameters common for each experiment

In [None]:
# define estimators and their hyperparameters

en = ("en", ElasticNet(random_state=random_state))
en_params = dict(
    en__alpha=np.logspace(-7, 3, num=20, base=10),
    en__l1_ratio=np.logspace(-8, 0, num=17, base=10),
)

kr = ("kr", KernelRidge(kernel="rbf"))
kr_params = dict(
    kr__alpha=np.logspace(-5, 3, num=20, base=10),
    kr__gamma=np.logspace(-5, 3, num=20, base=10),
)


svr = ("svr", SVR())
svr_params = dict(
    svr__kernel=["linear", "rbf"],
    svr__C=[0.00001, 0.0001, 0.001, 0.01, 0.1, 1, 10],
    svr__gamma=["scale"],
    svr__epsilon=[0.00001, 0.0001, 0.001, 0.01, 0.1, 1],
)

tested_regressors = [
    (svr, svr_params), 
    (kr, kr_params), 
    (en, en_params)
]

----
# Check within-subject and between-subject variance of feature vector

# Between subject variation
- without spatial filter

In [None]:
# results_df_rum = pd.read_pickle("../data/split0.3/regression_union_100-600_baselined_centered-2_diff_boxes_diff_pe-ind_diff_models.pkl")

# ern_fex = results_df_rum[results_df_rum['model'] == 'en'].best_estimator[11]['features'].transformer_list[0][1]['feature_extraction']
# pe_fex = results_df_rum[results_df_rum['model'] == 'en'].best_estimator[11]['features'].transformer_list[1][1]['feature_extraction']

In [None]:
between_df = pd.DataFrame({'pipeline': [], 'values': []})

In [None]:
box= [
    "Fpz", 
    "AFz",
    "Fz",
    "FCz",
    "Cz",
    "CPz",
    "Pz",
]

In [None]:
# X_train_df_100copy = pd.DataFrame(copy.deepcopy(X_train_df_100.to_dict()))

In [None]:
# pipeline_name = '-100:0'

# ern_features = Pipeline(steps=[
#                                 ("ern_data_extraction", ErnTransformer()),
#                                 ("ern_amplitude", ErnAmplitude2()),
#                 ])


# pe_features = Pipeline(steps = [
#                                 ("pe_data_extraction", PeTransformer(start_pe_bin=3, stop_pe_bin=8)),
#                                 ("pe_amplitude", PeAmplitude2()),
#                 ])

# ern_pe_features = FeatureUnion([("ern_features", ern_features), ("pe_features", pe_features)], n_jobs = 10)

# x_pre = Pipeline([
#             ("channels_extraction",PickChannels(channels_list=box)),
#             ("average", Evoked()),
#             ('extract_data', ExtractData()),
#             ("lowpass_filter", LowpassFilter()),
#             ("binning", BinTransformer(step=12)),
#             ("baseline", ErnBaselined()),
#             ("centering", CenteredSignalAfterBaseline3()),
#             ("features", ern_pe_features),
# ]).fit_transform(X_train_df_100copy)

# x_feature_100_between = np.sum(x_pre, axis=1)
# x_100_std_between = np.std(x_feature_100_between, axis=0)

# values = x_feature_100_between.flatten().tolist()
# names = [pipeline_name] * len(x_feature_100_between)

# temp_df = pd.DataFrame(zip(names, values), columns=['pipeline', 'values'])

# between_df = between_df.append(temp_df, ignore_index=True)

In [None]:
# X_train_df_500copy = pd.DataFrame(copy.deepcopy(X_train_df_500.to_dict()))

In [None]:
# pipeline_name = '-500:-300'

# ern_features = Pipeline(steps=[
#                                 ("ern_data_extraction", ErnTransformer()),
#                                 ("ern_amplitude", ErnAmplitude2()),
#                 ])


# pe_features = Pipeline(steps = [
#                                 ("pe_data_extraction", PeTransformer(start_pe_bin=3, stop_pe_bin=8)),
#                                 ("pe_amplitude", PeAmplitude2()),
#                 ])

# ern_pe_features = FeatureUnion([("ern_features", ern_features), ("pe_features", pe_features)], n_jobs = 10)

# x_pre = Pipeline([
#             ("channels_extraction",PickChannels(channels_list=box)),
#             ("average", Evoked()),
#             ('extract_data', ExtractData()),
#             ("lowpass_filter", LowpassFilter()),
#             ("binning", BinTransformer(step=12)),
#             ("baseline", ErnBaselined()),
#             ("centering", CenteredSignalAfterBaseline3()),
#             ("features", ern_pe_features),
# ]).fit_transform(X_train_df_500copy)

# x_feature_500_between = np.sum(x_pre, axis=1)
# x_500_std_between = np.std(x_feature_500_between, axis=0)

# values = x_feature_500_between.flatten().tolist()
# names = [pipeline_name] * len(x_feature_500_between)

# temp_df = pd.DataFrame(zip(names, values), columns=['pipeline', 'values'])

# between_df = between_df.append(temp_df, ignore_index=True)

- different lowpass filters

In [None]:
for cutoff in [40]:
    X_train_df_100copy = pd.DataFrame(copy.deepcopy(X_train_df_100.to_dict()))
    
    pipeline_name = '-100:0 '+ str(cutoff)

    ern_features = Pipeline(steps=[
                                    ("ern_data_extraction", ErnTransformer()),
                                    ("ern_amplitude", ErnAmplitude2_prim()),
                    ])


    pe_features = Pipeline(steps = [
                                    ("pe_data_extraction", PeTransformer(start_pe_bin=2, stop_pe_bin=7)),
                                    ("pe_amplitude", PeAmplitude2()),
                    ])

    ern_pe_features = FeatureUnion([("ern_features", ern_features), ("pe_features", pe_features)], n_jobs = 10)

    x_pre = Pipeline([
                ("channels_extraction",PickChannels(channels_list=box)),
                ("average", Evoked()),
                ('extract_data', ExtractData()),
                ("lowpass_filter", LowpassFilter(cutoff=cutoff)),
                ('neg', ReverseSignal()),
                ("binning", BinTransformer(step=12)),
                ("baseline", ErnBaselined()),
                ("centering", CenteredSignalAfterBaseline3()),
                ("features", ern_pe_features),
    ]).fit_transform(X_train_df_100copy)

    x_feature_100_between = np.sum(x_pre, axis=1)
    x_100_std_between = np.std(x_feature_100_between, axis=0)

    values = x_feature_100_between.flatten().tolist()
    names = [pipeline_name] * len(x_feature_100_between)

    temp_df = pd.DataFrame(zip(names, values), columns=['pipeline', 'values'])

    between_df = between_df.append(temp_df, ignore_index=True)
    
    ###########################################################################################
   

In [None]:
for cutoff in [15,20,30,40]:
    X_train_df_100copy = pd.DataFrame(copy.deepcopy(X_train_df_100.to_dict()))
    
    pipeline_name = '-100:0 '+ str(cutoff) + ' no BS'

    ern_features = Pipeline(steps=[
                                    ("ern_data_extraction", ErnTransformer()),
                                    ("ern_amplitude", ErnAmplitude2()),
                    ])


    pe_features = Pipeline(steps = [
                                    ("pe_data_extraction", PeTransformer(start_pe_bin=3, stop_pe_bin=8)),
                                    ("pe_amplitude", PeAmplitude2()),
                    ])

    ern_pe_features = FeatureUnion([("ern_features", ern_features), ("pe_features", pe_features)], n_jobs = 10)

    x_pre = Pipeline([
                ("channels_extraction",PickChannels(channels_list=box)),
                ("average", Evoked()),
                ('extract_data', ExtractData()),
                ("lowpass_filter", LowpassFilter(cutoff=cutoff)),
                ("binning", BinTransformer(step=12)),
                # ("baseline", ErnBaselined()),
                ("centering", CenteredSignalAfterBaseline3()),
                ("features", ern_pe_features),
    ]).fit_transform(X_train_df_100copy)

    x_feature_100_between = np.sum(x_pre, axis=1)
    x_100_std_between = np.std(x_feature_100_between, axis=0)

    values = x_feature_100_between.flatten().tolist()
    names = [pipeline_name] * len(x_feature_100_between)

    temp_df = pd.DataFrame(zip(names, values), columns=['pipeline', 'values'])

    between_df = between_df.append(temp_df, ignore_index=True)
    

In [None]:
for cutoff in [40]:
    X_train_df_500copy = pd.DataFrame(copy.deepcopy(X_train_df_500.to_dict()))
    
    pipeline_name = '-500:-300 '+ str(cutoff)

    ern_features = Pipeline(steps=[
                                    ("ern_data_extraction", ErnTransformer()),
                                    ("ern_amplitude", ErnAmplitude2()),
                    ])


    pe_features = Pipeline(steps = [
                                    ("pe_data_extraction", PeTransformer(start_pe_bin=3, stop_pe_bin=8)),
                                    ("pe_amplitude", PeAmplitude2()),
                    ])

    ern_pe_features = FeatureUnion([("ern_features", ern_features), ("pe_features", pe_features)], n_jobs = 10)

    x_pre = Pipeline([
                ("channels_extraction",PickChannels(channels_list=box)),
                ("average", Evoked()),
                ('extract_data', ExtractData()),
                ("lowpass_filter", LowpassFilter(cutoff=cutoff)),
                ("binning", BinTransformer(step=12)),
                ("baseline", ErnBaselined()),
                ("centering", CenteredSignalAfterBaseline3()),
                ("features", ern_pe_features),
    ]).fit_transform(X_train_df_500copy)

    x_feature_500_between = np.sum(x_pre, axis=1)
    x_500_std_between = np.std(x_feature_500_between, axis=0)

    values = x_feature_500_between.flatten().tolist()
    names = [pipeline_name] * len(x_feature_500_between)

    temp_df = pd.DataFrame(zip(names, values), columns=['pipeline', 'values'])

    between_df = between_df.append(temp_df, ignore_index=True)
    
    ##########################################################################
    
    X_train_df_500copy = pd.DataFrame(copy.deepcopy(X_train_df_500.to_dict()))
    
    pipeline_name = '-500:-300 '+ str(cutoff) + ' no BS'

    ern_features = Pipeline(steps=[
                                    ("ern_data_extraction", ErnTransformer()),
                                    ("ern_amplitude", ErnAmplitude2()),
                    ])


    pe_features = Pipeline(steps = [
                                    ("pe_data_extraction", PeTransformer(start_pe_bin=3, stop_pe_bin=8)),
                                    ("pe_amplitude", PeAmplitude2()),
                    ])

    ern_pe_features = FeatureUnion([("ern_features", ern_features), ("pe_features", pe_features)], n_jobs = 10)

    x_pre = Pipeline([
                ("channels_extraction",PickChannels(channels_list=box)),
                ("average", Evoked()),
                ('extract_data', ExtractData()),
                ("lowpass_filter", LowpassFilter(cutoff=cutoff)),
                ("binning", BinTransformer(step=12)),
                # ("baseline", ErnBaselined()),
                ("centering", CenteredSignalAfterBaseline3()),
                ("features", ern_pe_features),
    ]).fit_transform(X_train_df_500copy)

    x_feature_500_between = np.sum(x_pre, axis=1)
    x_500_std_between = np.std(x_feature_500_between, axis=0)

    values = x_feature_500_between.flatten().tolist()
    names = [pipeline_name] * len(x_feature_500_between)

    temp_df = pd.DataFrame(zip(names, values), columns=['pipeline', 'values'])

    between_df = between_df.append(temp_df, ignore_index=True)

- with spatial filter

In [None]:
# X_train_df_100copy = pd.DataFrame(copy.deepcopy(X_train_df_100.to_dict()))

In [None]:
for cutoff in [40]:
    X_train_df_100copy = pd.DataFrame(copy.deepcopy(X_train_df_100.to_dict()))

    pipeline_name = '-100:0 SF '+ str(cutoff)

    ern_features = Pipeline(steps=[
                                    ("ern_data_extraction", ErnTransformer()),
                                    ("ern_amplitude", ErnAmplitude2_prim()),
                                    # ("data_channel_swap", ChannelDataSwap()),
                                    # ("postprocessing", PostprocessingTransformer()),
                                    # ("scaler", StandardScaler()),
                    ])


    pe_features = Pipeline(steps = [
                                    ("pe_data_extraction", PeTransformer(start_pe_bin=2, stop_pe_bin=8)),
                                    ("pe_amplitude", PeAmplitude2()),
                                    # ("data_channel_swap", ChannelDataSwap()),
                                    # ("postprocessing", PostprocessingTransformer()),
                                    # ("scaler", StandardScaler()),
                    ])

    ern_pe_features = FeatureUnion([("ern_features", ern_features), ("pe_features", pe_features)], n_jobs = 10)


    x_pre = Pipeline([
                ("channels_extraction",PickChannels(channels_list=box)),
                ("average", Evoked()),
                ('extract_data', ExtractData()),
                ("spatial_filter_preprocessing", SpatialFilterPreprocessing()),
                ("spatial_filter",PCA(n_components=3, random_state=random_state)),
                ("spatial_filter_postprocessing",SpatialFilterPostprocessing(timepoints_count=181)),
                ("lowpass_filter", LowpassFilter(cutoff=cutoff)),
                ('neg', ReverseComponent3()),
                ("binning", BinTransformer(step=12)),
                ("baseline", ErnBaselined()),
                ("centering", CenteredSignalAfterBaseline3()),
                ("features", ern_pe_features),
                # ("ern_amplitude", ErnAmplitude2())
    ]).fit_transform(X_train_df_100copy)

    x_feature_100_sf_between = np.sum(x_pre, axis=1)
    x_100_std_sf_between = np.std(x_feature_100_sf_between, axis=0)

    values = x_feature_100_sf_between.flatten().tolist()
    names = [pipeline_name] * len(x_feature_100_sf_between)

    temp_df = pd.DataFrame(zip(names, values), columns=['pipeline', 'values'])

    between_df = between_df.append(temp_df, ignore_index=True)
    
    #####################################################################
    
    X_train_df_100copy = pd.DataFrame(copy.deepcopy(X_train_df_100.to_dict()))

    pipeline_name = '-100:0 SF '+ str(cutoff) + ' no BS'

    ern_features = Pipeline(steps=[
                                    ("ern_data_extraction", ErnTransformer()),
                                    ("ern_amplitude", ErnAmplitude2()),
                                    # ("data_channel_swap", ChannelDataSwap()),
                                    # ("postprocessing", PostprocessingTransformer()),
                                    # ("scaler", StandardScaler()),
                    ])


    pe_features = Pipeline(steps = [
                                    ("pe_data_extraction", PeTransformer(start_pe_bin=2, stop_pe_bin=8)),
                                    ("pe_amplitude", PeAmplitude2()),
                                    # ("data_channel_swap", ChannelDataSwap()),
                                    # ("postprocessing", PostprocessingTransformer()),
                                    # ("scaler", StandardScaler()),
                    ])

    ern_pe_features = FeatureUnion([("ern_features", ern_features), ("pe_features", pe_features)], n_jobs = 10)


    x_pre = Pipeline([
                ("channels_extraction",PickChannels(channels_list=box)),
                ("average", Evoked()),
                ('extract_data', ExtractData()),
                ("spatial_filter_preprocessing", SpatialFilterPreprocessing()),
                ("spatial_filter",PCA(n_components=3, random_state=random_state)),
                ("spatial_filter_postprocessing",SpatialFilterPostprocessing(timepoints_count=181)),
                ("lowpass_filter", LowpassFilter(cutoff=cutoff)),
                ('neg', ReverseComponent3()),
                ("binning", BinTransformer(step=12)),
                # ("baseline", ErnBaselined()),
                ("centering", CenteredSignalAfterBaseline3()),
                ("features", ern_pe_features),
                # ("ern_amplitude", ErnAmplitude2())
    ]).fit_transform(X_train_df_100copy)

    x_feature_100_sf_between = np.sum(x_pre, axis=1)
    x_100_std_sf_between = np.std(x_feature_100_sf_between, axis=0)

    values = x_feature_100_sf_between.flatten().tolist()
    names = [pipeline_name] * len(x_feature_100_sf_between)

    temp_df = pd.DataFrame(zip(names, values), columns=['pipeline', 'values'])

    between_df = between_df.append(temp_df, ignore_index=True)

In [None]:
# X_train_df_500copy = pd.DataFrame(copy.deepcopy(X_train_df_500.to_dict()))

In [None]:
for cutoff in [15,20,30,40]:
    X_train_df_500copy = pd.DataFrame(copy.deepcopy(X_train_df_500.to_dict()))
    
    pipeline_name = '-500:-300 SF ' + str(cutoff)

    ern_features = Pipeline(steps=[
                                    ("ern_data_extraction", ErnTransformer()),
                                    ("ern_amplitude", ErnAmplitude2()),
                    ])


    pe_features = Pipeline(steps = [
                                    ("pe_data_extraction", PeTransformer(start_pe_bin=2, stop_pe_bin=8)),
                                    ("pe_amplitude", PeAmplitude2()),
                    ])

    ern_pe_features = FeatureUnion([("ern_features", ern_features), ("pe_features", pe_features)], n_jobs = 10)


    x_pre = Pipeline([
                ("channels_extraction",PickChannels(channels_list=box)),
                ("average", Evoked()),
                ('extract_data', ExtractData()),
                ("spatial_filter_preprocessing", SpatialFilterPreprocessing()),
                ("spatial_filter",PCA(n_components=3, random_state=random_state)),
                ("spatial_filter_postprocessing",SpatialFilterPostprocessing(timepoints_count=181)),
                ("lowpass_filter", LowpassFilter(cutoff=cutoff)),
                ('neg', ReverseComponent3()),
                ("binning", BinTransformer(step=12)),
                ("baseline", ErnBaselined()),
                ("centering", CenteredSignalAfterBaseline3()),
                ("features", ern_pe_features),
    ]).fit_transform(X_train_df_500copy)

    x_feature_500_sf_between = np.sum(x_pre, axis=1)
    x_500_std_sf_between = np.std(x_feature_500_sf_between, axis=0)

    values = x_feature_500_sf_between.flatten().tolist()
    names = [pipeline_name] * len(x_feature_500_sf_between)

    temp_df = pd.DataFrame(zip(names, values), columns=['pipeline', 'values'])

    between_df = between_df.append(temp_df, ignore_index=True)
    
    ##################################################################
    X_train_df_500copy = pd.DataFrame(copy.deepcopy(X_train_df_500.to_dict()))
    
    pipeline_name = '-500:-300 SF ' + str(cutoff) + ' no BS'

    ern_features = Pipeline(steps=[
                                    ("ern_data_extraction", ErnTransformer()),
                                    ("ern_amplitude", ErnAmplitude2()),
                    ])


    pe_features = Pipeline(steps = [
                                    ("pe_data_extraction", PeTransformer(start_pe_bin=2, stop_pe_bin=8)),
                                    ("pe_amplitude", PeAmplitude2()),
                    ])

    ern_pe_features = FeatureUnion([("ern_features", ern_features), ("pe_features", pe_features)], n_jobs = 10)


    x_pre = Pipeline([
                ("channels_extraction",PickChannels(channels_list=box)),
                ("average", Evoked()),
                ('extract_data', ExtractData()),
                ("spatial_filter_preprocessing", SpatialFilterPreprocessing()),
                ("spatial_filter",PCA(n_components=3, random_state=random_state)),
                ("spatial_filter_postprocessing",SpatialFilterPostprocessing(timepoints_count=181)),
                ("lowpass_filter", LowpassFilter(cutoff=cutoff)),
                ('neg', ReverseComponent3()),
                ("binning", BinTransformer(step=12)),
                # ("baseline", ErnBaselined()),
                ("centering", CenteredSignalAfterBaseline3()),
                ("features", ern_pe_features),
    ]).fit_transform(X_train_df_500copy)

    x_feature_500_sf_between = np.sum(x_pre, axis=1)
    x_500_std_sf_between = np.std(x_feature_500_sf_between, axis=0)

    values = x_feature_500_sf_between.flatten().tolist()
    names = [pipeline_name] * len(x_feature_500_sf_between)

    temp_df = pd.DataFrame(zip(names, values), columns=['pipeline', 'values'])

    between_df = between_df.append(temp_df, ignore_index=True)

- with spatial filter and feature extraction

In [None]:
# X_train_df_100copy = pd.DataFrame(copy.deepcopy(X_train_df_100.to_dict()))

In [None]:
# pipeline_name = '-100:0 SF EX'

# x_pre = Pipeline([
#             ("channels_extraction",PickChannels(channels_list=box)),
#             ("average", Evoked()),
#             ('extract_data', ExtractData()),
#             ("spatial_filter_preprocessing", SpatialFilterPreprocessing()),
#             ("spatial_filter",PCA(n_components=3, random_state=random_state)),
#             ("spatial_filter_postprocessing",SpatialFilterPostprocessing(timepoints_count=181)),
#             ("lowpass_filter", LowpassFilter()),
#             ('neg', ReverseComponent3()),
#             ("binning", BinTransformer(step=12)),
#             ("baseline", ErnBaselined()),
#             ("centering", CenteredSignalAfterBaseline3()),
#             # ("features", ern_pe_features),
#             # ("ern_amplitude", ErnAmplitude2())
# ]).fit_transform(X_train_df_100copy)


# ern_features_pre = Pipeline(steps=[
#                                 ("ern_data_extraction", ErnTransformer()),
#                                 ("ern_amplitude", ErnAmplitude2()),
#                                 ("data_channel_swap", ChannelDataSwap()),
#                                 ("postprocessing", PostprocessingTransformer()),
#                                 # ("scaler", StandardScaler()),
#                                 # ("feature_extraction", FastICA(random_state=random_state, n_components=3))
#                 ]).fit_transform(x_pre)


# ern_features = Pipeline([("feature_extraction", ern_fex)]).transform(ern_features_pre)


# pe_features_pre = Pipeline(steps = [
#                                 ("pe_data_extraction", PeTransformer(start_pe_bin=2, stop_pe_bin=8)),
#                                 ("pe_amplitude", PeAmplitude2()),
#                                 ("data_channel_swap", ChannelDataSwap()),
#                                 ("postprocessing", PostprocessingTransformer()),
#                                 # ("scaler", StandardScaler()),
#                                 # ("feature_extraction", FastICA(random_state=random_state, n_components=3))

#                 ]).fit_transform(x_pre)

# pe_features = Pipeline([("feature_extraction", pe_fex)]).transform(pe_features_pre)

# # ern_pe_features = FeatureUnion([("ern_features", ern_features), ("pe_features", pe_features)], n_jobs = 10).fit_transform()

# x_pre2 = zip(ern_features, pe_features)
# x_pre2 = np.array(list(x_pre2)).reshape(x_pre.shape[0],-1)
        
# x_feature_100_sf_between = np.sum(x_pre2, axis=1)
# x_100_std_sf_between = np.std(x_feature_100_sf_between, axis=0)

# values = x_feature_100_sf_between.flatten().tolist()
# names = [pipeline_name] * len(x_feature_100_sf_between)

# temp_df = pd.DataFrame(zip(names, values), columns=['pipeline', 'values'])

# between_df = between_df.append(temp_df, ignore_index=True)

In [None]:
# X_train_df_500copy = pd.DataFrame(copy.deepcopy(X_train_df_500.to_dict()))

In [None]:
# pipeline_name = '-500:-300 SF EX'

# # ern_features = Pipeline(steps=[
# #                                 ("ern_data_extraction", ErnTransformer()),
# #                                 ("ern_amplitude", ErnAmplitude2()),
# #                                 ("data_channel_swap", ChannelDataSwap()),
# #                                 ("postprocessing", PostprocessingTransformer()),
# #                                 ("scaler", StandardScaler()),
# #                                 ("feature_extraction", FastICA(random_state=random_state, n_components=3))
# #                 ])


# # pe_features = Pipeline(steps = [
# #                                 ("pe_data_extraction", PeTransformer(start_pe_bin=2, stop_pe_bin=8)),
# #                                 ("pe_amplitude", PeAmplitude2()),
# #                                 ("data_channel_swap", ChannelDataSwap()),
# #                                 ("postprocessing", PostprocessingTransformer()),
# #                                 ("scaler", StandardScaler()),
# #                                 ("feature_extraction", FastICA(random_state=random_state, n_components=3))
# #                 ])

# # ern_pe_features = FeatureUnion([("ern_features", ern_features), ("pe_features", pe_features)], n_jobs = 10)


# # x_pre = Pipeline([
# #             ("channels_extraction",PickChannels(channels_list=box)),
# #             ("average", Evoked()),
# #             ('extract_data', ExtractData()),
# #             ("spatial_filter_preprocessing", SpatialFilterPreprocessing()),
# #             ("spatial_filter",PCA(n_components=3, random_state=random_state)),
# #             ("spatial_filter_postprocessing",SpatialFilterPostprocessing(timepoints_count=181)),
# #             ("lowpass_filter", LowpassFilter()),
# #             ('neg', ReverseComponent3()),
# #             ("binning", BinTransformer(step=12)),
# #             ("baseline", ErnBaselined()),
# #             ("centering", CenteredSignalAfterBaseline3()),
# #             ("features", ern_pe_features),
# #             # ("ern_amplitude", ErnAmplitude2())
# # ]).fit_transform(X_train_df_500copy)

# x_pre = Pipeline([
#             ("channels_extraction",PickChannels(channels_list=box)),
#             ("average", Evoked()),
#             ('extract_data', ExtractData()),
#             ("spatial_filter_preprocessing", SpatialFilterPreprocessing()),
#             ("spatial_filter",PCA(n_components=3, random_state=random_state)),
#             ("spatial_filter_postprocessing",SpatialFilterPostprocessing(timepoints_count=181)),
#             ("lowpass_filter", LowpassFilter()),
#             ('neg', ReverseComponent3()),
#             ("binning", BinTransformer(step=12)),
#             ("baseline", ErnBaselined()),
#             ("centering", CenteredSignalAfterBaseline3()),
#             # ("features", ern_pe_features),
#             # ("ern_amplitude", ErnAmplitude2())
# ]).fit_transform(X_train_df_500copy)


# ern_features_pre = Pipeline(steps=[
#                                 ("ern_data_extraction", ErnTransformer()),
#                                 ("ern_amplitude", ErnAmplitude2()),
#                                 ("data_channel_swap", ChannelDataSwap()),
#                                 ("postprocessing", PostprocessingTransformer()),
#                                 # ("scaler", StandardScaler()),
#                                 # ("feature_extraction", FastICA(random_state=random_state, n_components=3))
#                 ]).fit_transform(x_pre)


# ern_features = Pipeline([("feature_extraction", ern_fex)]).transform(ern_features_pre)


# pe_features_pre = Pipeline(steps = [
#                                 ("pe_data_extraction", PeTransformer(start_pe_bin=2, stop_pe_bin=8)),
#                                 ("pe_amplitude", PeAmplitude2()),
#                                 ("data_channel_swap", ChannelDataSwap()),
#                                 ("postprocessing", PostprocessingTransformer()),
#                                 # ("scaler", StandardScaler()),
#                                 # ("feature_extraction", FastICA(random_state=random_state, n_components=3))

#                 ]).fit_transform(x_pre)

# pe_features = Pipeline([("feature_extraction", pe_fex)]).transform(pe_features_pre)

# # ern_pe_features = FeatureUnion([("ern_features", ern_features), ("pe_features", pe_features)], n_jobs = 10).fit_transform()

# x_pre2 = zip(ern_features, pe_features)
# x_pre2 = np.array(list(x_pre2)).reshape(x_pre.shape[0],-1)

# x_feature_500_sf_between = np.sum(x_pre2, axis=1)
# x_500_std_sf_between = np.std(x_feature_500_sf_between, axis=0)

# values = x_feature_500_sf_between.flatten().tolist()
# names = [pipeline_name] * len(x_feature_500_sf_between)

# temp_df = pd.DataFrame(zip(names, values), columns=['pipeline', 'values'])

# between_df = between_df.append(temp_df, ignore_index=True)

### Visualization

In [None]:
import seaborn as sns
sns.set(rc={'figure.figsize':(10,8)},font_scale = 1.2)
sns.set_style("whitegrid")

ax = sns.pointplot(x = 'values', y='pipeline', data = between_df, orient='h', join=False, estimator=np.std, ci=95,capsize=.05,)
# ax.figure.savefig("between_subject_std_ern_pe_lowpass.png")

In [None]:
import seaborn as sns
sns.set(rc={'figure.figsize':(10,8)},font_scale = 1.2)
sns.set_style("whitegrid")

ax = sns.pointplot(x = 'values', y='pipeline', data = between_df, orient='h', join=False, estimator=np.std, ci=95,capsize=.05,)
# ax.figure.savefig("between_subject_std_ern_pe_lowpass.png")

----
# WITHIN SUBJECT
- without spatial filter

In [None]:
within_df = pd.DataFrame({'pipeline': [], 'values': []})

## -100 to 0

In [None]:
# X_train_df_100copy = pd.DataFrame(copy.deepcopy(X_train_df_100.to_dict()))

In [None]:
for cutoff in [40]:
    X_train_df_100copy = pd.DataFrame(copy.deepcopy(X_train_df_100.to_dict()))

    pipeline_name = '-100:0 ' + str(cutoff)

    for i in range(0,len(X_train_df_100copy)):

        X = X_train_df_100copy[i:i+1]    
        x_pre = Pipeline([
                ("channels_extraction",PickChannels(channels_list=box)),
                ('extract_data', ExtractDataEpochs()),
        ]).fit_transform(X)  

        x_pre = x_pre[0] 

        ern_features = Pipeline(steps=[
                                    ("ern_data_extraction", ErnTransformer()),
                                    ("ern_amplitude", ErnAmplitude2_prim()),
                                    # ("data_channel_swap", ChannelDataSwap()),
                                    # ("postprocessing", PostprocessingTransformer()),
                                    # ("scaler", StandardScaler()),
                                    # ("feature_extraction", FastICA(random_state=random_state, n_components=3))
                    ])


        pe_features = Pipeline(steps = [
                                    ("pe_data_extraction", PeTransformer(start_pe_bin=2, stop_pe_bin=7)),
                                    ("pe_amplitude", PeAmplitude2()),
                                    # ("data_channel_swap", ChannelDataSwap()),
                                    # ("postprocessing", PostprocessingTransformer()),
                                    # ("scaler", StandardScaler()),
                                    # ("feature_extraction", FastICA(random_state=random_state, n_components=3))
                    ])

        ern_pe_features = FeatureUnion([("ern_features", ern_features), ("pe_features", pe_features)], n_jobs = 10)


        x_pre2 = Pipeline([
            ("lowpass_filter", LowpassFilter(cutoff=cutoff)),
            ('neg', ReverseSignal()),
            ("binning", BinTransformer(step=12)),
            ("baseline", ErnBaselined()),
            ("centering", CenteredSignalAfterBaseline3()),
            ("features", ern_pe_features),
            # ("ern_amplitude", ErnAmplitude2()),
        ]).fit_transform(x_pre)

        # f_vector = np.mean(x_pre2, axis=1)
        f_vector = np.sum(x_pre2, axis=1)
        f_variance = np.std(f_vector)

        data = {'pipeline' : pipeline_name,
                'values' : f_variance,
               }

        within_df = within_df.append(data, ignore_index = True)
        
        ##########################################################################
        
        

In [None]:
for cutoff in [15,20,30,40]:
    X_train_df_100copy = pd.DataFrame(copy.deepcopy(X_train_df_100.to_dict()))

    pipeline_name = '-100:0 ' + str(cutoff) + ' no BS'

    for i in range(0,len(X_train_df_100copy)):

        X = X_train_df_100copy[i:i+1]    
        x_pre = Pipeline([
                ("channels_extraction",PickChannels(channels_list=box)),
                ('extract_data', ExtractDataEpochs()),
        ]).fit_transform(X)  

        x_pre = x_pre[0] 

        ern_features = Pipeline(steps=[
                                    ("ern_data_extraction", ErnTransformer()),
                                    ("ern_amplitude", ErnAmplitude2()),
                                    # ("data_channel_swap", ChannelDataSwap()),
                                    # ("postprocessing", PostprocessingTransformer()),
                                    # ("scaler", StandardScaler()),
                                    # ("feature_extraction", FastICA(random_state=random_state, n_components=3))
                    ])


        pe_features = Pipeline(steps = [
                                    ("pe_data_extraction", PeTransformer(start_pe_bin=3, stop_pe_bin=8)),
                                    ("pe_amplitude", PeAmplitude2()),
                                    # ("data_channel_swap", ChannelDataSwap()),
                                    # ("postprocessing", PostprocessingTransformer()),
                                    # ("scaler", StandardScaler()),
                                    # ("feature_extraction", FastICA(random_state=random_state, n_components=3))
                    ])

        ern_pe_features = FeatureUnion([("ern_features", ern_features), ("pe_features", pe_features)], n_jobs = 10)


        x_pre2 = Pipeline([
            ("lowpass_filter", LowpassFilter(cutoff=cutoff)),
            ("binning", BinTransformer(step=12)),
            # ("baseline", ErnBaselined()),
            ("centering", CenteredSignalAfterBaseline3()),
            ("features", ern_pe_features),
            # ("ern_amplitude", ErnAmplitude2()),
        ]).fit_transform(x_pre)

        # f_vector = np.mean(x_pre2, axis=1)
        f_vector = np.sum(x_pre2, axis=1)
        f_variance = np.std(f_vector)

        data = {'pipeline' : pipeline_name,
                'values' : f_variance,
               }

        within_df = within_df.append(data, ignore_index = True)

## -500 to -300

In [None]:
# X_train_df_500copy = pd.DataFrame(copy.deepcopy(X_train_df_500.to_dict()))

In [None]:
for cutoff in [15,20,30,40]:
    X_train_df_500copy = pd.DataFrame(copy.deepcopy(X_train_df_500.to_dict()))

    pipeline_name = '-500:-300 ' + str(cutoff)

    for i in range(0,len(X_train_df_500copy)):

        X = X_train_df_500copy[i:i+1]    
        x_pre = Pipeline([
                ("channels_extraction",PickChannels(channels_list=box)),
                ('extract_data', ExtractDataEpochs()),
        ]).fit_transform(X)  
        x_pre = x_pre[0] 

        ern_features = Pipeline(steps=[
                                    ("ern_data_extraction", ErnTransformer()),
                                    ("ern_amplitude", ErnAmplitude2()),
                                    # ("data_channel_swap", ChannelDataSwap()),
                                    # ("postprocessing", PostprocessingTransformer()),
                                    # ("scaler", StandardScaler()),
                                    # ("feature_extraction", FastICA(random_state=random_state, n_components=3))
                    ])


        pe_features = Pipeline(steps = [
                                    ("pe_data_extraction", PeTransformer(start_pe_bin=3, stop_pe_bin=8)),
                                    ("pe_amplitude", PeAmplitude2()),
                                    # ("data_channel_swap", ChannelDataSwap()),
                                    # ("postprocessing", PostprocessingTransformer()),
                                    # ("scaler", StandardScaler()),
                                    # ("feature_extraction", FastICA(random_state=random_state, n_components=3))
                    ])

        ern_pe_features = FeatureUnion([("ern_features", ern_features), ("pe_features", pe_features)], n_jobs = 10)


        x_pre2 = Pipeline([
            ("lowpass_filter", LowpassFilter(cutoff=cutoff)),
            ("binning", BinTransformer(step=12)),
            ("baseline", ErnBaselined()),
            ("centering", CenteredSignalAfterBaseline3()),
            ("features", ern_pe_features),
            # ("ern_amplitude", ErnAmplitude2()),
        ]).fit_transform(x_pre)

        # f_vector = np.mean(x_pre2, axis=1)
        f_vector = np.sum(x_pre2, axis=1)

        f_variance = np.std(f_vector)

        data = {'pipeline' : pipeline_name,
                'values' : f_variance,
               }

        within_df = within_df.append(data, ignore_index = True)
        
        #############################################################################
    X_train_df_500copy = pd.DataFrame(copy.deepcopy(X_train_df_500.to_dict()))

    pipeline_name = '-500:-300 ' + str(cutoff)  + ' no BS'

    for i in range(0,len(X_train_df_500copy)):

        X = X_train_df_500copy[i:i+1]    
        x_pre = Pipeline([
                ("channels_extraction",PickChannels(channels_list=box)),
                ('extract_data', ExtractDataEpochs()),
        ]).fit_transform(X)  
        x_pre = x_pre[0] 

        ern_features = Pipeline(steps=[
                                    ("ern_data_extraction", ErnTransformer()),
                                    ("ern_amplitude", ErnAmplitude2()),
                                    # ("data_channel_swap", ChannelDataSwap()),
                                    # ("postprocessing", PostprocessingTransformer()),
                                    # ("scaler", StandardScaler()),
                                    # ("feature_extraction", FastICA(random_state=random_state, n_components=3))
                    ])


        pe_features = Pipeline(steps = [
                                    ("pe_data_extraction", PeTransformer(start_pe_bin=3, stop_pe_bin=8)),
                                    ("pe_amplitude", PeAmplitude2()),
                                    # ("data_channel_swap", ChannelDataSwap()),
                                    # ("postprocessing", PostprocessingTransformer()),
                                    # ("scaler", StandardScaler()),
                                    # ("feature_extraction", FastICA(random_state=random_state, n_components=3))
                    ])

        ern_pe_features = FeatureUnion([("ern_features", ern_features), ("pe_features", pe_features)], n_jobs = 10)


        x_pre2 = Pipeline([
            ("lowpass_filter", LowpassFilter(cutoff=cutoff)),
            ("binning", BinTransformer(step=12)),
            # ("baseline", ErnBaselined()),
            ("centering", CenteredSignalAfterBaseline3()),
            ("features", ern_pe_features),
            # ("ern_amplitude", ErnAmplitude2()),
        ]).fit_transform(x_pre)

        # f_vector = np.mean(x_pre2, axis=1)
        f_vector = np.sum(x_pre2, axis=1)

        f_variance = np.std(f_vector)

        data = {'pipeline' : pipeline_name,
                'values' : f_variance,
               }

        within_df = within_df.append(data, ignore_index = True)

- With spatial filter

### -100 to 0

In [None]:
X_train_df_100copy = pd.DataFrame(copy.deepcopy(X_train_df_100.to_dict()))

In [None]:
pre_pip_for_spatial_filter = Pipeline([
        ("channels_extraction",PickChannels(channels_list = box)),
        ("average", Evoked()),
        ('extract_averaged_data', ExtractData()),
        # ("narrow_indices", NarrowIndices(start=76, stop=257)),
        ("spatial_filter_preprocessing", SpatialFilterPreprocessing()),
        ("spatial_filter",PCA(n_components=3, random_state=random_state)),
]).fit(X_train_df_100copy)

spatial_filter = pre_pip_for_spatial_filter['spatial_filter']

In [None]:
# X_train_df_100copy = pd.DataFrame(copy.deepcopy(X_train_df_100.to_dict()))

In [None]:
for cutoff in [40]:
    X_train_df_100copy = pd.DataFrame(copy.deepcopy(X_train_df_100.to_dict()))

    pipeline_name = '-100:0 SF ' + str(cutoff)

    for i in range(0,len(X_train_df_100copy)):

        X = X_train_df_100copy[i:i+1]    
        x_pre = Pipeline([
                ("channels_extraction",PickChannels(channels_list=box)),
                ('extract_data', ExtractDataEpochs()),
        ]).fit_transform(X)  

        x_pre = x_pre[0] 

        x_pre_pre = Pipeline([
            ("spatial_filter_preprocessing", SpatialFilterPreprocessing()),
            ("spatial_filter", spatial_filter),
            ("spatial_filter_postprocessing",SpatialFilterPostprocessing(timepoints_count=181)),
            ("lowpass_filter", LowpassFilter(cutoff=cutoff)),
            ('neg', ReverseComponent3()),
            ("binning", BinTransformer(step=12)),
            ("baseline", ErnBaselined()),
            ("centering", CenteredSignalAfterBaseline3())]).transform(x_pre)


        ern_features_pre = Pipeline(steps=[
                                    ("ern_data_extraction", ErnTransformer()),
                                    ("ern_amplitude", ErnAmplitude2_prim()),
                                    # ("data_channel_swap", ChannelDataSwap()),
                                    # ("postprocessing", PostprocessingTransformer()),
                                    # ("scaler", StandardScaler()),
                    ])

        # ern_features = Pipeline([("feature_extraction", ern_fex)]).transform(ern_features_pre)


        pe_features_pre = Pipeline(steps = [
                                    ("pe_data_extraction", PeTransformer(start_pe_bin=3, stop_pe_bin=8)),
                                    ("pe_amplitude", PeAmplitude2()),
                                    # ("data_channel_swap", ChannelDataSwap()),
                                    # ("postprocessing", PostprocessingTransformer()),
                                    # ("scaler", StandardScaler()),
                    ])

        # pe_features = Pipeline([("feature_extraction", pe_fex)]).transform(pe_features_pre)

    #     x_pre2 = zip(ern_features, pe_features)
    #     x_pre2 = np.array(list(x_pre2)).reshape(x_pre.shape[0],-1)

    #     print(x_pre2.shape)

        ern_pe_features = FeatureUnion([("ern_features", ern_features_pre), ("pe_features", pe_features_pre)], n_jobs = 10)

        x_pre2 = Pipeline([
            ("features", ern_pe_features),
        ]).fit_transform(x_pre_pre)

        # f_vector = np.mean(x_pre2, axis=1)
        f_vector = np.sum(x_pre2, axis=1)

        f_variance = np.std(f_vector)

        data = {'pipeline' : pipeline_name,
                'values' : f_variance,
               }

        within_df = within_df.append(data, ignore_index = True)
        
        ########################################################################
    X_train_df_100copy = pd.DataFrame(copy.deepcopy(X_train_df_100.to_dict()))

    pipeline_name = '-100:0 SF ' + str(cutoff)  + ' no BS'


    for i in range(0,len(X_train_df_100copy)):

        X = X_train_df_100copy[i:i+1]    
        x_pre = Pipeline([
                ("channels_extraction",PickChannels(channels_list=box)),
                ('extract_data', ExtractDataEpochs()),
        ]).fit_transform(X)  

        x_pre = x_pre[0] 

        x_pre_pre = Pipeline([
            ("spatial_filter_preprocessing", SpatialFilterPreprocessing()),
            ("spatial_filter", spatial_filter),
            ("spatial_filter_postprocessing",SpatialFilterPostprocessing(timepoints_count=181)),
            ("lowpass_filter", LowpassFilter(cutoff=cutoff)),
            ('neg', ReverseComponent3()),
            ("binning", BinTransformer(step=12)),
            # ("baseline", ErnBaselined()),
            ("centering", CenteredSignalAfterBaseline3())]).transform(x_pre)


        ern_features_pre = Pipeline(steps=[
                                    ("ern_data_extraction", ErnTransformer()),
                                    ("ern_amplitude", ErnAmplitude2()),
                                    # ("data_channel_swap", ChannelDataSwap()),
                                    # ("postprocessing", PostprocessingTransformer()),
                                    # ("scaler", StandardScaler()),
                    ])

        # ern_features = Pipeline([("feature_extraction", ern_fex)]).transform(ern_features_pre)


        pe_features_pre = Pipeline(steps = [
                                    ("pe_data_extraction", PeTransformer(start_pe_bin=3, stop_pe_bin=8)),
                                    ("pe_amplitude", PeAmplitude2()),
                                    # ("data_channel_swap", ChannelDataSwap()),
                                    # ("postprocessing", PostprocessingTransformer()),
                                    # ("scaler", StandardScaler()),
                    ])

        # pe_features = Pipeline([("feature_extraction", pe_fex)]).transform(pe_features_pre)

    #     x_pre2 = zip(ern_features, pe_features)
    #     x_pre2 = np.array(list(x_pre2)).reshape(x_pre.shape[0],-1)

    #     print(x_pre2.shape)

        ern_pe_features = FeatureUnion([("ern_features", ern_features_pre), ("pe_features", pe_features_pre)], n_jobs = 10)

        x_pre2 = Pipeline([
            ("features", ern_pe_features),
        ]).fit_transform(x_pre_pre)

        # f_vector = np.mean(x_pre2, axis=1)
        f_vector = np.sum(x_pre2, axis=1)

        f_variance = np.std(f_vector)

        data = {'pipeline' : pipeline_name,
                'values' : f_variance,
               }

        within_df = within_df.append(data, ignore_index = True)

### -500 to -300

In [None]:
X_train_df_500copy = pd.DataFrame(copy.deepcopy(X_train_df_500.to_dict()))

In [None]:
pre_pip_for_spatial_filter = Pipeline([
        ("channels_extraction",PickChannels(channels_list = box)),
        ("average", Evoked()),
        ('extract_averaged_data', ExtractData()),
        ("spatial_filter_preprocessing", SpatialFilterPreprocessing()),
        ("spatial_filter",PCA(n_components=3, random_state=random_state)),
]).fit(X_train_df_500copy)

spatial_filter = pre_pip_for_spatial_filter['spatial_filter']

In [None]:
# X_train_df_500copy = pd.DataFrame(copy.deepcopy(X_train_df_500.to_dict()))

In [None]:
for cutoff in [15,20,30,40]:
    X_train_df_500copy = pd.DataFrame(copy.deepcopy(X_train_df_500.to_dict()))

    pipeline_name = '-500:-300 SF ' + str(cutoff)

    for i in range(0,len(X_train_df_500copy)):

        X = X_train_df_500copy[i:i+1]    
        x_pre = Pipeline([
                ("channels_extraction",PickChannels(channels_list=box)),
                ('extract_data', ExtractDataEpochs()),
        ]).fit_transform(X)  
        x_pre = x_pre[0] 



        x_pre_pre = Pipeline([
            ("spatial_filter_preprocessing", SpatialFilterPreprocessing()),
            ("spatial_filter", spatial_filter),
            ("spatial_filter_postprocessing",SpatialFilterPostprocessing(timepoints_count=181)),
            ("lowpass_filter", LowpassFilter(cutoff=cutoff)),
            ('neg', ReverseComponent3()),
            ("binning", BinTransformer(step=12)),
            ("baseline", ErnBaselined()),
            ("centering", CenteredSignalAfterBaseline3()),
        ]).transform(x_pre)


        ern_features = Pipeline(steps=[
                                    ("ern_data_extraction", ErnTransformer()),
                                    ("ern_amplitude", ErnAmplitude2()),
                                    # ("data_channel_swap", ChannelDataSwap()),
                                    # ("postprocessing", PostprocessingTransformer()),
                                    # ("scaler", StandardScaler()),
                                    # ("feature_extraction", FastICA(random_state=random_state, n_components=3))
                    ])


        pe_features = Pipeline(steps = [
                                    ("pe_data_extraction", PeTransformer(start_pe_bin=3, stop_pe_bin=8)),
                                    ("pe_amplitude", PeAmplitude2()),
                                    # ("data_channel_swap", ChannelDataSwap()),
                                    # ("postprocessing", PostprocessingTransformer()),
                                    # ("scaler", StandardScaler()),
                                    # ("feature_extraction", FastICA(random_state=random_state, n_components=3))
                    ])

        ern_pe_features = FeatureUnion([("ern_features", ern_features), ("pe_features", pe_features)], n_jobs = 10)

        x_pre2 = Pipeline([("features", ern_pe_features)]).fit_transform(x_pre_pre)


    #     x_pre_pre = Pipeline([
    #         ("spatial_filter_preprocessing", SpatialFilterPreprocessing()),
    #         ("spatial_filter", spatial_filter),
    #         ("spatial_filter_postprocessing",SpatialFilterPostprocessing(timepoints_count=181)),
    #         ("lowpass_filter", LowpassFilter()),
    #         ('neg', ReverseComponent3()),
    #         ("binning", BinTransformer(step=12)),
    #         ("baseline", ErnBaselined()),
    #         ("centering", CenteredSignalAfterBaseline3())]).transform(x_pre)


    #     ern_features_pre = Pipeline(steps=[
    #                                 ("ern_data_extraction", ErnTransformer()),
    #                                 ("ern_amplitude", ErnAmplitude2()),
    #                                 ("data_channel_swap", ChannelDataSwap()),
    #                                 ("postprocessing", PostprocessingTransformer()),
    #                                 ("scaler", StandardScaler()),
    #                 ]).fit_transform(x_pre_pre)


    #     ern_features = Pipeline([("feature_extraction", ern_fex)]).transform(ern_features_pre)


    #     pe_features_pre = Pipeline(steps = [
    #                                 ("pe_data_extraction", PeTransformer(start_pe_bin=3, stop_pe_bin=8)),
    #                                 ("pe_amplitude", PeAmplitude2()),
    #                                 ("data_channel_swap", ChannelDataSwap()),
    #                                 ("postprocessing", PostprocessingTransformer()),
    #                                 ("scaler", StandardScaler()),
    #                 ]).fit_transform(x_pre_pre)

    #     pe_features = Pipeline([("feature_extraction", pe_fex)]).transform(pe_features_pre)


    #     x_pre2 = zip(ern_features, pe_features)
    #     x_pre2 = np.array(list(x_pre2)).reshape(x_pre.shape[0],-1)


        # f_vector = np.mean(x_pre2, axis=1)
        f_vector = np.sum(x_pre2, axis=1)     
        f_variance = np.std(f_vector)

        data = {'pipeline' : pipeline_name,
                'values' : f_variance,
               }

        within_df = within_df.append(data, ignore_index = True)
        
        ##################################################################
                
    X_train_df_500copy = pd.DataFrame(copy.deepcopy(X_train_df_500.to_dict()))

    pipeline_name = '-500:-300 SF ' + str(cutoff) + ' no BS'


    for i in range(0,len(X_train_df_500copy)):

        X = X_train_df_500copy[i:i+1]    
        x_pre = Pipeline([
                ("channels_extraction",PickChannels(channels_list=box)),
                ('extract_data', ExtractDataEpochs()),
        ]).fit_transform(X)  
        x_pre = x_pre[0] 



        x_pre_pre = Pipeline([
            ("spatial_filter_preprocessing", SpatialFilterPreprocessing()),
            ("spatial_filter", spatial_filter),
            ("spatial_filter_postprocessing",SpatialFilterPostprocessing(timepoints_count=181)),
            ("lowpass_filter", LowpassFilter(cutoff=cutoff)),
            ('neg', ReverseComponent3()),
            ("binning", BinTransformer(step=12)),
            # ("baseline", ErnBaselined()),
            ("centering", CenteredSignalAfterBaseline3()),
        ]).transform(x_pre)


        ern_features = Pipeline(steps=[
                                    ("ern_data_extraction", ErnTransformer()),
                                    ("ern_amplitude", ErnAmplitude2()),
                                    # ("data_channel_swap", ChannelDataSwap()),
                                    # ("postprocessing", PostprocessingTransformer()),
                                    # ("scaler", StandardScaler()),
                                    # ("feature_extraction", FastICA(random_state=random_state, n_components=3))
                    ])


        pe_features = Pipeline(steps = [
                                    ("pe_data_extraction", PeTransformer(start_pe_bin=3, stop_pe_bin=8)),
                                    ("pe_amplitude", PeAmplitude2()),
                                    # ("data_channel_swap", ChannelDataSwap()),
                                    # ("postprocessing", PostprocessingTransformer()),
                                    # ("scaler", StandardScaler()),
                                    # ("feature_extraction", FastICA(random_state=random_state, n_components=3))
                    ])

        ern_pe_features = FeatureUnion([("ern_features", ern_features), ("pe_features", pe_features)], n_jobs = 10)

        x_pre2 = Pipeline([("features", ern_pe_features)]).fit_transform(x_pre_pre)


    #     x_pre_pre = Pipeline([
    #         ("spatial_filter_preprocessing", SpatialFilterPreprocessing()),
    #         ("spatial_filter", spatial_filter),
    #         ("spatial_filter_postprocessing",SpatialFilterPostprocessing(timepoints_count=181)),
    #         ("lowpass_filter", LowpassFilter()),
    #         ('neg', ReverseComponent3()),
    #         ("binning", BinTransformer(step=12)),
    #         ("baseline", ErnBaselined()),
    #         ("centering", CenteredSignalAfterBaseline3())]).transform(x_pre)


    #     ern_features_pre = Pipeline(steps=[
    #                                 ("ern_data_extraction", ErnTransformer()),
    #                                 ("ern_amplitude", ErnAmplitude2()),
    #                                 ("data_channel_swap", ChannelDataSwap()),
    #                                 ("postprocessing", PostprocessingTransformer()),
    #                                 ("scaler", StandardScaler()),
    #                 ]).fit_transform(x_pre_pre)


    #     ern_features = Pipeline([("feature_extraction", ern_fex)]).transform(ern_features_pre)


    #     pe_features_pre = Pipeline(steps = [
    #                                 ("pe_data_extraction", PeTransformer(start_pe_bin=3, stop_pe_bin=8)),
    #                                 ("pe_amplitude", PeAmplitude2()),
    #                                 ("data_channel_swap", ChannelDataSwap()),
    #                                 ("postprocessing", PostprocessingTransformer()),
    #                                 ("scaler", StandardScaler()),
    #                 ]).fit_transform(x_pre_pre)

    #     pe_features = Pipeline([("feature_extraction", pe_fex)]).transform(pe_features_pre)


    #     x_pre2 = zip(ern_features, pe_features)
    #     x_pre2 = np.array(list(x_pre2)).reshape(x_pre.shape[0],-1)


        # f_vector = np.mean(x_pre2, axis=1)
        f_vector = np.sum(x_pre2, axis=1)     
        f_variance = np.std(f_vector)

        data = {'pipeline' : pipeline_name,
                'values' : f_variance,
               }

        within_df = within_df.append(data, ignore_index = True)

- with SF and FEX

### -100 to 0

In [None]:
# X_train_df_100copy = pd.DataFrame(copy.deepcopy(X_train_df_100.to_dict()))

In [None]:
# pre_pip_for_spatial_filter = Pipeline([
#         ("channels_extraction",PickChannels(channels_list = box)),
#         ("average", Evoked()),
#         ('extract_averaged_data', ExtractData()),
#         # ("narrow_indices", NarrowIndices(start=76, stop=257)),
#         ("spatial_filter_preprocessing", SpatialFilterPreprocessing()),
#         ("spatial_filter",PCA(n_components=3, random_state=random_state)),
# ]).fit(X_train_df_100copy)

# spatial_filter = pre_pip_for_spatial_filter['spatial_filter']

In [None]:
# X_train_df_100copy = pd.DataFrame(copy.deepcopy(X_train_df_100.to_dict()))

In [None]:
# pipeline_name = '-100:0 SF EX'

# for i in range(0,len(X_train_df_100copy)):
    
#     X = X_train_df_100copy[i:i+1]    
#     x_pre = Pipeline([
#             ("channels_extraction",PickChannels(channels_list=box)),
#             ('extract_data', ExtractDataEpochs()),
#     ]).fit_transform(X)  
    
#     x_pre = x_pre[0] 
    
#     x_pre_pre = Pipeline([
#         ("spatial_filter_preprocessing", SpatialFilterPreprocessing()),
#         ("spatial_filter", spatial_filter),
#         ("spatial_filter_postprocessing",SpatialFilterPostprocessing(timepoints_count=181)),
#         ("lowpass_filter", LowpassFilter()),
#         ('neg', ReverseComponent3()),
#         ("binning", BinTransformer(step=12)),
#         ("baseline", ErnBaselined()),
#         ("centering", CenteredSignalAfterBaseline3())]).transform(x_pre)

    
#     ern_features_pre = Pipeline(steps=[
#                                 ("ern_data_extraction", ErnTransformer()),
#                                 ("ern_amplitude", ErnAmplitude2()),
#                                 ("data_channel_swap", ChannelDataSwap()),
#                                 ("postprocessing", PostprocessingTransformer()),
#                                 # ("scaler", StandardScaler()),
#                 ]).fit_transform(x_pre_pre)
        
#     ern_features = Pipeline([("feature_extraction", ern_fex)]).transform(ern_features_pre)


#     pe_features_pre = Pipeline(steps = [
#                                 ("pe_data_extraction", PeTransformer(start_pe_bin=3, stop_pe_bin=8)),
#                                 ("pe_amplitude", PeAmplitude2()),
#                                 ("data_channel_swap", ChannelDataSwap()),
#                                 ("postprocessing", PostprocessingTransformer()),
#                                 # ("scaler", StandardScaler()),
#                 ]).fit_transform(x_pre_pre)
    
#     pe_features = Pipeline([("feature_extraction", pe_fex)]).transform(pe_features_pre)
        
#     x_pre2 = zip(ern_features, pe_features)
#     x_pre2 = np.array(list(x_pre2)).reshape(x_pre.shape[0],-1)
    
#     # print(x_pre2.shape)

#     # ern_pe_features = FeatureUnion([("ern_features", ern_features), ("pe_features", pe_features)], n_jobs = 10)
    
#     # x_pre2 = Pipeline([
#     #     ("features", ern_pe_features),
#     # ]).fit_transform(x_pre_pre)
    
#     # f_vector = np.mean(x_pre2, axis=1)
#     f_vector = np.sum(x_pre2, axis=1)
    
#     f_variance = np.std(f_vector)
    
#     data = {'pipeline' : pipeline_name,
#             'values' : f_variance,
#            }
    
#     within_df = within_df.append(data, ignore_index = True)
    
    
# #     variances_100_sf.append(f_variance)
    
# # variances_100_sf = np.array(variances_100_sf)

### -500 to -300

In [None]:
# X_train_df_500copy = pd.DataFrame(copy.deepcopy(X_train_df_500.to_dict()))

In [None]:
# pre_pip_for_spatial_filter = Pipeline([
#         ("channels_extraction",PickChannels(channels_list = box)),
#         ("average", Evoked()),
#         ('extract_averaged_data', ExtractData()),
#         ("spatial_filter_preprocessing", SpatialFilterPreprocessing()),
#         ("spatial_filter",PCA(n_components=3, random_state=random_state)),
# ]).fit(X_train_df_500copy)

# spatial_filter = pre_pip_for_spatial_filter['spatial_filter']

In [None]:
# X_train_df_500copy = pd.DataFrame(copy.deepcopy(X_train_df_500.to_dict()))

In [None]:
# pipeline_name = '-500:-300 SF EX'

# for i in range(0,len(X_train_df_500copy)):
    
#     X = X_train_df_500copy[i:i+1]    
#     x_pre = Pipeline([
#             ("channels_extraction",PickChannels(channels_list=box)),
#             ('extract_data', ExtractDataEpochs()),
#     ]).fit_transform(X)  
#     x_pre = x_pre[0] 

    
    
# #     x_pre_pre = Pipeline([
# #         ("spatial_filter_preprocessing", SpatialFilterPreprocessing()),
# #         ("spatial_filter", spatial_filter),
# #         ("spatial_filter_postprocessing",SpatialFilterPostprocessing(timepoints_count=181)),
# #         ("lowpass_filter", LowpassFilter()),
# #         ('neg', ReverseComponent3()),
# #         ("binning", BinTransformer(step=12)),
# #         ("baseline", ErnBaselined()),
# #         ("centering", CenteredSignalAfterBaseline3()),
# #     ]).transform(x_pre)
    
    
# #     ern_features = Pipeline(steps=[
# #                                 ("ern_data_extraction", ErnTransformer()),
# #                                 ("ern_amplitude", ErnAmplitude2()),
# #                                 ("data_channel_swap", ChannelDataSwap()),
# #                                 ("postprocessing", PostprocessingTransformer()),
# #                                 ("scaler", StandardScaler()),
# #                                 ("feature_extraction", FastICA(random_state=random_state, n_components=3))
# #                 ])


# #     pe_features = Pipeline(steps = [
# #                                 ("pe_data_extraction", PeTransformer(start_pe_bin=3, stop_pe_bin=8)),
# #                                 ("pe_amplitude", PeAmplitude2()),
# #                                 ("data_channel_swap", ChannelDataSwap()),
# #                                 ("postprocessing", PostprocessingTransformer()),
# #                                 ("scaler", StandardScaler()),
# #                                 ("feature_extraction", FastICA(random_state=random_state, n_components=3))
# #                 ])

# #     ern_pe_features = FeatureUnion([("ern_features", ern_features), ("pe_features", pe_features)], n_jobs = 10)
    
# #     x_pre2 = Pipeline([("features", ern_pe_features)]).fit_transform(x_pre_pre)


#     x_pre_pre = Pipeline([
#         ("spatial_filter_preprocessing", SpatialFilterPreprocessing()),
#         ("spatial_filter", spatial_filter),
#         ("spatial_filter_postprocessing",SpatialFilterPostprocessing(timepoints_count=181)),
#         ("lowpass_filter", LowpassFilter()),
#         ('neg', ReverseComponent3()),
#         ("binning", BinTransformer(step=12)),
#         ("baseline", ErnBaselined()),
#         ("centering", CenteredSignalAfterBaseline3())]).transform(x_pre)

    
#     ern_features_pre = Pipeline(steps=[
#                                 ("ern_data_extraction", ErnTransformer()),
#                                 ("ern_amplitude", ErnAmplitude2()),
#                                 ("data_channel_swap", ChannelDataSwap()),
#                                 ("postprocessing", PostprocessingTransformer()),
#                                 # ("scaler", StandardScaler()),
#                 ]).fit_transform(x_pre_pre)
    
    
#     ern_features = Pipeline([("feature_extraction", ern_fex)]).transform(ern_features_pre)


#     pe_features_pre = Pipeline(steps = [
#                                 ("pe_data_extraction", PeTransformer(start_pe_bin=3, stop_pe_bin=8)),
#                                 ("pe_amplitude", PeAmplitude2()),
#                                 ("data_channel_swap", ChannelDataSwap()),
#                                 ("postprocessing", PostprocessingTransformer()),
#                                 # ("scaler", StandardScaler()),
#                 ]).fit_transform(x_pre_pre)
    
#     pe_features = Pipeline([("feature_extraction", pe_fex)]).transform(pe_features_pre)
    
    
#     x_pre2 = zip(ern_features, pe_features)
#     x_pre2 = np.array(list(x_pre2)).reshape(x_pre.shape[0],-1)
    

#     # f_vector = np.mean(x_pre2, axis=1)
#     f_vector = np.sum(x_pre2, axis=1)     
#     f_variance = np.std(f_vector)
    
#     data = {'pipeline' : pipeline_name,
#             'values' : f_variance,
#            }
    
#     within_df = within_df.append(data, ignore_index = True)
# #     variances_500_sf.append(f_variance)
    
# # variances_500_sf = np.array(variances_500_sf)

### Visualization

In [None]:
import seaborn as sns
sns.set(rc={'figure.figsize':(10,12)})
sns.set_theme(style="whitegrid")


ax = sns.violinplot(x = 'values', y = 'pipeline', data = within_df, orient='h', )
# ax.figure.savefig("within_subject_std_ern_pe_with_lowpass_BS.png")

In [None]:
import seaborn as sns
sns.set(rc={'figure.figsize':(10,12)})
sns.set_theme(style="whitegrid")


ax = sns.violinplot(x = 'values', y = 'pipeline', data = within_df, orient='h', )
# ax.figure.savefig("within_subject_std_ern_pe_with_lowpass_BS.png")

## Internal consistency

consistency = betweenPerson / between_person + within_person

In [None]:
internal_df = pd.DataFrame({'pipeline': [], 'internal_variance': []})

for pipeline in between_df['pipeline'].unique().tolist():
    
    between_std = np.std(np.array(between_df.loc[between_df['pipeline'] == pipeline, 'values'].tolist()))                     
    within_list = np.array(within_df.loc[within_df['pipeline'] == pipeline, 'values'].tolist())
    
    for person_variance in within_list:
        
        internal = between_std/(between_std + person_variance)    
        data = {'pipeline' : pipeline,
                'internal_variance' : internal,
               }
    
        internal_df = internal_df.append(data, ignore_index = True)

In [None]:
import seaborn as sns
sns.set(rc={'figure.figsize':(10,8)})
sns.set_theme(style="whitegrid")


ax = sns.violinplot(x = 'internal_variance', y = 'pipeline', data = internal_df, orient='h', inner="quartile")
# ax.figure.savefig("internal_consistency_ern_pe_lowpass_BS.png")

In [None]:
within_df[(within_df['pipeline'] == '-100:0 SF') & (within_df['values'] > 0.00005)].index

In [None]:
# 182 == 0

indexes = within_df[(within_df['pipeline'] == '-100:0 SF') & (within_df['values'] > 0.00005)].index
indexes = np.array(indexes.tolist())

indexes_new = [index - 196 for index in indexes]
indexes_new
# index 8, 19, 33, 

---
### Visualization

In [None]:
import copy
X_train_df_copy = pd.DataFrame(copy.deepcopy(X_train_df_100.to_dict()))
# X_test_df_copy = pd.DataFrame(copy.deepcopy(X_test_df.to_dict()))

In [None]:
box = ['Fpz', 'AFz', 'Fz', 'FCz', 'C1', 'Cz', 'C2', 'CPz', 'P1', 'Pz', 'P2']
red_box8_prim = [
    "Fpz", 
    "AFz",
    "Fz",
    "FCz",
    "C1", "Cz","C2",
    "CPz",
    "P1", "Pz", "P2",
]

In [None]:
pre_pip = Pipeline([
        ("channels_extraction",PickChannels(channels_list = red_box8_prim)),
        ("average", Evoked()),
        ('extract_averaged_data', ExtractData()),
        # ("narrow_indices", NarrowIndices(start=76, stop=257)),
        ("spatial_filter_preprocessing", SpatialFilterPreprocessing()),
        ("spatial_filter",PCA(n_components=3, random_state=random_state)),
        ("spatial_filter_postprocessing",SpatialFilterPostprocessing(timepoints_count=181)),
        ("lowpass_filter", LowpassFilter()),
        ('neg', ReverseComponent3()),
        ("binning", BinTransformer(step=12)),
        # ("baseline", ErnBaselined()),
        ("centering", CenteredSignalAfterBaseline3()),
]).fit(X_train_df_copy)

X = pre_pip.transform(X_train_df_copy)
X_mean = np.mean(X, axis=0)

In [None]:
X_mean.shape

In [None]:
sf = pre_pip['spatial_filter']

In [None]:
components = sf.components_

In [None]:
def check_positive(number):
    if number > 0:
          return True  

    return False

def check_negative(number):
    if number < 0:
          return True  

    return False

In [None]:
components_copy = components.copy()

In [None]:
components_copy

In [None]:
components_1 = [components_copy[1]]
components_2 = [components_copy[2]]

In [None]:
components_2

In [None]:
for component in components_1:
    print(component)

    positive = []
    negative = []
    for item in component:
        if item > 0:
            positive.append(item)
            negative.append(0)
        else:
            positive.append(0)
            negative.append(item)
    print(positive)
    print(negative)
    
positive = np.array(positive).reshape(11,-1)
negative = np.array(negative).reshape(11,-1)

positive_compo = X_mean * positive
negative_compo = X_mean * negative

positive_signal_1 = np.sum(positive_compo, axis=0)
negative_signal_1 = np.sum(negative_compo, axis=0)

In [None]:
for component in components_2:
    print(component)

    positive = []
    negative = []
    for item in component:
        if item > 0:
            positive.append(item)
            negative.append(0)
        else:
            positive.append(0)
            negative.append(item)
    print(positive)
    print(negative)
    
positive = np.array(positive).reshape(11,-1)
negative = np.array(negative).reshape(11,-1)

positive_compo = X_mean * positive
negative_compo = X_mean * negative

positive_signal_2 = np.sum(positive_compo, axis=0)
negative_signal_2 = np.sum(negative_compo, axis=0)

In [None]:
c_1 = positive_signal_1 + negative_signal_1
c_2 = positive_signal_2 + negative_signal_2

In [None]:
import matplotlib.pyplot as plt

plt.plot(positive_signal_2)
plt.plot(-negative_signal_2)

# plt.plot(X_mean[1], lw=4)
plt.plot(c_2, lw = 4)
plt.savefig("differences_component_3")

In [None]:
import matplotlib.pyplot as plt

for i in range(0,3):
    plt.plot(X_mean[i])

In [None]:
import matplotlib.pyplot as plt

for i in range(0,10):
    plt.plot(X[i][0])

In [None]:
import matplotlib.pyplot as plt

for i in range(0,10):
    plt.plot(X[i][1])

In [None]:
import matplotlib.pyplot as plt

for i in range(0,10):
    plt.plot(X[i][2])

In [None]:
import matplotlib.pyplot as plt

for i in range(0,20):
    plt.plot(X[i][0])

In [None]:
import matplotlib.pyplot as plt

for i in range(0,4):
    plt.plot(X[i][2])

In [None]:
# bez lowpasss

import matplotlib.pyplot as plt

for i in range(0,4):
    plt.plot(X[i][2])

In [None]:
import matplotlib.pyplot as plt

for i in range(0,4):
    plt.plot(X[i][0])

In [None]:
import matplotlib.pyplot as plt

for i in range(0,4):
    plt.plot(X[i][1])

---
## Experiment 1 no CDS

- channel extraction to red box
- choose error and average
- baseline to positivity peak
- center to ERN from 1 component
- split on ERN and PE
- spatial filter with PCA
- feature extraction with ICA

In [None]:
# define hyperparameters of pipeline
timepoints_count = 181

min_spatial_filter = 3
max_spatial_filter = 6
step_spatial_filter = 1

min_feature_selection = 2
max_feature_selection = 6
step_feature_selection = 1


# define proper parameters for training. In this case define range of number of feature extraction to search
regressor_params = dict(
    # features__ern_features__feature_extraction__n_components=np.arange(
    #     min_feature_selection, max_feature_selection, step_feature_selection
    # ),
    # features__pe_features__feature_extraction__n_components=np.arange(
    #     min_feature_selection, max_feature_selection, step_feature_selection
    # ),
)

In [None]:
red_box = [
    "F3","F2", "F4",
    "C1","Cz", "FCz",
    "C3","C2","C4",
    "P1","Fpz", "P2",
    "P3", "CPz", "P4",
]
# bez Fpz - no significant
red_box2 = [
    "F3","F2", "F4",
    "C1","Cz", "FCz",
    "C3","C2","C4",
    "P1", "P2",
    "P3", "CPz", "P4",
]

# bez Fpz i z dodanym F1 - no sognificant
red_box3 = [
    "F3","F1","F2", "F4",
    "C1","Cz", "FCz",
    "C3","C2","C4",
    "P1", "P2",
    "P3", "CPz", "P4",
]

red_box4 = [
    "Fpz",
    "F3","F1","Fz", "F2", "F4",
    "FCz",
    "C3", "C1","Cz","C2","C4",
    "CPz",
    "P3", "P1", "P2", "P4",
]

red_box4_prim = [
    "Fpz",
    "F1","Fz", "F2",
    "FCz",
    "C1","Cz","C2",
    "CPz",
    "P1", "P2",
]

#
red_box5 = [
    "AFz", 
    "F3","F1","Fz", "F2", "F4",
    "C1","Cz", "FCz",
    "C3","C2","C4",
    "P1", "P2",
    "P3", "CPz", "P4",
]

red_box6 = [
    "Fpz", 
    "AFz",
    "Fz",
    "FCz",
    "Cz",
    "CPz",
    "Pz",
]

# linia i klaster z tyłu
red_box7 = [
    "Fpz", 
    "AFz",
    "Fz",
    "FCz",
    "Cz",
    "CPz",
    "P3", "P1", "Pz", "P2", "P4",
]

red_box7_prim = [
    "Fpz", 
    "AFz",
    "Fz",
    "FCz",
    "Cz",
    "CPz",
    "P1", "Pz", "P2",
]

# linia i klaster z tyłu i na środku
red_box8 = [
    "Fpz", 
    "AFz",
    "Fz",
    "FCz",
    "C3", "C1", "Cz","C2", "C4",
    "CPz",
    "P3", "P1", "Pz", "P2", "P4"
]

red_box8_prim = [
    "Fpz", 
    "AFz",
    "Fz",
    "FCz",
    "C1", "Cz","C2",
    "CPz",
    "P1", "Pz", "P2",
]

# linia i klaster z tyłu i na środku i na poczatku
red_box9 = [
    "Fpz", 
    "AFz",
    "F3", "F1", "Fz", "F2", "F4",
    "FCz",
    "C3", "C1", "Cz","C2", "C4",
    "CPz",
    "P3", "P1", "Pz", "P2", "P4",
]

red_box9_prim = [
    "Fpz", 
    "AFz",
    "F1", "Fz", "F2",
    "FCz",
    "C1", "Cz","C2",
    "CPz",
    "P1", "Pz", "P2",
]

red_box10 = [
    "Fpz", 
    "AFz",
    "F3", "F1", "Fz", "F2", "F4",
    "FCz",
    "Cz",
    "CP3", "CP1", "CPz", "CP2", "CP4",
    "Pz",
]

red_box10_prim = [
    "Fpz", 
    "AFz",
    "F1", "Fz", "F2",
    "FCz",
    "Cz",
    "CP1", "CPz", "CP2",
    "Pz",
]


box_list = [red_box6, red_box7_prim, red_box8_prim, red_box9_prim, red_box10_prim]


In [None]:
# wider signal, ERN: -2,1 Pe: 1,6

import copy

results_baseline_100 = pd.DataFrame()

# manually test different numbers of spatial filter components

if not sys.warnoptions:
    warnings.simplefilter("ignore")
    os.environ["PYTHONWARNINGS"] = "ignore" # Also affect subprocesses

    
# pe_indices = [(3,8)]
# for this_bin in [12]:
    # for ind in pe_indices:
        # print(f"----------PE INDICES: {ind}-------------------------------------------------")

for box in box_list:

    X_train_df_copy = pd.DataFrame(copy.deepcopy(X_train_df.to_dict()))
    X_test_df_copy = pd.DataFrame(copy.deepcopy(X_test_df.to_dict()))

    print(f"----------BOX: {box}")

    for n_components in range(min_spatial_filter, 4, step_spatial_filter): 

        pipeline_name = f"PCA_{n_components}_baseline_100_0"

        ############################################################################################
        preprocessed_pipeline = Pipeline([
            ("channels_extraction",PickChannels(channels_list=box)),
            ("average", Evoked()),
            ('extract_averaged_data', ExtractData()),
            # ("narrow_indices", NarrowIndices(start=76, stop=257)),
            ("spatial_filter_preprocessing", SpatialFilterPreprocessing()),
            ("spatial_filter",PCA(n_components=n_components, random_state=random_state)),
            ("spatial_filter_postprocessing",SpatialFilterPostprocessing(timepoints_count=timepoints_count)),
            ("lowpass_filter", LowpassFilter()),
            ('neg', ReverseComponent3()),
            ("binning", BinTransformer(step=12)),
            # ("baseline", ErnBaselined()),
            ("centering", CenteredSignalAfterBaseline3()) 

        ]).fit(X_train_df_copy)

        preprocessed_X = preprocessed_pipeline.transform(X_train_df_copy)

        ##########################################################################################

        ern_first_comp = Pipeline(steps=[
            ("baseline", ErnBaselined()),
            ("ern_data_extraction", ErnTransformer()),
            ("get_first", GetComponent(0)),
            ("ern_peak_to_peak", ErnAmplitude2()),
        ])

        ern_second_comp = Pipeline(steps=[
            ("ern_data_extraction", ErnTransformer()),
            ("get_second", GetComponent(1)),
            ("diff_ern_peak", ErnAmplitude2_prim()),
        ])

        # ern_third_comp = Pipeline(steps=[
        #     ("ern_data_extraction", ErnTransformer(0,4)),
        #     ("get_third", GetComponent(2)),
        #     ("second_diff_ern_peak", ErnAmplitude2()),
        # ])

        ern_compo_features = FeatureUnion([("first", ern_first_comp), 
                                           ("second", ern_second_comp),
                                           # ("third", ern_third_comp),
                                          ],n_jobs = 10)


        ern_features = Pipeline(steps=[
                        # ("ern_data_extraction", ErnTransformer()),
                        # ("ern_amplitude", ErnAmplitude2()),
                        ("ern_compo_features", ern_compo_features),
                        ("data_channel_swap", ChannelDataSwap()),
                        ("postprocessing", PostprocessingTransformer()),
                        ("scaler", StandardScaler()),
                        # ("feature_extraction", FastICA(random_state=random_state))
        ])

        ####

        pe_first_comp = Pipeline(steps=[
            ("baseline", ErnBaselined()),
            ("ern_data_extraction", PeTransformer(2,7)),
            ("get_first", GetComponent(0)),
            ("ern_peak_to_peak", PeAmplitude2()),
        ])

        pe_second_comp = Pipeline(steps=[
            ("ern_data_extraction", PeTransformer(3,8)),
            ("get_second", GetComponent(1)),
            ("diff_ern_peak", PeAmplitude2()),
        ])

        pe_third_comp = Pipeline(steps=[
            ("ern_data_extraction", PeTransformer(3,8)),
            ("get_third", GetComponent(2)),
            ("second_diff_ern_peak", PeAmplitude2()),
        ])

        pe_compo_features = FeatureUnion([("first", pe_first_comp), 
                                           ("second", pe_second_comp),
                                           ("third", pe_third_comp),
                                          ],n_jobs = 10)



        pe_features = Pipeline(steps = [
                        # ("pe_data_extraction", PeTransformer(start_pe_bin=ind[0], stop_pe_bin=ind[1])),
                        # ("pe_amplitude", PeAmplitude2()),
                        ("pe_compo_features", pe_compo_features),
                        ("data_channel_swap", ChannelDataSwap()),
                        ("postprocessing", PostprocessingTransformer()),
                        ("scaler", StandardScaler()),
                        # ("feature_extraction", FastICA(random_state=random_state))
        ])

        ern_pe_features = FeatureUnion([("ern_features", ern_features), ("pe_features", pe_features)], n_jobs = 10)

        steps = ('features', ern_pe_features)
        # steps = ('features', ern_features)

        ############################################################################################

        regressor_steps = steps

        # rate different models
        results_baseline_100 = run_experiment(
            tested_regressors,
            regressor_params,
            pipeline_name,
            preprocessed_X,
            X_test,
            y_train,
            y_test,
            dataset_name,
            regressor_steps,
            preprocessed_pipeline,
            X_test_df_copy,
            y_rum_test,
            results_baseline_100,
        )

In [None]:
results_baseline_100.to_pickle("../data/regression_union_100-600_cached_splited_compos_ern-compo2-max_pe-compo2-max_diff_models_no-fex_mne.pkl")

In [None]:
# # wider signal, ERN: -2,1 Pe: 1,6

# import copy

# results_baseline_100 = pd.DataFrame()

# # manually test different numbers of spatial filter components

# if not sys.warnoptions:
#     warnings.simplefilter("ignore")
#     os.environ["PYTHONWARNINGS"] = "ignore" # Also affect subprocesses

    
# pe_indices = [(3,8)]
# for this_bin in [12]:
#     for ind in pe_indices:
#         print(f"----------PE INDICES: {ind}-------------------------------------------------")

#         for box in box_list:

#             X_train_df_copy = pd.DataFrame(copy.deepcopy(X_train_df.to_dict()))
#             X_test_df_copy = pd.DataFrame(copy.deepcopy(X_test_df.to_dict()))

#             print(f"----------BOX: {box}")

#             for n_components in range(min_spatial_filter, max_spatial_filter, step_spatial_filter): 

#                 pipeline_name = f"PCA_{n_components}_baseline_500_300"

#                 ############################################################################################
#                 preprocessed_pipeline = Pipeline([
#                     ("channels_extraction",PickChannels(channels_list=box)),
#                     ("average", Evoked()),
#                     ('extract_averaged_data', ExtractData()),
#                     # ("narrow_indices", NarrowIndices(start=76, stop=257)),
#                     ("spatial_filter_preprocessing", SpatialFilterPreprocessing()),
#                     ("spatial_filter",PCA(n_components=n_components, random_state=random_state)),
#                     ("spatial_filter_postprocessing",SpatialFilterPostprocessing(timepoints_count=timepoints_count)),
#                     ("lowpass_filter", LowpassFilter()),
#                     ('neg', ReverseComponent3()),
#                     ("binning", BinTransformer(step=this_bin)),
#                     ("baseline", ErnBaselined()),
#                     ("centering", CenteredSignalAfterBaseline3()) 

#                 ]).fit(X_train_df_copy)

#                 preprocessed_X = preprocessed_pipeline.transform(X_train_df_copy)

#                 ###########################################################################################

#                 ern_features = Pipeline(steps=[
#                                 ("ern_data_extraction", ErnTransformer()),
#                                 ("ern_amplitude", ErnAmplitude2()),
#                                 ("data_channel_swap", ChannelDataSwap()),
#                                 ("postprocessing", PostprocessingTransformer()),
#                                 ("scaler", StandardScaler()),
#                                 # ("feature_extraction", FastICA(random_state=random_state))
#                 ])


#                 pe_features = Pipeline(steps = [
#                                 ("pe_data_extraction", PeTransformer(start_pe_bin=ind[0], stop_pe_bin=ind[1])),
#                                 ("pe_amplitude", PeAmplitude2()),
#                                 ("data_channel_swap", ChannelDataSwap()),
#                                 ("postprocessing", PostprocessingTransformer()),
#                                 ("scaler", StandardScaler()),
#                                 # ("feature_extraction", FastICA(random_state=random_state))
#                 ])

#                 ern_pe_features = FeatureUnion([("ern_features", ern_features), ("pe_features", pe_features)], n_jobs = 10)

#                 steps = ('features', ern_pe_features)

#                 ############################################################################################

#                 regressor_steps = steps

#                 # rate different models
#                 results_baseline_100 = run_experiment(
#                     tested_regressors,
#                     regressor_params,
#                     pipeline_name,
#                     preprocessed_X,
#                     X_test,
#                     y_train,
#                     y_test,
#                     dataset_name,
#                     regressor_steps,
#                     preprocessed_pipeline,
#                     X_test_df_copy,
#                     y_rum_test,
#                     results_baseline_100,
#                 )

In [None]:
results_baseline_100_no_fex_df.to_pickle("../data/regression_union_100-600_cached_baselined_centered_diff_boxes_diff_pe-ind_diff_models_no-fex_mne.pkl")

In [None]:
results_copy_best = results_copy.loc[(results_copy['mean_cv_r2'] > 0.08) & (results_copy['external_score'] > 0.05)]

In [None]:
results_copy_best.sort_values(by=['overfit'], ascending=False)

In [None]:
results_df_rum = pd.read_pickle("../data/split0.3/regression_union_100-600_baselined_centered-2_diff_boxes_diff_pe-ind_diff_models.pkl")

In [None]:
results_df_rum = pd.read_pickle("../data/split0.3/regression_union_100-600_baselined_centered-2_diff_boxes_diff_pe-ind_diff_models.pkl")

results_df_rum[results_df_rum['model'] == 'en']


In [None]:
pe_fex

# Experiment 4 with CDS

In [None]:
# define hyperparameters of pipeline
timepoints_count = 181

min_spatial_filter = 3
max_spatial_filter = 7
step_spatial_filter = 1

min_feature_selection = 2
max_feature_selection = 6
step_feature_selection = 1


# define proper parameters for training. In this case define range of number of feature extraction to search
regressor_params = dict(
    features__feature_extraction__n_components=np.arange(
        min_feature_selection, max_feature_selection, step_feature_selection
    ),
)

In [None]:
X_train_df_copy = pd.DataFrame(copy.deepcopy(X_train_df.to_dict()))
X_test_df_copy = pd.DataFrame(copy.deepcopy(X_test_df.to_dict()))

preprocessed_X_train = Pipeline([
        ('current_source_density', CurrentSourceDensity()),
]).fit_transform(X_train_df_copy)

preprocessed_X_test = Pipeline([
        ('current_source_density', CurrentSourceDensity()),
]).fit_transform(X_test_df_copy)

In [None]:
# whole signal centered to ERN (from - 2) and abs()


import copy

results_df_cds = pd.DataFrame()

# manually test different numbers of spatial filter components

if not sys.warnoptions:
    warnings.simplefilter("ignore")
    os.environ["PYTHONWARNINGS"] = "ignore" # Also affect subprocesses

for box in box_list:

    # X_train_df_copy = pd.DataFrame(copy.deepcopy(X_train_df.to_dict()))
    # X_test_df_copy = pd.DataFrame(copy.deepcopy(X_test_df.to_dict()))
    
    preprocessed_X_train_copy = pd.DataFrame(copy.deepcopy(preprocessed_X_train.to_dict()))
    preprocessed_X_test_copy = pd.DataFrame(copy.deepcopy(preprocessed_X_test.to_dict()))
    
    print(f"BOX: {box}")

    for n_components in range(min_spatial_filter, max_spatial_filter, step_spatial_filter): 

        pipeline_name = f"PCA_{n_components}_CDS_SF_SPLIT_FE"

        ############################################################################################
        preprocessed_pipeline = Pipeline([
            ("channels_extraction",PickChannels(channels_list=box)),
            ("average", Evoked()),
            # ("bandpass_filter",BandpassFilter()),
            ('extract_averaged_data', ExtractData()),
            ("spatial_filter_preprocessing", SpatialFilterPreprocessing()),
            ("spatial_filter",PCA(n_components=n_components, random_state=random_state)),
            ("spatial_filter_postprocessing",SpatialFilterPostprocessing(timepoints_count=timepoints_count)),
            ("lowpass_filter", LowpassFilter()),
            ('neg', ReverseComponent3()),
            ("binning", BinTransformer(step=12)),
            # ("baseline", ErnBaselined()),
            ("centering", CenteredSignalAfterBaseline3()),
        ]).fit(preprocessed_X_train_copy)

        preprocessed_X = preprocessed_pipeline.transform(preprocessed_X_train_copy)

        ###########################################################################################

        ern_pe_features = Pipeline([
            # ("abs", AbsSignal()),
            ("data_channel_swap", ChannelDataSwap()),
            ("postprocessing", PostprocessingTransformer()),
            ("scaler", StandardScaler()),
            ("feature_extraction", FastICA(random_state=random_state))
            ])

        steps = ('features', ern_pe_features)

        ############################################################################################

        regressor_steps = steps

        # rate different models
        results_df_cds = run_experiment(
            tested_regressors,
            regressor_params,
            pipeline_name,
            preprocessed_X,
            X_test,
            y_train,
            y_test,
            dataset_name,
            regressor_steps,
            preprocessed_pipeline,
            preprocessed_X_test_copy,
            y_rum_test,
            results_df_cds,
        )

# Experiment 4 no CDS

In [None]:
# whole signal centered to ERN (from - 2) and abs()


import copy

results_df = pd.DataFrame()

# manually test different numbers of spatial filter components

if not sys.warnoptions:
    warnings.simplefilter("ignore")
    os.environ["PYTHONWARNINGS"] = "ignore" # Also affect subprocesses

for box in box_list:

    X_train_df_copy = pd.DataFrame(copy.deepcopy(X_train_df.to_dict()))
    X_test_df_copy = pd.DataFrame(copy.deepcopy(X_test_df.to_dict()))
    
    # preprocessed_X_train_copy = pd.DataFrame(copy.deepcopy(preprocessed_X_train.to_dict()))
    # preprocessed_X_test_copy = pd.DataFrame(copy.deepcopy(preprocessed_X_test.to_dict()))
    
    print(f"BOX: {box}")

    for n_components in range(min_spatial_filter, max_spatial_filter, step_spatial_filter): 

        pipeline_name = f"PCA_{n_components}_CDS_SF_SPLIT_FE"

        ############################################################################################
        preprocessed_pipeline = Pipeline([
            ("channels_extraction",PickChannels(channels_list=box)),
            ("average", Evoked()),
            # ("bandpass_filter",BandpassFilter()),
            ('extract_averaged_data', ExtractData()),
            ("spatial_filter_preprocessing", SpatialFilterPreprocessing()),
            ("spatial_filter",PCA(n_components=n_components, random_state=random_state)),
            ("spatial_filter_postprocessing",SpatialFilterPostprocessing(timepoints_count=timepoints_count)),
            ("lowpass_filter", LowpassFilter()),
            ('neg', ReverseComponent3()),
            ("binning", BinTransformer(step=12)),
            # ("baseline", ErnBaselined()),
            ("centering", CenteredSignalAfterBaseline3()),
        ]).fit(X_train_df_copy)

        preprocessed_X = preprocessed_pipeline.transform(X_train_df_copy)

        ###########################################################################################

        ern_pe_features = Pipeline([
            # ("abs", AbsSignal()),
            ("data_channel_swap", ChannelDataSwap()),
            ("postprocessing", PostprocessingTransformer()),
            ("scaler", StandardScaler()),
            ("feature_extraction", FastICA(random_state=random_state))
            ])

        steps = ('features', ern_pe_features)

        ############################################################################################

        regressor_steps = steps

        # rate different models
        results_df = run_experiment(
            tested_regressors,
            regressor_params,
            pipeline_name,
            preprocessed_X,
            X_test,
            y_train,
            y_test,
            dataset_name,
            regressor_steps,
            preprocessed_pipeline,
            X_test_df_copy,
            y_rum_test,
            results_df,
        )

# Experiment 4
- binning
- extracted baseline: (-1,1) where 0 is argmin from 0-3 bins
- extracted ERN after centering as (-1,1) bins where 0 is ERN
- extracted Pe as (1,6) where 0 is ERN
- whole signal is abs
- feature extraction separately on extracted signals

In [None]:
red_box = [
    "F3","F2", "F4",
    "C1","Cz", "FCz",
    "C3","C2","C4",
    "P1","Fpz", "P2",
    "P3", "CPz", "P4",
]
# bez Fpz - no significant
red_box2 = [
    "F3","F2", "F4",
    "C1","Cz", "FCz",
    "C3","C2","C4",
    "P1", "P2",
    "P3", "CPz", "P4",
]

# bez Fpz i z dodanym F1 - no sognificant
red_box3 = [
    "F3","F1","F2", "F4",
    "C1","Cz", "FCz",
    "C3","C2","C4",
    "P1", "P2",
    "P3", "CPz", "P4",
]

red_box4 = [
    "Fpz",
    "F3","F1","Fz", "F2", "F4",
    "FCz",
    "C3", "C1","Cz","C2","C4",
    "CPz",
    "P3", "P1", "P2", "P4",
]

red_box4_prim = [
    "Fpz",
    "F1","Fz", "F2",
    "FCz",
    "C1","Cz","C2",
    "CPz",
    "P1", "P2",
]

#
red_box5 = [
    "AFz", 
    "F3","F1","Fz", "F2", "F4",
    "C1","Cz", "FCz",
    "C3","C2","C4",
    "P1", "P2",
    "P3", "CPz", "P4",
]

red_box6 = [
    "Fpz", 
    "AFz",
    "Fz",
    "FCz",
    "Cz",
    "CPz",
    "Pz",
]

# linia i klaster z tyłu
red_box7 = [
    "Fpz", 
    "AFz",
    "Fz",
    "FCz",
    "Cz",
    "CPz",
    "P3", "P1", "Pz", "P2", "P4",
]

red_box7_prim = [
    "Fpz", 
    "AFz",
    "Fz",
    "FCz",
    "Cz",
    "CPz",
    "P1", "Pz", "P2",
]

# linia i klaster z tyłu i na środku
red_box8 = [
    "Fpz", 
    "AFz",
    "Fz",
    "FCz",
    "C3", "C1", "Cz","C2", "C4",
    "CPz",
    "P3", "P1", "Pz", "P2", "P4"
]

red_box8_prim = [
    "Fpz", 
    "AFz",
    "Fz",
    "FCz",
    "C1", "Cz","C2",
    "CPz",
    "P1", "Pz", "P2",
]

# linia i klaster z tyłu i na środku i na poczatku
red_box9 = [
    "Fpz", 
    "AFz",
    "F3", "F1", "Fz", "F2", "F4",
    "FCz",
    "C3", "C1", "Cz","C2", "C4",
    "CPz",
    "P3", "P1", "Pz", "P2", "P4",
]

red_box9_prim = [
    "Fpz", 
    "AFz",
    "F1", "Fz", "F2",
    "FCz",
    "C1", "Cz","C2",
    "CPz",
    "P1", "Pz", "P2",
]

red_box10 = [
    "Fpz", 
    "AFz",
    "F3", "F1", "Fz", "F2", "F4",
    "FCz",
    "Cz",
    "CP3", "CP1", "CPz", "CP2", "CP4",
    "Pz",
]

red_box10_prim = [
    "Fpz", 
    "AFz",
    "F1", "Fz", "F2",
    "FCz",
    "Cz",
    "CP1", "CPz", "CP2",
    "Pz",
]


box_list = [red_box6, red_box7, red_box7_prim, red_box8, red_box8_prim, red_box9, red_box9_prim, red_box4, red_box4_prim, red_box10, red_box10_prim]


In [None]:
# define hyperparameters of pipeline
timepoints_count = 181

min_spatial_filter = 3
max_spatial_filter = 7
step_spatial_filter = 1

min_feature_selection = 2
max_feature_selection = 6
step_feature_selection = 1


# define proper parameters for training. In this case define range of number of feature extraction to search
regressor_params = dict(
    features__ern_features__feature_extraction__n_components=np.arange(
        min_feature_selection, max_feature_selection, step_feature_selection
    ),
    features__pe_features__feature_extraction__n_components=np.arange(
        min_feature_selection, max_feature_selection, step_feature_selection
    ),
)

In [None]:
# wider signal, ERN: -2,1 Pe: 2,6. , extract baseline no amplitude


import copy

results_df = pd.DataFrame()

# manually test different numbers of spatial filter components

if not sys.warnoptions:
    warnings.simplefilter("ignore")
    os.environ["PYTHONWARNINGS"] = "ignore" # Also affect subprocesses

for box in box_list:

    X_train_df_copy = pd.DataFrame(copy.deepcopy(X_train_df.to_dict()))
    X_test_df_copy = pd.DataFrame(copy.deepcopy(X_test_df.to_dict()))
    
    print(f"BOX: {box}")

    for n_components in range(min_spatial_filter, max_spatial_filter, step_spatial_filter): 

        pipeline_name = f"PCA_{n_components}_CDS_SF_SPLIT_FE"

        ############################################################################################
        preprocessed_pipeline = Pipeline([
            ("channels_extraction",PickChannels(channels_list=box)),
            ("average", Evoked()),
            # ("bandpass_filter",BandpassFilter()),
            ('extract_averaged_data', ExtractData()),
            ("spatial_filter_preprocessing", SpatialFilterPreprocessing()),
            ("spatial_filter",PCA(n_components=n_components, random_state=random_state)),
            ("spatial_filter_postprocessing",SpatialFilterPostprocessing(timepoints_count=timepoints_count)),
            ("lowpass_filter", LowpassFilter()),
            ('neg', ReverseComponent3()),
            ("binning", BinTransformer(step=12)),
            # ("baseline", ErnBaselined()),
            # ("centering", CenteredSignalAfterBaseline3()) 

        ]).fit(X_train_df_copy)

        preprocessed_X = preprocessed_pipeline.transform(X_train_df_copy)

        ###########################################################################################
#extract 1,1 where 0 is ERN
        ern_features = Pipeline(steps=[
                        ("centering", CenteredSignalAfterBaseline3()),
                        ("ern_data_extraction", ErnTransformer(start_ern_bin=1, stop_ern_bin=4)),
                        # ("ern_amplitude", ErnAmplitude2()),
                        ("abs", AbsSignal()),
                        ("data_channel_swap", ChannelDataSwap()),
                        ("postprocessing", PostprocessingTransformer()),
                        ("scaler", StandardScaler()),
                        ("feature_extraction", FastICA(random_state=random_state))
        ])

#extract 1,6 where 0 i ERN
        pe_features = Pipeline(steps = [
                        ("centering", CenteredSignalAfterBaseline3()),
                        ("pe_data_extraction", PeTransformer()),
                        ("abs", AbsSignal()),
                        # ("pe_amplitude", PeAmplitude2()),
                        ("data_channel_swap", ChannelDataSwap()),
                        ("postprocessing", PostprocessingTransformer()),
                        ("scaler", StandardScaler()),
                        ("feature_extraction", FastICA(random_state=random_state))
        ])
# extract -1,1 where 0 is min (0,4)        
        base_features = Pipeline(steps = [
                        ("base_data_extraction", ExtractBaseline()),
                        # ("pe_amplitude", PeAmplitude2()),
                        ("abs", AbsSignal()),
                        ("data_channel_swap", ChannelDataSwap()),
                        ("postprocessing", PostprocessingTransformer()),
                        ("scaler", StandardScaler()),
                        ("feature_extraction", FastICA(random_state=random_state))
        ])

        ern_pe_features = FeatureUnion([("ern_features", ern_features), ("pe_features", pe_features), ('base', base_features)], n_jobs = 10)

        steps = ('features', ern_pe_features)

        ############################################################################################

        regressor_steps = steps

        # rate different models
        results_df = run_experiment(
            tested_regressors,
            regressor_params,
            pipeline_name,
            preprocessed_X,
            X_test,
            y_train,
            y_test,
            dataset_name,
            regressor_steps,
            preprocessed_pipeline,
            X_test_df_copy,
            y_rum_test,
            results_df,
        )

In [None]:
results_df.to_pickle("../data/regression_union_100-600_baselined_centered_diff_boxes_mne.pkl")

---
# With CDS

In [None]:
# define hyperparameters of pipeline
timepoints_count = 181

min_spatial_filter = 3
max_spatial_filter = 7
step_spatial_filter = 1

min_feature_selection = 2
max_feature_selection = 6
step_feature_selection = 1


# define proper parameters for training. In this case define range of number of feature extraction to search
regressor_params = dict(
    features__ern_features__feature_extraction__n_components=np.arange(
        min_feature_selection, max_feature_selection, step_feature_selection
    ),
    features__pe_features__feature_extraction__n_components=np.arange(
        min_feature_selection, max_feature_selection, step_feature_selection
    ),
)

In [None]:
red_box = [
    "F3","F2", "F4",
    "C1","Cz", "FCz",
    "C3","C2","C4",
    "P1","Fpz", "P2",
    "P3", "CPz", "P4",
]
# bez Fpz - no significant
red_box2 = [
    "F3","F2", "F4",
    "C1","Cz", "FCz",
    "C3","C2","C4",
    "P1", "P2",
    "P3", "CPz", "P4",
]

# bez Fpz i z dodanym F1 - no sognificant
red_box3 = [
    "F3","F1","F2", "F4",
    "C1","Cz", "FCz",
    "C3","C2","C4",
    "P1", "P2",
    "P3", "CPz", "P4",
]

red_box4 = [
    "Fpz",
    "F3","F1","Fz", "F2", "F4",
    "FCz",
    "C3", "C1","Cz","C2","C4",
    "CPz",
    "P3", "P1", "P2", "P4",
]

red_box4_prim = [
    "Fpz",
    "F1","Fz", "F2",
    "FCz",
    "C1","Cz","C2",
    "CPz",
    "P1", "P2",
]

#
red_box5 = [
    "AFz", 
    "F3","F1","Fz", "F2", "F4",
    "C1","Cz", "FCz",
    "C3","C2","C4",
    "P1", "P2",
    "P3", "CPz", "P4",
]

red_box6 = [
    "Fpz", 
    "AFz",
    "Fz",
    "FCz",
    "Cz",
    "CPz",
    "Pz",
]

# linia i klaster z tyłu
red_box7 = [
    "Fpz", 
    "AFz",
    "Fz",
    "FCz",
    "Cz",
    "CPz",
    "P3", "P1", "Pz", "P2", "P4",
]

red_box7_prim = [
    "Fpz", 
    "AFz",
    "Fz",
    "FCz",
    "Cz",
    "CPz",
    "P1", "Pz", "P2",
]

# linia i klaster z tyłu i na środku
red_box8 = [
    "Fpz", 
    "AFz",
    "Fz",
    "FCz",
    "C3", "C1", "Cz","C2", "C4",
    "CPz",
    "P3", "P1", "Pz", "P2", "P4"
]

red_box8_prim = [
    "Fpz", 
    "AFz",
    "Fz",
    "FCz",
    "C1", "Cz","C2",
    "CPz",
    "P1", "Pz", "P2",
]

# linia i klaster z tyłu i na środku i na poczatku
red_box9 = [
    "Fpz", 
    "AFz",
    "F3", "F1", "Fz", "F2", "F4",
    "FCz",
    "C3", "C1", "Cz","C2", "C4",
    "CPz",
    "P3", "P1", "Pz", "P2", "P4",
]

red_box9_prim = [
    "Fpz", 
    "AFz",
    "F1", "Fz", "F2",
    "FCz",
    "C1", "Cz","C2",
    "CPz",
    "P1", "Pz", "P2",
]

red_box10 = [
    "Fpz", 
    "AFz",
    "F3", "F1", "Fz", "F2", "F4",
    "FCz",
    "Cz",
    "CP3", "CP1", "CPz", "CP2", "CP4",
    "Pz",
]

red_box10_prim = [
    "Fpz", 
    "AFz",
    "F1", "Fz", "F2",
    "FCz",
    "Cz",
    "CP1", "CPz", "CP2",
    "Pz",
]


box_list = [red_box6, red_box7, red_box7_prim, red_box8, red_box8_prim, red_box9, red_box9_prim, red_box4, red_box4_prim, red_box10, red_box10_prim]


In [None]:
import copy


X_train_df_copy = pd.DataFrame(copy.deepcopy(X_train_df.to_dict()))
X_test_df_copy = pd.DataFrame(copy.deepcopy(X_test_df.to_dict()))

preprocessed_X_train = Pipeline([
        ('current_source_density', CurrentSourceDensity()),
]).fit_transform(X_train_df_copy)

preprocessed_X_test = Pipeline([
        ('current_source_density', CurrentSourceDensity()),
]).fit_transform(X_test_df_copy)

In [None]:
import copy

results_df_2 = pd.DataFrame()

# manually test different numbers of spatial filter components

if not sys.warnoptions:
    warnings.simplefilter("ignore")
    os.environ["PYTHONWARNINGS"] = "ignore" # Also affect subprocesses

for step in [7,12,16]:
    print(f"-----------------STEP: {step}-----------")
    for box in box_list:

        preprocessed_X_train_copy = pd.DataFrame(copy.deepcopy(preprocessed_X_train.to_dict()))
        preprocessed_X_test_copy = pd.DataFrame(copy.deepcopy(preprocessed_X_test.to_dict()))

        print(f"BOX: {box}")

        for n_components in range(min_spatial_filter, max_spatial_filter, step_spatial_filter): 

            pipeline_name = f"PCA_{n_components}_CDS_SF_SPLIT_FE"

            ############################################################################################
            preprocessed_pipeline = Pipeline([
                ("channels_extraction",PickChannels(channels_list=box)),
                ("average", Evoked()),
                # ("bandpass_filter",BandpassFilter()),
                ('extract_averaged_data', ExtractData()),
                ("spatial_filter_preprocessing", SpatialFilterPreprocessing()),
                ("spatial_filter",PCA(n_components=n_components, random_state=random_state)),
                ("spatial_filter_postprocessing",SpatialFilterPostprocessing(timepoints_count=timepoints_count)),
                ("lowpass_filter", LowpassFilter()),
                ("binning", BinTransformer(step=step)),
                ("baseline", ErnBaselined()),
                ("centering", CenteredSignalAfterBaseline()) 

            ]).fit(preprocessed_X_train_copy)

            preprocessed_X = preprocessed_pipeline.transform(preprocessed_X_train_copy)

            ###########################################################################################

            ern_features = Pipeline(steps=[
                            ("ern_data_extraction", ErnTransformer()),
                            ("ern_amplitude", ErnAmplitude2()),
                            ("data_channel_swap", ChannelDataSwap()),
                            ("postprocessing", PostprocessingTransformer()),
                            ("scaler", StandardScaler()),
                            ("feature_extraction", FastICA(random_state=random_state))
            ])


            pe_features = Pipeline(steps = [
                            ("pe_data_extraction", PeTransformer()),
                            ("pe_amplitude", PeAmplitude2()),
                            ("data_channel_swap", ChannelDataSwap()),
                            ("postprocessing", PostprocessingTransformer()),
                            ("scaler", StandardScaler()),
                            ("feature_extraction", FastICA(random_state=random_state))
            ])

            ern_pe_features = FeatureUnion([("ern_features", ern_features), ("pe_features", pe_features)], n_jobs = 10)

            steps = ('features', ern_pe_features)

            ############################################################################################

            regressor_steps = steps

            # rate different models
            results_df_2 = run_experiment(
                tested_regressors,
                regressor_params,
                pipeline_name,
                preprocessed_X,
                X_test,
                y_train,
                y_test,
                dataset_name,
                regressor_steps,
                preprocessed_pipeline,
                preprocessed_X_test_copy,
                y_rum_test,
                results_df_2,
            )

## Visualization

In [None]:
# data_df = pd.read_pickle(
#     "../data/split0.3/regression_union_100-600_baselined_centered_ampl-2-pe-ern_0.3-5_significant.pkl"
# )
data_df = results_static_ICA_bin_union_100_600_baselined_peak_to_peak_components_df
data_df.name = "union_100_600_baselined_centered_no_scaler"

In [None]:
data_df

#### Extract coefficients of ERN and PE features extraction (ICA) and coefficient od estimator

In [None]:
# ern_features = data_df.best_estimator[1]["features"].transformer_list[0][1]["feature_selection"].components_
# pe_features = data_df.best_estimator[1]["features"].transformer_list[1][1]["feature_selection"].components_

# without additional metric as feature
ern_features = data_df.best_estimator[0]["features"]["ern_pe_features"].transformer_list[0][1]["feature_selection"].components_
pe_features = data_df.best_estimator[0]["features"]["ern_pe_features"].transformer_list[1][1]["feature_selection"].components_

coeffs = data_df.best_estimator[0]["en"].coef_

In [None]:
ern_features.shape

In [None]:
pe_features.shape

In [None]:
coeffs

#### Weigh components with coeffs from estimator and sum

In [None]:
ern_components_weighed = np.array([ern_features[i] * coeffs[i] for i in range(0,ern_features.shape[0])])
pe_components_weighed = np.array([pe_features[i-ern_features.shape[0]] * coeffs[i] for i in range(ern_features.shape[0], ern_features.shape[0] + pe_features.shape[0])])

#### Sum all feature extraction components to extract direct weigh of given bin at given spatial filter component

In [None]:
components_weighed_ern_sum = sum(ern_components_weighed)
components_weighed_pe_sum = sum(pe_components_weighed)

In [None]:
components_weighed_ern_sum

In [None]:
mean_rum_ern = components_weighed_ern_sum * ern_ampl_mean
mean_rum_ern

In [None]:
mean_rum_pe = components_weighed_pe_sum * pe_ampl_mean
mean_rum_pe

#### Extract components of spatial filter

In [None]:
ern_features = Pipeline(steps=[
                ("ern_data_extraction", ErnTransformer()),
                ("ern_amplitude", ErnAmplitude2()),
                ("data_channel_swap", ChannelDataSwap()),
                ("postprocessing", PostprocessingTransformer()),
                ("scaler", StandardScaler()),
                # ("feature_selection", FastICA(random_state=random_state))
])


pe_features = Pipeline(steps = [
                ("pe_data_extraction", PeTransformer()),
                # ("pe_centered", CenteredPeAfterBaseline()),
                ("pe_amplitude", PeAmplitude2()),
                ("data_channel_swap", ChannelDataSwap()),
                ("postprocessing", PostprocessingTransformer()),
                # ("scaler", StandardScaler()),
                # ("feature_selection", FastICA(random_state=random_state))
])

ern_pe_features = FeatureUnion([("ern_features", ern_features), ("pe_features", pe_features)], n_jobs = 10)

features = Pipeline([
    ('ern_pe_features', ern_pe_features)

])

# steps = ('features', features)

preprocessed_pipeline = Pipeline([("extract_epochs", EEGdata(dataset=dataset)),
    # ("narrow_indices", NarrowIndices(start=76, stop=257)),
    (
        "channels_filtering",
        ChannelExtraction(significant_channels)
    ),
    (
        "average_epochs",
        AveragePerParticipant(),
    ),
    ("spatial_filter_preprocessing", SpatialFilterPreprocessing()),
    (
        "spatial_filter",
        PCA(n_components=3, random_state=random_state),
    ),
    (
        "spatial_filter_postprocessing",
        SpatialFilterPostprocessing(
            timepoints_count=181,
        ),
    ),
    ("lowpass_filter", LowpassFilter()),
    ("binning", BinTransformer(step=12)),
    ("baseline", ErnBaselined()),
    ("centering", CenteredSignalAfterBaseline()),
    # ('features', features)

                          ]).fit(X_train)
preprocessed_X_test = preprocessed_pipeline.transform(X_test_df)

In [None]:
preprocessed_X = preprocessed_pipeline.transform(X_train)

In [None]:
preprocessed_X.shape

In [None]:
pe_ampl = preprocessed_X[:,3:6]
pe_ampl_mean = np.mean(pe_ampl, axis=0)
pe_ampl_mean

In [None]:
ern_ampl = preprocessed_X[:,0:3]
ern_ampl_mean = np.mean(ern_ampl, axis=0)

In [None]:
ern_ampl_mean

In [None]:
# steps = ('features', features)
preprocessed_pipeline = Pipeline([("extract_epochs", EEGdata(dataset=dataset)),
            # ("narrow_indices", NarrowIndices(start=76, stop=257)),
            (
                "channels_filtering",
                ChannelExtraction(significant_channels)
            ),
            (
                "average_epochs",
                AveragePerParticipant(),
            ),
            ("spatial_filter_preprocessing", SpatialFilterPreprocessing()),
            (
                "spatial_filter",
                PCA(n_components=3, random_state=random_state),
            ),
            (
                "spatial_filter_postprocessing",
                SpatialFilterPostprocessing(
                    timepoints_count=181,
                ),
            ),
            ("lowpass_filter", LowpassFilter()),
            ("binning", BinTransformer(step=12)),
            ("baseline", ErnBaselined()),
            ("centering", CenteredSignalAfterBaseline()),
            ('ern_pe_features', ern_pe_features)
                                  ]).fit(X_train)

preprocessed_X = preprocessed_pipeline.transform(X_train)
pre_processed_test_X = preprocessed_pipeline.transform(X_test_df)

In [None]:
# ern_features = Pipeline(steps=[
#                     ("ern_extraction", CenteredERN(step=16)),
#                     ("binning", BinTransformer(step=16)),
# #                     ("data_channel_swap", ChannelDataSwap()),
# #                     ("postprocessing", PostprocessingTransformer()),
# #                     ("scaler", StandardScaler()),
# #                     ("feature_selection", FastICA(random_state=random_state))
# # 
# ])

# pe_features = Pipeline(steps = [
#                         ("pe_extraction", CenteredPe(step=16)),
#                         ("binning", BinTransformer(step=16)),
# #                         # ("data_channel_swap", ChannelDataSwap()),
# #                         # ("postprocessing", PostprocessingTransformer()),
# #                         # ("scaler", StandardScaler()),
# #                         # ("feature_selection", FastICA(random_state=random_state))
#         ])
    
# #         ern_pe_features = FeatureUnion([("ern_features", ern_features), ("pe_features", pe_features)], n_jobs = 10)

# #         features = Pipeline([
# #             ('ern_pe_features', ern_pe_features)

# #         ])

# #         steps = ('features', features)

# ern_fitted = ern_features.fit_transform(preprocessed_X)
# ern_test_fitted = ern_features.transform(pre_processed_test_X)

In [None]:
ern_fitted_mean = np.mean(ern_fitted, axis=0)
ern_test_fitted_mean = np.mean(ern_test_fitted, axis=0)

In [None]:
ern_test_fitted_mean.shape

In [None]:
import matplotlib.pyplot as plt

plt.plot(ern_fitted_mean[0])

---

In [None]:
spatial_filter_n_components = 3

this_steps = spatial_filter_bins_steps(spatial_filter_n_components=spatial_filter_n_components, timepoints_count=181)
pre_processed_X = Pipeline(steps=this_steps).fit_transform(X_train)

In [None]:
pre_processed_test_X = preprocessed_pipeline.transform(X_test_df)
pre_processed_X = preprocessed_pipeline.transform(X_train)

In [None]:
# Averaged signal within components through all participants
mean_X_1 = np.mean(pre_processed_X, axis=0)

In [None]:
ern_fitted.shape

In [None]:
mean_X.shape

In [None]:
pre_processed_test_X.shape

In [None]:
# Averaged signal within components through all participants
mean_X = np.mean(preprocessed_X, axis=0)
mean_2_X = np.mean(pre_processed_test_X, axis=0)

-----
### Plot results

In [None]:
# indices in bins

tmin, tmax = -0.1, 0.6  # Start and end of the segments
signal_frequency = 256

step_in_ms = 50  # in miliseconds (?)
step_tp = int(signal_frequency * step_in_ms / 1000) # in timepoints

# indices for slicing epoch into ERN part and Pe part (in sec)
start_ern = 0
stop_ern = 0.15
start_pe = 0.15
stop_pe = 0.35

start_ern_bin = int((signal_frequency * (start_ern - tmin)) / step_tp) + 1
stop_ern_bin = int(signal_frequency * (stop_ern - tmin) / step_tp) + 1
start_pe_bin = int(signal_frequency * (start_pe - tmin) / step_tp) + 1
stop_pe_bin = int(signal_frequency * (stop_pe - tmin) / step_tp) + 1

In [None]:
start_ern_bin = 0
stop_ern_bin = 3
start_pe_bin = 3
stop_pe_bin = 7

In [None]:
spatial_filter_n_components = 3

In [None]:
# 0, 1 or 2
this_component = 2

# pe_step = int(pe_features.shape[1]/ spatial_filter_n_components)
# ern_step = int(ern_features.shape[1]/ spatial_filter_n_components)
# spatial_filter_step = int(pre_processed_X.shape[1]/3)

In [None]:
spatial_filter_step

In [None]:
ern_components_weighed.shape

In [None]:
-ern_fitted_mean[0]

In [None]:
ern_components_weighed[0][0:3].shape

In [None]:
np.arange(0,4).shape

In [None]:
import seaborn as sns
import matplotlib.pylab as plt

sns.set(rc={'figure.figsize':(11.7,8.27)})
sns.set_style("white")

fig, ax1 = plt.subplots()

# ax1 = plt.twinx()
ax1.set(ylim=(np.min(ern_components_weighed)-0.1, np.max(pe_components_weighed)+0.05))
ax1.tick_params(axis='y', color="magenta", width=3, length=10)

plt.axhline(y=0, color="grey", linewidth = 2, linestyle='--', alpha=0.5)
plt.axvline(x=2, color="grey", linewidth = 2, linestyle='--', alpha=0.5)
plt.axvline(x=6, color="grey", linewidth = 2, linestyle='--', alpha=0.5)



# for i in range(0,5):
#     sns.lineplot(np.arange(0,3), ern_components_weighed[i][this_component:3], ax=ax1)

# for i in range(0,pe_features.shape[0]):
#     sns.scatterplot(np.arange(5,6), pe_components_weighed[i][this_component], ax=ax1)
    

ax2 = plt.twinx()
ax2.set(ylim=(-1e-5,2.5e-5))
ax2.tick_params(axis='y', color="black")

# ax3 = plt.twinx()
# ax3.set(ylim=(min(components_weighed_ern_sum), max(components_weighed_ern_sum)))
# ax3.tick_params(axis='y', color="magenta")

sns.scatterplot(x=[4], y= components_weighed_pe_sum[this_component], ax=ax1, color="magenta")
sns.scatterplot(x=[1], y= components_weighed_ern_sum[this_component], ax=ax1, color="magenta")
# sns_plot = sns.scatterplot(np.arange(5,6), components_weighed_pe_sum[this_component*pe_step:(this_component+1)*pe_step], ax=ax1, color="magenta")
# plt.axhline(y=0, color="magenta", linewidth = 2)

sns_plot = sns.lineplot(np.arange(0,10), -mean_X[this_component], ax=ax2, color="black", linewidth = 3)


sns_plot.figure.savefig(f"{data_df.name}_output_{this_component}.png")

In [None]:
np.mean(y_train)

In [None]:
np.mean(y_rum)

# CURRENT BEST RESULTS

In [None]:
results_df = pd.read_pickle(
    "../data/split0.3/regression_union_100-600_ampl_bins50_0.3_significant.pkl"
)
results_df

In [None]:
results_df = pd.read_pickle(
    "../data/split0.3/regression_union_100-600_centered_signal_ampl_0.3-5_significant.pkl"
)
results_df

In [None]:
results_df = pd.read_pickle(
    "../data/split0.3/regression_union_100-600_centered_signal_baselined-to-0-bin_signal_0.3-5.pkl"
)
results_df

In [None]:
results_df = pd.read_pickle(
    "../data/split0.3/regression_union_100-600_baselined_centered_ampl-2-pe-ern_0.3-5_significant.pkl"
)
results_df