# Rumination prediction

### Imports

In [None]:
import os
import re
import glob
import os
import ast
import os.path as op
from collections import defaultdict
from copy import deepcopy

import pickle
from time import time
import pywt
import mne
import scipy
import numpy as np
import plotly.graph_objects as go
import plotly.express as px
import pandas as pd
import cesium.featurize
from plotly.subplots import make_subplots
from ipywidgets import Dropdown, FloatRangeSlider, IntSlider, FloatSlider, interact
from sklearn.metrics import roc_auc_score
from sklearn.model_selection import StratifiedKFold
from sklearn.model_selection import train_test_split
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.base import TransformerMixin, BaseEstimator


import sys

# sys.path.append("..")
# from utils import *

---
## Loading data

Loading EEG data and data from rumination questionnaire. By default create_df_data loads all info from given file but one can specify it by passing a list of desired labels from csv file.

In [None]:
# paths TODO
dir_path = os.path.dirname(os.path.abspath(""))

In [None]:
tmin, tmax = -0.1, 0.6  # Start and end of the segments
signal_frequency = 256
ERROR = 0
CORRECT = 1
random_state = 0

In [None]:
sig = [4, 38, 39, 11, 47, 46, 12, 48, 49, 19, 32, 56, 20, 31, 57]

In [None]:
for item in sig:
    print(channels_order_list[item])

In [None]:
channels_order_list = [
    "Fp1",
    "AF7",
    "AF3",
    "F1",
    "F3",
    "F5",
    "F7",
    "FT7",
    "FC5",
    "FC3",
    "FC1",
    "C1",
    "C3",
    "C5",
    "T7",
    "TP7",
    "CP5",
    "CP3",
    "CP1",
    "P1",
    "P3",
    "P5",
    "P7",
    "P9",
    "PO7",
    "PO3",
    "O1",
    "Iz",
    "Oz",
    "POz",
    "Pz",
    "CPz",
    "Fpz",
    "Fp2",
    "AF8",
    "AF4",
    "AFz",
    "Fz",
    "F2",
    "F4",
    "F6",
    "F8",
    "FT8",
    "FC6",
    "FC4",
    "FC2",
    "FCz",
    "Cz",
    "C2",
    "C4",
    "C6",
    "T8",
    "TP8",
    "CP6",
    "CP4",
    "CP2",
    "P2",
    "P4",
    "P6",
    "P8",
    "P10",
    "PO8",
    "PO4",
    "O2",
]

channels_dict = dict(zip(channels_order_list, np.arange(0, 64, 1)))

In [None]:
def create_df_data(
    test_participants=False,
    test_epochs=False,
    info_filename=None,
    info="all",
    personal=True,
):
    """Loads data for all participants and create DataFrame with optional additional info from given .csv file.

    On default, loads a train set: chooses only 80% of participants
    and for each of them chooses 80% of epochs.
    It will choose them deterministically.

    Participants with less than 10 epochs per condition are rejected.

    If test_participants is set to True, it will load remaining 20% of participants.
    If test_epochs is set to True, it will load remaining 20% of epochs.
    Test epochs are chronologically after train epochs,
    because it reflects real usage (first callibration and then classification).

    Parameters
    ----------
    test_participants: bool
        whether load data for training or final testing.
        If true load participants data for testing.
    test_epochs: bool
        whether load data for training or final testing.
        If true load epochs of each participants data for testing.
    info_filename: String | None
        path to .csv file with additional data.
    info: array
        listed parameters from the info file to be loaded.
        if 'all', load all parameters
    personal: bool
        whether a model will be both trained and tested on epochs from one person
        if false, person's epochs aren't split into test and train


    Returns
    -------
    go_nogo_data_df : pandas.DataFrame

    """
    print(os.path.abspath(""))
    dir_path = os.path.dirname(os.path.abspath(""))
    print(dir_path)
    header_files_glob = os.path.join(dir_path, "data/responses_400_600/*.vhdr")
    header_files = glob.glob(header_files_glob)

    header_files = sorted(header_files)
    go_nogo_data_df = pd.DataFrame()

    # cut 20% of data for testing
    h_train, h_test = train_test_split(header_files, test_size=0.2, random_state=0)

    if test_participants:
        header_files = h_test
    else:
        header_files = h_train

    for file in header_files:
        #  load eeg data for given participant
        participant_epochs = load_epochs_from_file(file)

        # and compute participant's id from file_name
        participant_id = re.match(r".*_(\w+).*", file).group(1)

        error = participant_epochs["error_response"]._data
        correct = participant_epochs["correct_response"]._data

        # exclude those participants who have too few samples
        if len(error) < 5 or len(correct) < 5:
            # not enough data for this participant
            continue

        if personal:
            # cut 20% of each participant's epochs for testing
            # shuffling is disabled to make sure test epochs are after train epochs
            # TODO: not sure if this step is necessary
            err_train, err_test = train_test_split(error, test_size=0.2, shuffle=False)
            cor_train, cor_test = train_test_split(
                correct, test_size=0.2, shuffle=False
            )
            if test_epochs:
                error = err_test
                correct = cor_test
            else:
                error = err_train
                correct = cor_train

        # construct dataframe for participant with: id|epoch_data|response_type|additional info...
        participant_df = create_df_from_epochs(
            participant_id, correct, error, info_filename, info
        )
        print(participant_id)
        go_nogo_data_df = go_nogo_data_df.append(participant_df, ignore_index=True)

    return go_nogo_data_df

In [None]:
def create_df_from_epochs(id, correct, error, info_filename, info):
    """Create df for each participant. DF structure is like: {id: String ; epoch: epoch_data ; marker: 1.0|0.0}
    1.0 means correct and 0.0 means error response.
    Default info extracted form .csv file is 'Rumination Full Scale' and participants' ids.
    With this info df structure is like:
    {id: String ; epoch: epoch_data ; marker: 1.0|0.0 ; File: id ; 'Rumination Full Scale': int}

    Parameters
    ----------
    id: String
        participant's id extracted from filename
    correct: array
        correct responses' data
    error: array
        error responses' data
    info_filename: String
        path to .csv file with additional data.
    info: array
        listed parameters from the info file to be loaded.
        if 'all', load all parameters

    Returns
    -------
    participant_df : pandas.DataFrame

    """
    participant_df = pd.DataFrame()
    info_df = pd.DataFrame()

    # get additional info from file
    if info_filename is not None:
        if info == "all":
            rumination_df = pd.read_csv(info_filename)
        else:
            rumination_df = pd.read_csv(info_filename, usecols=["File"] + info)
        info_df = (
            rumination_df.loc[rumination_df["File"] == id]
            .reset_index()
            .drop("index", axis=1)
        )

    for epoch in correct:
        epoch_df = pd.DataFrame(
            {"id": [id], "epoch": [epoch], "marker": [CORRECT]}
        ).join(info_df)
        participant_df = participant_df.append(epoch_df, ignore_index=True)

    for epoch in error:
        epoch_df = pd.DataFrame({"id": [id], "epoch": [epoch], "marker": [ERROR]}).join(
            info_df
        )
        participant_df = participant_df.append(epoch_df, ignore_index=True)

    return participant_df

In [None]:
def load_epochs_from_file(file, reject_bad_segments="auto", mask=None):
    """Load epochs from a header file.

    Args:
        file: path to a header file (.vhdr)
        reject_bad_segments: 'auto' means that bad segments are rejected automatically.

    Returns:
        mne Epochs

    """
    # Import the BrainVision data into an MNE Raw object
    raw = mne.io.read_raw_brainvision(file)

    # Construct annotation filename
    annot_file = file[:-4] + "vmrk"

    # Read in the event information as MNE annotations
    annotations = mne.read_annotations(annot_file)

    # Add the annotations to our raw object so we can use them with the data
    raw.set_annotations(annotations)

    # Map with response markers only
    event_dict = {
        "Stimulus/RE*ex*1_n*1_c_1*R*FB": 10004,
        "Stimulus/RE*ex*1_n*1_c_1*R*FG": 10005,
        "Stimulus/RE*ex*1_n*1_c_2*R": 10006,
        "Stimulus/RE*ex*1_n*2_c_1*R": 10007,
        "Stimulus/RE*ex*2_n*1_c_1*R": 10008,
        "Stimulus/RE*ex*2_n*2_c_1*R*FB": 10009,
        "Stimulus/RE*ex*2_n*2_c_1*R*FG": 10010,
        "Stimulus/RE*ex*2_n*2_c_2*R": 10011,
    }

    # Map for merged correct/error response markers
    merged_event_dict = {"correct_response": 0, "error_response": 1}

    # Reconstruct the original events from Raw object
    events, event_ids = mne.events_from_annotations(raw, event_id=event_dict)

    # Merge correct/error response events
    merged_events = mne.merge_events(
        events,
        [10004, 10005, 10009, 10010],
        merged_event_dict["correct_response"],
        replace_events=True,
    )
    merged_events = mne.merge_events(
        merged_events,
        [10006, 10007, 10008, 10011],
        merged_event_dict["error_response"],
        replace_events=True,
    )

    epochs = []
    bads = []
    this_reject_by_annotation = True

    # Read epochs
    epochs = mne.Epochs(
        raw=raw,
        events=merged_events,
        event_id=merged_event_dict,
        tmin=tmin,
        tmax=tmax,
        baseline=None,
        reject_by_annotation=this_reject_by_annotation,
        preload=True,
    )

    return epochs

#### Read the data

In [None]:
df_name = "go_nogo_100_600_df_3-5"
pickled_data_filename = "../data/" + df_name + ".pkl"
info_filename = "../data/Demographic_Questionnaires_Behavioral_Results_N=163.csv"

# Check if data is already loaded
if os.path.isfile(pickled_data_filename):
    print("Pickled file found. Loading pickled data...")
    epochs_df = pd.read_pickle(pickled_data_filename)
    print("Done")
else:
    print("Pickled file not found. Loading data...")
    epochs_df = create_df_data(
        test_participants=False, info="all", personal=False, info_filename=info_filename
    )
    epochs_df.name = df_name
    # save loaded data into a pickle file
    epochs_df.to_pickle("../data/" + epochs_df.name + ".pkl")
    print("Done. Pickle file created")

#### Read data for external testing

In [None]:
df_name = "go_nogo_100_600_test_df_3-5"
pickled_data_filename = "../data/" + df_name + ".pkl"
info_filename = "../data/Demographic_Questionnaires_Behavioral_Results_N=163.csv"

# Check if data is already loaded
if os.path.isfile(pickled_data_filename):
    print("Pickled file found. Loading pickled data...")
    epochs_test_df = pd.read_pickle(pickled_data_filename)
    print("Done")
else:
    print("Pickled file not found. Loading data...")
    epochs_test_df = create_df_data(
        test_participants=True, info="all", personal=False, info_filename=info_filename
    )
    epochs_test_df.name = df_name
    # save loaded data into a pickle file
    epochs_test_df.to_pickle("../data/" + epochs_test_df.name + ".pkl")
    print("Done. Pickle file created")

#### Rearrange data:  from: *one row - one epoch* to *one row - one participant* 

epochs column contain list of epochs from given condition (marker = error or correct)

In [None]:
data_df = (
    epochs_df.groupby(
        ["id", "marker"],
        sort=False,
    )
    .apply(
        lambda group_df: pd.Series(
            {
                "epochs": np.array(group_df["epoch"].to_list(), dtype="float64"),
                # "ern": np.array(group_df["ern"].to_list(), dtype="float64"),
                # "pe": np.array(group_df["pe"].to_list(), dtype="float64"),
                "Rumination": np.mean(group_df["Rumination Full Scale"]),
                "Anxiety": np.mean(group_df["DASS-21 Anxiety scale"]),
                "Stress": np.mean(group_df["DASS-21 Stress scale"]),
                "Depression": np.mean(group_df["DASS-21 Depression scale"]),
            }
        )
    )
    .reset_index()
)

X_df = data_df[data_df['marker'] == ERROR]

In [None]:
X_df.shape

In [None]:
data_test_df = (
    epochs_test_df.groupby(
        ["id", "marker"],
        sort=False,
    )
    .apply(
        lambda group_test_df: pd.Series(
            {
                "epochs": np.array(group_test_df["epoch"].to_list(), dtype="float64"),
                # "ern": np.array(group_df["ern"].to_list(), dtype="float64"),
                # "pe": np.array(group_df["pe"].to_list(), dtype="float64"),
                "Rumination": np.mean(group_test_df["Rumination Full Scale"]),
                "Anxiety": np.mean(group_test_df["DASS-21 Anxiety scale"]),
                "Stress": np.mean(group_test_df["DASS-21 Stress scale"]),
                "Depression": np.mean(group_test_df["DASS-21 Depression scale"]),
            }
        )
    )
    .reset_index()
)

X_test_df = data_test_df[data_test_df['marker'] == ERROR]

In [None]:
X_test_df.shape

---

In [None]:
summary_df = (
    epochs_df.groupby(
        ["id", "marker"],
        sort=False,
    )
    .size()
    .reset_index(name="counts")
)

participants_data_len = np.array(
    summary_df[summary_df["marker"] == 0]["counts"].tolist()
)

# participant data indices for identifying participants data after spatial filtering

participants_data_indices = []
index = 0

for participant_len in participants_data_len:
    participant_indices = (index, index + participant_len - 1)
    participants_data_indices.append(participant_indices)
    index = index + participant_len

participants_data_indices = np.array(participants_data_indices)

---
## Training and prediction

In [None]:
from sklearn.linear_model import LinearRegression
from sklearn.linear_model import Lasso
from sklearn.linear_model import ElasticNet
from sklearn.tree import DecisionTreeRegressor
from sklearn.neighbors import KNeighborsRegressor
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import KFold
from sklearn.model_selection import StratifiedKFold
from sklearn.model_selection import permutation_test_score
from sklearn.pipeline import Pipeline, FeatureUnion
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import FunctionTransformer
from sklearn.dummy import DummyRegressor
from sklearn.model_selection import GridSearchCV
from sklearn.svm import SVR
from tempfile import mkdtemp
from sklearn.linear_model import Ridge
from sklearn.kernel_ridge import KernelRidge


from sklearn.metrics import mean_absolute_error
from sklearn.metrics import mean_squared_error
from sklearn.metrics import r2_score

import warnings

warnings.filterwarnings("ignore")

#### Create X train and y train sets

In [None]:
# selection of the analysed condition: erroneous responses or correct responses

dataset = ERROR
dataset_name = "correct" if dataset == CORRECT else "error"

In [None]:
# shape 4-D: participant x epoch x channel x timepoints
# X_train = np.array(data_df[data_df["marker"] == dataset]["epochs"].tolist())

# dataframe where 1 row = one participant and 'epochs' column consists of 4-array: participant x epoch x channel x timepoints
X_train = X_df

# shape 1-D: rumination score
rumination = np.array(
    data_df[data_df["marker"] == dataset]["Rumination"].to_list()
)

anxiety = np.array(data_df[data_df["marker"] == dataset]["Anxiety"].to_list())
stress = np.array(data_df[data_df["marker"] == dataset]["Stress"].to_list())
depression = np.array(data_df[data_df["marker"] == dataset]["Depression"].to_list())

y_train = rumination

In [None]:
y_rum = np.array(
    data_test_df[data_test_df["marker"] == dataset]["Rumination"].to_list()
)

In [None]:
X_test = []
y_test = []

In [None]:
X_train.shape

---
### Experiments 

Parameters of experiments:
- regressors
- hyperparameters
- preprocessing pipelines

#### Prepare experiment estimating 
____

In [None]:
# Rating model with grid search


def rate_regressor(
    X_train, y_train, X_test, y_test, regressor, regressor_params, base_steps, cv=3
):
    # define cross-validation method
    cv_kf = KFold(n_splits=3)

    pipeline = Pipeline([base_steps, regressor])
    param_grid = regressor_params
    # print(f"Param grid {param_grid}")
    grid_search = GridSearchCV(
        pipeline,
        param_grid,
        cv=cv_kf,
        scoring={"r2", "neg_mean_absolute_error", "neg_mean_squared_error"},
        refit="r2",
        return_train_score=True,
        n_jobs=10,
        verbose=1,
        error_score="raise",
    )
    grid_search.fit(X_train, y_train)

    return grid_search

In [None]:
# conducting experiment and saving selected info do result df


def run_experiment(
    tested_regressors,
    regressor_params,
    pipeline_name,
    X_train,
    X_test,
    y_train,
    y_test,
    dataset_name,
    base_steps,
    preprocessed_pipeline,
    X_test_df,
    y_rum,
    results_df,
):

    for (regressor, params) in tested_regressors:
        print(f"Rating {regressor} \n")
        tested_params = {**regressor_params, **params}

        # enter to grid search
        grid_result = rate_regressor(
            X_train,
            y_train,
            X_test,
            y_test,
            regressor,
            tested_params,
            base_steps,
            cv=3,
        )

        #     predictions = grid_result.predict(X_test)
        #     r2 = grid_result.score(X_test, y_test)
        #     mae = mean_absolute_error(y_test, predictions)
        #     r2_adj = r2_adjusted_scorer(y_test, predictions, len(X_test[0]), len(X_test))

        best_estimator_index = grid_result.best_index_
        mean_cv_r2 = grid_result.cv_results_["mean_test_r2"][best_estimator_index]
        std_cv_r2 = grid_result.cv_results_["std_test_r2"][best_estimator_index]
        mean_cv_neg_mean_absolute_error = grid_result.cv_results_[
            "mean_test_neg_mean_absolute_error"
        ][best_estimator_index]
        std_cv_neg_mean_absolute_error = grid_result.cv_results_[
            "std_test_neg_mean_absolute_error"
        ][best_estimator_index]
        mean_cv_neg_mean_squared_error = grid_result.cv_results_[
            "mean_test_neg_mean_squared_error"
        ][best_estimator_index]
        std_cv_neg_mean_squared_error = grid_result.cv_results_[
            "std_test_neg_mean_squared_error"
        ][best_estimator_index]
        
        mean_train_r2 = grid_result.cv_results_["mean_train_r2"][best_estimator_index]
        mean_train_mae = grid_result.cv_results_["mean_train_neg_mean_absolute_error"][best_estimator_index]
        mean_train_mse = grid_result.cv_results_["mean_train_neg_mean_squared_error"][best_estimator_index]


        print(f"     Best parameters: {grid_result.best_params_}")
        print(f"     mean r2: {mean_cv_r2}           ± {round(std_cv_r2,3)}")
        print(f"     mean r2 train: {mean_train_r2}")

        cv_results = grid_result.cv_results_

        # calculate p-value
        scores_, pvalue_ = calculate_p_permutations(
            grid_result.best_estimator_, X_train, y_train
        )
        
        pre_processed_test_X = preprocessed_pipeline.transform(X_test_df)
        estimator = grid_result.best_estimator_
        score = estimator.score(pre_processed_test_X, y_rum)
        
        print(print(f"     external validation r2: {score}"))
        

        # insert selected info to df
        data = {
            "data_set": dataset_name,
            "pipeline_name": pipeline_name,
            "model": regressor[0],
            "parameters": grid_result.best_params_,
            "mean_cv_r2": mean_cv_r2,
            "std_cv_r2": std_cv_r2,
            "mean_cv_mae": mean_cv_neg_mean_absolute_error,
            "std_cv_mae": std_cv_neg_mean_absolute_error,
            "mean_cv_mse":mean_cv_neg_mean_squared_error,
            "std_cv_mse": std_cv_neg_mean_squared_error,
            "cv_results": cv_results,
            "mean_train_r2": mean_train_r2,
            "mean_train_mae":mean_train_mae,
            "mean_train_mse":mean_train_mse,
            "p-value": pvalue_,
            "best_estimator": grid_result.best_estimator_,
            "pre_processed_pipeline": preprocessed_pipeline,
            "external_score":score
        }

        results_df = results_df.append(data, ignore_index=True)
    return results_df

In [None]:
# Calculating p-value with permutation test


def calculate_p_permutations(estimator, X, y, cv=3, n_permutations=100, n_jobs=10):

    score_, perm_scores_, pvalue_ = permutation_test_score(
        estimator, X, y, cv=cv, n_permutations=n_permutations, n_jobs=n_jobs
    )

    # summarize
    print(f"     The permutation P-value is = {pvalue_:.3f}")
    print(f"     The permutation score is = {score_:.3f}\n")

    return score_, pvalue_

#### Define pipelines
___

In [None]:
from rumination_experiment_transformers_averaged import *

from sklearn.decomposition import FastICA
from sklearn.decomposition import PCA

In [None]:
# SPATIAL FILTER - BINS


def spatial_filter_bins_steps(spatial_filter_n_components, timepoints_count):

    steps = [
               ("extract_epochs", EEGdata(dataset=dataset)),
        # ("narrow_indices", NarrowIndices(start=101, stop=257)),
        (
            "channels_filtering",
            ChannelExtraction(significant_channels)
        ),
        (
            "average_epochs",
            AveragePerParticipant(),
        ),
        ("spatial_filter_preprocessing", SpatialFilterPreprocessing()),
        (
            "spatial_filter",
            PCA(n_components=spatial_filter_n_components, random_state=random_state),
        ),
        (
            "spatial_filter_postprocessing",
            SpatialFilterPostprocessing(
                timepoints_count=timepoints_count,
            ),
        ),
        ("lowpass_filter", LowpassFilter()),
        ("binning", BinTransformer(step=step_tp)),
        ("data_channel_swap", ChannelDataSwap()),
        ("postprocessing", PostprocessingTransformer()),
    ]

    return steps

In [None]:
# SPATIAL FILTER - BINS - UNION


def spatial_filter_bins_union_features(spatial_filter_n_components, timepoints_count):

    ern_features = Pipeline(steps=[
                    ("ern_data_extraction", ErnTransformer()),
                    # ("ern_min_max_features", ErnMinMaxFeatures()),
                    ("ern_amplitude", ErnAmplitude2()),
                    ("data_channel_swap", ChannelDataSwap()),
                    ("postprocessing", PostprocessingTransformer()),
                    ("scaler", StandardScaler()),
                    ("feature_selection", FastICA(random_state=random_state))
    ])

    pe_features = Pipeline(steps = [
                    ("pe_data_extraction", PeTransformer()),
                    # ("pe_min_max_features", PeMinMaxFeatures()),
                    ("pe_amplitude", PeAmplitude2()),
                    ("data_channel_swap", ChannelDataSwap()),
                    ("postprocessing", PostprocessingTransformer()),
                    ("scaler", StandardScaler()),
                    ("feature_selection", FastICA(random_state=random_state))
    ])
    
    ern_pe_features = FeatureUnion([("ern_features", ern_features), ("pe_features", pe_features)], n_jobs = 10)

    features = Pipeline([
        # ("extract_epochs", EEGdata(dataset=dataset)),
        # (
        #     "channels_filtering",
        #     ChannelExtraction(significant_channels)
        # ),
        # (
        #     "average_epochs",
        #     AveragePerParticipant(),
        # ),
        # ("spatial_filter_preprocessing", SpatialFilterPreprocessing()),
        # (
        #     "spatial_filter",
        #     PCA(n_components=spatial_filter_n_components, random_state=random_state),
        # ),
        # (
        #     "spatial_filter_postprocessing",
        #     SpatialFilterPostprocessing(
        #         timepoints_count=timepoints_count,
        #     ),
        # ),
        # ("lowpass_filter", LowpassFilter()),
        # ("binning", BinTransformer(step=step_tp)),
        ('ern_pe_features', ern_pe_features)
    
    ])
        
    steps = ('features', features)

    return steps

In [None]:
# SPATIAL FILTER - BINS - UNION


def spatial_filter_bins_union_amplitude(spatial_filter_n_components, timepoints_count):

    ern_features = Pipeline(steps=[
                    ("ern_amplitude", ErnAmplitude()),
                    ("data_channel_swap", ChannelDataSwap()),
                    ("postprocessing", PostprocessingTransformer()),
                    ("scaler", StandardScaler()),
                    ("feature_selection", FastICA(random_state=random_state))
    ])

    pe_features = Pipeline(steps = [
                    ("pe_amplitude", PeAmplitude()),
                    ("data_channel_swap", ChannelDataSwap()),
                    ("postprocessing", PostprocessingTransformer()),
                    ("scaler", StandardScaler()),
                    ("feature_selection", FastICA(random_state=random_state))
    ])
    
    ern_pe_features = FeatureUnion([("ern_features", ern_features), ("pe_features", pe_features)], n_jobs = 10)

    features = Pipeline([
        # ("extract_epochs", EEGdata(dataset=dataset)),
        # (
        #     "channels_filtering",
        #     ChannelExtraction(significant_channels)
        # ),
        # (
        #     "average_epochs",
        #     AveragePerParticipant(),
        # ),
        # ("spatial_filter_preprocessing", SpatialFilterPreprocessing()),
        # (
        #     "spatial_filter",
        #     PCA(n_components=spatial_filter_n_components, random_state=random_state),
        # ),
        # (
        #     "spatial_filter_postprocessing",
        #     SpatialFilterPostprocessing(
        #         timepoints_count=timepoints_count,
        #     ),
        # ),
        # ("lowpass_filter", LowpassFilter()),
        # ("binning", BinTransformer(step=step_tp)),
        ('ern_pe_features', ern_pe_features)
    
    ])
        
    steps = ('features', features)

    return steps

In [None]:
# SPATIAL FILTER - BINS - UNION


def spatial_filter_bins_union_min_max_features(spatial_filter_n_components, timepoints_count):

    ern_features = Pipeline(steps=[
                    ("ern_data_extraction", ErnTransformer()),
                    ("ern_min_max_features", ErnMinMaxFeatures()),
                    # ("ern_amplitude", ErnAmplitude()),
                    ("data_channel_swap", ChannelDataSwap()),
                    ("postprocessing", PostprocessingTransformer()),
                    ("scaler", StandardScaler()),
                    ("feature_selection", FastICA(random_state=random_state))
    ])

    pe_features = Pipeline(steps = [
                    ("pe_data_extraction", PeTransformer()),
                    ("pe_min_max_features", ErnMinMaxFeatures()),
                    # ("pe_amplitude", PeAmplitude()),
                    ("data_channel_swap", ChannelDataSwap()),
                    ("postprocessing", PostprocessingTransformer()),
                    ("scaler", StandardScaler()),
                    ("feature_selection", FastICA(random_state=random_state))
    ])
    
    ern_pe_features = FeatureUnion([("ern_features", ern_features), ("pe_features", pe_features)], n_jobs = 10)

    features = Pipeline([
        # ("extract_epochs", EEGdata(dataset=dataset)),
        # (
        #     "channels_filtering",
        #     ChannelExtraction(significant_channels)
        # ),
        # (
        #     "average_epochs",
        #     AveragePerParticipant(),
        # ),
        # ("spatial_filter_preprocessing", SpatialFilterPreprocessing()),
        # (
        #     "spatial_filter",
        #     PCA(n_components=spatial_filter_n_components, random_state=random_state),
        # ),
        # (
        #     "spatial_filter_postprocessing",
        #     SpatialFilterPostprocessing(
        #         timepoints_count=timepoints_count,
        #     ),
        # ),
        # ("lowpass_filter", LowpassFilter()),
        # ("binning", BinTransformer(step=step_tp)),
        ('ern_pe_features', ern_pe_features)
    
    ])
        
    steps = ('features', features)

    return steps

In [None]:
# SPATIAL FILTER - BINS - UNION


def spatial_filter_bins_union_plain_features(spatial_filter_n_components, timepoints_count):

    ern_features = Pipeline(steps=[
                    ("ern_data_extraction", ErnTransformer()),
                    # ("ern_min_max_features", ErnMinMaxFeatures()),
                    # ("ern_amplitude", ErnAmplitude()),
                    ("data_channel_swap", ChannelDataSwap()),
                    ("postprocessing", PostprocessingTransformer()),
                    ("scaler", StandardScaler()),
                    ("feature_selection", FastICA(random_state=random_state))
    ])

    pe_features = Pipeline(steps = [
                    ("pe_data_extraction", PeTransformer()),
                    # ("pe_min_max_features", ErnMinMaxFeatures()),
                    # ("pe_amplitude", PeAmplitude()),
                    ("data_channel_swap", ChannelDataSwap()),
                    ("postprocessing", PostprocessingTransformer()),
                    ("scaler", StandardScaler()),
                    ("feature_selection", FastICA(random_state=random_state))
    ])
    
    ern_pe_features = FeatureUnion([("ern_features", ern_features), ("pe_features", pe_features)], n_jobs = 10)

    features = Pipeline([
        # ("extract_epochs", EEGdata(dataset=dataset)),
        # (
        #     "channels_filtering",
        #     ChannelExtraction(significant_channels)
        # ),
        # (
        #     "average_epochs",
        #     AveragePerParticipant(),
        # ),
        # ("spatial_filter_preprocessing", SpatialFilterPreprocessing()),
        # (
        #     "spatial_filter",
        #     PCA(n_components=spatial_filter_n_components, random_state=random_state),
        # ),
        # (
        #     "spatial_filter_postprocessing",
        #     SpatialFilterPostprocessing(
        #         timepoints_count=timepoints_count,
        #     ),
        # ),
        # ("lowpass_filter", LowpassFilter()),
        # ("binning", BinTransformer(step=step_tp)),
        ('ern_pe_features', ern_pe_features)
    
    ])
        
    steps = ('features', features)

    return steps

In [None]:
def spatial_filter_union_bins_features(spatial_filter_n_components, timepoints_count):

    ern_features = Pipeline(steps=[
                    ("ern_data_extraction", ErnTransformerTP()),
                    ("binning", BinTransformer(step=step_tp)),
                    ("data_channel_swap", ChannelDataSwap()),
                    ("postprocessing", PostprocessingTransformer()),
                    ("scaler", StandardScaler()),
                    ("feature_selection", FastICA(random_state=random_state))])

    pe_features = Pipeline(steps = [
                    ("pe_data_extraction", PeTransformerTP()),
                    ("binning", BinTransformer(step=step_tp)),
                    ("data_channel_swap", ChannelDataSwap()),
                    ("postprocessing", PostprocessingTransformer()),
                    ("scaler", StandardScaler()),
                    ("feature_selection", FastICA(random_state=random_state))])
    
    ern_pe_features = FeatureUnion([("ern_features", ern_features), ("pe_features", pe_features)], n_jobs = 10)

    features = Pipeline([
        # ("extract_epochs", EEGdata(dataset=dataset)),
        # (
        #     "channels_filtering",
        #     ChannelExtraction(significant_channels)
        # ),
        # (
        #     "average_epochs",
        #     AveragePerParticipant(),
        # ),
        # ("spatial_filter_preprocessing", SpatialFilterPreprocessing()),
        # (
        #     "spatial_filter",
        #     PCA(n_components=spatial_filter_n_components, random_state=random_state),
        # ),
        # (
        #     "spatial_filter_postprocessing",
        #     SpatialFilterPostprocessing(
        #         timepoints_count=timepoints_count,
        #     ),
        # ),
        # ("lowpass_filter", LowpassFilter()),
        # ("binning", BinTransformer(step=step_tp)),
        ('ern_pe_features', ern_pe_features)
    
    ])
        
    steps = ('features', features)

    return steps

In [None]:
def spatial_filter_centered_components(spatial_filter_n_components, timepoints_count):

    ern_features = Pipeline(steps=[
                    ("ern_extraction", CenteredERN(step=12)),
                    ("binning", BinTransformer(step=12)),
                    ("data_channel_swap", ChannelDataSwap()),
                    ("postprocessing", PostprocessingTransformer()),
                    ("scaler", StandardScaler()),
                    ("feature_selection", FastICA(random_state=random_state))])

    pe_features = Pipeline(steps = [
                    ("pe_extraction", CenteredPe(step=12)),
                    ("binning", BinTransformer(step=12)),
                    ("data_channel_swap", ChannelDataSwap()),
                    ("postprocessing", PostprocessingTransformer()),
                    ("scaler", StandardScaler()),
                    ("feature_selection", FastICA(random_state=random_state))])
    
    ern_pe_features = FeatureUnion([("ern_features", ern_features), ("pe_features", pe_features)], n_jobs = 10)

    features = Pipeline([
        ('ern_pe_features', ern_pe_features)
    
    ])
        
    steps = ('features', features)

    return steps

In [None]:
# SPATIAL FILTER - BINS - UNION - METRICS


def spatial_filter_bins_union_metrics_features(spatial_filter_n_components, timepoints_count, feature_name):

    ern_features = Pipeline(steps=[
                    ("ern_data_extraction", ErnTransformer()),
                    ("data_channel_swap", ChannelDataSwap()),
                    ("postprocessing", PostprocessingTransformer()),
                    ("scaler", StandardScaler()),
                    ("feature_selection", FastICA(random_state=random_state))])

    pe_features = Pipeline(steps = [
                    ("pe_data_extraction", PeTransformer()),
                    ("data_channel_swap", ChannelDataSwap()),
                    ("postprocessing", PostprocessingTransformer()),
                    ("scaler", StandardScaler()),
                    ("feature_selection", FastICA(random_state=random_state))])
    
    eeg_features = FeatureUnion([("ern_features", ern_features), ("pe_features", pe_features)])

    eeg_pipeline = Pipeline([
        ("extract_epochs", EEGdata(dataset=dataset)),
        (
            "channels_filtering",
            ChannelExtraction(significant_channels)
        ),
        (
            "average_epochs",
            AveragePerParticipant(),
        ),
        ("spatial_filter_preprocessing", SpatialFilterPreprocessing()),
        (
            "spatial_filter",
            PCA(n_components=spatial_filter_n_components, random_state=random_state),
        ),
        (
            "spatial_filter_postprocessing",
            SpatialFilterPostprocessing(
                timepoints_count=timepoints_count,
            ),
        ),
        ("lowpass_filter", LowpassFilter()),
        ("binning", BinTransformer(step=step_tp)),
        ('ern_pe_features', eeg_features)
    
    ])
        
    
    metric = Pipeline(steps = [
            ("anxiety", GetFeature(feature_name=feature_name, dataset=dataset)),
            ("scaler", StandardScaler())
        ])
    
    features = FeatureUnion([("eeg_features", eeg_pipeline),("metric_features", metric)])
    steps = ('features', features)

    return steps

In [None]:
# BINS
def erp_bins_steps():
    steps = [
        (
            "channels_filtering",
            ChannelExtraction(significant_channels),
        ),
        ("lowpass_filter", LowpassFilter()),
        (
            "average_epochs",
            AveragePerParticipant(),
        ),
        ("binning", BinTransformer(step=step_tp)),
        ("data_channel_swap", ChannelDataSwap()),
        ("postprocessing", PostprocessingTransformer()),
        ("scaler", StandardScaler()),
        # ("feature_selection", PCA(random_state=random_state)),
    ]

    return steps

Generate estimator HTML representation

In [None]:
# from sklearn.utils import estimator_html_repr

# with open("my_estimator.html", "w") as f:
#     f.write(estimator_html_repr(Pipeline(this_steps)))

### Perform Experiments
___

#### Global parameters common for each experiment

In [None]:
# channels that will be included in the experiment

# red_box = [
#     "F1",
#     "Fz",
#     "F2",
#     "FC1",
#     "FCz",
#     "FC2",
#     "C1",
#     "Cz",
#     "C2",
#     "CP1",
#     "CPz",
#     "CP2",
#     "P1",
#     "Pz",
#     "P2",
# ]

red_box = [
    "F3","F1","Fz", "F2", "F4",
    "FC3", "FC1", "FCz", "FC2","FC4",
    "C3", "C1","Cz","C2", "C4",
    "CP3", "CP1","CPz","CP2", "CP4",
    "P3","P1", "Pz", "P2", "P4",
]

significant_channels = [channels_dict[channel] for channel in red_box]

In [None]:
# spatial filters

spatial_filters_dict = {
    "ICA": FastICA(random_state=random_state),
    "PCA": PCA(random_state=random_state),
}

In [None]:
# # bins width

# step_in_ms = 30  # in miliseconds (?)
# step_tp = int(signal_frequency * step_in_ms / 1000) # in timepoints

---
#### Experiment 1

- spatial filter
- bins
- feature selection

##### Spatial filter & binning

In [None]:
# define hyperparameters of pipeline

spatial_filter = "PCA"

min_spatial_filter = 3
max_spatial_filter = 6
step_spatial_filter = 1

min_feature_selection = 2
max_feature_selection = 6
step_feature_selection = 1


# define proper parameters for training. In this case define range of number of feature extraction to search
regressor_params = dict(
    features__ern_pe_features__ern_features__feature_selection__n_components=np.arange(
        min_feature_selection, max_feature_selection, step_feature_selection
    ),
    features__ern_pe_features__pe_features__feature_selection__n_components=np.arange(
        min_feature_selection, max_feature_selection, step_feature_selection
        ),
    features__ern_pe_features__ern_features__ern_amplitude__step=np.arange(
        5, 12, 2
    ),
    features__ern_pe_features__pe_features__pe_amplitude__step=np.arange(
        5, 12, 2
    ),
)

In [None]:
# define estimators and their hyperparameters

en = ("en", ElasticNet(random_state=random_state))
en_params = dict(
    en__alpha=np.logspace(-7, 3, num=20, base=10),
    en__l1_ratio=np.logspace(-8, 0, num=17, base=10),
)

kr = ("kr", KernelRidge(kernel="rbf"))
kr_params = dict(
    kr__alpha=np.logspace(-5, 3, num=20, base=10),
    kr__gamma=np.logspace(-5, 3, num=20, base=10),
)


svr = ("svr", SVR())
svr_params = dict(
    svr__kernel=["linear", "rbf"],
    svr__C=[0.00001, 0.0001, 0.001, 0.01, 0.1, 1, 10],
    svr__gamma=["scale"],
    svr__epsilon=[0.00001, 0.0001, 0.001, 0.01, 0.1, 1],
)

tested_regressors = [
    # (svr, svr_params), 
    # (kr, kr_params), 
    (en, en_params)
]

#### Run Experiment

---
# Cented Components

In [None]:
# define hyperparameters of pipeline

spatial_filter = "PCA"

min_spatial_filter = 3
max_spatial_filter = 6
step_spatial_filter = 1

min_feature_selection = 2
max_feature_selection = 6
step_feature_selection = 1


# define proper parameters for training. In this case define range of number of feature extraction to search
regressor_params = dict(
    features__ern_pe_features__ern_features__feature_selection__n_components=np.arange(
        min_feature_selection, max_feature_selection, step_feature_selection
    ),
    features__ern_pe_features__pe_features__feature_selection__n_components=np.arange(
        min_feature_selection, max_feature_selection, step_feature_selection
        ),
    # features__ern_pe_features__ern_features__binning__step=np.arange(
    #     5, 13, 2
    # ),
    # features__ern_pe_features__pe_features__binning__step=np.arange(
    #     5, 13, 2
    # ),
    # features__ern_pe_features__ern_features__ern_extraction__step=np.arange(
    #     5, 10, 2
    # ),
    # features__ern_pe_features__pe_features__pe_extraction__step=np.arange(
    #     5, 10, 2
    # ),
    
)

In [None]:
results_static_ICA_bin_union_100_600_centered_components_df = pd.DataFrame()

In [None]:
# manually test different numbers of spatial filter components

timepoints_count = 181

if not sys.warnoptions:
    warnings.simplefilter("ignore")
    os.environ["PYTHONWARNINGS"] = "ignore" # Also affect subprocesses
    

for n_components in range(min_spatial_filter, max_spatial_filter, step_spatial_filter):        
    ern_features = Pipeline(steps=[
                ("ern_extraction", CenteredERN(step=12)),
                ("binning", BinTransformer(step=12)),
                ("ern_amplitude", ErnAmplitude2()),
                ("data_channel_swap", ChannelDataSwap()),
                ("postprocessing", PostprocessingTransformer()),
                ("scaler", StandardScaler()),
                ("feature_selection", FastICA(random_state=random_state))])

    pe_features = Pipeline(steps = [
                    ("pe_extraction", CenteredPe(step=12)),
                    ("binning", BinTransformer(step=12)),
                    ("pe_amplitude", PeAmplitude2()),
                    ("data_channel_swap", ChannelDataSwap()),
                    ("postprocessing", PostprocessingTransformer()),
                    ("scaler", StandardScaler()),
                    ("feature_selection", FastICA(random_state=random_state))])

    ern_pe_features = FeatureUnion([("ern_features", ern_features), ("pe_features", pe_features)], n_jobs = 10)

    features = Pipeline([
        ('ern_pe_features', ern_pe_features)

    ])

    steps = ('features', features)


    pipeline_name = f"{spatial_filter}_{n_components}_centered_ampl_components"

    preprocessed_pipeline = Pipeline([("extract_epochs", EEGdata(dataset=dataset)),
        # ("narrow_indices", NarrowIndices(start=76, stop=257)),
        (
            "channels_filtering",
            ChannelExtraction(significant_channels)
        ),
        (
            "average_epochs",
            AveragePerParticipant(),
        ),
        ("spatial_filter_preprocessing", SpatialFilterPreprocessing()),
        (
            "spatial_filter",
            PCA(n_components=n_components, random_state=random_state),
        ),
        (
            "spatial_filter_postprocessing",
            SpatialFilterPostprocessing(
                timepoints_count=timepoints_count,
            ),
        ),
        ("lowpass_filter", LowpassFilter()),
                              ]).fit(X_train)

    preprocessed_X = preprocessed_pipeline.transform(X_train)

    # this_steps = spatial_filter_centered_components(spatial_filter_n_components=n_components, timepoints_count=timepoints_count)
    this_steps = steps

    # rate different models
    results_static_ICA_bin_union_100_600_centered_components_df = run_experiment(
        tested_regressors,
        regressor_params,
        pipeline_name,
        preprocessed_X,
        X_test,
        y_train,
        y_test,
        dataset_name,
        this_steps,
        preprocessed_pipeline,
        X_test_df,
        y_rum,
        results_static_ICA_bin_union_100_600_centered_components_df,
    )

In [None]:
results_static_ICA_bin_union_100_600_centered_components_df.to_pickle("../data/regression_union_100-600_centered_components_amplmax_0.3.pkl")

In [None]:
# define proper parameters for training. In this case define range of number of feature extraction to search
regressor_params = dict(
    features__feature_selection__n_components=np.arange(
        min_feature_selection, max_feature_selection, step_feature_selection
    ),
    # features__ern_pe_features__pe_features__feature_selection__n_components=np.arange(
    #     min_feature_selection, max_feature_selection, step_feature_selection
    #     ),
    # features__ern_pe_features__ern_features__binning__step=np.arange(
    #     5, 13, 2
    # ),
    # features__ern_pe_features__pe_features__binning__step=np.arange(
    #     5, 13, 2
    # ),
    # features__ern_pe_features__ern_features__ern_extraction__step=np.arange(
    #     5, 10, 2
    # ),
    # features__ern_pe_features__pe_features__pe_extraction__step=np.arange(
    #     5, 10, 2
    # ),
    
)

In [None]:
# manually test different numbers of spatial filter components

timepoints_count = 181

if not sys.warnoptions:
    warnings.simplefilter("ignore")
    os.environ["PYTHONWARNINGS"] = "ignore" # Also affect subprocesses
    

for n_components in range(min_spatial_filter, max_spatial_filter, step_spatial_filter):        
    ern_features = Pipeline(steps=[
                ("ern_extraction", CenteredERN(step=12)),
                ("binning", BinTransformer(step=12)),
                # ("ern_amplitude", ErnAmplitude2()),
                ("data_channel_swap", ChannelDataSwap()),
                ("postprocessing", PostprocessingTransformer()),
                ("scaler", StandardScaler()),
                ("feature_selection", FastICA(random_state=random_state))])

#     pe_features = Pipeline(steps = [
#                     ("pe_extraction", CenteredPe(step=12)),
#                     ("binning", BinTransformer(step=12)),
#                     ("pe_amplitude", PeAmplitude2()),
#                     ("data_channel_swap", ChannelDataSwap()),
#                     ("postprocessing", PostprocessingTransformer()),
#                     ("scaler", StandardScaler()),
#                     ("feature_selection", FastICA(random_state=random_state))])

#     ern_pe_features = FeatureUnion([("ern_features", ern_features), ("pe_features", pe_features)], n_jobs = 10)

#     features = Pipeline([
#         ('ern_pe_features', ern_pe_features)

#     ])

    steps = ('features', ern_features)


    pipeline_name = f"{spatial_filter}_{n_components}_centered_ampl_components"

    preprocessed_pipeline = Pipeline([("extract_epochs", EEGdata(dataset=dataset)),
        # ("narrow_indices", NarrowIndices(start=76, stop=257)),
        (
            "channels_filtering",
            ChannelExtraction(significant_channels)
        ),
        (
            "average_epochs",
            AveragePerParticipant(),
        ),
        ("spatial_filter_preprocessing", SpatialFilterPreprocessing()),
        (
            "spatial_filter",
            PCA(n_components=n_components, random_state=random_state),
        ),
        (
            "spatial_filter_postprocessing",
            SpatialFilterPostprocessing(
                timepoints_count=timepoints_count,
            ),
        ),
        ("lowpass_filter", LowpassFilter()),
                              ]).fit(X_train)

    preprocessed_X = preprocessed_pipeline.transform(X_train)

    # this_steps = spatial_filter_centered_components(spatial_filter_n_components=n_components, timepoints_count=timepoints_count)
    this_steps = steps

    # rate different models
    results_static_ICA_bin_union_100_600_centered_components_df = run_experiment(
        tested_regressors,
        regressor_params,
        pipeline_name,
        preprocessed_X,
        X_test,
        y_train,
        y_test,
        dataset_name,
        this_steps,
        preprocessed_pipeline,
        X_test_df,
        y_rum,
        results_static_ICA_bin_union_100_600_centered_components_df,
    )

In [None]:
results_static_ICA_bin_union_100_600_centered_components_df.to_pickle("../data/regression_union_100-600_centered_ern_0.3.pkl")

In [None]:
# define proper parameters for training. In this case define range of number of feature extraction to search
regressor_params = dict(
    features__feature_selection__n_components=np.arange(
        min_feature_selection, max_feature_selection, step_feature_selection
    ),
    # features__ern_pe_features__pe_features__feature_selection__n_components=np.arange(
    #     min_feature_selection, max_feature_selection, step_feature_selection
    #     ),
    # features__ern_pe_features__ern_features__binning__step=np.arange(
    #     5, 13, 2
    # ),
    # features__ern_pe_features__pe_features__binning__step=np.arange(
    #     5, 13, 2
    # ),
    # features__ern_pe_features__ern_features__ern_extraction__step=np.arange(
    #     5, 10, 2
    # ),
    # features__ern_pe_features__pe_features__pe_extraction__step=np.arange(
    #     5, 10, 2
    # ),
    
)

In [None]:
# manually test different numbers of spatial filter components

timepoints_count = 181

if not sys.warnoptions:
    warnings.simplefilter("ignore")
    os.environ["PYTHONWARNINGS"] = "ignore" # Also affect subprocesses
    

for n_components in range(min_spatial_filter, max_spatial_filter, step_spatial_filter):        
    ern_features = Pipeline(steps=[
                ("ern_extraction", CenteredERN(step=12)),
                ("binning", BinTransformer(step=12)),
                ("ern_amplitude", ErnAmplitude2()),
                ("data_channel_swap", ChannelDataSwap()),
                ("postprocessing", PostprocessingTransformer()),
                ("scaler", StandardScaler()),
                ("feature_selection", FastICA(random_state=random_state))])

#     pe_features = Pipeline(steps = [
#                     ("pe_extraction", CenteredPe(step=12)),
#                     ("binning", BinTransformer(step=12)),
#                     ("pe_amplitude", PeAmplitude2()),
#                     ("data_channel_swap", ChannelDataSwap()),
#                     ("postprocessing", PostprocessingTransformer()),
#                     ("scaler", StandardScaler()),
#                     ("feature_selection", FastICA(random_state=random_state))])

#     ern_pe_features = FeatureUnion([("ern_features", ern_features), ("pe_features", pe_features)], n_jobs = 10)

#     features = Pipeline([
#         ('ern_pe_features', ern_pe_features)

#     ])

    steps = ('features', ern_features)


    pipeline_name = f"{spatial_filter}_{n_components}_centered_ampl_components"

    preprocessed_pipeline = Pipeline([("extract_epochs", EEGdata(dataset=dataset)),
        # ("narrow_indices", NarrowIndices(start=76, stop=257)),
        (
            "channels_filtering",
            ChannelExtraction(significant_channels)
        ),
        (
            "average_epochs",
            AveragePerParticipant(),
        ),
        ("spatial_filter_preprocessing", SpatialFilterPreprocessing()),
        (
            "spatial_filter",
            PCA(n_components=n_components, random_state=random_state),
        ),
        (
            "spatial_filter_postprocessing",
            SpatialFilterPostprocessing(
                timepoints_count=timepoints_count,
            ),
        ),
        ("lowpass_filter", LowpassFilter()),
                              ]).fit(X_train)

    preprocessed_X = preprocessed_pipeline.transform(X_train)

    # this_steps = spatial_filter_centered_components(spatial_filter_n_components=n_components, timepoints_count=timepoints_count)
    this_steps = steps

    # rate different models
    results_static_ICA_bin_union_100_600_centered_components_df = run_experiment(
        tested_regressors,
        regressor_params,
        pipeline_name,
        preprocessed_X,
        X_test,
        y_train,
        y_test,
        dataset_name,
        this_steps,
        preprocessed_pipeline,
        X_test_df,
        y_rum,
        results_static_ICA_bin_union_100_600_centered_components_df,
    )

In [None]:
results_static_ICA_bin_union_100_600_centered_components_df.to_pickle("../data/regression_union_100-600_centered_ern_amplmax_0.3.pkl")

In [None]:
# define proper parameters for training. In this case define range of number of feature extraction to search
regressor_params = dict(
    features__ern_pe_features__ern_features__feature_selection__n_components=np.arange(
        min_feature_selection, max_feature_selection, step_feature_selection
    ),
    features__ern_pe_features__pe_features__feature_selection__n_components=np.arange(
        min_feature_selection, max_feature_selection, step_feature_selection
        ),
    # features__ern_pe_features__ern_features__binning__step=np.arange(
    #     5, 13, 2
    # ),
    # features__ern_pe_features__pe_features__binning__step=np.arange(
    #     5, 13, 2
    # ),
    # features__ern_pe_features__ern_features__ern_extraction__step=np.arange(
    #     5, 10, 2
    # ),
    # features__ern_pe_features__pe_features__pe_extraction__step=np.arange(
    #     5, 10, 2
    # ),
    
)

In [None]:
# manually test different numbers of spatial filter components centered, max-min + amplitude

timepoints_count = 181

if not sys.warnoptions:
    warnings.simplefilter("ignore")
    os.environ["PYTHONWARNINGS"] = "ignore" # Also affect subprocesses
    

for n_components in range(min_spatial_filter, max_spatial_filter, step_spatial_filter):        
    ern_features = Pipeline(steps=[
                ("ern_extraction", CenteredERN(step=12)),
                ("binning", BinTransformer(step=12)),
                ("ern_amplitude", ErnAmplitude2()),
                ("data_channel_swap", ChannelDataSwap()),
                ("postprocessing", PostprocessingTransformer()),
                ("scaler", StandardScaler()),
                ("feature_selection", FastICA(random_state=random_state))])

    pe_features = Pipeline(steps = [
                    ("pe_extraction", CenteredPe(step=12)),
                    ("binning", BinTransformer(step=12)),
                    ("pe_amplitude", PeAmplitude2()),
                    ("data_channel_swap", ChannelDataSwap()),
                    ("postprocessing", PostprocessingTransformer()),
                    ("scaler", StandardScaler()),
                    ("feature_selection", FastICA(random_state=random_state))])
    
    ern_amplitude = Pipeline(steps = [
        ("ern_amplitude", ErnAmplitude()),
        ("data_channel_swap", ChannelDataSwap()),
        ("postprocessing", PostprocessingTransformer()),
        ("scaler", StandardScaler()),
    ])
    
    
    pe_amplitude = Pipeline(steps = [
        ("pe_amplitude", PeAmplitude()),
        ("data_channel_swap", ChannelDataSwap()),
        ("postprocessing", PostprocessingTransformer()),
        ("scaler", StandardScaler()),
    ])

    ern_pe_features = FeatureUnion([("ern_features", ern_features), ("pe_features", pe_features), ("e_ampl", ern_amplitude), ("p_ampl", pe_amplitude)], n_jobs = 10)

    features = Pipeline([
        ('ern_pe_features', ern_pe_features)

    ])

    steps = ('features', features)


    pipeline_name = f"{spatial_filter}_{n_components}_centered_ampl_components"

    preprocessed_pipeline = Pipeline([("extract_epochs", EEGdata(dataset=dataset)),
        # ("narrow_indices", NarrowIndices(start=76, stop=257)),
        (
            "channels_filtering",
            ChannelExtraction(significant_channels)
        ),
        (
            "average_epochs",
            AveragePerParticipant(),
        ),
        ("spatial_filter_preprocessing", SpatialFilterPreprocessing()),
        (
            "spatial_filter",
            PCA(n_components=n_components, random_state=random_state),
        ),
        (
            "spatial_filter_postprocessing",
            SpatialFilterPostprocessing(
                timepoints_count=timepoints_count,
            ),
        ),
        ("lowpass_filter", LowpassFilter()),
                              ]).fit(X_train)

    preprocessed_X = preprocessed_pipeline.transform(X_train)

    # this_steps = spatial_filter_centered_components(spatial_filter_n_components=n_components, timepoints_count=timepoints_count)
    this_steps = steps

    # rate different models
    results_static_ICA_bin_union_100_600_centered_components_df = run_experiment(
        tested_regressors,
        regressor_params,
        pipeline_name,
        preprocessed_X,
        X_test,
        y_train,
        y_test,
        dataset_name,
        this_steps,
        preprocessed_pipeline,
        X_test_df,
        y_rum,
        results_static_ICA_bin_union_100_600_centered_components_df,
    )

In [None]:
results_static_ICA_bin_union_100_600_centered_components_df.to_pickle("../data/regression_union_100-600_centered_components_amplmax_ampl_0.3.pkl")

# CENTERED SIGNAL

In [None]:
# define proper parameters for training. In this case define range of number of feature extraction to search
regressor_params = dict(
    features__ern_pe_features__ern_features__feature_selection__n_components=np.arange(
        min_feature_selection, max_feature_selection, step_feature_selection
    ),
    features__ern_pe_features__pe_features__feature_selection__n_components=np.arange(
        min_feature_selection, max_feature_selection, step_feature_selection
        ),   
)

In [None]:
results_static_ICA_bin_union_100_600_centered_components_df = pd.DataFrame()

In [None]:
# manually test different numbers of spatial filter components

timepoints_count = 181

if not sys.warnoptions:
    warnings.simplefilter("ignore")
    os.environ["PYTHONWARNINGS"] = "ignore" # Also affect subprocesses
    

for n_components in range(min_spatial_filter, max_spatial_filter, step_spatial_filter):        
    
    
    ern_features = Pipeline(steps=[
                    ("ern_data_extraction", ErnTransformer()),
                    # ("ern_amplitude", ErnAmplitude3()),
                    ("data_channel_swap", ChannelDataSwap()),
                    ("postprocessing", PostprocessingTransformer()),
                    ("scaler", StandardScaler()),
                    ("feature_selection", FastICA(random_state=random_state))
    ])
     

    pe_features = Pipeline(steps = [
                    ("pe_data_extraction", PeTransformer()),
                    # ("pe_centered", CenteredPeAfterBaseline()),
                    ("pe_amplitude", PeAmplitude2()),
                    ("data_channel_swap", ChannelDataSwap()),
                    ("postprocessing", PostprocessingTransformer()),
                    ("scaler", StandardScaler()),
                    ("feature_selection", FastICA(random_state=random_state))
    ])
    
    ern_pe_features = FeatureUnion([("ern_features", ern_features), ("pe_features", pe_features)], n_jobs = 10)

    features = Pipeline([
        ('ern_pe_features', ern_pe_features)

    ])

    steps = ('features', features)


    pipeline_name = f"{spatial_filter}_{n_components}_centered_ampl_components"

    preprocessed_pipeline = Pipeline([("extract_epochs", EEGdata(dataset=dataset)),
        # ("narrow_indices", NarrowIndices(start=76, stop=257)),
        (
            "channels_filtering",
            ChannelExtraction(significant_channels)
        ),
        (
            "average_epochs",
            AveragePerParticipant(),
        ),
        ("spatial_filter_preprocessing", SpatialFilterPreprocessing()),
        (
            "spatial_filter",
            PCA(n_components=n_components, random_state=random_state),
        ),
        (
            "spatial_filter_postprocessing",
            SpatialFilterPostprocessing(
                timepoints_count=timepoints_count,
            ),
        ),
        ("lowpass_filter", LowpassFilter()),
        ("binning", BinTransformer(step=12)),
        ("baseline", ErnBaselined()),
        ("centering", CenteredSignalAfterBaseline())

                              ]).fit(X_train)

    preprocessed_X = preprocessed_pipeline.transform(X_train)

    # this_steps = spatial_filter_centered_components(spatial_filter_n_components=n_components, timepoints_count=timepoints_count)
    this_steps = steps

    # rate different models
    results_static_ICA_bin_union_100_600_centered_components_df = run_experiment(
        tested_regressors,
        regressor_params,
        pipeline_name,
        preprocessed_X,
        X_test,
        y_train,
        y_test,
        dataset_name,
        this_steps,
        preprocessed_pipeline,
        X_test_df,
        y_rum,
        results_static_ICA_bin_union_100_600_centered_components_df,
    )

In [None]:
results_static_ICA_bin_union_100_600_centered_components_df.to_pickle("../data/regression_union_100-600_baselined_centered_ampl-2-pe_0.4.pkl")

In [None]:
results_static_ICA_bin_union_100_600_baselined_peak_to_peak_components_df = pd.DataFrame()

In [None]:
significant_channels = [channels_dict[channel] for channel in red_box]


x = Pipeline([("extract_epochs", EEGdata(dataset=dataset)),
            (
                "channels_filtering",
                ChannelExtraction(significant_channels)
            ),
            (
                "average_epochs",
                AveragePerParticipant(),
            )]).fit_transform(X_train)

In [None]:
x.shape

In [None]:
# channels that will be included in the experiment

# red_box = [
#     "F1",
#     "Fz",
#     "F2",
#     "FC1",
#     "FCz",
#     "FC2",
#     "C1",
#     "Cz",
#     "C2",
#     "CP1",
#     "CPz",
#     "CP2",
#     "P1",
#     "Pz",
#     "P2",
# ]

# red_box = [
#     "F3","F1","Fz", "F2", "F4",
#     "FC3", "FC1", "FCz", "FC2","FC4",
#     "C3", "C1","Cz","C2", "C4",
#     "CP3", "CP1","CPz","CP2", "CP4",
#     "P3","P1", "Pz", "P2", "P4",
# ]

red_box = [
    "F3","F2", "F4",
    "C1","Cz", "FCz",
    "C3","C2","C4",
    "P1","Fpz", "P2",
    "P3", "CPz", "P4",
]
# bez Fpz - no significant
red_box2 = [
    "F3","F2", "F4",
    "C1","Cz", "FCz",
    "C3","C2","C4",
    "P1", "P2",
    "P3", "CPz", "P4",
]

# bez Fpz i z dodanym F1 - no sognificant
red_box3 = [
    "F3","F1","F2", "F4",
    "C1","Cz", "FCz",
    "C3","C2","C4",
    "P1", "P2",
    "P3", "CPz", "P4",
]

red_box4 = [
    "Fpz",
    "F3","F1","Fz", "F2", "F4",
    "FCz",
    "C3", "C1","Cz","C2","C4",
    "CPz",
    "P3", "P1", "P2", "P4",
]

red_box4_prim = [
    "Fpz",
    "F1","Fz", "F2",
    "FCz",
    "C1","Cz","C2",
    "CPz",
    "P1", "P2",
]

#
red_box5 = [
    "AFz", 
    "F3","F1","Fz", "F2", "F4",
    "C1","Cz", "FCz",
    "C3","C2","C4",
    "P1", "P2",
    "P3", "CPz", "P4",
]

red_box6 = [
    "Fpz", 
    "AFz",
    "Fz",
    "FCz",
    "Cz",
    "CPz",
    "Pz",
]

# linia i klaster z tyłu
red_box7 = [
    "Fpz", 
    "AFz",
    "Fz",
    "FCz",
    "Cz",
    "CPz",
    "P3", "P1", "Pz", "P2", "P4",
]

red_box7_prim = [
    "Fpz", 
    "AFz",
    "Fz",
    "FCz",
    "Cz",
    "CPz",
    "P1", "Pz", "P2",
]

# linia i klaster z tyłu i na środku
red_box8 = [
    "Fpz", 
    "AFz",
    "Fz",
    "FCz",
    "C3", "C1", "Cz","C2", "C4",
    "CPz",
    "P3", "P1", "Pz", "P2", "P4"
]

red_box8_prim = [
    "Fpz", 
    "AFz",
    "Fz",
    "FCz",
    "C1", "Cz","C2",
    "CPz",
    "P1", "Pz", "P2",
]

# linia i klaster z tyłu i na środku i na poczatku
red_box9 = [
    "Fpz", 
    "AFz",
    "F3", "F1", "Fz", "F2", "F4",
    "FCz",
    "C3", "C1", "Cz","C2", "C4",
    "CPz",
    "P3", "P1", "Pz", "P2", "P4",
]

red_box9_prim = [
    "Fpz", 
    "AFz",
    "F1", "Fz", "F2",
    "FCz",
    "C1", "Cz","C2",
    "CPz",
    "P1", "Pz", "P2",
]

red_box10 = [
    "Fpz", 
    "AFz",
    "F3", "F1", "Fz", "F2", "F4",
    "FCz",
    "Cz",
    "CP3", "CP1", "CPz", "CP2", "CP4",
    "Pz",
]

red_box10_prim = [
    "Fpz", 
    "AFz",
    "F1", "Fz", "F2",
    "FCz",
    "Cz",
    "CP1", "CPz", "CP2",
    "Pz",
]


box_list = [red_box6, red_box7, red_box7_prim, red_box8, red_box8_prim, red_box9, red_box9_prim, red_box4, red_box4_prim, red_box10, red_box10_prim]


# significant_channels = [channels_dict[channel] for channel in red_box]

In [None]:
# manually test different numbers of spatial filter components

timepoints_count = 181

if not sys.warnoptions:
    warnings.simplefilter("ignore")
    os.environ["PYTHONWARNINGS"] = "ignore" # Also affect subprocesses
    
for box in box_list:
    print(f"BOX: {box}")
    significant_channels = [channels_dict[channel] for channel in box]
    for n_components in range(min_spatial_filter, max_spatial_filter, step_spatial_filter):        


        ern_features = Pipeline(steps=[
                        ("ern_data_extraction", ErnTransformer()),
                        ("ern_amplitude", ErnAmplitude2()),
                        ("data_channel_swap", ChannelDataSwap()),
                        ("postprocessing", PostprocessingTransformer()),
                        ("scaler", StandardScaler()),
                        ("feature_selection", FastICA(random_state=random_state))
        ])


        pe_features = Pipeline(steps = [
                        ("pe_data_extraction", PeTransformer()),
                        ("pe_amplitude", PeAmplitude2()),
                        ("data_channel_swap", ChannelDataSwap()),
                        ("postprocessing", PostprocessingTransformer()),
                        ("scaler", StandardScaler()),
                        ("feature_selection", FastICA(random_state=random_state))
        ])

        ern_pe_features = FeatureUnion([("ern_features", ern_features), ("pe_features", pe_features)], n_jobs = 10)

        features = Pipeline([
            ('ern_pe_features', ern_pe_features)

        ])

        steps = ('features', features)


        pipeline_name = f"{spatial_filter}_{n_components}_centered_ampl_components"

        preprocessed_pipeline = Pipeline([("extract_epochs", EEGdata(dataset=dataset)),
            (
                "channels_filtering",
                ChannelExtraction(significant_channels)
            ),
            (
                "average_epochs",
                AveragePerParticipant(),
            ),
            ("spatial_filter_preprocessing", SpatialFilterPreprocessing()),
            (
                "spatial_filter",
                PCA(n_components=n_components, random_state=random_state),
            ),
            (
                "spatial_filter_postprocessing",
                SpatialFilterPostprocessing(
                    timepoints_count=timepoints_count,
                ),
            ),
            ("lowpass_filter", LowpassFilter()),
            ("binning", BinTransformer(step=12)),
            ("baseline", ErnBaselined()),
            ("centering", CenteredSignalAfterBaseline())
            # ('features', features)

                                  ]).fit(X_train)

        preprocessed_X = preprocessed_pipeline.transform(X_train)

        # this_steps = spatial_filter_centered_components(spatial_filter_n_components=n_components, timepoints_count=timepoints_count)
        this_steps = steps

        # rate different models
        results_static_ICA_bin_union_100_600_baselined_peak_to_peak_components_df = run_experiment(
            tested_regressors,
            regressor_params,
            pipeline_name,
            preprocessed_X,
            X_test,
            y_train,
            y_test,
            dataset_name,
            this_steps,
            preprocessed_pipeline,
            X_test_df,
            y_rum,
            results_static_ICA_bin_union_100_600_baselined_peak_to_peak_components_df,
        )

In [None]:
max(0,2)

In [None]:
results_static_ICA_bin_union_100_600_baselined_peak_to_peak_components_df

In [None]:
results_static_ICA_bin_union_100_600_baselined_peak_to_peak_components_df.to_pickle("../data/regression_union_100-600_baselined_centered_diff_boxes.pkl")

---

In [None]:
    ern_features = Pipeline(steps=[
                    ("ern_data_extraction", ExtractErnBins()),
                    ("ern_amplitude", ErnAmplitudeInBins()),
                    ("data_channel_swap", ChannelDataSwap()),
                    ("postprocessing", PostprocessingTransformer()),
                    ("scaler", StandardScaler()),
                    ("feature_selection", FastICA(random_state=random_state))
    ])
     

    pe_features = Pipeline(steps = [
                    ("pe_data_extraction", ExtractPeBins()),
                    # ("pe_min_max_features", PeMinMaxFeatures()),
                    # ("pe_amplitude", PeAmplitude2()),
                    # ("pe_centered", CenteredPeAfterBaseline()),
                    ("pe_amplitude", PeAmplitudeInBins()),
                    ("data_channel_swap", ChannelDataSwap()),
                    ("postprocessing", PostprocessingTransformer()),
                    ("scaler", StandardScaler()),
                    ("feature_selection", FastICA(random_state=random_state))
    ])
    
    ern_pe_features = FeatureUnion([("ern_features", ern_features), ("pe_features", pe_features)], n_jobs = 10)

#     features = Pipeline([
#         ('ern_pe_features', ern_pe_features)

#     ])

# steps = ('features', features)
preprocessed_pipeline = Pipeline([("extract_epochs", EEGdata(dataset=dataset)),
            ("narrow_indices", NarrowIndices(start=76, stop=257)),
            (
                "channels_filtering",
                ChannelExtraction(significant_channels)
            ),
            (
                "average_epochs",
                AveragePerParticipant(),
            ),
            ("spatial_filter_preprocessing", SpatialFilterPreprocessing()),
            (
                "spatial_filter",
                PCA(n_components=3, random_state=random_state),
            ),
            (
                "spatial_filter_postprocessing",
                SpatialFilterPostprocessing(
                    timepoints_count=181,
                ),
            ),
            ("lowpass_filter", LowpassFilter()),
            # ("binning", BinTransformer(step=12)),
            # ("baseline", BaselineToFirstPositivityPeak()),
            # ("centering", CenterSignalToErn()),
            # ('ern_pe_features', ern_pe_features)
                                  ]).fit(X_train)

preprocessed_X = preprocessed_pipeline.transform(X_train)
pre_processed_test_X = preprocessed_pipeline.transform(X_test_df)

In [None]:
from sklearn.utils import estimator_html_repr

with open("my_estimator.html", "w") as f:
    f.write(estimator_html_repr(preprocessed_pipeline))

In [None]:
preprocessed_pipeline = Pipeline([("extract_epochs", EEGdata(dataset=dataset)),
        # ("narrow_indices", NarrowIndices(start=76, stop=257)),
        (
            "channels_filtering",
            ChannelExtraction(significant_channels)
        ),
        # (
        #     "average_epochs",
        #     AveragePerParticipant(),
        # ),
        # ("spatial_filter_preprocessing", SpatialFilterPreprocessing()),
        # (
        #     "spatial_filter",
        #     PCA(n_components=3, random_state=random_state),
        # ),
        # (
        #     "spatial_filter_postprocessing",
        #     SpatialFilterPostprocessing(
        #         timepoints_count=181,
        #     ),
        # ),
        # ("lowpass_filter", LowpassFilter()),
        # ("binning", BinTransformer(step=12)),
        # ("baseline", ErnBaselined()),
        # ("centering", CenteredSignalAfterBaseline())

                              ]).fit(X_train)

preprocessed_X = preprocessed_pipeline.transform(X_train)
pre_processed_test_X = preprocessed_pipeline.transform(X_test_df)

In [None]:
preprocessed_X[0][0][1]

In [None]:
mean_X = np.mean(preprocessed_X, axis=0)
mean_X_test = np.mean(pre_processed_test_X, axis=0)

In [None]:
import matplotlib.pyplot as plt

for i in range(0,3):
    plt.plot(mean_X_test[i])
# plt.axvline(x=12, color="grey", linewidth = 2, linestyle='--', alpha=0.5)
# plt.axvline(x=24, color="grey", linewidth = 2, linestyle='--', alpha=0.5)
# plt.axvline(x=36, color="grey", linewidth = 2, linestyle='--', alpha=0.5)
# plt.axvline(x=48, color="grey", linewidth = 2, linestyle='--', alpha=0.5)
# plt.axvline(x=60, color="grey", linewidth = 2, linestyle='--', alpha=0.5)
# plt.axvline(x=72, color="grey", linewidth = 2, linestyle='--', alpha=0.5)
# plt.axvline(x=84, color="grey", linewidth = 2, linestyle='--', alpha=0.5)
# plt.axvline(x=96, color="grey", linewidth = 2, linestyle='--', alpha=0.5)
# plt.axvline(x=108, color="grey", linewidth = 2, linestyle='--', alpha=0.5)

In [None]:
import matplotlib.pyplot as plt

plt.plot(preprocessed_X[6][0])
plt.plot(preprocessed_X[14][0])
plt.plot(preprocessed_X[2][0])
plt.axvline(x=1, color="grey", linewidth = 2, linestyle='--', alpha=0.5)
plt.axvline(x=2, color="grey", linewidth = 2, linestyle='--', alpha=0.5)
plt.axvline(x=3, color="grey", linewidth = 2, linestyle='--', alpha=0.5)
plt.axvline(x=4, color="grey", linewidth = 2, linestyle='--', alpha=0.5)
plt.axvline(x=5, color="grey", linewidth = 2, linestyle='--', alpha=0.5)
plt.axvline(x=6, color="grey", linewidth = 2, linestyle='--', alpha=0.5)
plt.axvline(x=7, color="grey", linewidth = 2, linestyle='--', alpha=0.5)
plt.axvline(x=8, color="grey", linewidth = 2, linestyle='--', alpha=0.5)
plt.axvline(x=9, color="grey", linewidth = 2, linestyle='--', alpha=0.5)

In [None]:
import matplotlib.pyplot as plt

plt.plot(preprocessed_X[6][1])
plt.plot(preprocessed_X[14][1])
plt.plot(preprocessed_X[2][1])
plt.axvline(x=1, color="grey", linewidth = 2, linestyle='--', alpha=0.5)
plt.axvline(x=2, color="grey", linewidth = 2, linestyle='--', alpha=0.5)
plt.axvline(x=3, color="grey", linewidth = 2, linestyle='--', alpha=0.5)
plt.axvline(x=4, color="grey", linewidth = 2, linestyle='--', alpha=0.5)
plt.axvline(x=5, color="grey", linewidth = 2, linestyle='--', alpha=0.5)
plt.axvline(x=6, color="grey", linewidth = 2, linestyle='--', alpha=0.5)
plt.axvline(x=7, color="grey", linewidth = 2, linestyle='--', alpha=0.5)
plt.axvline(x=8, color="grey", linewidth = 2, linestyle='--', alpha=0.5)
plt.axvline(x=9, color="grey", linewidth = 2, linestyle='--', alpha=0.5)

In [None]:
import matplotlib.pyplot as plt

plt.plot(-preprocessed_X[6][0])
plt.plot(-preprocessed_X[14][0])
plt.plot(-preprocessed_X[2][0])
plt.axvline(x=5, color="grey", linewidth = 2, linestyle='--', alpha=0.5)
plt.axvline(x=12, color="grey", linewidth = 2, linestyle='--', alpha=0.5)
plt.axvline(x=24, color="grey", linewidth = 2, linestyle='--', alpha=0.5)
plt.axvline(x=36, color="grey", linewidth = 2, linestyle='--', alpha=0.5)
plt.axvline(x=48, color="grey", linewidth = 2, linestyle='--', alpha=0.5)
plt.axvline(x=60, color="grey", linewidth = 2, linestyle='--', alpha=0.5)
plt.axvline(x=72, color="grey", linewidth = 2, linestyle='--', alpha=0.5)
plt.axvline(x=84, color="grey", linewidth = 2, linestyle='--', alpha=0.5)
plt.axvline(x=96, color="grey", linewidth = 2, linestyle='--', alpha=0.5)
plt.axvline(x=108, color="grey", linewidth = 2, linestyle='--', alpha=0.5)
plt.savefig(f"train_component_0.png")

In [None]:
import matplotlib.pyplot as plt

plt.plot(preprocessed_X[6][0])
plt.plot(preprocessed_X[14][0])
plt.plot(preprocessed_X[2][0])
plt.axvline(x=1, color="grey", linewidth = 2, linestyle='--', alpha=0.5)
plt.axvline(x=2, color="grey", linewidth = 2, linestyle='--', alpha=0.5)
plt.axvline(x=3, color="grey", linewidth = 2, linestyle='--', alpha=0.5)
plt.axvline(x=4, color="grey", linewidth = 2, linestyle='--', alpha=0.5)
plt.axvline(x=5, color="grey", linewidth = 2, linestyle='--', alpha=0.5)
plt.axvline(x=6, color="grey", linewidth = 2, linestyle='--', alpha=0.5)
plt.axvline(x=7, color="grey", linewidth = 2, linestyle='--', alpha=0.5)
plt.axvline(x=8, color="grey", linewidth = 2, linestyle='--', alpha=0.5)
plt.axvline(x=9, color="grey", linewidth = 2, linestyle='--', alpha=0.5)

In [None]:
# check participants

In [None]:
ern_features = Pipeline(steps=[
                    ("ern_extraction", CenteredERN(step=12)),
                    ("binning", BinTransformer(step=12)),
#                     ("data_channel_swap", ChannelDataSwap()),
#                     ("postprocessing", PostprocessingTransformer()),
#                     ("scaler", StandardScaler()),
#                     ("feature_selection", FastICA(random_state=random_state))
# 
])

#         pe_features = Pipeline(steps = [
#                         ("pe_extraction", CenteredPe(step=12)),
#                         # ("binning", BinTransformer(step=12)),
#                         # ("data_channel_swap", ChannelDataSwap()),
#                         # ("postprocessing", PostprocessingTransformer()),
#                         # ("scaler", StandardScaler()),
#                         # ("feature_selection", FastICA(random_state=random_state))
#         ])
    
#         ern_pe_features = FeatureUnion([("ern_features", ern_features), ("pe_features", pe_features)], n_jobs = 10)

#         features = Pipeline([
#             ('ern_pe_features', ern_pe_features)

#         ])

#         steps = ('features', features)

ern_fitted = ern_features.fit_transform(preprocessed_X)
ern_test_fitted = ern_features.transform(pre_processed_test_X)

In [None]:
ern_fitted.shape
ern_test_fitted.shape

In [None]:
ern_fitted_mean = np.mean(ern_fitted, axis=0)

In [None]:
ern_fitted_mean.shape

In [None]:
import matplotlib.pyplot as plt

for i in range(0, 10):
    plt.plot(ern_fitted[i][0])

# plt.plot(ern_fitted_mean[0])


In [None]:
# results_static_ICA_bin_union_100_600_centered_components_df["ex_score"] = 0


# for i in range(0,3):
#     pre_processed_test_X = results_static_ICA_bin_union_100_600_centered_components_df.pre_processed_pipeline[i].transform(X_test_df)
#     # print(pre_processed_test_X.shape)
#     estimator = results_static_ICA_bin_union_100_600_centered_components_df.best_estimator[i]
#     score = estimator.score(pre_processed_test_X, y_rum)
#     # results_static_ICA_bin_union_100_600_better_ampl_bins30_df["ex_score"] = score
#     # print(score)
#     results_static_ICA_bin_union_100_600_centered_components_df["ex_score"][i] = score


In [None]:
results_static_ICA_bin_union_100_600_centered_components_df

In [None]:
results_static_ICA_bin_union_100_600_centered_components_df.to_pickle("../data/regression_union_100-600_centered_components_0.3.pkl")

----
# Better Amplitude

In [None]:
# define hyperparameters of pipeline

spatial_filter = "PCA"

min_spatial_filter = 3
max_spatial_filter = 6
step_spatial_filter = 1

min_feature_selection = 2
max_feature_selection = 6
step_feature_selection = 1


# define proper parameters for training. In this case define range of number of feature extraction to search
regressor_params = dict(
    features__ern_pe_features__ern_features__feature_selection__n_components=np.arange(
        min_feature_selection, max_feature_selection, step_feature_selection
    ),
    features__ern_pe_features__pe_features__feature_selection__n_components=np.arange(
        min_feature_selection, max_feature_selection, step_feature_selection
        ),
    features__ern_pe_features__ern_features__ern_amplitude__step=np.arange(
        5, 12, 2
    ),
    features__ern_pe_features__pe_features__pe_amplitude__step=np.arange(
        5, 12, 2
    ),
)

In [None]:
results_static_ICA_bin_union_100_600_better_ampl_bins_train_df = pd.DataFrame()

In [None]:
# manually test different numbers of spatial filter components

timepoints_count = 181

if not sys.warnoptions:
    warnings.simplefilter("ignore")
    os.environ["PYTHONWARNINGS"] = "ignore" # Also affect subprocesses
    

for n_components in range(min_spatial_filter, max_spatial_filter, step_spatial_filter):

    pipeline_name = f"{spatial_filter}_{n_components}_bins_union_100_600_ampl"
    
    preprocessed_pipeline = Pipeline([("extract_epochs", EEGdata(dataset=dataset)),
        # ("narrow_indices", NarrowIndices(start=76, stop=257)),
        (
            "channels_filtering",
            ChannelExtraction(significant_channels)
        ),
        (
            "average_epochs",
            AveragePerParticipant(),
        ),
        ("spatial_filter_preprocessing", SpatialFilterPreprocessing()),
        (
            "spatial_filter",
            PCA(n_components=n_components, random_state=random_state),
        ),
        (
            "spatial_filter_postprocessing",
            SpatialFilterPostprocessing(
                timepoints_count=timepoints_count,
            ),
        ),
        ("lowpass_filter", LowpassFilter()),
                              ]).fit(X_train)
    
    preprocessed_X = preprocessed_pipeline.transform(X_train)
    
    this_steps = spatial_filter_bins_union_amplitude(spatial_filter_n_components=n_components, timepoints_count=timepoints_count)   
   
    # rate different models
    results_static_ICA_bin_union_100_600_better_ampl_bins_train_df = run_experiment(
        tested_regressors,
        regressor_params,
        pipeline_name,
        preprocessed_X,
        X_test,
        y_train,
        y_test,
        dataset_name,
        this_steps,
        preprocessed_pipeline,
        X_test_df,
        y_rum,
        results_static_ICA_bin_union_100_600_better_ampl_bins_train_df,
    )

In [None]:
# results_static_ICA_bin_union_100_600_better_ampl_bins_train_df["ex_score"] = 0


# for i in range(0,3):
#     pre_processed_test_X = results_static_ICA_bin_union_100_600_better_ampl_bins_train_df.pre_processed_pipeline[i].transform(X_test_df)
#     # print(pre_processed_test_X.shape)
#     estimator = results_static_ICA_bin_union_100_600_better_ampl_bins_train_df.best_estimator[i]
#     score = estimator.score(pre_processed_test_X, y_rum)
#     # results_static_ICA_bin_union_100_600_better_ampl_bins30_df["ex_score"] = score
#     # results_static_ICA_bin_union_100_600_better_ampl_bins_train_df["ex_score"][i] = score

#     print(score)


In [None]:
results_static_ICA_bin_union_100_600_better_ampl_bins_train_df

In [None]:
results_static_ICA_bin_union_100_600_better_ampl_bins_train_df.to_pickle("../data/regression_union_100-600_better_ampl_trained_first_component_0.3.pkl")

# 1

In [None]:
import warnings
warnings.filterwarnings('ignore')

In [None]:
# bins width

step_in_ms = 30  # in miliseconds (?)
step_tp = int(signal_frequency * step_in_ms / 1000) # in timepoints

In [None]:
results_static_ICA_bin_union_100_600_ampl_bins30_df = pd.DataFrame()

In [None]:
# manually test different numbers of spatial filter components

timepoints_count = 181

if not sys.warnoptions:
    warnings.simplefilter("ignore")
    os.environ["PYTHONWARNINGS"] = "ignore" # Also affect subprocesses
    

for n_components in range(min_spatial_filter, max_spatial_filter, step_spatial_filter):

    pipeline_name = f"{spatial_filter}_{n_components}_bins_union_100_600_ampl"
    
    preprocessed_pipeline = Pipeline([("extract_epochs", EEGdata(dataset=dataset)),
        # ("narrow_indices", NarrowIndices(start=76, stop=257)),
        (
            "channels_filtering",
            ChannelExtraction(significant_channels)
        ),
        (
            "average_epochs",
            AveragePerParticipant(),
        ),
        ("spatial_filter_preprocessing", SpatialFilterPreprocessing()),
        (
            "spatial_filter",
            PCA(n_components=n_components, random_state=random_state),
        ),
        (
            "spatial_filter_postprocessing",
            SpatialFilterPostprocessing(
                timepoints_count=timepoints_count,
            ),
        ),
        ("lowpass_filter", LowpassFilter()),
        ("binning", BinTransformer(step=step_tp)),
                              ]).fit(X_train)
    
    preprocessed_X = preprocessed_pipeline.transform(X_train)
    
    this_steps = spatial_filter_bins_union_features(spatial_filter_n_components=n_components, timepoints_count=timepoints_count)
    
    # this_steps = spatial_filter_bins_steps(spatial_filter_n_components=n_components, timepoints_count=181)
    # pre_processing_pipeline = Pipeline(steps=this_steps)
    # pre_processed_X = pre_processing_pipeline.fit_transform(X_train)
    
   
    # rate different models
    results_static_ICA_bin_union_100_600_ampl_bins30_df = run_experiment(
        tested_regressors,
        regressor_params,
        pipeline_name,
        preprocessed_X,
        X_test,
        y_train,
        y_test,
        dataset_name,
        this_steps,
        preprocessed_pipeline,
        results_static_ICA_bin_union_100_600_ampl_bins30_df,
    )

In [None]:
for i in range(0,3):
    pre_processed_test_X = results_static_ICA_bin_union_100_600_ampl_bins30_df.pre_processed_pipeline[i].transform(X_test_df)
    # print(pre_processed_test_X.shape)
    estimator = results_static_ICA_bin_union_100_600_ampl_bins30_df.best_estimator[i]
    score = estimator.score(pre_processed_test_X, y_rum)
    # print(score)
    # results_static_ICA_bin_union_100_600_ampl_bins30_df["ex_score"] = score
    results_static_ICA_bin_union_100_600_ampl_bins30_df["ex_score"][i] = score


In [None]:
results_static_ICA_bin_union_100_600_ampl_bins30_df

In [None]:
results_static_ICA_bin_union_100_600_ampl_bins30_df.to_pickle("../data/regression_union_100-600_ampl_bins30_0.3.pkl")

---
# 2

In [None]:
# bins width

step_in_ms = 30  # in miliseconds (?)
step_tp = int(signal_frequency * step_in_ms / 1000) # in timepoints

In [None]:
results_static_ICA_bin_union_100_600_max_bins30_df = pd.DataFrame()

In [None]:
# manually test different numbers of spatial filter components

timepoints_count = 181

if not sys.warnoptions:
    warnings.simplefilter("ignore")
    os.environ["PYTHONWARNINGS"] = "ignore" # Also affect subprocesses
    

for n_components in range(min_spatial_filter, max_spatial_filter, step_spatial_filter):

    pipeline_name = f"{spatial_filter}_{n_components}_bins_union_100_600_ampl"
    
    preprocessed_pipeline = Pipeline([("extract_epochs", EEGdata(dataset=dataset)),
        # ("narrow_indices", NarrowIndices(start=76, stop=257)),
        (
            "channels_filtering",
            ChannelExtraction(significant_channels)
        ),
        (
            "average_epochs",
            AveragePerParticipant(),
        ),
        ("spatial_filter_preprocessing", SpatialFilterPreprocessing()),
        (
            "spatial_filter",
            PCA(n_components=n_components, random_state=random_state),
        ),
        (
            "spatial_filter_postprocessing",
            SpatialFilterPostprocessing(
                timepoints_count=timepoints_count,
            ),
        ),
        ("lowpass_filter", LowpassFilter()),
        ("binning", BinTransformer(step=step_tp)),
                              ]).fit(X_train)
    
    preprocessed_X = preprocessed_pipeline.transform(X_train)
    
    this_steps = spatial_filter_bins_union_min_max_features(spatial_filter_n_components=n_components, timepoints_count=timepoints_count)
    
    # this_steps = spatial_filter_bins_steps(spatial_filter_n_components=n_components, timepoints_count=181)
    # pre_processing_pipeline = Pipeline(steps=this_steps)
    # pre_processed_X = pre_processing_pipeline.fit_transform(X_train)
    
   
    # rate different models
    results_static_ICA_bin_union_100_600_max_bins30_df = run_experiment(
        tested_regressors,
        regressor_params,
        pipeline_name,
        preprocessed_X,
        X_test,
        y_train,
        y_test,
        dataset_name,
        this_steps,
        preprocessed_pipeline,
        results_static_ICA_bin_union_100_600_max_bins30_df,
    )

In [None]:
for i in range(0,3):
    pre_processed_test_X = results_static_ICA_bin_union_100_600_max_bins30_df.pre_processed_pipeline[i].transform(X_test_df)
    # print(pre_processed_test_X.shape)
    estimator = results_static_ICA_bin_union_100_600_max_bins30_df.best_estimator[i]
    score = estimator.score(pre_processed_test_X, y_rum)
    # results_static_ICA_bin_union_100_600_max_bins30_df["ex_score"] = score
    results_static_ICA_bin_union_100_600_max_bins30_df["ex_score"][i] = score
    print(score)

In [None]:
results_static_ICA_bin_union_100_600_max_bins30_df

In [None]:
results_static_ICA_bin_union_100_600_max_bins30_df.to_pickle("../data/regression_union_100-600_max_bins30_0.3.pkl")

---
# 3

In [None]:
# bins width

step_in_ms = 50  # in miliseconds (?)
step_tp = int(signal_frequency * step_in_ms / 1000) # in timepoints

In [None]:
# define hyperparameters of pipeline

spatial_filter = "PCA"

min_spatial_filter = 3
max_spatial_filter = 6
step_spatial_filter = 1

min_feature_selection = 2
max_feature_selection = 6
step_feature_selection = 1


# define proper parameters for training. In this case define range of number of feature extraction to search
regressor_params = dict(
    features__ern_pe_features__ern_features__feature_selection__n_components=np.arange(
        2, 3, step_feature_selection
    ),
    features__ern_pe_features__pe_features__feature_selection__n_components=np.arange(
        4, 5, step_feature_selection
        ),
    # features__ern_pe_features__ern_features__ern_amplitude__step=np.arange(
    #     5, 12, 2
    # ),
    # features__ern_pe_features__pe_features__pe_amplitude__step=np.arange(
    #     5, 12, 2
    # ),
)

In [None]:
results_static_ICA_bin_union_100_600_ampl_bins50_df = pd.DataFrame()

In [None]:
# manually test different numbers of spatial filter components

timepoints_count = 181

if not sys.warnoptions:
    warnings.simplefilter("ignore")
    os.environ["PYTHONWARNINGS"] = "ignore" # Also affect subprocesses
    

for n_components in range(min_spatial_filter, max_spatial_filter, step_spatial_filter):

    pipeline_name = f"{spatial_filter}_{n_components}_bins_union_100_600_ampl"
    
    preprocessed_pipeline = Pipeline([("extract_epochs", EEGdata(dataset=dataset)),
        # ("narrow_indices", NarrowIndices(start=76, stop=257)),
        (
            "channels_filtering",
            ChannelExtraction(significant_channels)
        ),
        (
            "average_epochs",
            AveragePerParticipant(),
        ),
        ("spatial_filter_preprocessing", SpatialFilterPreprocessing()),
        (
            "spatial_filter",
            PCA(n_components=n_components, random_state=random_state),
        ),
        (
            "spatial_filter_postprocessing",
            SpatialFilterPostprocessing(
                timepoints_count=timepoints_count,
            ),
        ),
        ("lowpass_filter", LowpassFilter()),
        ("binning", BinTransformer(step=step_tp)),
                              ]).fit(X_train)
    
    preprocessed_X = preprocessed_pipeline.transform(X_train)
    
    this_steps = spatial_filter_bins_union_features(spatial_filter_n_components=n_components, timepoints_count=timepoints_count)
    
    # this_steps = spatial_filter_bins_steps(spatial_filter_n_components=n_components, timepoints_count=181)
    # pre_processing_pipeline = Pipeline(steps=this_steps)
    # pre_processed_X = pre_processing_pipeline.fit_transform(X_train)
    
   
    # rate different models
    results_static_ICA_bin_union_100_600_ampl_bins50_df = run_experiment(
        tested_regressors,
        regressor_params,
        pipeline_name,
        preprocessed_X,
        X_test,
        y_train,
        y_test,
        dataset_name,
        this_steps,
        preprocessed_pipeline,
        X_test_df,
        y_rum,
        results_static_ICA_bin_union_100_600_ampl_bins50_df,
    )

In [None]:
results_static_ICA_bin_union_100_600_ampl_bins50_df.to_pickle("../data/regression_union_100-600_ampl_bins50_0.4.pkl")

In [None]:
results_df = pd.read_pickle(
    "../data/split0.3/regression_union_100-600_ampl_bins50_0.3_significant.pkl"
)

In [None]:
results_df

In [None]:
# results_static_ICA_bin_union_100_600_ampl_bins50_df["ex_score"] = 0

for i in range(0,3):
    pre_processed_test_X = results_df.pre_processed_pipeline[i].transform(X_test_df)
    # print(pre_processed_test_X.shape)
    estimator = results_df.best_estimator[i]
    score = estimator.score(pre_processed_test_X, y_rum)
    # print(score)
    # results_static_ICA_bin_union_100_600_ampl_bins50_df["ex_score"] = score
    # results_df["ex_score"][i] = score
    print(score)

In [None]:
results_static_ICA_bin_union_100_600_ampl_bins50_df.to_pickle("../data/regression_union_100-600_ampl_bins50_0.3.pkl")

In [None]:
results_static_ICA_bin_union_100_600_ampl_bins50_df

---
# 4

In [None]:
# bins width

step_in_ms = 50  # in miliseconds (?)
step_tp = int(signal_frequency * step_in_ms / 1000) # in timepoints

In [None]:
results_static_ICA_bin_union_100_600_max_bins50_df = pd.DataFrame()

In [None]:
# manually test different numbers of spatial filter components

timepoints_count = 181

if not sys.warnoptions:
    warnings.simplefilter("ignore")
    os.environ["PYTHONWARNINGS"] = "ignore" # Also affect subprocesses
    

for n_components in range(min_spatial_filter, max_spatial_filter, step_spatial_filter):

    pipeline_name = f"{spatial_filter}_{n_components}_bins_union_100_600_ampl"
    
    preprocessed_pipeline = Pipeline([("extract_epochs", EEGdata(dataset=dataset)),
        # ("narrow_indices", NarrowIndices(start=76, stop=257)),
        (
            "channels_filtering",
            ChannelExtraction(significant_channels)
        ),
        (
            "average_epochs",
            AveragePerParticipant(),
        ),
        ("spatial_filter_preprocessing", SpatialFilterPreprocessing()),
        (
            "spatial_filter",
            PCA(n_components=n_components, random_state=random_state),
        ),
        (
            "spatial_filter_postprocessing",
            SpatialFilterPostprocessing(
                timepoints_count=timepoints_count,
            ),
        ),
        ("lowpass_filter", LowpassFilter()),
        ("binning", BinTransformer(step=step_tp)),
                              ]).fit(X_train)
    
    preprocessed_X = preprocessed_pipeline.transform(X_train)
    
    this_steps = spatial_filter_bins_union_min_max_features(spatial_filter_n_components=n_components, timepoints_count=timepoints_count)
    
    # this_steps = spatial_filter_bins_steps(spatial_filter_n_components=n_components, timepoints_count=181)
    # pre_processing_pipeline = Pipeline(steps=this_steps)
    # pre_processed_X = pre_processing_pipeline.fit_transform(X_train)
    
   
    # rate different models
    results_static_ICA_bin_union_100_600_max_bins50_df = run_experiment(
        tested_regressors,
        regressor_params,
        pipeline_name,
        preprocessed_X,
        X_test,
        y_train,
        y_test,
        dataset_name,
        this_steps,
        preprocessed_pipeline,
        results_static_ICA_bin_union_100_600_max_bins50_df,
    )

In [None]:
# results_static_ICA_bin_union_100_600_max_bins50_df["ex_score"] = 0


for i in range(0,3):
    pre_processed_test_X = results_static_ICA_bin_union_100_600_max_bins50_df.pre_processed_pipeline[i].transform(X_test_df)
    # print(pre_processed_test_X.shape)
    estimator = results_static_ICA_bin_union_100_600_max_bins50_df.best_estimator[i]
    score = estimator.score(pre_processed_test_X, y_rum)
    # print(score)
    # results_static_ICA_bin_union_100_600_max_bins50_df["ex_score"] = score
    results_static_ICA_bin_union_100_600_max_bins50_df["ex_score"][i] = score

In [None]:
results_static_ICA_bin_union_100_600_max_bins50_df

In [None]:
results_static_ICA_bin_union_100_600_max_bins50_df.to_pickle("../data/regression_union_100-600_max_bins50_0.3.pkl")

---
# 5

In [None]:
# bins width

step_in_ms = 50  # in miliseconds (?)
step_tp = int(signal_frequency * step_in_ms / 1000) # in timepoints

In [None]:
results_static_ICA_bin_union_100_600_plain_bins50_df = pd.DataFrame()

In [None]:
# manually test different numbers of spatial filter components

timepoints_count = 181

if not sys.warnoptions:
    warnings.simplefilter("ignore")
    os.environ["PYTHONWARNINGS"] = "ignore" # Also affect subprocesses
    

for n_components in range(min_spatial_filter, max_spatial_filter, step_spatial_filter):

    pipeline_name = f"{spatial_filter}_{n_components}_bins_union_100_600_ampl"
    
    preprocessed_pipeline = Pipeline([("extract_epochs", EEGdata(dataset=dataset)),
        # ("narrow_indices", NarrowIndices(start=76, stop=257)),
        (
            "channels_filtering",
            ChannelExtraction(significant_channels)
        ),
        (
            "average_epochs",
            AveragePerParticipant(),
        ),
        ("spatial_filter_preprocessing", SpatialFilterPreprocessing()),
        (
            "spatial_filter",
            PCA(n_components=n_components, random_state=random_state),
        ),
        (
            "spatial_filter_postprocessing",
            SpatialFilterPostprocessing(
                timepoints_count=timepoints_count,
            ),
        ),
        ("lowpass_filter", LowpassFilter()),
        ("binning", BinTransformer(step=step_tp)),
                              ]).fit(X_train)
    
    preprocessed_X = preprocessed_pipeline.transform(X_train)
    
    this_steps = spatial_filter_bins_union_plain_features(spatial_filter_n_components=n_components, timepoints_count=timepoints_count)
    
    # this_steps = spatial_filter_bins_steps(spatial_filter_n_components=n_components, timepoints_count=181)
    # pre_processing_pipeline = Pipeline(steps=this_steps)
    # pre_processed_X = pre_processing_pipeline.fit_transform(X_train)
    
   
    # rate different models
    results_static_ICA_bin_union_100_600_plain_bins50_df = run_experiment(
        tested_regressors,
        regressor_params,
        pipeline_name,
        preprocessed_X,
        X_test,
        y_train,
        y_test,
        dataset_name,
        this_steps,
        preprocessed_pipeline,
        
        results_static_ICA_bin_union_100_600_plain_bins50_df,
    )

In [None]:
# results_static_ICA_bin_union_100_600_plain_bins50_df["ex_score"] = 0

for i in range(0,3):
    pre_processed_test_X = results_static_ICA_bin_union_100_600_plain_bins50_df.pre_processed_pipeline[i].transform(X_test_df)
    # print(pre_processed_test_X.shape)
    estimator = results_static_ICA_bin_union_100_600_plain_bins50_df.best_estimator[i]
    score = estimator.score(pre_processed_test_X, y_rum)
    # print(score)
    # results_static_ICA_bin_union_100_600_plain_bins50_df["ex_score"] = score
    results_static_ICA_bin_union_100_600_plain_bins50_df["ex_score"][i] = score

In [None]:
results_static_ICA_bin_union_100_600_plain_bins50_df

In [None]:
results_static_ICA_bin_union_100_600_plain_bins50_df.to_pickle("../data/regression_union_100-600_plain_bins50_0.3.pkl")

---

In [None]:
results_static_ICA_bin_union_100_600_ampl2_df

In [None]:
results_static_ICA_bin_union_100_600_max_min_without_sx_df

In [None]:
results_static_ICA_bin_union_100_600_max_min_without_sx_df

## Visualization

In [None]:
# data_df = pd.read_pickle(
#     "../data/split0.3/regression_union_100-600_baselined_centered_ampl-2-pe-ern_0.3-5_significant.pkl"
# )
data_df = results_static_ICA_bin_union_100_600_baselined_peak_to_peak_components_df
data_df.name = "union_100_600_baselined_centered_no_scaler"

In [None]:
data_df

#### Extract coefficients of ERN and PE features extraction (ICA) and coefficient od estimator

In [None]:
# ern_features = data_df.best_estimator[1]["features"].transformer_list[0][1]["feature_selection"].components_
# pe_features = data_df.best_estimator[1]["features"].transformer_list[1][1]["feature_selection"].components_

# without additional metric as feature
ern_features = data_df.best_estimator[0]["features"]["ern_pe_features"].transformer_list[0][1]["feature_selection"].components_
pe_features = data_df.best_estimator[0]["features"]["ern_pe_features"].transformer_list[1][1]["feature_selection"].components_

coeffs = data_df.best_estimator[0]["en"].coef_

In [None]:
ern_features.shape

In [None]:
pe_features.shape

In [None]:
coeffs

#### Weigh components with coeffs from estimator and sum

In [None]:
ern_components_weighed = np.array([ern_features[i] * coeffs[i] for i in range(0,ern_features.shape[0])])
pe_components_weighed = np.array([pe_features[i-ern_features.shape[0]] * coeffs[i] for i in range(ern_features.shape[0], ern_features.shape[0] + pe_features.shape[0])])

#### Sum all feature extraction components to extract direct weigh of given bin at given spatial filter component

In [None]:
components_weighed_ern_sum = sum(ern_components_weighed)
components_weighed_pe_sum = sum(pe_components_weighed)

In [None]:
components_weighed_ern_sum

In [None]:
mean_rum_ern = components_weighed_ern_sum * ern_ampl_mean
mean_rum_ern

In [None]:
mean_rum_pe = components_weighed_pe_sum * pe_ampl_mean
mean_rum_pe

#### Extract components of spatial filter

In [None]:
ern_features = Pipeline(steps=[
                ("ern_data_extraction", ErnTransformer()),
                ("ern_amplitude", ErnAmplitude2()),
                ("data_channel_swap", ChannelDataSwap()),
                ("postprocessing", PostprocessingTransformer()),
                ("scaler", StandardScaler()),
                # ("feature_selection", FastICA(random_state=random_state))
])


pe_features = Pipeline(steps = [
                ("pe_data_extraction", PeTransformer()),
                # ("pe_centered", CenteredPeAfterBaseline()),
                ("pe_amplitude", PeAmplitude2()),
                ("data_channel_swap", ChannelDataSwap()),
                ("postprocessing", PostprocessingTransformer()),
                # ("scaler", StandardScaler()),
                # ("feature_selection", FastICA(random_state=random_state))
])

ern_pe_features = FeatureUnion([("ern_features", ern_features), ("pe_features", pe_features)], n_jobs = 10)

features = Pipeline([
    ('ern_pe_features', ern_pe_features)

])

# steps = ('features', features)

preprocessed_pipeline = Pipeline([("extract_epochs", EEGdata(dataset=dataset)),
    # ("narrow_indices", NarrowIndices(start=76, stop=257)),
    (
        "channels_filtering",
        ChannelExtraction(significant_channels)
    ),
    (
        "average_epochs",
        AveragePerParticipant(),
    ),
    ("spatial_filter_preprocessing", SpatialFilterPreprocessing()),
    (
        "spatial_filter",
        PCA(n_components=3, random_state=random_state),
    ),
    (
        "spatial_filter_postprocessing",
        SpatialFilterPostprocessing(
            timepoints_count=181,
        ),
    ),
    ("lowpass_filter", LowpassFilter()),
    ("binning", BinTransformer(step=12)),
    ("baseline", ErnBaselined()),
    ("centering", CenteredSignalAfterBaseline()),
    # ('features', features)

                          ]).fit(X_train)
preprocessed_X_test = preprocessed_pipeline.transform(X_test_df)

In [None]:
preprocessed_X = preprocessed_pipeline.transform(X_train)

In [None]:
preprocessed_X.shape

In [None]:
pe_ampl = preprocessed_X[:,3:6]
pe_ampl_mean = np.mean(pe_ampl, axis=0)
pe_ampl_mean

In [None]:
ern_ampl = preprocessed_X[:,0:3]
ern_ampl_mean = np.mean(ern_ampl, axis=0)

In [None]:
ern_ampl_mean

In [None]:
# steps = ('features', features)
preprocessed_pipeline = Pipeline([("extract_epochs", EEGdata(dataset=dataset)),
            # ("narrow_indices", NarrowIndices(start=76, stop=257)),
            (
                "channels_filtering",
                ChannelExtraction(significant_channels)
            ),
            (
                "average_epochs",
                AveragePerParticipant(),
            ),
            ("spatial_filter_preprocessing", SpatialFilterPreprocessing()),
            (
                "spatial_filter",
                PCA(n_components=3, random_state=random_state),
            ),
            (
                "spatial_filter_postprocessing",
                SpatialFilterPostprocessing(
                    timepoints_count=181,
                ),
            ),
            ("lowpass_filter", LowpassFilter()),
            ("binning", BinTransformer(step=12)),
            ("baseline", ErnBaselined()),
            ("centering", CenteredSignalAfterBaseline()),
            ('ern_pe_features', ern_pe_features)
                                  ]).fit(X_train)

preprocessed_X = preprocessed_pipeline.transform(X_train)
pre_processed_test_X = preprocessed_pipeline.transform(X_test_df)

In [None]:
# ern_features = Pipeline(steps=[
#                     ("ern_extraction", CenteredERN(step=16)),
#                     ("binning", BinTransformer(step=16)),
# #                     ("data_channel_swap", ChannelDataSwap()),
# #                     ("postprocessing", PostprocessingTransformer()),
# #                     ("scaler", StandardScaler()),
# #                     ("feature_selection", FastICA(random_state=random_state))
# # 
# ])

# pe_features = Pipeline(steps = [
#                         ("pe_extraction", CenteredPe(step=16)),
#                         ("binning", BinTransformer(step=16)),
# #                         # ("data_channel_swap", ChannelDataSwap()),
# #                         # ("postprocessing", PostprocessingTransformer()),
# #                         # ("scaler", StandardScaler()),
# #                         # ("feature_selection", FastICA(random_state=random_state))
#         ])
    
# #         ern_pe_features = FeatureUnion([("ern_features", ern_features), ("pe_features", pe_features)], n_jobs = 10)

# #         features = Pipeline([
# #             ('ern_pe_features', ern_pe_features)

# #         ])

# #         steps = ('features', features)

# ern_fitted = ern_features.fit_transform(preprocessed_X)
# ern_test_fitted = ern_features.transform(pre_processed_test_X)

In [None]:
ern_fitted_mean = np.mean(ern_fitted, axis=0)
ern_test_fitted_mean = np.mean(ern_test_fitted, axis=0)

In [None]:
ern_test_fitted_mean.shape

In [None]:
import matplotlib.pyplot as plt

plt.plot(ern_fitted_mean[0])

---

In [None]:
spatial_filter_n_components = 3

this_steps = spatial_filter_bins_steps(spatial_filter_n_components=spatial_filter_n_components, timepoints_count=181)
pre_processed_X = Pipeline(steps=this_steps).fit_transform(X_train)

In [None]:
pre_processed_test_X = preprocessed_pipeline.transform(X_test_df)
pre_processed_X = preprocessed_pipeline.transform(X_train)

In [None]:
# Averaged signal within components through all participants
mean_X_1 = np.mean(pre_processed_X, axis=0)

In [None]:
ern_fitted.shape

In [None]:
mean_X.shape

In [None]:
pre_processed_test_X.shape

In [None]:
# Averaged signal within components through all participants
mean_X = np.mean(preprocessed_X, axis=0)
mean_2_X = np.mean(pre_processed_test_X, axis=0)

-----
### Plot results

In [None]:
# indices in bins

tmin, tmax = -0.1, 0.6  # Start and end of the segments
signal_frequency = 256

step_in_ms = 50  # in miliseconds (?)
step_tp = int(signal_frequency * step_in_ms / 1000) # in timepoints

# indices for slicing epoch into ERN part and Pe part (in sec)
start_ern = 0
stop_ern = 0.15
start_pe = 0.15
stop_pe = 0.35

start_ern_bin = int((signal_frequency * (start_ern - tmin)) / step_tp) + 1
stop_ern_bin = int(signal_frequency * (stop_ern - tmin) / step_tp) + 1
start_pe_bin = int(signal_frequency * (start_pe - tmin) / step_tp) + 1
stop_pe_bin = int(signal_frequency * (stop_pe - tmin) / step_tp) + 1

In [None]:
start_ern_bin = 0
stop_ern_bin = 3
start_pe_bin = 3
stop_pe_bin = 7

In [None]:
spatial_filter_n_components = 3

In [None]:
# 0, 1 or 2
this_component = 2

# pe_step = int(pe_features.shape[1]/ spatial_filter_n_components)
# ern_step = int(ern_features.shape[1]/ spatial_filter_n_components)
# spatial_filter_step = int(pre_processed_X.shape[1]/3)

In [None]:
spatial_filter_step

In [None]:
ern_components_weighed.shape

In [None]:
-ern_fitted_mean[0]

In [None]:
ern_components_weighed[0][0:3].shape

In [None]:
np.arange(0,4).shape

In [None]:
import seaborn as sns
import matplotlib.pylab as plt

sns.set(rc={'figure.figsize':(11.7,8.27)})
sns.set_style("white")

fig, ax1 = plt.subplots()

# ax1 = plt.twinx()
ax1.set(ylim=(np.min(ern_components_weighed)-0.1, np.max(pe_components_weighed)+0.05))
ax1.tick_params(axis='y', color="magenta", width=3, length=10)

plt.axhline(y=0, color="grey", linewidth = 2, linestyle='--', alpha=0.5)
plt.axvline(x=2, color="grey", linewidth = 2, linestyle='--', alpha=0.5)
plt.axvline(x=6, color="grey", linewidth = 2, linestyle='--', alpha=0.5)



# for i in range(0,5):
#     sns.lineplot(np.arange(0,3), ern_components_weighed[i][this_component:3], ax=ax1)

# for i in range(0,pe_features.shape[0]):
#     sns.scatterplot(np.arange(5,6), pe_components_weighed[i][this_component], ax=ax1)
    

ax2 = plt.twinx()
ax2.set(ylim=(-1e-5,2.5e-5))
ax2.tick_params(axis='y', color="black")

# ax3 = plt.twinx()
# ax3.set(ylim=(min(components_weighed_ern_sum), max(components_weighed_ern_sum)))
# ax3.tick_params(axis='y', color="magenta")

sns.scatterplot(x=[4], y= components_weighed_pe_sum[this_component], ax=ax1, color="magenta")
sns.scatterplot(x=[1], y= components_weighed_ern_sum[this_component], ax=ax1, color="magenta")
# sns_plot = sns.scatterplot(np.arange(5,6), components_weighed_pe_sum[this_component*pe_step:(this_component+1)*pe_step], ax=ax1, color="magenta")
# plt.axhline(y=0, color="magenta", linewidth = 2)

sns_plot = sns.lineplot(np.arange(0,10), -mean_X[this_component], ax=ax2, color="black", linewidth = 3)


sns_plot.figure.savefig(f"{data_df.name}_output_{this_component}.png")

In [None]:
np.mean(y_train)

In [None]:
np.mean(y_rum)

# CURRENT BEST RESULTS

In [None]:
results_df = pd.read_pickle(
    "../data/split0.3/regression_union_100-600_ampl_bins50_0.3_significant.pkl"
)
results_df

In [None]:
results_df = pd.read_pickle(
    "../data/split0.3/regression_union_100-600_centered_signal_ampl_0.3-5_significant.pkl"
)
results_df

In [None]:
results_df = pd.read_pickle(
    "../data/split0.3/regression_union_100-600_centered_signal_baselined-to-0-bin_signal_0.3-5.pkl"
)
results_df

In [None]:
results_df = pd.read_pickle(
    "../data/split0.3/regression_union_100-600_baselined_centered_ampl-2-pe-ern_0.3-5_significant.pkl"
)
results_df