# Rumination prediction - averaged participants' epochs

### Imports

In [None]:
%load_ext lab_black
import os
import pickle
from time import time
import pywt
import mne
import scipy
import numpy as np
import plotly.graph_objects as go
import plotly.express as px
import pandas as pd
import cesium.featurize
from plotly.subplots import make_subplots
from ipywidgets import Dropdown, FloatRangeSlider, IntSlider, FloatSlider, interact
from sklearn.decomposition import FastICA
from sklearn.metrics import roc_auc_score
from sklearn.model_selection import StratifiedKFold
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.decomposition import PCA
from sklearn.base import TransformerMixin, BaseEstimator


import sys

sys.path.append("..")
from utils import *

### Loading data

Loading EEG data and data from rumination questionnaire. By default create_df_data loads all info from given file but one can specify it by passing a list of desired labels from csv file.

In [None]:
tmin, tmax = -0.1, 0.6
signal_frequency = 256
ERROR = 0
CORRECT = 1
random_state = 0

In [None]:
channels_order_list = [
    "Fp1",
    "AF7",
    "AF3",
    "F1",
    "F3",
    "F5",
    "F7",
    "FT7",
    "FC5",
    "FC3",
    "FC1",
    "C1",
    "C3",
    "C5",
    "T7",
    "TP7",
    "CP5",
    "CP3",
    "CP1",
    "P1",
    "P3",
    "P5",
    "P7",
    "P9",
    "PO7",
    "PO3",
    "O1",
    "Iz",
    "Oz",
    "POz",
    "Pz",
    "CPz",
    "Fpz",
    "Fp2",
    "AF8",
    "AF4",
    "AFz",
    "Fz",
    "F2",
    "F4",
    "F6",
    "F8",
    "FT8",
    "FC6",
    "FC4",
    "FC2",
    "FCz",
    "Cz",
    "C2",
    "C4",
    "C6",
    "T8",
    "TP8",
    "CP6",
    "CP4",
    "CP2",
    "P2",
    "P4",
    "P6",
    "P8",
    "P10",
    "PO8",
    "PO4",
    "O2",
]

In [None]:
channels_dict = dict(zip(channels_order_list, np.arange(1, 64, 1)))

Define significant channels - the rest will be excluded

In [None]:
red_box = [
    "F1",
    "Fz",
    "F2",
    "FC1",
    "FCz",
    "FC2",
    "C1",
    "Cz",
    "C2",
    "CP1",
    "CPz",
    "CP2",
    "P1",
    "Pz",
    "P2",
]
significant_channels = [channels_dict[channel] for channel in red_box]

Read the data

In [None]:
df_name = "go_nogo_df_mean"
pickled_data_filename = "../../data/" + df_name + ".pkl"
info_filename = "../../data/Demographic_Questionnaires_Behavioral_Results_N=163.csv"

# Check if data is already loaded
if os.path.isfile(pickled_data_filename):
    print("Pickled file found. Loading pickled data...")
    epochs_df = pd.read_pickle(pickled_data_filename)
    print("Done")
else:
    print("Pickled file not found. Loading data...")
    epochs_df = create_df_data(
        test_participants=False, info="all", personal=False, info_filename=info_filename
    )
    epochs_df.name = df_name
    # save loaded data into a pickle file
    epochs_df.to_pickle("../../data/" + epochs_df.name + ".pkl")
    print("Done. Pickle file created")

#### Average participants' error and correct epochs

In [None]:
averaged_epochs_df = (
    epochs_df.groupby(
        ["id", "marker"],
        sort=False,
    )
    .apply(
        lambda group_df: pd.Series(
            {
                "epoch": np.mean(group_df["epoch"]),
                "Rumination Full Scale": np.mean(group_df["Rumination Full Scale"]),
            }
        )
    )
    .reset_index()
)

---------------------------------

## Training and predictions

In [None]:
from sklearn.linear_model import LinearRegression
from sklearn.linear_model import Lasso
from sklearn.linear_model import ElasticNet
from sklearn.tree import DecisionTreeRegressor
from sklearn.neighbors import KNeighborsRegressor
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import KFold
from sklearn.model_selection import StratifiedKFold
from sklearn.pipeline import Pipeline, FeatureUnion
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import FunctionTransformer
from sklearn.dummy import DummyRegressor
from sklearn.model_selection import GridSearchCV
from sklearn.svm import SVR
from tempfile import mkdtemp
from sklearn.linear_model import Ridge
from sklearn.kernel_ridge import KernelRidge

from sklearn.metrics import mean_absolute_error
from sklearn.metrics import mean_squared_error
from sklearn.metrics import r2_score
import warnings

warnings.filterwarnings("ignore")


import numpy as np
import scipy.stats

#### Standard features for EEG analysis provided by Guo et al. (2012)

In [None]:
def std_signal(t, m, e):
    return np.std(m)


def abs_diffs_signal(t, m, e):
    return np.sum(np.abs(np.diff(m)))


def mean_energy_signal(t, m, e):
    return np.mean(m ** 2)


def skew_signal(t, m, e):
    return scipy.stats.skew(m)


def mean_signal(t, m, e):
    return np.mean(m)

In [None]:
guo_features = {
    "mean": mean_signal,
    "std": std_signal,
    "mean_energy": mean_energy_signal,
}

step_in_ms = 50
step_tp = int(signal_frequency * step_in_ms / 1000)

#### Calculate p-value with permutation test

In [None]:
from sklearn.model_selection import permutation_test_score


def calculate_p_permutations(estimator, X, y, cv=3, n_permutations=100, n_jobs=10):

    score_, perm_scores_, pvalue_ = permutation_test_score(
        estimator, X, y, cv=cv, n_permutations=n_permutations, n_jobs=n_jobs
    )

    # summarize
    print(f"     The permutation P-value is = {pvalue_:.3f}")
    print(f"     The permutation score is = {score_:.3f}\n")

    return score_, pvalue_

In [None]:
# from mlxtend.evaluate import paired_ttest_5x2cv


# def calculate_p(estimator, X, y):
#     baseline_estimator = DummyRegressor(strategy="mean")

#     # check if difference between algorithms is real
#     t, p = paired_ttest_5x2cv(
#         estimator1=baseline_estimator,
#         estimator2=estimator,
#         X=X,
#         y=y,
#         scoring="r2",
#         random_seed=0,
#     )

#     # summarize
#     print(f"     The P-value is = {p:.3f}")
#     print(f"     The t-statistics is = {t:.3f}\n")

#     return t, p

#### Validation curves - for parameters' insight

In [None]:
import matplotlib.pyplot as plt


def pooled_var(stds):
    # https://en.wikipedia.org/wiki/Pooled_variance#Pooled_standard_deviation
    n = 5  # size of each group
    return np.sqrt(sum((n - 1) * (stds ** 2)) / len(stds) * (n - 1))


def show_validation_curves(cv_results, grid_params):

    df = pd.DataFrame(cv_results)
    results = [
        "mean_test_r2",
        "mean_train_r2",
        "std_test_r2",
        "std_train_r2",
    ]

    fig, axes = plt.subplots(
        1, len(grid_params), figsize=(5 * len(grid_params), 7), sharey="row"
    )
    axes[0].set_ylabel("Score", fontsize=25)

    for idx, (param_name, param_range) in enumerate(grid_params.items()):
        grouped_df = df.groupby(f"param_{param_name}")[results].agg(
            {
                "mean_train_r2": "mean",
                "mean_test_r2": "mean",
                "std_train_r2": pooled_var,
                "std_test_r2": pooled_var,
            }
        )

        previous_group = df.groupby(f"param_{param_name}")[results]
        axes[idx].set_xlabel(param_name, fontsize=10)
        axes[idx].set_ylim(0.0, 1.1)
        axes[idx].set_xscale("log")
        lw = 2
        axes[idx].plot(
            param_range,
            grouped_df["mean_train_r2"],
            label="Training score",
            color="darkorange",
            lw=lw,
        )
        axes[idx].fill_between(
            param_range,
            grouped_df["mean_train_r2"] - grouped_df["std_train_r2"],
            grouped_df["mean_train_r2"] + grouped_df["std_train_r2"],
            alpha=0.2,
            color="darkorange",
            lw=lw,
        )
        axes[idx].plot(
            param_range,
            grouped_df["mean_test_r2"],
            label="Cross-validation score",
            color="navy",
            lw=lw,
        )
        axes[idx].fill_between(
            param_range,
            grouped_df["mean_test_r2"] - grouped_df["std_test_r2"],
            grouped_df["mean_test_r2"] + grouped_df["std_test_r2"],
            alpha=0.2,
            color="navy",
            lw=lw,
        )

    handles, labels = axes[0].get_legend_handles_labels()
    fig.suptitle("Validation curves", fontsize=40)
    fig.legend(handles, labels, loc=8, ncol=2, fontsize=20)

    fig.subplots_adjust(bottom=0.25, top=0.85)
    plt.show()

----------------------------------------

### Create X and y sets

In [None]:
dataset = ERROR
dataset_name = "correct" if dataset == CORRECT else "error"

In [None]:
X_train = np.array(
    averaged_epochs_df[averaged_epochs_df["marker"] == dataset]["epoch"].to_list()
)
y_train = np.array(
    averaged_epochs_df[averaged_epochs_df["marker"] == dataset][
        "Rumination Full Scale"
    ].to_list()
)

In [None]:
X_test = []
y_test = []

----------------------------

### Define searching experiment

In [None]:
def rate_regressor(
    X_train, y_train, X_test, y_test, regressor, regressor_params, base_steps, cv=2
):
    # define cross-validation method
    cv_kf = KFold(n_splits=3)

    pipeline = Pipeline(steps=base_steps + [regressor])
    param_grid = regressor_params
    grid_search = GridSearchCV(
        pipeline,
        param_grid,
        cv=cv_kf,
        scoring={"r2", "neg_mean_absolute_error"},
        refit="r2",
        return_train_score=True,
        n_jobs=10,
        verbose=10,
    )
    grid_search.fit(X_train, y_train)

    return grid_search

In [None]:
def run_experiment(
    tested_regressors,
    regressor_params,
    pipeline_name,
    X_train,
    X_test,
    y_train,
    y_test,
    dataset_name,
    base_steps,
    results_df,
    #     function_name="-",
):

    for (regressor, params) in tested_regressors:
        print(f"Rating {regressor} \n")
        tested_params = {**regressor_params, **params}

        grid_result = rate_regressor(
            X_train,
            y_train,
            X_test,
            y_test,
            regressor,
            tested_params,
            base_steps,
            cv=3,
        )

        #     predictions = grid_result.predict(X_test)
        #     r2 = grid_result.score(X_test, y_test)
        #     mae = mean_absolute_error(y_test, predictions)
        #     r2_adj = r2_adjusted_scorer(y_test, predictions, len(X_test[0]), len(X_test))

        best_estimator_index = grid_result.best_index_
        mean_cv_r2 = grid_result.cv_results_["mean_test_r2"][best_estimator_index]
        std_cv_r2 = grid_result.cv_results_["std_test_r2"][best_estimator_index]
        mean_cv_neg_mean_absolute_error = grid_result.cv_results_[
            "mean_test_neg_mean_absolute_error"
        ][best_estimator_index]
        std_cv_neg_mean_absolute_error = grid_result.cv_results_[
            "std_test_neg_mean_absolute_error"
        ][best_estimator_index]
        mean_train_r2 = grid_result.cv_results_["mean_train_r2"][best_estimator_index]

        print(f"     Best parameters: {grid_result.best_params_}")
        print(f"     mean r2: {mean_cv_r2}           ± {round(std_cv_r2,3)}")
        print(f"     mean r2 train: {mean_train_r2}")

        cv_results = grid_result.cv_results_
        #         t_statistics, p_value = calculate_p(
        #             grid_result.best_estimator_, X_train, y_train
        #         )
        scores_, pvalue_ = calculate_p_permutations(
            grid_result.best_estimator_, X_train, y_train
        )

        #         show_validation_curves(grid_result.cv_results_, tested_params)

        data = {
            "data_set": dataset_name,
            "pipeline_name": pipeline_name,
            #             "function": function_name,
            "model": regressor[0],
            "parameters": grid_result.best_params_,
            "mean_cv_r2": mean_cv_r2,
            "std_cv_r2": std_cv_r2,
            "mean_cv_mae": mean_cv_neg_mean_absolute_error,
            "std_cv_mae": std_cv_neg_mean_absolute_error,
            "cv_results": cv_results,
            "mean_train_r2": mean_train_r2,
            "p-value": pvalue_,
            "best_estimator": grid_result.best_estimator_,
        }

        results_df = results_df.append(data, ignore_index=True)
    return results_df

---------------------

### Define data transformers - custom data transformation steps

In [None]:
from __future__ import division
from scipy.signal import butter, lfilter


class LowpassFilter(TransformerMixin, BaseEstimator):
    def __init__(self):
        super().__init__()

    def fit(self, X, y=None):
        return self

    def transform(self, X):
        fs = signal_frequency
        cutoff = 45  # Hz
        B, A = butter(
            6, cutoff / (fs / 2), btype="low", analog=False
        )  # 6th order Butterworth low-pass

        filtered_epochs_per_channel = []
        for channel in X:
            filtered_epochs = np.array(
                [lfilter(B, A, epoch, axis=0) for epoch in channel]
            )
            filtered_epochs_per_channel.append(filtered_epochs)
        filtered_epochs_per_channel = np.array(filtered_epochs_per_channel)
        return filtered_epochs_per_channel


class IcaPreprocessing(TransformerMixin, BaseEstimator):
    def __init__(self):
        super().__init__()

    def fit(self, X, y=None):
        return self

    def transform(self, X):
        timepoints_per_channel = np.concatenate(X, axis=1)
        return timepoints_per_channel.T


class IcaPostprocessing(TransformerMixin, BaseEstimator):
    def __init__(self, timepoints_count):
        super().__init__()
        self.timepoints_count = timepoints_count

    def fit(self, X, y=None):
        return self

    def transform(self, X):
        X_ica_transposed = X.T
        ica_n_components = X.shape[1]

        epochs_count = int(X_ica_transposed.shape[1] / self.timepoints_count)
        data_per_channel = X_ica_transposed.reshape(
            ica_n_components, epochs_count, self.timepoints_count
        )

        return data_per_channel


class Cwt(TransformerMixin, BaseEstimator):
    def __init__(self, mwt="morl", cwt_density=2, cwt_octaves=6):
        # for octaves=6, the highest frequency is 45.25 Hz
        super().__init__()
        self.mwt = mwt
        self.cwt_density = cwt_density
        self.cwt_octaves = cwt_octaves

    def fit(self, X, y=None):
        return self

    def transform(self, X):
        cwt_per_channel = []
        for data in X:
            data_cwt = np.array(
                [
                    cwt(epoch, self.mwt, self.cwt_density, self.cwt_octaves)
                    for epoch in data
                ]
            )
            cwt_per_channel.append(data_cwt)
        cwt_per_channel = np.array(cwt_per_channel)
        return cwt_per_channel


class CwtFeatureVectorizer(TransformerMixin, BaseEstimator):
    def __init__(self, feature_dict):
        super().__init__()
        self.feature_dict = feature_dict

    def fit(self, X, y=None):
        return self

    def transform(self, X):
        vectorized_data = []
        for data_cwt in X:
            # cesium functions
            feature_set_cwt = cesium.featurize.featurize_time_series(
                times=None,
                values=data_cwt,
                errors=None,
                features_to_use=list(self.feature_dict.keys()),
                custom_functions=self.feature_dict,
            )
            features_per_epoch = feature_set_cwt.to_numpy()
            vectorized_data.append(features_per_epoch)
        vectorized_data = np.array(vectorized_data)
        return vectorized_data


# reshape data from (channels x epoch x features) to (epochs x channles x features)
# and then flatten it to (epoch x channels*features)
class PostprocessingTransformer(TransformerMixin, BaseEstimator):
    def __init__(self):
        super().__init__()

    def fit(self, X, y=None):
        return self

    def transform(self, X):
        vectorized_data = np.stack(X, axis=1)
        epochs_per_channel_feature = vectorized_data.reshape(
            vectorized_data.shape[0], -1
        )
        return epochs_per_channel_feature


class ChannelExtraction(TransformerMixin, BaseEstimator):
    def __init__(self, channel_list):
        super().__init__()
        self.channel_list = channel_list

    def fit(self, X, y=None):
        return self

    def transform(self, X):
        epochs_per_channels = np.transpose(X, (1, 0, 2))
        epochs_per_selected_channels = []

        for channel in self.channel_list:
            this_data = epochs_per_channels[channel]
            epochs_per_selected_channels.append(this_data)

        epochs_per_selected_channels = np.array(epochs_per_selected_channels)
        selected_channels_per_epoch = np.transpose(
            epochs_per_selected_channels, (1, 0, 2)
        )
        #         print(f"EXTRACTION {selected_channels_per_epoch.shape}")
        return selected_channels_per_epoch


# swap channels and epochs axes: from epoch_channel_timepoints to channel_epoch_timepoints and vice versa
class ChannelDataSwap(TransformerMixin, BaseEstimator):
    def __init__(self):
        super().__init__()

    def fit(self, X, y=None):
        return self

    def transform(self, X):
        data_channel_swaped = np.transpose(X, (1, 0, 2))
        return data_channel_swaped


class BinTransformer(TransformerMixin, BaseEstimator):
    def __init__(self, step):
        super().__init__()
        self.step = step

    def bin_epoch(self, epoch):
        new_channels = []
        for channel in epoch:
            bins_channel = []
            index = 0
            while index + self.step < len(channel):
                this_bin = np.mean(channel[index : index + self.step])
                bins_channel.append(this_bin)
                index += self.step
            new_channels.append(bins_channel)
        return new_channels

    def fit(self, X, y=None):
        return self

    def transform(self, X):
        binned_data = np.array([self.bin_epoch(epoch) for epoch in X])
        return binned_data


# transforms energy of each sub-band into relative energy of sub-band
def RelativeEnergyTransformer():
    def transform(X):
        vectorized_data = []

        for epoch in X:
            total_energy_of_epoch = np.sum(epoch)
            sub_band_relative_energies = np.array(
                [(sub_band_energy / total_energy_of_epoch) for sub_band_energy in epoch]
            )
            vectorized_data.append(sub_band_relative_energies)

        vectorized_data = np.array(vectorized_data)
        return vectorized_data

    return FunctionTransformer(func=transform)

In [None]:
# ICA-bins + ICA-bins-cwt-features


def ica_bins_features_steps(feature_function_dict):

    steps = [
        (
            "channels_filtering",
            ChannelExtraction(significant_channels),
        ),
        ("ica_preprocessing", IcaPreprocessing()),
        ("ica", FastICA(random_state=random_state)),
        (
            "ica_postprocessing",
            IcaPostprocessing(timepoints_count=X_train.shape[-1]),
        ),
        ("lowpass_filter", LowpassFilter()),
        (
            "features",
            FeatureUnion(
                [
                    (
                        "bins",
                        Pipeline(
                            [
                                ("channel_data_swap", ChannelDataSwap()),
                                ("binning", BinTransformer(step=step_tp)),
                                ("data_channel_swap", ChannelDataSwap()),
                                ("postprocessing_bins", PostprocessingTransformer()),
                            ]
                        ),
                    ),
                    (
                        "functions",
                        Pipeline(
                            [
                                ("cwt", Cwt()),
                                (
                                    "cwt_feature",
                                    CwtFeatureVectorizer(
                                        feature_dict=feature_function_dict
                                    ),
                                ),
                                (
                                    "postprocessing_functions",
                                    PostprocessingTransformer(),
                                ),
                            ]
                        ),
                    ),
                ]
            ),
        ),
        ("scaler", StandardScaler()),
        ("feature_selection", PCA(random_state=random_state)),
    ]

    return steps

In [None]:
# ERP-bins + ERP-bins-cwt-features


def erp_bins_features_steps(feature_function_dict):

    functions_base_steps = [
        ("cwt", Cwt()),
        (
            "cwt_feature",
            CwtFeatureVectorizer(feature_dict=feature_function_dict),
        ),
        ("postprocessing_func", PostprocessingTransformer()),
    ]
    functions_pipeline = Pipeline(steps=functions_base_steps)

    bins_base_steps = [
        ("data_channel_swap_after_filter", ChannelDataSwap()),
        ("binning", BinTransformer(step=step_tp)),
        ("data_channel_swap", ChannelDataSwap()),
        ("postprocessing_bins", PostprocessingTransformer()),
    ]
    bins_pipeline = Pipeline(steps=bins_base_steps)

    combined_features = FeatureUnion(
        [("bins", bins_pipeline), ("functins", functions_pipeline)]
    )

    steps = [
        (
            "channels_filtering",
            ChannelExtraction(significant_channels),
        ),
        ("data_channel_swap_filter", ChannelDataSwap()),
        ("lowpass_filter", LowpassFilter()),
        ("features", combined_features),
        ("scaler", StandardScaler()),
        ("feature_selection", PCA(random_state=random_state)),
    ]

    return steps

In [None]:
# ICA-bins


def ica_bins_steps():

    steps = [
        (
            "channels_filtering",
            ChannelExtraction(significant_channels),
        ),
        ("ica_preprocessing", IcaPreprocessing()),
        ("ica", FastICA(random_state=random_state)),
        #         ("pca", PCA(random_state=random_state)),
        (
            "ica_postprocessing",
            IcaPostprocessing(timepoints_count=X_train.shape[-1]),
        ),
        ("lowpass_filter", LowpassFilter()),
        ("channel_data_swap", ChannelDataSwap()),
        ("binning", BinTransformer(step=step_tp)),
        ("data_channel_swap", ChannelDataSwap()),
        ("postprocessing", PostprocessingTransformer()),
        ("scaler", StandardScaler()),
        ("feature_selection", PCA(random_state=random_state)),
    ]

    return steps

In [None]:
# Erp-bins
def erp_bins_steps():
    steps = [
        (
            "channels_filtering",
            ChannelExtraction(significant_channels),
        ),
        ("data_channel_swap_filter", ChannelDataSwap()),
        ("lowpass_filter", LowpassFilter()),
        ("data_channel_swap_after_filter", ChannelDataSwap()),
        ("binning", BinTransformer(step=step_tp)),
        ("data_channel_swap", ChannelDataSwap()),
        ("postprocessing", PostprocessingTransformer()),
        ("scaler", StandardScaler()),
        ("feature_selection", PCA(random_state=random_state)),
    ]

    return steps

Generate estimator HTML representation

In [None]:
# from sklearn.utils import estimator_html_repr

# with open("my_estimator.html", "w") as f:
#     f.write(estimator_html_repr(Pipeline(this_steps)))

----------------------

# Experiments

In [None]:
# results_df = pd.DataFrame()

Experiment 0

In [None]:
# pipeline_name = "ICA-bins"
# this_steps = ica_bins_steps()

In [None]:
# pipeline_name = "ERP_bins"
# this_steps = erp_bins_steps()

In [None]:
# pipeline_name = "ICA_bins_functions"
# this_steps = ica_bins_features_steps(guo_features)

In [None]:
# pipeline_name = "ERP_bins_functions"
# this_steps = erp_bins_features_steps(guo_features)

In [None]:
regressor_params = dict(
    ica__n_components=np.arange(3, 16, 2),
    feature_selection__n_components=np.arange(1, 9, 1),
)

In [None]:
res = pd.DataFrame()

In [None]:
for alpha in [1]:
    for l1_ratio in [
        0.2,
        0.3,
        0.4,
        0.5,
        0.6,
        0.7,
    ]:

        en = ("en", ElasticNet(random_state=random_state))
        en_params = dict(
            en__alpha=[alpha],
            en__l1_ratio=[l1_ratio],
        )

        tested_regressors = [(en, en_params)]

        # rate different models
        res = run_experiment(
            tested_regressors,
            regressor_params,
            pipeline_name,
            X_train,
            X_test,
            y_train,
            y_test,
            dataset_name,
            this_steps,
            res,
        )

In [None]:
for alpha in [0.00001, 0.0001, 0.001, 0.01, 0.1, 0.2, 0.3, 0.4]:
    for gamma in [0.0000001, 0.000001, 0.00001, 0.0001, 0.001, 0.01, 0.1, 1]:

        kr = ("kr", KernelRidge(kernel="rbf"))
        kr_params = dict(kr__alpha=[alpha], kr__gamma=[gamma])

        tested_regressors = [
            (kr, kr_params),
        ]

        # rate different models
        res = run_experiment(
            tested_regressors,
            regressor_params,
            pipeline_name,
            X_train,
            X_test,
            y_train,
            y_test,
            dataset_name,
            this_steps,
            res,
        )

In [None]:
for C in [0.00001, 0.0001, 0.001, 0.01, 0.1, 1, 10]:
    for epsilon in [0.0001, 0.001, 0.01, 0.1, 1]:
        #         for gamma in [0.0000001, 0.000001, 0.00001, 0.0001, 0.001, 0.01, 0.1, 1]:

        svr = ("svr", SVR())
        svr_params = dict(
            svr__kernel=["linear"],
            svr__C=[C],
            svr__gamma=["scale"],
            svr__epsilon=[epsilon],
        )

        tested_regressors = [
            (svr, svr_params),
        ]

        # rate different models
        res = run_experiment(
            tested_regressors,
            regressor_params,
            pipeline_name,
            X_train,
            X_test,
            y_train,
            y_test,
            dataset_name,
            this_steps,
            res,
        )

In [None]:
results_df = pd.DataFrame()

In [None]:
en = ("en", ElasticNet(random_state=random_state))
en_params = dict(
    en__alpha=[1],
    en__l1_ratio=[0.7],
)

kr = ("kr", KernelRidge(kernel="rbf"))
kr_params = dict(kr__alpha=[0.4], kr__gamma=[0.001])


svr = ("svr", SVR())
svr_params = dict(
    svr__kernel=["linear"],
    #     svr__C=[0.00001, 0.0001, 0.001, 0.01, 0.1, 1, 10],
    svr__C=[0.01],
    svr__gamma=["scale"],
    #     svr__epsilon=[0.00001, 0.0001, 0.001, 0.01, 0.1, 1],
    svr__epsilon=[1],
)

tested_regressors = [(svr, svr_params), (kr, kr_params), (en, en_params)]

In [None]:
results_df = run_experiment(
    tested_regressors,
    regressor_params,
    pipeline_name,
    X_train,
    X_test,
    y_train,
    y_test,
    dataset_name,
    this_steps,
    results_df,
)

In [None]:
results_df

In [None]:
results_df.to_pickle(
    "../../data/regression_ICA_ERP_all_upto_8_features_" + dataset_name + ".pkl"
)

In [None]:
# with pd.option_context("display.max_rows", 2000):
#     display(results_df)

------------------------------------------------------------------------------------

In [None]:
pipeline_name = "ICA_bins_func"

In [None]:
guo_features = {
    "mean": mean_signal,
    "std": std_signal,
    "mean_energy": mean_energy_signal,
    #     "abs_diffs": abs_diffs_signal,
}

step_in_ms = 50
step_tp = int(signal_frequency * step_in_ms / 1000)

this_steps = create_feature_bins_nocwt_pipeline(guo_features)

In [None]:
regressor_params = dict(
    ica__n_components=np.arange(3, 16, 2),
    feature_selection__n_components=np.arange(3, 9, 1),
)

In [None]:
for alpha in [0.01, 0.1, 1, 5, 7, 10]:
    for l1_ratio in [
        0.0000001,
        0.000001,
        0.00001,
        0.0001,
        0.001,
        0.01,
        0.1,
        0.3,
        0.5,
        0.7,
        1,
    ]:

        en = ("en", ElasticNet(random_state=random_state))
        en_params = dict(
            en__alpha=[alpha],
            en__l1_ratio=[l1_ratio],
        )

        tested_regressors = [(en, en_params)]

        # rate different models
        results_df = run_experiment(
            tested_regressors,
            regressor_params,
            pipeline_name,
            X_train,
            X_test,
            y_train,
            y_test,
            dataset_name,
            this_steps,
            results_df,
        )

In [None]:
for alpha in [0.001, 0.01, 0.1, 1, 5, 7]:
    for gamma in [0.0000001, 0.000001, 0.00001, 0.0001, 0.001]:

        kr = ("kr", KernelRidge(kernel="rbf"))
        kr_params = dict(kr__alpha=[alpha], kr__gamma=[gamma])

        tested_regressors = [
            (kr, kr_params),
        ]

        # rate different models
        results_df = run_experiment(
            tested_regressors,
            regressor_params,
            pipeline_name,
            X_train,
            X_test,
            y_train,
            y_test,
            dataset_name,
            this_steps,
            results_df,
        )

In [None]:
for C in [0.0001, 0.001, 0.01, 0.1, 1, 10]:
    for epsilon in [0.0001, 0.001, 0.01, 0.1, 1]:

        svr = ("svr", SVR())
        svr_params = dict(
            svr__kernel=["linear"],
            svr__C=[C],
            svr__gamma=['scale'],
            svr_epsilon=[epsilon],
        )

        tested_regressors = [
            (svr, svr_params),
        ]

        # rate different models
        results_df = run_experiment(
            tested_regressors,
            regressor_params,
            pipeline_name,
            X_train,
            X_test,
            y_train,
            y_test,
            dataset_name,
            this_steps,
            results_df,
        )

-----------------------------------

In [None]:
pipeline_name = "ERP_bins_func"

In [None]:
guo_features = {
    "mean": mean_signal,
    "std": std_signal,
    "mean_energy": mean_energy_signal,
    #     "abs_diffs": abs_diffs_signal,
}

step_in_ms = 50
step_tp = int(signal_frequency * step_in_ms / 1000)

this_steps = create_feature_erp_pipeline(guo_features)

In [None]:
regressor_params = dict(
    ica__n_components=np.arange(3, 16, 2),
    feature_selection__n_components=np.arange(3, 9, 1),
)

In [None]:
for alpha in [0.01, 0.1, 1, 5, 7, 10]:
    for l1_ratio in [
        0.0000001,
        0.000001,
        0.00001,
        0.0001,
        0.001,
        0.01,
        0.1,
        0.3,
        0.5,
        0.7,
        1,
    ]:

        en = ("en", ElasticNet(random_state=random_state))
        en_params = dict(
            en__alpha=[alpha],
            en__l1_ratio=[l1_ratio],
        )

        tested_regressors = [(en, en_params)]

        # rate different models
        results_df = run_experiment(
            tested_regressors,
            regressor_params,
            pipeline_name,
            X_train,
            X_test,
            y_train,
            y_test,
            dataset_name,
            this_steps,
            results_df,
        )

In [None]:
for alpha in [0.001, 0.01, 0.1, 1, 5, 7]:
    for gamma in [0.0000001, 0.000001, 0.00001, 0.0001, 0.001]:

        kr = ("kr", KernelRidge(kernel="rbf"))
        kr_params = dict(kr__alpha=[alpha], kr__gamma=[gamma])

        tested_regressors = [
            (kr, kr_params),
        ]

        # rate different models
        results_df = run_experiment(
            tested_regressors,
            regressor_params,
            pipeline_name,
            X_train,
            X_test,
            y_train,
            y_test,
            dataset_name,
            this_steps,
            results_df,
        )

In [None]:
for C in [0.0001, 0.001, 0.01, 0.1, 1, 10]:
    for epsilon in [0.0001, 0.001, 0.01, 0.1, 1]:

        svr = ("svr", SVR())
        svr_params = dict(
            svr__kernel=["linear"],
            svr__C=[C],
            svr__gamma=['scale'],
            svr_epsilon=[epsilon],
        )

        tested_regressors = [
            (svr, svr_params),
        ]

        # rate different models
        results_df = run_experiment(
            tested_regressors,
            regressor_params,
            pipeline_name,
            X_train,
            X_test,
            y_train,
            y_test,
            dataset_name,
            this_steps,
            results_df,
        )

-----------------------

In [None]:
results_df = run_experiment(
    tested_regressors,
    regressor_params,
    pipeline_name,
    X_train,
    X_test,
    y_train,
    y_test,
    dataset_name,
    steps_,
    results_df,
)

-------------------------------------------------------

### Experiment 1
- Models: KNN, GBR, Lasso, SVR
- vectorize with ICA-cwt-PCA

Tuned

In [None]:
pipeline_name = "ICA_cut_cwt"

In [None]:
knn = ("knn", KNeighborsRegressor())
knn_params = dict(
    knn__n_neighbors=np.arange(15, 30, 3),
)

# 1+0.0001 rbf - greater overfitt v 0.0001 linear - minor ov
svr = ("svr", SVR())
svr_params = dict(
    svr__kernel=["rbf", "linear"], svr__C=[1, 0.1, 0.001, 0.0001], svr__gamma=[0.0001]
)

gbr = ("gbr", GradientBoostingRegressor())
gbr_params = dict(
    gbr__n_estimators=np.arange(1, 40, 5),
)

lasso = ("lasso", Lasso())
lasso_params = dict(lasso__alpha=[1.1])

In [None]:
regressor_params = dict(
    ica__n_components=np.arange(3, 16, 2),
    pca__n_components=np.arange(3, 9, 1),
)

In [None]:
tested_regressors = [
    (lasso, lasso_params),
    #     (gbr, gbr_params),
    (knn, knn_params),
    (svr, svr_params),
]

In [None]:
base_steps = [
    ("channels_filtering", ChannelExtractionTransformer(significant_channels)),
    ("ica_preprocessing", IcaPreprocessingTransformer()),
    ("ica", FastICA(random_state=random_state)),
    (
        "ica_postprocessing",
        IcaPostprocessingTransformer(timepoints_count=X_train.shape[-1]),
    ),
    #     ("channel_data_swap", ChannelDataSwap()),
    ("cwt", CwtVectorizer()),
    ("postprocessing", PostprocessingTransformer()),
    ("scaler", StandardScaler()),
    ("pca", PCA(random_state=random_state)),
]

In [None]:
# for alpha in [0.1, 0.2, 0.3, 0.5, 0.7, 0.9, 1.1, 1.3, 1.5, 2.0]:
#     #     for gamma in [0.00001, 0.0001, 0.001, 0.01, 0.1]:

#     lasso = ("lasso", Lasso())
#     lasso_params = dict(lasso__alpha=[alpha])

#     tested_regressors = [
#         (lasso, lasso_params),
#         #             (gbr, gbr_params),
#         #             (knn, knn_params),
#         #         (svr, svr_params),
#     ]

#     # define base steps
#     base_steps = [
#         ("channels_filtering", ChannelExtractionTransformer(significant_channels)),
#         ("ica_preprocessing", IcaPreprocessingTransformer()),
#         ("ica", FastICA(random_state=random_state)),
#         (
#             "ica_postprocessing",
#             IcaPostprocessingTransformer(timepoints_count=X_train.shape[-1]),
#         ),
#         #     ("channel_data_swap", ChannelDataSwap()),
#         ("cwt", CwtVectorizer()),
#         ("postprocessing", PostprocessingTransformer()),
#         ("scaler", StandardScaler()),
#         ("pca", PCA(random_state=random_state)),
#     ]

#     # rate different models
#     results_df = run_experiment(
#         tested_regressors,
#         regressor_params,
#         pipeline_name,
#         X_train,
#         X_test,
#         y_train,
#         y_test,
#         dataset_name,
#         base_steps,
#         results_df,
#     )

In [None]:
for alpha in [0.001, 0.01, 0.1, 1]:
    for gamma in [0.0000001, 0.000001, 0.00001, 0.0001, 0.001]:

        #         lasso = ("lasso", Lasso())
        #         lasso_params = dict(lasso__alpha=[alpha])
        kr = ("kr", KernelRidge(kernel="rbf"))
        kr_params = dict(kr__alpha=[alpha], kr__gamma=[gamma])

        tested_regressors = [
            (kr, kr_params),
            #             (gbr, gbr_params),
            #             (knn, knn_params),
            #         (svr, svr_params),
        ]

        # define base steps
        base_steps = [
            ("channels_filtering", ChannelExtractionTransformer(significant_channels)),
            ("ica_preprocessing", IcaPreprocessingTransformer()),
            ("ica", FastICA(random_state=random_state)),
            (
                "ica_postprocessing",
                IcaPostprocessingTransformer(timepoints_count=X_train.shape[-1]),
            ),
            #     ("channel_data_swap", ChannelDataSwap()),
            ("cwt", CwtVectorizer()),
            ("postprocessing", PostprocessingTransformer()),
            ("scaler", StandardScaler()),
            ("pca", PCA(random_state=random_state)),
        ]

        # rate different models
        results_df = run_experiment(
            tested_regressors,
            regressor_params,
            pipeline_name,
            X_train,
            X_test,
            y_train,
            y_test,
            dataset_name,
            base_steps,
            results_df,
        )

Run experiment:

In [None]:
results_df = run_experiment(
    tested_regressors,
    regressor_params,
    pipeline_name,
    X_train,
    X_test,
    y_train,
    y_test,
    dataset_name,
    base_steps,
    results_df,
)

In [None]:
# res = pd.read_pickle("../../data/res_regr_with_p_cv2/regression_ICA_cut_error.pkl")

In [None]:
# with pd.option_context("display.max_colwidth", -1):
#     display(res.parameters)

### Experiment 2

- Models: KNN, GBR, Lasso, SVR
- vectorize with ICA-bins-PCA

Tuned

In [None]:
pipeline_name = "ICA_cut_bins"

In [None]:
knn = ("knn", KNeighborsRegressor())
knn_params = dict(
    knn__n_neighbors=np.arange(12, 30, 2),
)

# 0.1
svr = ("svr", SVR())
svr_params = dict(
    svr__kernel=["rbf", "linear"],
    svr__C=[0.01, 0.1],
    svr__gamma=[0.01],
    svr__epsilon=[0.001, 0.01, 0.1],
)

# gbr = ("gbr", GradientBoostingRegressor())
# gbr_params = dict(
#     gbr__n_estimators=[30],
#     gbr__min_samples_split=[50],
#     gbr__min_samples_leaf=[20],
# )

# 0.3
lasso = ("lasso", Lasso())
lasso_params = dict(
    lasso__alpha=[0.3],
    #     lasso__max_iter=[20],
)

In [None]:
step_in_ms = 50
step_tp = int(signal_frequency * step_in_ms / 1000)

In [None]:
regressor_params = dict(
    ica__n_components=np.arange(3, 16, 2),
    pca__n_components=np.arange(3, 9, 1),
)

In [None]:
tested_regressors = [
    #     (lasso, lasso_params),
    #     (gbr, gbr_params),
    #     (knn, knn_params),
    (svr, svr_params),
]

In [None]:
base_steps = [
    ("channels_filtering", ChannelExtractionTransformer(significant_channels)),
    ("ica_preprocessing", IcaPreprocessingTransformer()),
    ("ica", FastICA(random_state=random_state)),
    (
        "ica_postprocessing",
        IcaPostprocessingTransformer(timepoints_count=X_train.shape[-1]),
    ),
    ("channel_data_swap", ChannelDataSwap()),
    ("binning", BinTransformer(step=step_tp)),
    ("data_channel_swap", ChannelDataSwap()),
    ("postprocessing", PostprocessingTransformer()),
    ("scaler", StandardScaler()),
    ("pca", PCA(random_state=random_state)),
]

In [None]:
for alpha in [0.001, 0.01, 0.1]:
    for gamma in [0.0000001, 0.000001, 0.00001, 0.0001, 0.001, 0.01, 0.1]:

        #         lasso = ("lasso", Lasso())
        #         lasso_params = dict(lasso__alpha=[alpha])
        kr = ("kr", KernelRidge(kernel="rbf"))
        kr_params = dict(kr__alpha=[alpha], kr__gamma=[gamma])

        tested_regressors = [
            (kr, kr_params),
            #             (gbr, gbr_params),
            #             (knn, knn_params),
            #         (svr, svr_params),
        ]

        # define base steps
        base_steps = [
            ("channels_filtering", ChannelExtractionTransformer(significant_channels)),
            ("ica_preprocessing", IcaPreprocessingTransformer()),
            ("ica", FastICA(random_state=random_state)),
            (
                "ica_postprocessing",
                IcaPostprocessingTransformer(timepoints_count=X_train.shape[-1]),
            ),
            ("channel_data_swap", ChannelDataSwap()),
            ("binning", BinTransformer(step=step_tp)),
            ("data_channel_swap", ChannelDataSwap()),
            ("postprocessing", PostprocessingTransformer()),
            ("scaler", StandardScaler()),
            ("pca", PCA(random_state=random_state)),
        ]

        # rate different models
        results_df = run_experiment(
            tested_regressors,
            regressor_params,
            pipeline_name,
            X_train,
            X_test,
            y_train,
            y_test,
            dataset_name,
            base_steps,
            results_df,
        )

In [None]:
# for C in [0.01, 0.1, 1, 10, 100]:
#     for gamma in [0.0001, 0.001, 0.01, 0.1]:
#         for epsilon in [0.001, 0.01, 0.1, 1, 2, 5, 10]:

#             svr = ("svr", SVR())
#             svr_params = dict(
#                 svr__kernel=["rbf"],
#                 svr__C=[C],
#                 svr__gamma=[gamma],
#                 svr__epsilon=[epsilon],
#             )

#             tested_regressors = [
#                 #             (lasso, lasso_params),
#                 #             (gbr, gbr_params),
#                 #             (knn, knn_params),
#                 (svr, svr_params),
#             ]

#             # define base steps
#             base_steps = [
#                 (
#                     "channels_filtering",
#                     ChannelExtractionTransformer(significant_channels),
#                 ),
#                 ("ica_preprocessing", IcaPreprocessingTransformer()),
#                 ("ica", FastICA(random_state=random_state)),
#                 (
#                     "ica_postprocessing",
#                     IcaPostprocessingTransformer(timepoints_count=X_train.shape[-1]),
#                 ),
#                 ("channel_data_swap", ChannelDataSwap()),
#                 ("binning", BinTransformer(step=step_tp)),
#                 ("data_channel_swap", ChannelDataSwap()),
#                 ("postprocessing", PostprocessingTransformer()),
#                 ("scaler", StandardScaler()),
#                 ("pca", PCA(random_state=random_state)),
#             ]

#             # rate different models
#             results_df = run_experiment(
#                 tested_regressors,
#                 regressor_params,
#                 pipeline_name,
#                 X_train,
#                 X_test,
#                 y_train,
#                 y_test,
#                 dataset_name,
#                 base_steps,
#                 results_df,
#             )

Run experiment:

In [None]:
results_df = run_experiment(
    tested_regressors,
    regressor_params,
    pipeline_name,
    X_train,
    X_test,
    y_train,
    y_test,
    dataset_name,
    base_steps,
    results_df,
)

### Experiment 3
- Models: KNN, GBR, Lasso, SVR
- vectorize with ICA-PCA

In [None]:
pipeline_name = "ICA_cut"

Tuned

In [None]:
knn = ("knn", KNeighborsRegressor())
knn_params = dict(
    knn__n_neighbors=np.arange(15, 40, 3),
)

# 10 v 0.001; 0.0001
svr = ("svr", SVR())
svr_params = dict(
    svr__kernel=["rbf", "linear"],
    svr__C=[0.001],
    svr__gamma=[0.0001],
)

gbr = ("gbr", GradientBoostingRegressor())
gbr_params = dict(gbr__n_estimators=np.arange(1, 40, 5))

lasso = ("lasso", Lasso())
lasso_params = dict(lasso__alpha=[0.5])

In [None]:
regressor_params = dict(
    ica__n_components=np.arange(3, 16, 2),
    pca__n_components=np.arange(3, 9, 1),
)

In [None]:
tested_regressors = [
    (lasso, lasso_params),
    #     (gbr, gbr_params),
    (knn, knn_params),
    (svr, svr_params),
]

In [None]:
# for alpha in [0.1, 0.2, 0.3, 0.5, 0.7, 0.9, 1.1, 1.3]:
#     #     for gamma in [0.00001, 0.0001, 0.001, 0.01, 0.1]:

#     lasso = ("lasso", Lasso())
#     lasso_params = dict(lasso__alpha=[alpha])

#     tested_regressors = [
#         (lasso, lasso_params),
#         #             (gbr, gbr_params),
#         #             (knn, knn_params),
#         #         (svr, svr_params),
#     ]

#     # define base steps
#     base_steps = [
#         ("channels_filtering", ChannelExtractionTransformer(significant_channels)),
#         ("ica_preprocessing", IcaPreprocessingTransformer()),
#         ("ica", FastICA(random_state=random_state)),
#         (
#             "ica_postprocessing",
#             IcaPostprocessingTransformer(timepoints_count=X_train.shape[-1]),
#         ),
#         #     ("channel_data_swap", ChannelDataSwap()),
#         ("postprocessing", PostprocessingTransformer()),
#         ("scaler", StandardScaler()),
#         ("pca", PCA(random_state=random_state)),
#     ]

#     # rate different models
#     results_df = run_experiment(
#         tested_regressors,
#         regressor_params,
#         pipeline_name,
#         X_train,
#         X_test,
#         y_train,
#         y_test,
#         dataset_name,
#         base_steps,
#         results_df,
#     )

In [None]:
for alpha in [0.001, 0.01, 0.1, 1]:
    for gamma in [0.0000001, 0.000001, 0.00001, 0.0001, 0.001, 0.01, 0.1]:

        #         lasso = ("lasso", Lasso())
        #         lasso_params = dict(lasso__alpha=[alpha])
        kr = ("kr", KernelRidge(kernel="rbf"))
        kr_params = dict(kr__alpha=[alpha], kr__gamma=[gamma])

        tested_regressors = [
            (kr, kr_params),
            #             (gbr, gbr_params),
            #             (knn, knn_params),
            #         (svr, svr_params),
        ]

        # define base steps
        base_steps = [
            ("channels_filtering", ChannelExtractionTransformer(significant_channels)),
            ("ica_preprocessing", IcaPreprocessingTransformer()),
            ("ica", FastICA(random_state=random_state)),
            (
                "ica_postprocessing",
                IcaPostprocessingTransformer(timepoints_count=X_train.shape[-1]),
            ),
            #     ("channel_data_swap", ChannelDataSwap()),
            ("postprocessing", PostprocessingTransformer()),
            ("scaler", StandardScaler()),
            ("pca", PCA(random_state=random_state)),
        ]

        # rate different models
        results_df = run_experiment(
            tested_regressors,
            regressor_params,
            pipeline_name,
            X_train,
            X_test,
            y_train,
            y_test,
            dataset_name,
            base_steps,
            results_df,
        )

In [None]:
base_steps = [
    ("channels_filtering", ChannelExtractionTransformer(significant_channels)),
    ("ica_preprocessing", IcaPreprocessingTransformer()),
    ("ica", FastICA(random_state=random_state)),
    (
        "ica_postprocessing",
        IcaPostprocessingTransformer(timepoints_count=X_train.shape[-1]),
    ),
    #     ("channel_data_swap", ChannelDataSwap()),
    ("postprocessing", PostprocessingTransformer()),
    ("scaler", StandardScaler()),
    ("pca", PCA(random_state=random_state)),
]

In [None]:
results_df = run_experiment(
    tested_regressors,
    regressor_params,
    pipeline_name,
    X_train,
    X_test,
    y_train,
    y_test,
    dataset_name,
    base_steps,
    results_df,
)

### Experiment 4

- Models: KNN, GBR, Lasso, SVR
- vectorize with ICA-bins-cwt-PCA

Tuned

In [None]:
pipeline_name = "ICA_cut_bins_cwt"

In [None]:
knn = ("knn", KNeighborsRegressor())
knn_params = dict(
    knn__n_neighbors=np.arange(17, 30, 3),
)

# 0.001 lin 0.1 rbf
svr = ("svr", SVR())
svr_params = dict(svr__kernel=["rbf", "linear"], svr__C=[0.1], svr__gamma=[0.001])

gbr = ("gbr", GradientBoostingRegressor())
gbr_params = dict(
    gbr__n_estimators=np.arange(1, 40, 5),
)

lasso = ("lasso", Lasso())
lasso_params = dict(lasso__alpha=[1.0])

In [None]:
step_in_ms = 50
step_tp = int(signal_frequency * step_in_ms / 1000)

In [None]:
regressor_params = dict(
    ica__n_components=np.arange(3, 16, 2),
    pca__n_components=np.arange(3, 9, 1),
)

In [None]:
tested_regressors = [
    (lasso, lasso_params),
    #     (gbr, gbr_params),
    (knn, knn_params),
    (svr, svr_params),
]

In [None]:
base_steps = [
    ("channels_filtering", ChannelExtractionTransformer(significant_channels)),
    ("ica_preprocessing", IcaPreprocessingTransformer()),
    ("ica", FastICA(random_state=random_state)),
    (
        "ica_postprocessing",
        IcaPostprocessingTransformer(timepoints_count=X_train.shape[-1]),
    ),
    ("channel_data_swap", ChannelDataSwap()),
    ("binning", BinTransformer(step=step_tp)),
    ("data_channel_swap", ChannelDataSwap()),
    ("cwt", CwtVectorizer()),
    ("postprocessing", PostprocessingTransformer()),
    ("scaler", StandardScaler()),
    ("pca", PCA(random_state=random_state)),
]

In [None]:
# for alpha in [0.1, 0.2, 0.3, 0.5, 0.7, 0.8, 0.9, 1.0, 1.2, 1.4]:
#     #     for gamma in [0.00001, 0.0001, 0.001, 0.01, 0.1]:

#     lasso = ("lasso", Lasso())
#     lasso_params = dict(lasso__alpha=[alpha])

#     tested_regressors = [
#         (lasso, lasso_params),
#         #             (gbr, gbr_params),
#         #             (knn, knn_params),
#         #         (svr, svr_params),
#     ]

#     # define base steps
#     base_steps = [
#         ("channels_filtering", ChannelExtractionTransformer(significant_channels)),
#         ("ica_preprocessing", IcaPreprocessingTransformer()),
#         ("ica", FastICA(random_state=random_state)),
#         (
#             "ica_postprocessing",
#             IcaPostprocessingTransformer(timepoints_count=X_train.shape[-1]),
#         ),
#         ("channel_data_swap", ChannelDataSwap()),
#         ("binning", BinTransformer(step=step_tp)),
#         ("data_channel_swap", ChannelDataSwap()),
#         ("cwt", CwtVectorizer()),
#         ("postprocessing", PostprocessingTransformer()),
#         ("scaler", StandardScaler()),
#         ("pca", PCA(random_state=random_state)),
#     ]

#     # rate different models
#     results_df = run_experiment(
#         tested_regressors,
#         regressor_params,
#         pipeline_name,
#         X_train,
#         X_test,
#         y_train,
#         y_test,
#         dataset_name,
#         base_steps,
#         results_df,
#     )

In [None]:
for alpha in [0.001, 0.01, 0.1, 1]:
    for gamma in [0.0000001, 0.000001, 0.00001, 0.0001, 0.001, 0.01, 0.1]:

        #         lasso = ("lasso", Lasso())
        #         lasso_params = dict(lasso__alpha=[alpha])
        kr = ("kr", KernelRidge(kernel="rbf"))
        kr_params = dict(kr__alpha=[alpha], kr__gamma=[gamma])

        tested_regressors = [
            (kr, kr_params),
            #             (gbr, gbr_params),
            #             (knn, knn_params),
            #         (svr, svr_params),
        ]

        # define base steps
        base_steps = [
            ("channels_filtering", ChannelExtractionTransformer(significant_channels)),
            ("ica_preprocessing", IcaPreprocessingTransformer()),
            ("ica", FastICA(random_state=random_state)),
            (
                "ica_postprocessing",
                IcaPostprocessingTransformer(timepoints_count=X_train.shape[-1]),
            ),
            ("channel_data_swap", ChannelDataSwap()),
            ("binning", BinTransformer(step=step_tp)),
            ("data_channel_swap", ChannelDataSwap()),
            ("cwt", CwtVectorizer()),
            ("postprocessing", PostprocessingTransformer()),
            ("scaler", StandardScaler()),
            ("pca", PCA(random_state=random_state)),
        ]

        # rate different models
        results_df = run_experiment(
            tested_regressors,
            regressor_params,
            pipeline_name,
            X_train,
            X_test,
            y_train,
            y_test,
            dataset_name,
            base_steps,
            results_df,
        )

In [None]:
results_df = run_experiment(
    tested_regressors,
    regressor_params,
    pipeline_name,
    X_train,
    X_test,
    y_train,
    y_test,
    dataset_name,
    base_steps,
    results_df,
)

In [None]:
results_df.to_pickle("../../data/classification_ICA_cut_" + dataset_name + ".pkl")

In [None]:
results_df.to_csv("../../data/classification_ICA_cut_" + dataset_name + ".csv")

## Experiment 5

In [None]:
pipeline_name = "ICA_cut_function_std"

feature_function_dict = {"std": std_signal}

In [None]:
knn = ("knn", KNeighborsRegressor())
knn_params = dict(
    knn__n_neighbors=np.arange(15, 30, 3),
)

svr = ("svr", SVR())
svr_params = dict(svr__kernel=["rbf", "linear"], svr__C=[100], svr__gamma=[0.001])

gbr = ("gbr", GradientBoostingRegressor())
gbr_params = dict(
    gbr__n_estimators=np.arange(1, 40, 5),
)

lasso = ("lasso", Lasso())
lasso_params = dict(lasso__alpha=[0.6])

In [None]:
regressor_params = dict(
    ica__n_components=np.arange(3, 16, 2),
    pca__n_components=np.arange(3, 9, 1),
)

In [None]:
tested_regressors = [
    (lasso, lasso_params),
    #     (gbr, gbr_params),
    (knn, knn_params),
    (svr, svr_params),
]

In [None]:
print(f"Featurize with {feature_function_dict.keys()} function")

for alpha in [0.001, 0.01, 0.1, 1]:
    for gamma in [0.0000001, 0.000001, 0.00001, 0.0001, 0.001, 0.01, 0.1]:

        #         lasso = ("lasso", Lasso())
        #         lasso_params = dict(lasso__alpha=[alpha])
        kr = ("kr", KernelRidge(kernel="rbf"))
        kr_params = dict(kr__alpha=[alpha], kr__gamma=[gamma])

        tested_regressors = [
            (kr, kr_params),
            #             (gbr, gbr_params),
            #             (knn, knn_params),
            #         (svr, svr_params),
        ]

        # define base steps
        this_base_steps = [
            ("channels_filtering", ChannelExtractionTransformer(significant_channels)),
            ("ica_preprocessing", IcaPreprocessingTransformer()),
            ("ica", FastICA(random_state=random_state)),
            (
                "ica_postprocessing",
                IcaPostprocessingTransformer(timepoints_count=X_train.shape[-1]),
            ),
            ("cwt", CwtVectorizer()),
            ("cwt_feature", CwtFeatureVectorizer(feature_dict=feature_function_dict)),
            ("postprocessing", PostprocessingTransformer()),
            ("scaler", StandardScaler()),
            ("pca", PCA(random_state=random_state)),
        ]

        # rate different models
        results_df = run_experiment(
            tested_regressors,
            regressor_params,
            pipeline_name,
            X_train,
            X_test,
            y_train,
            y_test,
            dataset_name,
            this_base_steps,
            results_df,
            function_name=list(feature_function_dict.keys())[0],
        )

In [None]:
print(f"Featurize with {feature_function_dict.keys()} function")

# define base steps
this_base_steps = [
    ("channels_filtering", ChannelExtractionTransformer(significant_channels)),
    ("ica_preprocessing", IcaPreprocessingTransformer()),
    ("ica", FastICA(random_state=random_state)),
    (
        "ica_postprocessing",
        IcaPostprocessingTransformer(timepoints_count=X_train.shape[-1]),
    ),
    ("cwt", CwtVectorizer()),
    ("cwt_feature", CwtFeatureVectorizer(feature_dict=feature_function_dict)),
    ("postprocessing", PostprocessingTransformer()),
    ("scaler", StandardScaler()),
    ("pca", PCA(random_state=random_state)),
]

# rate different models
results_df = run_experiment(
    tested_regressors,
    regressor_params,
    pipeline_name,
    X_train,
    X_test,
    y_train,
    y_test,
    dataset_name,
    this_base_steps,
    results_df,
    function_name=list(feature_function_dict.keys())[0],
)

## Experiment 6

In [None]:
pipeline_name = "ICA_cut_function_abs_diff"

feature_function_dict = {"abs_diffs": abs_diffs_signal}

In [None]:
knn = ("knn", KNeighborsRegressor())
knn_params = dict(
    knn__n_neighbors=np.arange(15, 30, 3),
)

# 10
svr = ("svr", SVR())
svr_params = dict(svr__kernel=["rbf", "linear"], svr__C=[100], svr__gamma=[0.0001])

gbr = ("gbr", GradientBoostingRegressor())
gbr_params = dict(
    gbr__n_estimators=np.arange(1, 40, 5),
)

lasso = ("lasso", Lasso())
lasso_params = dict(lasso__alpha=[0.8])

In [None]:
regressor_params = dict(
    ica__n_components=np.arange(3, 16, 2),
    pca__n_components=np.arange(3, 9, 1),
)

In [None]:
tested_regressors = [
    (lasso, lasso_params),
    #     (gbr, gbr_params),
    (knn, knn_params),
    (svr, svr_params),
]

In [None]:
print(f"Featurize with {feature_function_dict.keys()} function")

for alpha in [0.001, 0.01, 0.1, 1]:
    for gamma in [0.0000001, 0.000001, 0.00001, 0.0001, 0.001, 0.01, 0.1]:

        #         lasso = ("lasso", Lasso())
        #         lasso_params = dict(lasso__alpha=[alpha])
        kr = ("kr", KernelRidge(kernel="rbf"))
        kr_params = dict(kr__alpha=[alpha], kr__gamma=[gamma])

        tested_regressors = [
            (kr, kr_params),
            #             (gbr, gbr_params),
            #             (knn, knn_params),
            #         (svr, svr_params),
        ]

        # define base steps
        this_base_steps = [
            ("channels_filtering", ChannelExtractionTransformer(significant_channels)),
            ("ica_preprocessing", IcaPreprocessingTransformer()),
            ("ica", FastICA(random_state=random_state)),
            (
                "ica_postprocessing",
                IcaPostprocessingTransformer(timepoints_count=X_train.shape[-1]),
            ),
            ("cwt", CwtVectorizer()),
            ("cwt_feature", CwtFeatureVectorizer(feature_dict=feature_function_dict)),
            ("postprocessing", PostprocessingTransformer()),
            ("scaler", StandardScaler()),
            ("pca", PCA(random_state=random_state)),
        ]

        # rate different models
        results_df = run_experiment(
            tested_regressors,
            regressor_params,
            pipeline_name,
            X_train,
            X_test,
            y_train,
            y_test,
            dataset_name,
            this_base_steps,
            results_df,
            function_name=list(feature_function_dict.keys())[0],
        )

In [None]:
print(f"Featurize with {feature_function_dict.keys()} function")

# define base steps
this_base_steps = [
    ("channels_filtering", ChannelExtractionTransformer(significant_channels)),
    ("ica_preprocessing", IcaPreprocessingTransformer()),
    ("ica", FastICA(random_state=random_state)),
    (
        "ica_postprocessing",
        IcaPostprocessingTransformer(timepoints_count=X_train.shape[-1]),
    ),
    ("cwt", CwtVectorizer()),
    ("cwt_feature", CwtFeatureVectorizer(feature_dict=feature_function_dict)),
    ("postprocessing", PostprocessingTransformer()),
    ("scaler", StandardScaler()),
    ("pca", PCA(random_state=random_state)),
]

# rate different models
results_df = run_experiment(
    tested_regressors,
    regressor_params,
    pipeline_name,
    X_train,
    X_test,
    y_train,
    y_test,
    dataset_name,
    this_base_steps,
    results_df,
    function_name=list(feature_function_dict.keys())[0],
)

### Experiment 7

In [None]:
pipeline_name = "ICA_cut_function_energy"

feature_function_dict = {"energy": mean_energy_signal}

In [None]:
knn = ("knn", KNeighborsRegressor())
knn_params = dict(
    knn__n_neighbors=np.arange(15, 30, 3),
)

# 10
svr = ("svr", SVR())
svr_params = dict(svr__kernel=["rbf"], svr__C=[0.1], svr__gamma=[0.001])

gbr = ("gbr", GradientBoostingRegressor())
gbr_params = dict(
    gbr__n_estimators=np.arange(1, 40, 5),
)

lasso = ("lasso", Lasso())
lasso_params = dict(lasso__alpha=[0.7])

In [None]:
regressor_params = dict(
    ica__n_components=np.arange(3, 16, 2),
    pca__n_components=np.arange(3, 9, 1),
)

In [None]:
tested_regressors = [
    (lasso, lasso_params),
    #     (gbr, gbr_params),
    (knn, knn_params),
    (svr, svr_params),
]

In [None]:
# for alpha in [0.1, 0.2, 0.3, 0.5, 0.7, 0.8, 0.9, 1.0, 1.2, 1.4]:
#     #     for gamma in [0.00001, 0.0001, 0.001, 0.01, 0.1]:

#     lasso = ("lasso", Lasso())
#     lasso_params = dict(lasso__alpha=[alpha])

#     tested_regressors = [
#         (lasso, lasso_params),
#         #             (gbr, gbr_params),
#         #             (knn, knn_params),
#         #         (svr, svr_params),
#     ]

#     # define base steps
#     this_base_steps = [
#         ("channels_filtering", ChannelExtractionTransformer(significant_channels)),
#         ("ica_preprocessing", IcaPreprocessingTransformer()),
#         ("ica", FastICA(random_state=random_state)),
#         (
#             "ica_postprocessing",
#             IcaPostprocessingTransformer(timepoints_count=X_train.shape[-1]),
#         ),
#         ("cwt", CwtVectorizer()),
#         ("cwt_feature", CwtFeatureVectorizer(feature_dict=feature_function_dict)),
#         ("postprocessing", PostprocessingTransformer()),
#         ("scaler", StandardScaler()),
#         ("pca", PCA(random_state=random_state)),
#     ]

#     # rate different models
#     results_df = run_experiment(
#         tested_regressors,
#         regressor_params,
#         pipeline_name,
#         X_train,
#         X_test,
#         y_train,
#         y_test,
#         dataset_name,
#         this_base_steps,
#         results_df,
#         function_name=list(feature_function_dict.keys())[0],
#     )

In [None]:
print(f"Featurize with {feature_function_dict.keys()} function")

for alpha in [0.001, 0.01, 0.1, 1]:
    for gamma in [0.0000001, 0.000001, 0.00001, 0.0001, 0.001, 0.01, 0.1]:

        #         lasso = ("lasso", Lasso())
        #         lasso_params = dict(lasso__alpha=[alpha])
        kr = ("kr", KernelRidge(kernel="rbf"))
        kr_params = dict(kr__alpha=[alpha], kr__gamma=[gamma])

        tested_regressors = [
            (kr, kr_params),
            #             (gbr, gbr_params),
            #             (knn, knn_params),
            #         (svr, svr_params),
        ]

        # define base steps
        this_base_steps = [
            ("channels_filtering", ChannelExtractionTransformer(significant_channels)),
            ("ica_preprocessing", IcaPreprocessingTransformer()),
            ("ica", FastICA(random_state=random_state)),
            (
                "ica_postprocessing",
                IcaPostprocessingTransformer(timepoints_count=X_train.shape[-1]),
            ),
            ("cwt", CwtVectorizer()),
            ("cwt_feature", CwtFeatureVectorizer(feature_dict=feature_function_dict)),
            ("postprocessing", PostprocessingTransformer()),
            ("scaler", StandardScaler()),
            ("pca", PCA(random_state=random_state)),
        ]

        # rate different models
        results_df = run_experiment(
            tested_regressors,
            regressor_params,
            pipeline_name,
            X_train,
            X_test,
            y_train,
            y_test,
            dataset_name,
            this_base_steps,
            results_df,
            function_name=list(feature_function_dict.keys())[0],
        )

In [None]:
print(f"Featurize with {feature_function_dict.keys()} function")

# define base steps
this_base_steps = [
    ("channels_filtering", ChannelExtractionTransformer(significant_channels)),
    ("ica_preprocessing", IcaPreprocessingTransformer()),
    ("ica", FastICA(random_state=random_state)),
    (
        "ica_postprocessing",
        IcaPostprocessingTransformer(timepoints_count=X_train.shape[-1]),
    ),
    ("cwt", CwtVectorizer()),
    ("cwt_feature", CwtFeatureVectorizer(feature_dict=feature_function_dict)),
    ("postprocessing", PostprocessingTransformer()),
    ("scaler", StandardScaler()),
    ("pca", PCA(random_state=random_state)),
]

# rate different models
results_df = run_experiment(
    tested_regressors,
    regressor_params,
    pipeline_name,
    X_train,
    X_test,
    y_train,
    y_test,
    dataset_name,
    this_base_steps,
    results_df,
    function_name=list(feature_function_dict.keys())[0],
)

### Experiment 8

In [None]:
pipeline_name = "ICA_cut_function_mean"

feature_function_dict = {"mean": mean_signal}

In [None]:
knn = ("knn", KNeighborsRegressor())
knn_params = dict(
    knn__n_neighbors=np.arange(15, 30, 3),
)

# 10
svr = ("svr", SVR())
svr_params = dict(svr__kernel=["rbf", "linear"], svr__C=[100], svr__gamma=[0.0001])

gbr = ("gbr", GradientBoostingRegressor())
gbr_params = dict(
    gbr__n_estimators=np.arange(1, 40, 5),
)

lasso = ("lasso", Lasso())
lasso_params = dict(lasso__alpha=[0.2])

In [None]:
regressor_params = dict(
    ica__n_components=np.arange(3, 16, 2),
    pca__n_components=np.arange(3, 9, 1),
)

In [None]:
tested_regressors = [
    (lasso, lasso_params),
    #     (gbr, gbr_params),
    (knn, knn_params),
    (svr, svr_params),
]

In [None]:
# for alpha in [0.1, 0.2, 0.3, 0.5, 0.7, 0.8, 0.9, 1.0, 1.2, 1.4]:
#     #     for gamma in [0.00001, 0.0001, 0.001, 0.01, 0.1]:

#     lasso = ("lasso", Lasso())
#     lasso_params = dict(lasso__alpha=[alpha])

#     tested_regressors = [
#         (lasso, lasso_params),
#         #             (gbr, gbr_params),
#         #             (knn, knn_params),
#         #         (svr, svr_params),
#     ]

#     # define base steps
#     this_base_steps = [
#         ("channels_filtering", ChannelExtractionTransformer(significant_channels)),
#         ("ica_preprocessing", IcaPreprocessingTransformer()),
#         ("ica", FastICA(random_state=random_state)),
#         (
#             "ica_postprocessing",
#             IcaPostprocessingTransformer(timepoints_count=X_train.shape[-1]),
#         ),
#         ("cwt", CwtVectorizer()),
#         ("cwt_feature", CwtFeatureVectorizer(feature_dict=feature_function_dict)),
#         ("postprocessing", PostprocessingTransformer()),
#         ("scaler", StandardScaler()),
#         ("pca", PCA(random_state=random_state)),
#     ]

#     # rate different models
#     results_df = run_experiment(
#         tested_regressors,
#         regressor_params,
#         pipeline_name,
#         X_train,
#         X_test,
#         y_train,
#         y_test,
#         dataset_name,
#         this_base_steps,
#         results_df,
#         function_name=list(feature_function_dict.keys())[0],
#     )

In [None]:
print(f"Featurize with {feature_function_dict.keys()} function")

for alpha in [0.001, 0.01, 0.1, 1]:
    for gamma in [0.0000001, 0.000001, 0.00001, 0.0001, 0.001, 0.01, 0.1]:

        #         lasso = ("lasso", Lasso())
        #         lasso_params = dict(lasso__alpha=[alpha])
        kr = ("kr", KernelRidge(kernel="rbf"))
        kr_params = dict(kr__alpha=[alpha], kr__gamma=[gamma])

        tested_regressors = [
            (kr, kr_params),
            #             (gbr, gbr_params),
            #             (knn, knn_params),
            #         (svr, svr_params),
        ]

        # define base steps
        this_base_steps = [
            ("channels_filtering", ChannelExtractionTransformer(significant_channels)),
            ("ica_preprocessing", IcaPreprocessingTransformer()),
            ("ica", FastICA(random_state=random_state)),
            (
                "ica_postprocessing",
                IcaPostprocessingTransformer(timepoints_count=X_train.shape[-1]),
            ),
            ("cwt", CwtVectorizer()),
            ("cwt_feature", CwtFeatureVectorizer(feature_dict=feature_function_dict)),
            ("postprocessing", PostprocessingTransformer()),
            ("scaler", StandardScaler()),
            ("pca", PCA(random_state=random_state)),
        ]

        # rate different models
        results_df = run_experiment(
            tested_regressors,
            regressor_params,
            pipeline_name,
            X_train,
            X_test,
            y_train,
            y_test,
            dataset_name,
            this_base_steps,
            results_df,
            function_name=list(feature_function_dict.keys())[0],
        )

In [None]:
print(f"Featurize with {feature_function_dict.keys()} function")

# define base steps
this_base_steps = [
    ("channels_filtering", ChannelExtractionTransformer(significant_channels)),
    ("ica_preprocessing", IcaPreprocessingTransformer()),
    ("ica", FastICA(random_state=random_state)),
    (
        "ica_postprocessing",
        IcaPostprocessingTransformer(timepoints_count=X_train.shape[-1]),
    ),
    ("cwt", CwtVectorizer()),
    ("cwt_feature", CwtFeatureVectorizer(feature_dict=feature_function_dict)),
    ("postprocessing", PostprocessingTransformer()),
    ("scaler", StandardScaler()),
    ("pca", PCA(random_state=random_state)),
]

# rate different models
results_df = run_experiment(
    tested_regressors,
    regressor_params,
    pipeline_name,
    X_train,
    X_test,
    y_train,
    y_test,
    dataset_name,
    this_base_steps,
    results_df,
    function_name=list(feature_function_dict.keys())[0],
)

### Experiment 9

In [None]:
pipeline_name = "ICA_cut_bins_function_std"

feature_function_dict = {"std": std_signal}

In [None]:
knn = ("knn", KNeighborsRegressor())
knn_params = dict(
    knn__n_neighbors=np.arange(15, 30, 3),
)

# 10
svr = ("svr", SVR())
svr_params = dict(svr__kernel=["rbf", "linear"], svr__C=[0.1], svr__gamma=[0.01])

gbr = ("gbr", GradientBoostingRegressor())
gbr_params = dict(
    gbr__n_estimators=np.arange(1, 40, 5),
)

lasso = ("lasso", Lasso())
lasso_params = dict(lasso__alpha=[0.6])

In [None]:
regressor_params = dict(
    ica__n_components=np.arange(3, 16, 2),
    pca__n_components=np.arange(3, 9, 1),
)

In [None]:
tested_regressors = [
    (lasso, lasso_params),
    #     (gbr, gbr_params),
    (knn, knn_params),
    (svr, svr_params),
]

In [None]:
# for alpha in [0.1, 0.2, 0.3, 0.5, 0.7, 0.8, 0.9, 1.0, 1.2, 1.4]:
#     #     for gamma in [0.00001, 0.0001, 0.001, 0.01, 0.1]:

#     lasso = ("lasso", Lasso())
#     lasso_params = dict(lasso__alpha=[alpha])

#     tested_regressors = [
#         (lasso, lasso_params),
#         #             (gbr, gbr_params),
#         #             (knn, knn_params),
#         #         (svr, svr_params),
#     ]

#     # define base steps
#     this_base_steps = [
#         ("channels_filtering", ChannelExtractionTransformer(significant_channels)),
#         ("ica_preprocessing", IcaPreprocessingTransformer()),
#         ("ica", FastICA(random_state=random_state)),
#         (
#             "ica_postprocessing",
#             IcaPostprocessingTransformer(timepoints_count=X_train.shape[-1]),
#         ),
#         ("channel_data_swap", ChannelDataSwap()),
#         ("binning", BinTransformer(step=step_tp)),
#         ("data_channel_swap", ChannelDataSwap()),
#         ("cwt", CwtVectorizer()),
#         ("cwt_feature", CwtFeatureVectorizer(feature_dict=feature_function_dict)),
#         ("postprocessing", PostprocessingTransformer()),
#         ("scaler", StandardScaler()),
#         ("pca", PCA(random_state=random_state)),
#     ]

#     # rate different models
#     results_df = run_experiment(
#         tested_regressors,
#         regressor_params,
#         pipeline_name,
#         X_train,
#         X_test,
#         y_train,
#         y_test,
#         dataset_name,
#         this_base_steps,
#         results_df,
#         function_name=list(feature_function_dict.keys())[0],
#     )

In [None]:
print(f"Featurize with {feature_function_dict.keys()} function")

for alpha in [0.001, 0.01, 0.1, 1]:
    for gamma in [0.0000001, 0.000001, 0.00001, 0.0001, 0.001, 0.01, 0.1]:

        #         lasso = ("lasso", Lasso())
        #         lasso_params = dict(lasso__alpha=[alpha])
        kr = ("kr", KernelRidge(kernel="rbf"))
        kr_params = dict(kr__alpha=[alpha], kr__gamma=[gamma])

        tested_regressors = [
            (kr, kr_params),
            #             (gbr, gbr_params),
            #             (knn, knn_params),
            #         (svr, svr_params),
        ]

        # define base steps
        this_base_steps = [
            (
                "channels_filtering",
                ChannelExtractionTransformer(significant_channels),
            ),
            ("ica_preprocessing", IcaPreprocessingTransformer()),
            ("ica", FastICA(random_state=random_state)),
            (
                "ica_postprocessing",
                IcaPostprocessingTransformer(timepoints_count=X_train.shape[-1]),
            ),
            ("channel_data_swap", ChannelDataSwap()),
            ("binning", BinTransformer(step=step_tp)),
            ("data_channel_swap", ChannelDataSwap()),
            ("cwt", CwtVectorizer()),
            (
                "cwt_feature",
                CwtFeatureVectorizer(feature_dict=feature_function_dict),
            ),
            ("postprocessing", PostprocessingTransformer()),
            ("scaler", StandardScaler()),
            ("pca", PCA(random_state=random_state)),
        ]

        # rate different models
        results_df = run_experiment(
            tested_regressors,
            regressor_params,
            pipeline_name,
            X_train,
            X_test,
            y_train,
            y_test,
            dataset_name,
            this_base_steps,
            results_df,
            function_name=list(feature_function_dict.keys())[0],
        )

In [None]:
print(f"Featurize with {feature_function_dict.keys()} function")

# define base steps
this_base_steps = [
    ("channels_filtering", ChannelExtractionTransformer(significant_channels)),
    ("ica_preprocessing", IcaPreprocessingTransformer()),
    ("ica", FastICA(random_state=random_state)),
    (
        "ica_postprocessing",
        IcaPostprocessingTransformer(timepoints_count=X_train.shape[-1]),
    ),
    ("channel_data_swap", ChannelDataSwap()),
    ("binning", BinTransformer(step=step_tp)),
    ("data_channel_swap", ChannelDataSwap()),
    ("cwt", CwtVectorizer()),
    ("cwt_feature", CwtFeatureVectorizer(feature_dict=feature_function_dict)),
    ("postprocessing", PostprocessingTransformer()),
    ("scaler", StandardScaler()),
    ("pca", PCA(random_state=random_state)),
]

# rate different models
results_df = run_experiment(
    tested_regressors,
    regressor_params,
    pipeline_name,
    X_train,
    X_test,
    y_train,
    y_test,
    dataset_name,
    this_base_steps,
    results_df,
    function_name=list(feature_function_dict.keys())[0],
)

### Experiment 10

In [None]:
pipeline_name = "ICA_cut_bins_function_abs_diff"

feature_function_dict = {"abs_diffs": abs_diffs_signal}

In [None]:
knn = ("knn", KNeighborsRegressor())
knn_params = dict(
    knn__n_neighbors=np.arange(15, 30, 3),
)

# 10
svr = ("svr", SVR())
svr_params = dict(svr__kernel=["rbf", "linear"], svr__C=[100], svr__gamma=[0.0001])

gbr = ("gbr", GradientBoostingRegressor())
gbr_params = dict(
    gbr__n_estimators=np.arange(1, 40, 5),
)

lasso = ("lasso", Lasso())
lasso_params = dict(lasso__alpha=[0.5])

In [None]:
regressor_params = dict(
    ica__n_components=np.arange(3, 16, 2),
    pca__n_components=np.arange(3, 9, 1),
)

In [None]:
tested_regressors = [
    (lasso, lasso_params),
    #     (gbr, gbr_params),
    (knn, knn_params),
    (svr, svr_params),
]

In [None]:
# print(f"Featurize with {feature_function_dict.keys()} function")

# for alpha in [0.1, 0.2, 0.3, 0.5, 0.7, 0.8, 0.9, 1.0, 1.2, 1.4]:
#     #     for gamma in [0.00001, 0.0001, 0.001, 0.01, 0.1]:

#     lasso = ("lasso", Lasso())
#     lasso_params = dict(lasso__alpha=[alpha])

#     tested_regressors = [
#         (lasso, lasso_params),
#         #             (gbr, gbr_params),
#         #             (knn, knn_params),
#         #         (svr, svr_params),
#     ]

#     # define base steps
#     this_base_steps = [
#         ("channels_filtering", ChannelExtractionTransformer(significant_channels)),
#         ("ica_preprocessing", IcaPreprocessingTransformer()),
#         ("ica", FastICA(random_state=random_state)),
#         (
#             "ica_postprocessing",
#             IcaPostprocessingTransformer(timepoints_count=X_train.shape[-1]),
#         ),
#         ("channel_data_swap", ChannelDataSwap()),
#         ("binning", BinTransformer(step=step_tp)),
#         ("data_channel_swap", ChannelDataSwap()),
#         ("cwt", CwtVectorizer()),
#         ("cwt_feature", CwtFeatureVectorizer(feature_dict=feature_function_dict)),
#         ("postprocessing", PostprocessingTransformer()),
#         ("scaler", StandardScaler()),
#         ("pca", PCA(random_state=random_state)),
#     ]

#     # rate different models
#     results_df = run_experiment(
#         tested_regressors,
#         regressor_params,
#         pipeline_name,
#         X_train,
#         X_test,
#         y_train,
#         y_test,
#         dataset_name,
#         this_base_steps,
#         results_df,
#         function_name=list(feature_function_dict.keys())[0],
#     )

In [None]:
print(f"Featurize with {feature_function_dict.keys()} function")

for alpha in [0.001, 0.01, 0.1, 1]:
    for gamma in [0.0000001, 0.000001, 0.00001, 0.0001, 0.001, 0.01, 0.1]:

        #         lasso = ("lasso", Lasso())
        #         lasso_params = dict(lasso__alpha=[alpha])
        kr = ("kr", KernelRidge(kernel="rbf"))
        kr_params = dict(kr__alpha=[alpha], kr__gamma=[gamma])

        tested_regressors = [
            (kr, kr_params),
            #             (gbr, gbr_params),
            #             (knn, knn_params),
            #         (svr, svr_params),
        ]

        # define base steps
        this_base_steps = [
            ("channels_filtering", ChannelExtractionTransformer(significant_channels)),
            ("ica_preprocessing", IcaPreprocessingTransformer()),
            ("ica", FastICA(random_state=random_state)),
            (
                "ica_postprocessing",
                IcaPostprocessingTransformer(timepoints_count=X_train.shape[-1]),
            ),
            ("channel_data_swap", ChannelDataSwap()),
            ("binning", BinTransformer(step=step_tp)),
            ("data_channel_swap", ChannelDataSwap()),
            ("cwt", CwtVectorizer()),
            ("cwt_feature", CwtFeatureVectorizer(feature_dict=feature_function_dict)),
            ("postprocessing", PostprocessingTransformer()),
            ("scaler", StandardScaler()),
            ("pca", PCA(random_state=random_state)),
        ]

        # rate different models
        results_df = run_experiment(
            tested_regressors,
            regressor_params,
            pipeline_name,
            X_train,
            X_test,
            y_train,
            y_test,
            dataset_name,
            this_base_steps,
            results_df,
            function_name=list(feature_function_dict.keys())[0],
        )

In [None]:
print(f"Featurize with {feature_function_dict.keys()} function")

# define base steps
this_base_steps = [
    ("channels_filtering", ChannelExtractionTransformer(significant_channels)),
    ("ica_preprocessing", IcaPreprocessingTransformer()),
    ("ica", FastICA(random_state=random_state)),
    (
        "ica_postprocessing",
        IcaPostprocessingTransformer(timepoints_count=X_train.shape[-1]),
    ),
    ("channel_data_swap", ChannelDataSwap()),
    ("binning", BinTransformer(step=step_tp)),
    ("data_channel_swap", ChannelDataSwap()),
    ("cwt", CwtVectorizer()),
    ("cwt_feature", CwtFeatureVectorizer(feature_dict=feature_function_dict)),
    ("postprocessing", PostprocessingTransformer()),
    ("scaler", StandardScaler()),
    ("pca", PCA(random_state=random_state)),
]

# rate different models
results_df = run_experiment(
    tested_regressors,
    regressor_params,
    pipeline_name,
    X_train,
    X_test,
    y_train,
    y_test,
    dataset_name,
    this_base_steps,
    results_df,
    function_name=list(feature_function_dict.keys())[0],
)

### Experiment 11

In [None]:
pipeline_name = "ICA_cut_bins_function_energy"

feature_function_dict = {"energy": mean_energy_signal}

In [None]:
knn = ("knn", KNeighborsRegressor())
knn_params = dict(
    knn__n_neighbors=np.arange(15, 30, 3),
)

# 10
svr = ("svr", SVR())
svr_params = dict(svr__kernel=["rbf"], svr__C=[10], svr__gamma=[0.001])

gbr = ("gbr", GradientBoostingRegressor())
gbr_params = dict(
    gbr__n_estimators=np.arange(1, 40, 5),
)

lasso = ("lasso", Lasso())
lasso_params = dict(lasso__alpha=[0.1])

In [None]:
regressor_params = dict(
    ica__n_components=np.arange(3, 16, 2),
    pca__n_components=np.arange(3, 9, 1),
)

In [None]:
tested_regressors = [
    (lasso, lasso_params),
    #     (gbr, gbr_params),
    (knn, knn_params),
    (svr, svr_params),
]

In [None]:
# print(f"Featurize with {feature_function_dict.keys()} function")

# for alpha in [0.1, 0.2, 0.3, 0.5, 0.7, 0.8, 0.9, 1.0, 1.2, 1.4]:
#     #     for gamma in [0.00001, 0.0001, 0.001, 0.01, 0.1]:

#     lasso = ("lasso", Lasso())
#     lasso_params = dict(lasso__alpha=[alpha])

#     tested_regressors = [
#         (lasso, lasso_params),
#         #             (gbr, gbr_params),
#         #             (knn, knn_params),
#         #         (svr, svr_params),
#     ]

#     # define base steps
#     this_base_steps = [
#         ("channels_filtering", ChannelExtractionTransformer(significant_channels)),
#         ("ica_preprocessing", IcaPreprocessingTransformer()),
#         ("ica", FastICA(random_state=random_state)),
#         (
#             "ica_postprocessing",
#             IcaPostprocessingTransformer(timepoints_count=X_train.shape[-1]),
#         ),
#         ("channel_data_swap", ChannelDataSwap()),
#         ("binning", BinTransformer(step=step_tp)),
#         ("data_channel_swap", ChannelDataSwap()),
#         ("cwt", CwtVectorizer()),
#         ("cwt_feature", CwtFeatureVectorizer(feature_dict=feature_function_dict)),
#         ("postprocessing", PostprocessingTransformer()),
#         ("scaler", StandardScaler()),
#         ("pca", PCA(random_state=random_state)),
#     ]

#     # rate different models
#     results_df = run_experiment(
#         tested_regressors,
#         regressor_params,
#         pipeline_name,
#         X_train,
#         X_test,
#         y_train,
#         y_test,
#         dataset_name,
#         this_base_steps,
#         results_df,
#         function_name=list(feature_function_dict.keys())[0],
#     )

In [None]:
print(f"Featurize with {feature_function_dict.keys()} function")

for alpha in [0.001, 0.01, 0.1, 1]:
    for gamma in [0.0000001, 0.000001, 0.00001, 0.0001, 0.001, 0.01, 0.1]:

        #         lasso = ("lasso", Lasso())
        #         lasso_params = dict(lasso__alpha=[alpha])
        kr = ("kr", KernelRidge(kernel="rbf"))
        kr_params = dict(kr__alpha=[alpha], kr__gamma=[gamma])

        tested_regressors = [
            (kr, kr_params),
            #             (gbr, gbr_params),
            #             (knn, knn_params),
            #         (svr, svr_params),
        ]

        # define base steps
        this_base_steps = [
            (
                "channels_filtering",
                ChannelExtractionTransformer(significant_channels),
            ),
            ("ica_preprocessing", IcaPreprocessingTransformer()),
            ("ica", FastICA(random_state=random_state)),
            (
                "ica_postprocessing",
                IcaPostprocessingTransformer(timepoints_count=X_train.shape[-1]),
            ),
            ("channel_data_swap", ChannelDataSwap()),
            ("binning", BinTransformer(step=step_tp)),
            ("data_channel_swap", ChannelDataSwap()),
            ("cwt", CwtVectorizer()),
            (
                "cwt_feature",
                CwtFeatureVectorizer(feature_dict=feature_function_dict),
            ),
            ("postprocessing", PostprocessingTransformer()),
            ("scaler", StandardScaler()),
            ("pca", PCA(random_state=random_state)),
        ]

        # rate different models
        results_df = run_experiment(
            tested_regressors,
            regressor_params,
            pipeline_name,
            X_train,
            X_test,
            y_train,
            y_test,
            dataset_name,
            this_base_steps,
            results_df,
            function_name=list(feature_function_dict.keys())[0],
        )

In [None]:
print(f"Featurize with {feature_function_dict.keys()} function")

# define base steps
this_base_steps = [
    ("channels_filtering", ChannelExtractionTransformer(significant_channels)),
    ("ica_preprocessing", IcaPreprocessingTransformer()),
    ("ica", FastICA(random_state=random_state)),
    (
        "ica_postprocessing",
        IcaPostprocessingTransformer(timepoints_count=X_train.shape[-1]),
    ),
    ("channel_data_swap", ChannelDataSwap()),
    ("binning", BinTransformer(step=step_tp)),
    ("data_channel_swap", ChannelDataSwap()),
    ("cwt", CwtVectorizer()),
    ("cwt_feature", CwtFeatureVectorizer(feature_dict=feature_function_dict)),
    ("postprocessing", PostprocessingTransformer()),
    ("scaler", StandardScaler()),
    ("pca", PCA(random_state=random_state)),
]

# rate different models
results_df = run_experiment(
    tested_regressors,
    regressor_params,
    pipeline_name,
    X_train,
    X_test,
    y_train,
    y_test,
    dataset_name,
    this_base_steps,
    results_df,
    function_name=list(feature_function_dict.keys())[0],
)

### Experiment 12

In [None]:
pipeline_name = "ICA_cut_bins_function_mean"

feature_function_dict = {"mean": mean_signal}

In [None]:
knn = ("knn", KNeighborsRegressor())
knn_params = dict(
    knn__n_neighbors=np.arange(15, 30, 3),
)

# 10
svr = ("svr", SVR())
svr_params = dict(svr__kernel=["rbf", "linear"], svr__C=[0.1], svr__gamma=[0.01])

gbr = ("gbr", GradientBoostingRegressor())
gbr_params = dict(
    gbr__n_estimators=np.arange(1, 40, 5),
)

lasso = ("lasso", Lasso())
lasso_params = dict(lasso__alpha=[0.1])

In [None]:
regressor_params = dict(
    ica__n_components=np.arange(3, 16, 2),
    pca__n_components=np.arange(3, 9, 1),
)

In [None]:
tested_regressors = [
    (lasso, lasso_params),
    #     (gbr, gbr_params),
    (knn, knn_params),
    (svr, svr_params),
]

In [None]:
# for alpha in [0.1, 0.2, 0.3, 0.5, 0.7, 0.8, 0.9, 1.0, 1.2, 1.4]:
#     #     for gamma in [0.00001, 0.0001, 0.001, 0.01, 0.1]:

#     lasso = ("lasso", Lasso())
#     lasso_params = dict(lasso__alpha=[alpha])

#     tested_regressors = [
#         (lasso, lasso_params),
#         #             (gbr, gbr_params),
#         #             (knn, knn_params),
#         #         (svr, svr_params),
#     ]

#     # define base steps
#     this_base_steps = [
#         ("channels_filtering", ChannelExtractionTransformer(significant_channels)),
#         ("ica_preprocessing", IcaPreprocessingTransformer()),
#         ("ica", FastICA(random_state=random_state)),
#         (
#             "ica_postprocessing",
#             IcaPostprocessingTransformer(timepoints_count=X_train.shape[-1]),
#         ),
#         ("channel_data_swap", ChannelDataSwap()),
#         ("binning", BinTransformer(step=step_tp)),
#         ("data_channel_swap", ChannelDataSwap()),
#         ("cwt", CwtVectorizer()),
#         ("cwt_feature", CwtFeatureVectorizer(feature_dict=feature_function_dict)),
#         ("postprocessing", PostprocessingTransformer()),
#         ("scaler", StandardScaler()),
#         ("pca", PCA(random_state=random_state)),
#     ]

#     # rate different models
#     results_df = run_experiment(
#         tested_regressors,
#         regressor_params,
#         pipeline_name,
#         X_train,
#         X_test,
#         y_train,
#         y_test,
#         dataset_name,
#         this_base_steps,
#         results_df,
#         function_name=list(feature_function_dict.keys())[0],
#     )

In [None]:
print(f"Featurize with {feature_function_dict.keys()} function")

for alpha in [0.001, 0.01, 0.1, 1]:
    for gamma in [0.0000001, 0.000001, 0.00001, 0.0001, 0.001, 0.01, 0.1]:

        #         lasso = ("lasso", Lasso())
        #         lasso_params = dict(lasso__alpha=[alpha])
        kr = ("kr", KernelRidge(kernel="rbf"))
        kr_params = dict(kr__alpha=[alpha], kr__gamma=[gamma])

        tested_regressors = [
            (kr, kr_params),
            #             (gbr, gbr_params),
            #             (knn, knn_params),
            #         (svr, svr_params),
        ]

        # define base steps
        this_base_steps = [
            ("channels_filtering", ChannelExtractionTransformer(significant_channels)),
            ("ica_preprocessing", IcaPreprocessingTransformer()),
            ("ica", FastICA(random_state=random_state)),
            (
                "ica_postprocessing",
                IcaPostprocessingTransformer(timepoints_count=X_train.shape[-1]),
            ),
            ("channel_data_swap", ChannelDataSwap()),
            ("binning", BinTransformer(step=step_tp)),
            ("data_channel_swap", ChannelDataSwap()),
            ("cwt", CwtVectorizer()),
            ("cwt_feature", CwtFeatureVectorizer(feature_dict=feature_function_dict)),
            ("postprocessing", PostprocessingTransformer()),
            ("scaler", StandardScaler()),
            ("pca", PCA(random_state=random_state)),
        ]

        # rate different models
        results_df = run_experiment(
            tested_regressors,
            regressor_params,
            pipeline_name,
            X_train,
            X_test,
            y_train,
            y_test,
            dataset_name,
            this_base_steps,
            results_df,
            function_name=list(feature_function_dict.keys())[0],
        )

In [None]:
print(f"Featurize with {feature_function_dict.keys()} function")

# define base steps
this_base_steps = [
    ("channels_filtering", ChannelExtractionTransformer(significant_channels)),
    ("ica_preprocessing", IcaPreprocessingTransformer()),
    ("ica", FastICA(random_state=random_state)),
    (
        "ica_postprocessing",
        IcaPostprocessingTransformer(timepoints_count=X_train.shape[-1]),
    ),
    ("channel_data_swap", ChannelDataSwap()),
    ("binning", BinTransformer(step=step_tp)),
    ("data_channel_swap", ChannelDataSwap()),
    ("cwt", CwtVectorizer()),
    ("cwt_feature", CwtFeatureVectorizer(feature_dict=feature_function_dict)),
    ("postprocessing", PostprocessingTransformer()),
    ("scaler", StandardScaler()),
    ("pca", PCA(random_state=random_state)),
]

# rate different models
results_df = run_experiment(
    tested_regressors,
    regressor_params,
    pipeline_name,
    X_train,
    X_test,
    y_train,
    y_test,
    dataset_name,
    this_base_steps,
    results_df,
    function_name=list(feature_function_dict.keys())[0],
)

In [None]:
results_df

In [None]:
results_df

In [None]:
results_df.to_pickle("../../data/regression_ICA_cut_cv3_oct6_" + dataset_name + ".pkl")

In [None]:
results_df.to_csv("../../data/regression_ICA_cut_cv3_oct6_" + dataset_name + ".csv")