In [22]:
from pathlib import Path

import pandas as pd
import numpy as np

import biopsykit as bp
from sklearn.preprocessing import MinMaxScaler, StandardScaler, FunctionTransformer


#Feature Selection
from sklearn.feature_selection import SelectKBest, RFE, f_regression, mutual_info_regression

#Classification
from sklearn.svm import SVR

# Regression
from sklearn.linear_model import LinearRegression
from sklearn.linear_model import ElasticNet
from sklearn.linear_model import Ridge
from sklearn.neighbors import KNeighborsRegressor
from sklearn.ensemble import RandomForestRegressor


# Cross-Validation
from sklearn.model_selection import GroupKFold

from biopsykit.classification.model_selection import SklearnPipelinePermuter


%matplotlib widget
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [3]:
save_results = True

In [4]:
data_path = Path("../../results/data")
data_path

WindowsPath('../../results/data')

In [5]:
models_path = Path("../../results/models")

In [6]:
input_data = pd.read_csv(data_path.joinpath("train_data_q_wave.csv"), index_col=[0,1,2,3,4])
input_data

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,rr_interval_ms_estimated,forounzafar2018,martinez2004,vanlien2013-32-ms,vanlien2013-34-ms,vanlien2013-36-ms,vanlien2013-38-ms,vanlien2013-40-ms,vanlien2013-42-ms
Unnamed: 0_level_1,participant,phase,heartbeat_id_reference,q_wave_onset_sample_reference,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
0,GDN0005,HoldingBreath,0,260.0,850.0,276.0,262.0,266.0,264.0,262.0,260.0,258.0,256.0
1,GDN0005,HoldingBreath,1,260.0,778.0,278.0,262.0,266.0,264.0,262.0,260.0,258.0,256.0
2,GDN0005,HoldingBreath,3,222.0,746.0,236.0,224.0,230.0,228.0,226.0,224.0,222.0,220.0
3,GDN0005,HoldingBreath,4,220.0,766.0,236.0,222.0,230.0,228.0,226.0,224.0,222.0,220.0
4,GDN0005,HoldingBreath,5,228.0,790.0,242.0,230.0,236.0,234.0,232.0,230.0,228.0,226.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...
11498,VP_032,Talk,38,153.0,596.0,188.0,154.0,179.0,177.0,175.0,173.0,171.0,169.0
11499,VP_032,Talk,39,172.0,619.0,183.0,173.0,177.0,175.0,173.0,171.0,169.0,167.0
11500,VP_032,Talk,40,181.0,680.0,193.0,182.0,185.0,183.0,181.0,179.0,177.0,175.0
11501,VP_032,Talk,41,200.0,719.0,210.0,201.0,206.0,204.0,202.0,200.0,198.0,196.0


In [7]:
print(f"Min data value: {input_data.values.min()}\nMax data value: {input_data.values.max()}")

Min data value: 23.0
Max data value: 1534.0


In [8]:
columns=input_data.columns
columns

Index(['rr_interval_ms_estimated', 'forounzafar2018', 'martinez2004',
       'vanlien2013-32-ms', 'vanlien2013-34-ms', 'vanlien2013-36-ms',
       'vanlien2013-38-ms', 'vanlien2013-40-ms', 'vanlien2013-42-ms'],
      dtype='object')

In [9]:
X, y, groups, group_keys = bp.classification.utils.prepare_df_sklearn(data=input_data, label_col="q_wave_onset_sample_reference", subject_col="participant", print_summary=True)

Shape of X: (11349, 9); shape of y: (11349,); number of groups: 39, class prevalence: [  1   2   1   6   2   8   4  15  21  14  26  30  35  50  48  55  57  52
  65  61  79  73  59  50  51  49  43  44  42  25  40  30  38  30  38  49
  33  32  31  31  22  38  29  43  47  34  21  47  48  41  32  31  36  36
  39  47  52  29  29  40  51  21  50  31  43  28  32  27  45  23  39  33
  47  22  45  25  35  29  34  37  53  23  68  24  66  38  59  36  62  31
  72  29  83  30  69  26  64  20  95  27  78  22  67  20  64  28  69  21
  92  18  81  19  91  25  91  19  81  27  69  25  81  15  90  14 101  16
  94  14 116  18 103  25  96  21  78  17  95  12  89  25  91  24  76  13
  82  22  95  11  74  20  82  18  82  11  77  14  83  19  96  12 100  14
 100  11  89  14  87  10  97   9  99   8  98   7 107   7 108   5 106   5
 110   3  89   4  93   4  86  10  82   7  73   2  68   1  75   2  83   3
  84   4  70   4  82  70   5  56   3  75   1  75   3  55   4  59   3  70
   2  69  68   1  66   3  56  59  72  

In [10]:
group_keys

Index(['GDN0005', 'GDN0006', 'GDN0007', 'GDN0008', 'GDN0009', 'GDN0010',
       'GDN0011', 'GDN0012', 'GDN0013', 'GDN0014', 'GDN0016', 'GDN0017',
       'GDN0018', 'GDN0019', 'GDN0020', 'GDN0021', 'GDN0022', 'GDN0023',
       'GDN0024', 'GDN0025', 'GDN0027', 'GDN0028', 'GDN0029', 'GDN0030',
       'VP_001', 'VP_002', 'VP_003', 'VP_004', 'VP_005', 'VP_020', 'VP_022',
       'VP_023', 'VP_026', 'VP_027', 'VP_028', 'VP_029', 'VP_030', 'VP_031',
       'VP_032'],
      dtype='object')

In [23]:
model_dict = {
    "scaler": {"StandardScaler": StandardScaler(), "MinMaxScaler": MinMaxScaler()},
    "reduce_dim": {"SelectKBest": SelectKBest(), "RFE": RFE(SVR(kernel="linear")), "DummyFeatureSelector": FunctionTransformer()},
    #"reduce_dim": {"SelectKBest": SelectKBest(), "DummyFeatureSelector": FunctionTransformer()},
    #"reduce_dim": {"RFE": RFE(SVR(kernel="linear"))},
    "clf": {
        "LinearRegressor": LinearRegression(),
        "ElasticNetRegressor": ElasticNet(),
        "RidgeRegressor": Ridge(),
        #"KNeighborsRegressor": KNeighborsRegressor(),
        #"RandomForestRegressor": RandomForestRegressor(n_jobs=10),
    },
}

In [24]:
params_dict = {
    "StandardScaler": None,
    "MinMaxScaler": None,
    "SelectKBest": {
        "score_func": [f_regression, mutual_info_regression],
        "k": [2, 4, 6, 8, 10],
        },
    "SVR": {"C": [0.1, 1, 10]},
    "RFE": {
        "n_features_to_select": [0.4, 0.5, 0.6],
        "step": [1,2,3],
    },
    "DummyFeatureSelector": None,
    "LinearRegressor": {

    },
    "ElasticNetRegressor": {
        "alpha": [0.01, 0.1, 1, 10, 100],
        "l1_ratio": [0.1, 0.3, 0.5, 0.7, 0.9],        
    },
    "RidgeRegressor": {
        "alpha": [0.01, 0.1, 1, 10, 100],
    }
    #"KNeighborsRegressor": {
    #    "n_neighbors": [8,9,10,11,12,13,14],
    #    "weights": ["uniform", "distance"],
    #    "p": [1,2],
    #    },
    #"RandomForestRegressor": {
    #    "n_estimators": [200, 300],
    #    "min_samples_split": [2, 5, 10, 20, 30, 40],
    #    "min_samples_leaf": [1, 2, 4, 10, 20, 30],
    #    "max_depth": [None, 40, 60, 80, 100, 120],
    #    #"max_features": ["sqrt", "log2", None],
    #},
}

In [25]:
pipeline_permuter = SklearnPipelinePermuter(
    model_dict=model_dict, param_dict=params_dict
)

In [26]:
outer_cv = GroupKFold(n_splits=5)
inner_cv = GroupKFold(n_splits=5)

pipeline_permuter.fit(X=X, y=y, outer_cv=outer_cv, inner_cv=inner_cv, scoring="neg_mean_absolute_error", groups=groups)

Pipeline Combinations:   0%|          | 0/18 [00:00<?, ?it/s]

### Running hyperparameter search for pipeline: (('scaler', 'StandardScaler'), ('reduce_dim', 'SelectKBest'), ('clf', 'LinearRegressor')) with 1 parameter grid(s):
Parameter grid #0 ({'search_method': 'grid'}): {'reduce_dim__score_func': [<function f_regression at 0x000001B6630B5760>, <function mutual_info_regression at 0x000001B6630A3600>], 'reduce_dim__k': [2, 4, 6, 8, 10]}


Outer CV:   0%|          | 0/5 [00:00<?, ?it/s]

Fitting 5 folds for each of 10 candidates, totalling 50 fits




Fitting 5 folds for each of 10 candidates, totalling 50 fits
Fitting 5 folds for each of 10 candidates, totalling 50 fits
Fitting 5 folds for each of 10 candidates, totalling 50 fits
Fitting 5 folds for each of 10 candidates, totalling 50 fits


### Running hyperparameter search for pipeline: (('scaler', 'StandardScaler'), ('reduce_dim', 'SelectKBest'), ('clf', 'ElasticNetRegressor')) with 1 parameter grid(s):
Parameter grid #0 ({'search_method': 'grid'}): {'clf__alpha': [0.01, 0.1, 1, 10, 100], 'clf__l1_ratio': [0.1, 0.3, 0.5, 0.7, 0.9], 'reduce_dim__score_func': [<function f_regression at 0x000001B6630B5760>, <function mutual_info_regression at 0x000001B6630A3600>], 'reduce_dim__k': [2, 4, 6, 8, 10]}




Outer CV:   0%|          | 0/5 [00:00<?, ?it/s]

Fitting 5 folds for each of 250 candidates, totalling 1250 fits




Fitting 5 folds for each of 250 candidates, totalling 1250 fits




Fitting 5 folds for each of 250 candidates, totalling 1250 fits




Fitting 5 folds for each of 250 candidates, totalling 1250 fits




Fitting 5 folds for each of 250 candidates, totalling 1250 fits


### Running hyperparameter search for pipeline: (('scaler', 'StandardScaler'), ('reduce_dim', 'SelectKBest'), ('clf', 'RidgeRegressor')) with 1 parameter grid(s):
Parameter grid #0 ({'search_method': 'grid'}): {'clf__alpha': [0.01, 0.1, 1, 10, 100], 'reduce_dim__score_func': [<function f_regression at 0x000001B6630B5760>, <function mutual_info_regression at 0x000001B6630A3600>], 'reduce_dim__k': [2, 4, 6, 8, 10]}




Outer CV:   0%|          | 0/5 [00:00<?, ?it/s]

Fitting 5 folds for each of 50 candidates, totalling 250 fits




Fitting 5 folds for each of 50 candidates, totalling 250 fits




Fitting 5 folds for each of 50 candidates, totalling 250 fits




Fitting 5 folds for each of 50 candidates, totalling 250 fits




Fitting 5 folds for each of 50 candidates, totalling 250 fits


### Running hyperparameter search for pipeline: (('scaler', 'StandardScaler'), ('reduce_dim', 'RFE'), ('clf', 'LinearRegressor')) with 1 parameter grid(s):
Parameter grid #0 ({'search_method': 'grid'}): {'reduce_dim__n_features_to_select': [0.4, 0.5, 0.6], 'reduce_dim__step': [1, 2, 3]}




Outer CV:   0%|          | 0/5 [00:00<?, ?it/s]



Fitting 5 folds for each of 9 candidates, totalling 45 fits
Fitting 5 folds for each of 9 candidates, totalling 45 fits




Fitting 5 folds for each of 9 candidates, totalling 45 fits
Fitting 5 folds for each of 9 candidates, totalling 45 fits
Fitting 5 folds for each of 9 candidates, totalling 45 fits


### Running hyperparameter search for pipeline: (('scaler', 'StandardScaler'), ('reduce_dim', 'RFE'), ('clf', 'ElasticNetRegressor')) with 1 parameter grid(s):
Parameter grid #0 ({'search_method': 'grid'}): {'clf__alpha': [0.01, 0.1, 1, 10, 100], 'clf__l1_ratio': [0.1, 0.3, 0.5, 0.7, 0.9], 'reduce_dim__n_features_to_select': [0.4, 0.5, 0.6], 'reduce_dim__step': [1, 2, 3]}




Outer CV:   0%|          | 0/5 [00:00<?, ?it/s]

Fitting 5 folds for each of 225 candidates, totalling 1125 fits




Fitting 5 folds for each of 225 candidates, totalling 1125 fits




Fitting 5 folds for each of 225 candidates, totalling 1125 fits




Fitting 5 folds for each of 225 candidates, totalling 1125 fits


  _data = np.array(data, dtype=dtype, copy=copy,


Fitting 5 folds for each of 225 candidates, totalling 1125 fits


### Running hyperparameter search for pipeline: (('scaler', 'StandardScaler'), ('reduce_dim', 'RFE'), ('clf', 'RidgeRegressor')) with 1 parameter grid(s):
Parameter grid #0 ({'search_method': 'grid'}): {'clf__alpha': [0.01, 0.1, 1, 10, 100], 'reduce_dim__n_features_to_select': [0.4, 0.5, 0.6], 'reduce_dim__step': [1, 2, 3]}




Outer CV:   0%|          | 0/5 [00:00<?, ?it/s]

Fitting 5 folds for each of 45 candidates, totalling 225 fits




Fitting 5 folds for each of 45 candidates, totalling 225 fits




Fitting 5 folds for each of 45 candidates, totalling 225 fits




Fitting 5 folds for each of 45 candidates, totalling 225 fits




Fitting 5 folds for each of 45 candidates, totalling 225 fits


### Running hyperparameter search for pipeline: (('scaler', 'StandardScaler'), ('reduce_dim', 'DummyFeatureSelector'), ('clf', 'LinearRegressor')) with 1 parameter grid(s):
Parameter grid #0 ({'search_method': 'grid'}): {}




Outer CV:   0%|          | 0/5 [00:00<?, ?it/s]

Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits






### Running hyperparameter search for pipeline: (('scaler', 'StandardScaler'), ('reduce_dim', 'DummyFeatureSelector'), ('clf', 'ElasticNetRegressor')) with 1 parameter grid(s):
Parameter grid #0 ({'search_method': 'grid'}): {'clf__alpha': [0.01, 0.1, 1, 10, 100], 'clf__l1_ratio': [0.1, 0.3, 0.5, 0.7, 0.9]}




Outer CV:   0%|          | 0/5 [00:00<?, ?it/s]

Fitting 5 folds for each of 25 candidates, totalling 125 fits




Fitting 5 folds for each of 25 candidates, totalling 125 fits




Fitting 5 folds for each of 25 candidates, totalling 125 fits




Fitting 5 folds for each of 25 candidates, totalling 125 fits


  model = cd_fast.enet_coordinate_descent(


Fitting 5 folds for each of 25 candidates, totalling 125 fits


### Running hyperparameter search for pipeline: (('scaler', 'StandardScaler'), ('reduce_dim', 'DummyFeatureSelector'), ('clf', 'RidgeRegressor')) with 1 parameter grid(s):
Parameter grid #0 ({'search_method': 'grid'}): {'clf__alpha': [0.01, 0.1, 1, 10, 100]}




Outer CV:   0%|          | 0/5 [00:00<?, ?it/s]



Fitting 5 folds for each of 5 candidates, totalling 25 fits
Fitting 5 folds for each of 5 candidates, totalling 25 fits
Fitting 5 folds for each of 5 candidates, totalling 25 fits




Fitting 5 folds for each of 5 candidates, totalling 25 fits
Fitting 5 folds for each of 5 candidates, totalling 25 fits


### Running hyperparameter search for pipeline: (('scaler', 'MinMaxScaler'), ('reduce_dim', 'SelectKBest'), ('clf', 'LinearRegressor')) with 1 parameter grid(s):
Parameter grid #0 ({'search_method': 'grid'}): {'reduce_dim__score_func': [<function f_regression at 0x000001B6630B5760>, <function mutual_info_regression at 0x000001B6630A3600>], 'reduce_dim__k': [2, 4, 6, 8, 10]}




Outer CV:   0%|          | 0/5 [00:00<?, ?it/s]

Fitting 5 folds for each of 10 candidates, totalling 50 fits




Fitting 5 folds for each of 10 candidates, totalling 50 fits




Fitting 5 folds for each of 10 candidates, totalling 50 fits




Fitting 5 folds for each of 10 candidates, totalling 50 fits




Fitting 5 folds for each of 10 candidates, totalling 50 fits


### Running hyperparameter search for pipeline: (('scaler', 'MinMaxScaler'), ('reduce_dim', 'SelectKBest'), ('clf', 'ElasticNetRegressor')) with 1 parameter grid(s):
Parameter grid #0 ({'search_method': 'grid'}): {'clf__alpha': [0.01, 0.1, 1, 10, 100], 'clf__l1_ratio': [0.1, 0.3, 0.5, 0.7, 0.9], 'reduce_dim__score_func': [<function f_regression at 0x000001B6630B5760>, <function mutual_info_regression at 0x000001B6630A3600>], 'reduce_dim__k': [2, 4, 6, 8, 10]}




Outer CV:   0%|          | 0/5 [00:00<?, ?it/s]

Fitting 5 folds for each of 250 candidates, totalling 1250 fits


  _data = np.array(data, dtype=dtype, copy=copy,


Fitting 5 folds for each of 250 candidates, totalling 1250 fits


  _data = np.array(data, dtype=dtype, copy=copy,


Fitting 5 folds for each of 250 candidates, totalling 1250 fits




Fitting 5 folds for each of 250 candidates, totalling 1250 fits


  _data = np.array(data, dtype=dtype, copy=copy,


Fitting 5 folds for each of 250 candidates, totalling 1250 fits


### Running hyperparameter search for pipeline: (('scaler', 'MinMaxScaler'), ('reduce_dim', 'SelectKBest'), ('clf', 'RidgeRegressor')) with 1 parameter grid(s):
Parameter grid #0 ({'search_method': 'grid'}): {'clf__alpha': [0.01, 0.1, 1, 10, 100], 'reduce_dim__score_func': [<function f_regression at 0x000001B6630B5760>, <function mutual_info_regression at 0x000001B6630A3600>], 'reduce_dim__k': [2, 4, 6, 8, 10]}




Outer CV:   0%|          | 0/5 [00:00<?, ?it/s]

Fitting 5 folds for each of 50 candidates, totalling 250 fits




Fitting 5 folds for each of 50 candidates, totalling 250 fits




Fitting 5 folds for each of 50 candidates, totalling 250 fits




Fitting 5 folds for each of 50 candidates, totalling 250 fits




Fitting 5 folds for each of 50 candidates, totalling 250 fits


### Running hyperparameter search for pipeline: (('scaler', 'MinMaxScaler'), ('reduce_dim', 'RFE'), ('clf', 'LinearRegressor')) with 1 parameter grid(s):
Parameter grid #0 ({'search_method': 'grid'}): {'reduce_dim__n_features_to_select': [0.4, 0.5, 0.6], 'reduce_dim__step': [1, 2, 3]}




Outer CV:   0%|          | 0/5 [00:00<?, ?it/s]

Fitting 5 folds for each of 9 candidates, totalling 45 fits




Fitting 5 folds for each of 9 candidates, totalling 45 fits




Fitting 5 folds for each of 9 candidates, totalling 45 fits




Fitting 5 folds for each of 9 candidates, totalling 45 fits




Fitting 5 folds for each of 9 candidates, totalling 45 fits


### Running hyperparameter search for pipeline: (('scaler', 'MinMaxScaler'), ('reduce_dim', 'RFE'), ('clf', 'ElasticNetRegressor')) with 1 parameter grid(s):
Parameter grid #0 ({'search_method': 'grid'}): {'clf__alpha': [0.01, 0.1, 1, 10, 100], 'clf__l1_ratio': [0.1, 0.3, 0.5, 0.7, 0.9], 'reduce_dim__n_features_to_select': [0.4, 0.5, 0.6], 'reduce_dim__step': [1, 2, 3]}




Outer CV:   0%|          | 0/5 [00:00<?, ?it/s]

Fitting 5 folds for each of 225 candidates, totalling 1125 fits




Fitting 5 folds for each of 225 candidates, totalling 1125 fits




Fitting 5 folds for each of 225 candidates, totalling 1125 fits


  _data = np.array(data, dtype=dtype, copy=copy,


Fitting 5 folds for each of 225 candidates, totalling 1125 fits




Fitting 5 folds for each of 225 candidates, totalling 1125 fits


  _data = np.array(data, dtype=dtype, copy=copy,




### Running hyperparameter search for pipeline: (('scaler', 'MinMaxScaler'), ('reduce_dim', 'RFE'), ('clf', 'RidgeRegressor')) with 1 parameter grid(s):
Parameter grid #0 ({'search_method': 'grid'}): {'clf__alpha': [0.01, 0.1, 1, 10, 100], 'reduce_dim__n_features_to_select': [0.4, 0.5, 0.6], 'reduce_dim__step': [1, 2, 3]}




Outer CV:   0%|          | 0/5 [00:00<?, ?it/s]

Fitting 5 folds for each of 45 candidates, totalling 225 fits




Fitting 5 folds for each of 45 candidates, totalling 225 fits




Fitting 5 folds for each of 45 candidates, totalling 225 fits




Fitting 5 folds for each of 45 candidates, totalling 225 fits




Fitting 5 folds for each of 45 candidates, totalling 225 fits


### Running hyperparameter search for pipeline: (('scaler', 'MinMaxScaler'), ('reduce_dim', 'DummyFeatureSelector'), ('clf', 'LinearRegressor')) with 1 parameter grid(s):
Parameter grid #0 ({'search_method': 'grid'}): {}




Outer CV:   0%|          | 0/5 [00:00<?, ?it/s]



Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits


### Running hyperparameter search for pipeline: (('scaler', 'MinMaxScaler'), ('reduce_dim', 'DummyFeatureSelector'), ('clf', 'ElasticNetRegressor')) with 1 parameter grid(s):
Parameter grid #0 ({'search_method': 'grid'}): {'clf__alpha': [0.01, 0.1, 1, 10, 100], 'clf__l1_ratio': [0.1, 0.3, 0.5, 0.7, 0.9]}




Outer CV:   0%|          | 0/5 [00:00<?, ?it/s]

Fitting 5 folds for each of 25 candidates, totalling 125 fits




Fitting 5 folds for each of 25 candidates, totalling 125 fits




Fitting 5 folds for each of 25 candidates, totalling 125 fits




Fitting 5 folds for each of 25 candidates, totalling 125 fits




Fitting 5 folds for each of 25 candidates, totalling 125 fits


### Running hyperparameter search for pipeline: (('scaler', 'MinMaxScaler'), ('reduce_dim', 'DummyFeatureSelector'), ('clf', 'RidgeRegressor')) with 1 parameter grid(s):
Parameter grid #0 ({'search_method': 'grid'}): {'clf__alpha': [0.01, 0.1, 1, 10, 100]}




Outer CV:   0%|          | 0/5 [00:00<?, ?it/s]



Fitting 5 folds for each of 5 candidates, totalling 25 fits
Fitting 5 folds for each of 5 candidates, totalling 25 fits
Fitting 5 folds for each of 5 candidates, totalling 25 fits




Fitting 5 folds for each of 5 candidates, totalling 25 fits
Fitting 5 folds for each of 5 candidates, totalling 25 fits




In [27]:
pipeline_permuter.metric_summary()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,true_labels,true_labels_folds,predicted_labels,predicted_labels_folds,train_indices,train_indices_folds,test_indices,test_indices_folds,mean_test_neg_mean_absolute_error,std_test_neg_mean_absolute_error,test_neg_mean_absolute_error_fold_0,test_neg_mean_absolute_error_fold_1,test_neg_mean_absolute_error_fold_2,test_neg_mean_absolute_error_fold_3,test_neg_mean_absolute_error_fold_4
pipeline_scaler,pipeline_reduce_dim,pipeline_clf,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1
StandardScaler,SelectKBest,LinearRegressor,"[260.0, 260.0, 222.0, 220.0, 228.0, 236.0, 226...","[[260.0, 260.0, 222.0, 220.0, 228.0, 236.0, 22...","[266.2595336822087, 268.1621393151938, 226.218...","[[266.2595336822087, 268.1621393151938, 226.21...","[316, 317, 318, 319, 320, 321, 322, 323, 324, ...","[[316, 317, 318, 319, 320, 321, 322, 323, 324,...","[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13,...","[[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13...",3.330206,0.757054,3.917414,4.511707,2.53226,2.936977,2.752669
StandardScaler,SelectKBest,ElasticNetRegressor,"[260.0, 260.0, 222.0, 220.0, 228.0, 236.0, 226...","[[260.0, 260.0, 222.0, 220.0, 228.0, 236.0, 22...","[266.18413886531175, 267.91841254057977, 226.3...","[[266.18413886531175, 267.91841254057977, 226....","[316, 317, 318, 319, 320, 321, 322, 323, 324, ...","[[316, 317, 318, 319, 320, 321, 322, 323, 324,...","[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13,...","[[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13...",3.312061,0.783536,3.82333,4.594794,2.469095,2.83362,2.839466
StandardScaler,SelectKBest,RidgeRegressor,"[260.0, 260.0, 222.0, 220.0, 228.0, 236.0, 226...","[[260.0, 260.0, 222.0, 220.0, 228.0, 236.0, 22...","[266.2296027760496, 268.05962917099976, 226.28...","[[266.2296027760496, 268.05962917099976, 226.2...","[316, 317, 318, 319, 320, 321, 322, 323, 324, ...","[[316, 317, 318, 319, 320, 321, 322, 323, 324,...","[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13,...","[[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13...",3.309016,0.762966,3.870164,4.511821,2.460958,2.922914,2.779224
StandardScaler,RFE,LinearRegressor,"[260.0, 260.0, 222.0, 220.0, 228.0, 236.0, 226...","[[260.0, 260.0, 222.0, 220.0, 228.0, 236.0, 22...","[266.2871479318242, 267.6609138836928, 227.575...","[[266.2871479318242, 267.6609138836928, 227.57...","[316, 317, 318, 319, 320, 321, 322, 323, 324, ...","[[316, 317, 318, 319, 320, 321, 322, 323, 324,...","[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13,...","[[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13...",3.757041,0.93285,4.119654,5.102293,2.574206,4.155178,2.833872
StandardScaler,RFE,ElasticNetRegressor,"[260.0, 260.0, 222.0, 220.0, 228.0, 236.0, 226...","[[260.0, 260.0, 222.0, 220.0, 228.0, 236.0, 22...","[265.98085324242305, 266.7594852595584, 228.61...","[[265.98085324242305, 266.7594852595584, 228.6...","[316, 317, 318, 319, 320, 321, 322, 323, 324, ...","[[316, 317, 318, 319, 320, 321, 322, 323, 324,...","[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13,...","[[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13...",3.837134,0.951108,4.288469,5.242182,2.595018,4.083922,2.976078
StandardScaler,RFE,RidgeRegressor,"[260.0, 260.0, 222.0, 220.0, 228.0, 236.0, 226...","[[260.0, 260.0, 222.0, 220.0, 228.0, 236.0, 22...","[266.2871346382257, 267.66066717410666, 227.57...","[[266.2871346382257, 267.66066717410666, 227.5...","[316, 317, 318, 319, 320, 321, 322, 323, 324, ...","[[316, 317, 318, 319, 320, 321, 322, 323, 324,...","[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13,...","[[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13...",3.775262,0.902513,4.119739,5.102561,2.574249,4.105132,2.974627
StandardScaler,DummyFeatureSelector,LinearRegressor,"[260.0, 260.0, 222.0, 220.0, 228.0, 236.0, 226...","[[260.0, 260.0, 222.0, 220.0, 228.0, 236.0, 22...","[266.2891198718378, 267.59949380049784, 227.56...","[[266.2891198718378, 267.59949380049784, 227.5...","[316, 317, 318, 319, 320, 321, 322, 323, 324, ...","[[316, 317, 318, 319, 320, 321, 322, 323, 324,...","[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13,...","[[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13...",3.760605,0.9315,4.116792,5.101517,2.58346,4.16872,2.832534
StandardScaler,DummyFeatureSelector,ElasticNetRegressor,"[260.0, 260.0, 222.0, 220.0, 228.0, 236.0, 226...","[[260.0, 260.0, 222.0, 220.0, 228.0, 236.0, 22...","[266.2800687441125, 267.36973381650154, 228.15...","[[266.2800687441125, 267.36973381650154, 228.1...","[316, 317, 318, 319, 320, 321, 322, 323, 324, ...","[[316, 317, 318, 319, 320, 321, 322, 323, 324,...","[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13,...","[[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13...",3.943395,0.996654,4.249597,5.396331,2.729212,4.425598,2.916235
StandardScaler,DummyFeatureSelector,RidgeRegressor,"[260.0, 260.0, 222.0, 220.0, 228.0, 236.0, 226...","[[260.0, 260.0, 222.0, 220.0, 228.0, 236.0, 22...","[266.2891106572623, 267.59918237140107, 227.56...","[[266.2891106572623, 267.59918237140107, 227.5...","[316, 317, 318, 319, 320, 321, 322, 323, 324, ...","[[316, 317, 318, 319, 320, 321, 322, 323, 324,...","[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13,...","[[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13...",3.76082,0.931615,4.116949,5.101942,2.583556,4.169017,2.832637
MinMaxScaler,SelectKBest,LinearRegressor,"[260.0, 260.0, 222.0, 220.0, 228.0, 236.0, 226...","[[260.0, 260.0, 222.0, 220.0, 228.0, 236.0, 22...","[266.25953368220866, 268.1621393151937, 226.21...","[[266.25953368220866, 268.1621393151937, 226.2...","[316, 317, 318, 319, 320, 321, 322, 323, 324, ...","[[316, 317, 318, 319, 320, 321, 322, 323, 324,...","[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13,...","[[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13...",3.330206,0.757054,3.917414,4.511707,2.53226,2.936977,2.752669


In [28]:
pipeline_permuter.best_hyperparameter_pipeline()

  .agg(["mean", "std"])


Unnamed: 0_level_0,mean_test_neg_mean_absolute_error,param_clf__alpha,param_clf__l1_ratio,param_reduce_dim__k,param_reduce_dim__score_func,params,rank_test_neg_mean_absolute_error,split0_test_neg_mean_absolute_error,split1_test_neg_mean_absolute_error,split2_test_neg_mean_absolute_error,split3_test_neg_mean_absolute_error,split4_test_neg_mean_absolute_error,std_test_neg_mean_absolute_error
outer_fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
0,-3.265984,0.01,0.9,2.0,<function mutual_info_regression at 0x000001B6...,"{'clf__alpha': 0.01, 'clf__l1_ratio': 0.9, 're...",4,-2.853479,-2.277923,-3.844315,-3.642922,-3.71128,0.603533
1,-2.976049,0.01,0.9,2.0,<function mutual_info_regression at 0x000001B6...,"{'clf__alpha': 0.01, 'clf__l1_ratio': 0.9, 're...",1,-2.919214,-1.338394,-3.896358,-1.73175,-4.994527,1.353118
2,-3.578102,0.01,0.9,2.0,<function mutual_info_regression at 0x000001B6...,"{'clf__alpha': 0.01, 'clf__l1_ratio': 0.9, 're...",1,-4.291267,-2.084471,-2.755215,-3.277813,-5.481745,1.194212
3,-3.351185,0.01,0.9,2.0,<function mutual_info_regression at 0x000001B6...,"{'clf__alpha': 0.01, 'clf__l1_ratio': 0.9, 're...",1,-2.236238,-2.936794,-5.173064,-2.959586,-3.450244,0.98967
4,-3.502172,0.01,0.9,2.0,<function mutual_info_regression at 0x000001B6...,"{'clf__alpha': 0.01, 'clf__l1_ratio': 0.9, 're...",3,-1.450456,-4.064515,-3.561047,-2.998853,-5.435988,1.305543


In [29]:
print(f"Save results: {save_results}")

Save results: True


In [30]:
if save_results:
    pipeline_permuter.to_pickle(models_path.joinpath("linear_models_q_peak_rr.pkl"))