# Traditional machine learning models for age prediction on EEG data

This notebook uses traditional ML methods to predict the age of infants using EEG data. The EEG data is preprocessed.

In [1]:
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

from epodium.config import Config
from epodium.loaders import RegressionsLoader
from epodium.ml import Regressions
from epodium.nn import NnOptimizer

config = Config()

2022-08-14 05:14:56.715977: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory
2022-08-14 05:14:56.716007: I tensorflow/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine.


## Load preprocessed data

Steps:

1. Get all the files in the output folder
2. Get the full paths of the files without the .h5 or .csv extensions
3. Load the features from the .h5 files
4. Assign the proper labels to the files based on the metadata
5. Assign the subject's code to the files based on the metadata
6. Split the data into a training, validation and test set (NOTE: make sure data points from same subjects don't end up in same set

Here we seem to have a problem- we have features that are multi-dimensional, and from what I see in the code they were not reduced

In [21]:
ef_ex =pd.read_hdf('../../volume-ceph/preprocessed-ml/extracted_features_001_29_jc_mmn.h5')
ef_ex

Unnamed: 0,hjorth_complexity_O2,hjorth_complexity_O1,hjorth_complexity_OZ,hjorth_complexity_PZ,hjorth_complexity_P4,hjorth_complexity_CP4,hjorth_complexity_P8,hjorth_complexity_C4,hjorth_complexity_TP8,hjorth_complexity_T8,...,kurtosis_FZ,kurtosis_F4,kurtosis_F8,kurtosis_T7,kurtosis_FT7,kurtosis_FC3,kurtosis_F3,kurtosis_FP2,kurtosis_F7,kurtosis_FP1
0,3.844394,5.285615,4.714020,4.544710,1.658952,4.459770,3.651870,2.953397,3.291691,2.914745,...,2.055386,2.303099,2.542408,3.068982,3.418724,2.657893,2.372468,3.185538,2.092215,3.008556
1,3.417027,2.626444,4.403926,3.043164,2.034024,2.411991,3.348194,3.159142,2.899417,4.565265,...,2.670512,3.148539,2.688797,2.922697,3.541677,3.702186,3.218444,4.924635,3.751193,3.726647
2,5.507933,5.908041,6.899218,6.282867,2.969392,6.678296,5.882777,6.328652,3.454919,6.847669,...,2.580961,2.691047,2.993296,3.024504,3.359949,2.963878,3.422704,2.830353,3.541482,2.820758
3,4.378624,3.176170,2.945529,4.003992,1.537418,3.072991,3.133565,4.155132,2.842107,2.998760,...,2.427195,3.072564,2.810491,3.044418,2.455291,2.491790,3.226719,2.430481,2.523738,2.288575
4,2.595716,2.823651,3.383794,2.899973,1.924664,3.105019,4.375562,3.215412,2.416628,3.819207,...,2.689158,2.981971,2.585572,2.913774,2.413742,2.018164,2.809000,2.670755,2.437741,2.699543
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
369,3.788516,5.015761,3.622249,5.324916,1.578261,5.441124,5.961255,3.377872,2.403643,8.646042,...,2.555178,2.582785,2.496452,2.627511,3.361375,2.446408,2.929544,3.162804,2.626943,2.396528
370,5.694019,4.978446,5.758974,4.023781,1.946865,5.119463,5.793373,3.489983,3.885625,3.133482,...,2.716574,2.734104,2.202228,2.096609,2.094368,2.364857,1.836433,1.703992,1.977626,1.606735
371,4.520942,2.506913,4.147480,4.322664,1.730429,3.097054,3.243782,4.310385,2.388814,5.321362,...,5.439471,2.684431,2.691150,5.740503,3.752384,2.974340,3.547407,4.102953,3.549382,2.297193
372,4.483920,2.912305,5.364835,10.737846,2.950583,3.600636,2.438044,4.722172,1.872033,4.795071,...,3.781560,2.498872,3.458072,2.476443,2.374693,2.490436,3.049023,2.752978,2.950096,2.023796


In [23]:
ef_ex.shape

(374, 450)

In [25]:
ef_ex2 =pd.read_hdf('../../volume-ceph/preprocessed-ml/extracted_features_742-421-41m-mr-mmn39.h5')
ef_ex2

Unnamed: 0,hjorth_complexity_O2,hjorth_complexity_O1,hjorth_complexity_OZ,hjorth_complexity_PZ,hjorth_complexity_P4,hjorth_complexity_CP4,hjorth_complexity_P8,hjorth_complexity_C4,hjorth_complexity_TP8,hjorth_complexity_T8,...,kurtosis_FZ,kurtosis_F4,kurtosis_F8,kurtosis_T7,kurtosis_FT7,kurtosis_FC3,kurtosis_F3,kurtosis_FP2,kurtosis_F7,kurtosis_FP1
0,7.120505,3.730505,3.205375,3.529707,3.445155,3.532043,4.100093,3.047061,3.213816,3.851944,...,2.399696,6.176483,3.124231,2.249034,2.059224,3.278414,2.542662,3.239702,3.177960,3.115319
1,5.566804,4.194747,5.452748,4.920088,2.759034,3.282675,7.387417,2.953358,3.860556,4.568681,...,2.620529,2.444163,3.247432,2.426343,2.239892,2.861497,2.322857,2.623000,3.227037,3.819862
2,3.006311,7.328448,4.785770,4.068938,2.300908,6.936230,5.945792,2.879884,2.776556,8.107701,...,3.169681,2.256984,2.858533,2.133400,2.137958,2.634927,2.332557,2.798153,2.382952,2.626738
3,2.939739,2.679574,3.140329,3.602519,2.480139,3.737771,3.495600,4.560246,4.933089,9.273948,...,2.971585,3.991573,2.609726,2.063039,2.559048,2.452298,2.310617,2.410712,1.924318,2.442920
4,3.401915,2.684335,4.923735,5.330649,4.008512,4.505154,3.344369,4.876172,4.154387,5.325330,...,3.636426,2.612162,2.347731,3.301192,2.597156,2.944806,2.062676,3.018968,2.293102,2.368643
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1473,2.890692,3.448004,3.570491,3.772927,2.480316,3.177406,2.318948,2.746175,2.339679,4.105380,...,2.359276,6.614177,3.014793,3.135483,2.086609,2.772631,2.740312,5.043128,5.457685,9.711739
1474,2.921146,3.324517,3.498953,2.927086,2.792203,3.008127,4.481938,2.724597,5.270908,3.604442,...,3.870975,2.558482,2.913526,2.537148,2.128364,2.370526,2.014445,2.726510,2.313950,7.310882
1475,3.851551,3.329561,4.851749,5.490593,2.328013,3.324403,5.392880,3.696231,3.889106,3.913235,...,3.428506,3.612941,2.799006,2.608251,2.601824,2.435638,2.432502,2.339043,2.849219,2.841555
1476,4.444722,5.786780,5.630836,4.795795,2.304020,7.416496,5.115721,4.855955,2.663749,3.914910,...,2.277445,2.843975,2.191248,2.360067,2.467876,2.512492,2.288658,2.936312,2.484780,2.551581


In [27]:
ef_ex2.shape

(1478, 450)

In [29]:
ef_ex2.columns

Index(['hjorth_complexity_O2', 'hjorth_complexity_O1', 'hjorth_complexity_OZ',
       'hjorth_complexity_PZ', 'hjorth_complexity_P4', 'hjorth_complexity_CP4',
       'hjorth_complexity_P8', 'hjorth_complexity_C4', 'hjorth_complexity_TP8',
       'hjorth_complexity_T8',
       ...
       'kurtosis_FZ', 'kurtosis_F4', 'kurtosis_F8', 'kurtosis_T7',
       'kurtosis_FT7', 'kurtosis_FC3', 'kurtosis_F3', 'kurtosis_FP2',
       'kurtosis_F7', 'kurtosis_FP1'],
      dtype='object', length=450)

In [37]:
ef_ex2.loc['Mean'] = ef_ex[ef_ex2.columns].mean()

In [38]:
ef_ex2

Unnamed: 0,hjorth_complexity_O2,hjorth_complexity_O1,hjorth_complexity_OZ,hjorth_complexity_PZ,hjorth_complexity_P4,hjorth_complexity_CP4,hjorth_complexity_P8,hjorth_complexity_C4,hjorth_complexity_TP8,hjorth_complexity_T8,...,kurtosis_FZ,kurtosis_F4,kurtosis_F8,kurtosis_T7,kurtosis_FT7,kurtosis_FC3,kurtosis_F3,kurtosis_FP2,kurtosis_F7,kurtosis_FP1
0,7.120505,3.730505,3.205375,3.529707,3.445155,3.532043,4.100093,3.047061,3.213816,3.851944,...,2.399696,6.176483,3.124231,2.249034,2.059224,3.278414,2.542662,3.239702,3.177960,3.115319
1,5.566804,4.194747,5.452748,4.920088,2.759034,3.282675,7.387417,2.953358,3.860556,4.568681,...,2.620529,2.444163,3.247432,2.426343,2.239892,2.861497,2.322857,2.623000,3.227037,3.819862
2,3.006311,7.328448,4.785770,4.068938,2.300908,6.936230,5.945792,2.879884,2.776556,8.107701,...,3.169681,2.256984,2.858533,2.133400,2.137958,2.634927,2.332557,2.798153,2.382952,2.626738
3,2.939739,2.679574,3.140329,3.602519,2.480139,3.737771,3.495600,4.560246,4.933089,9.273948,...,2.971585,3.991573,2.609726,2.063039,2.559048,2.452298,2.310617,2.410712,1.924318,2.442920
4,3.401915,2.684335,4.923735,5.330649,4.008512,4.505154,3.344369,4.876172,4.154387,5.325330,...,3.636426,2.612162,2.347731,3.301192,2.597156,2.944806,2.062676,3.018968,2.293102,2.368643
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1475,3.851551,3.329561,4.851749,5.490593,2.328013,3.324403,5.392880,3.696231,3.889106,3.913235,...,3.428506,3.612941,2.799006,2.608251,2.601824,2.435638,2.432502,2.339043,2.849219,2.841555
1476,4.444722,5.786780,5.630836,4.795795,2.304020,7.416496,5.115721,4.855955,2.663749,3.914910,...,2.277445,2.843975,2.191248,2.360067,2.467876,2.512492,2.288658,2.936312,2.484780,2.551581
1477,3.080230,2.429913,2.088846,1.806167,1.543953,4.643841,2.757513,2.162098,4.287201,2.988736,...,2.902446,4.149579,3.310786,3.160118,3.116579,2.717108,2.550682,2.978461,2.873703,3.078165
Total,1552.133942,1544.735496,1331.190401,1751.694362,822.893334,1445.111655,1473.329026,1405.212654,1004.485512,1757.797518,...,1483.461514,1138.566501,1077.565461,1073.113956,1145.476094,1031.328934,1030.295444,1080.668118,1119.322856,1057.238064


In [2]:
def plot_result(df, prop, x, y):
    sns.set()
    ax = sns.scatterplot(x=x, y=y, hue=prop, palette='RdBu', data=df)

    norm = plt.Normalize(df[prop].min(), df[prop].max())
    sm = plt.cm.ScalarMappable(cmap="RdBu", norm=norm)
    sm.set_array([])

    # Remove the legend and add a colorbar
    ax.get_legend().remove()
    ax.figure.colorbar(sm)

    plt.show()

In [6]:
%%time
rloader = RegressionsLoader(config.get_directory('preprocessed'), config.get_directory('models'), samples=100)
rloader.load()
rloader.split()
regressions = Regressions(rloader)

CPU times: user 4.46 s, sys: 1.12 s, total: 5.58 s
Wall time: 6.81 s


## Dummy regressor

Firstly, we make predictions with dummy regressors as a simple baseline to see whether other models learn "something". From the sklearn docs: 

> `DummyRegressor` is a regressor that makes predictions using simple rules. This regressor is useful as a simple baseline to compare with other (real) regressors."

In [7]:
%%time
regressions.algorithms['dummy'].fit()

CPU times: user 0 ns, sys: 1.86 ms, total: 1.86 ms
Wall time: 27.8 ms


In [8]:
score, rmse, ma, y_test, y_pred =  regressions.algorithms['dummy'].predict()

In [9]:
print(score, rmse, ma )

-0.47890061657830585 13.658630142623013 11.007143892410616


In [10]:
y_pred

array([17., 17., 17., ..., 17., 17., 17.])

In [11]:
len(y_pred)

43697

In [12]:
array_test = np.array(y_test)
len(array_test)

43697

In [15]:
y_test.describe()

count    43697.000000
mean        24.772493
std         11.231626
min         10.833333
25%         11.100000
50%         23.033333
75%         35.066667
max         41.266667
Name: label, dtype: float64

In [None]:
plt.plot(y_pred)
plt.title('pred of dummy')

So dummy regressor just called everyone 17 months old.

## Model 1: Random Forest regressor

In [None]:
%%time
regressions.algorithms['rf'].grid_search()

In [None]:
score, rmsee, ma, y_test, y_pred =  regressions.algorithms['rf'].predict()

In [None]:
print(score, rmse, ma)

### Train with best parameters

In [None]:
%%time
regressions.algorithms['rf'].fit()

In [None]:
score, rmsee, mae, y_test, y_pred =  regressions.algorithms['rf'].predict()

In [None]:
print(score, rmsee, mae)

In [None]:
plt.plot(y_test)
plt.title('actual')

In [None]:
plt.plot(y_pred)
plt.title('random forest')

## Model 2: Linear Support Vector Regressor

There are a lot of training examples in the training set. According to the sklearn docs: "The fit time complexity is more than quadratic with the number of samples which makes it hard to scale to datasets with more than a couple of 10000 samples." 

They recommend using a linear SVR for large data sets. Therefore, let's try this first.

### Randomized search

In [None]:
%%time
lsv_result = regressions.algorithms['lsv'].fit()

In [None]:
cv_df = pd.DataFrame(lsv_result.cv_results_).sort_values('rank_test_score').head(50)
cv_df.head(30)

In [None]:
plot_result(cv_df, 'mean_test_score', "param_linearsvr__C", "param_linearsvr__epsilon")

### Grid search

In [None]:
%%time
lsv_gs_result = regressions.algorithms['lsv'].grid_search()

In [None]:
lsv_gs_result.best_params_

In [None]:
cv_df_gs = pd.DataFrame(lsv_gs_result.cv_results_).sort_values('rank_test_score').head(50)
cv_df_gs.head(30)

In [None]:
#plot_result(cv_df_gs, 'mean_test_score', "param_linearsvr__C", "param_linearsvr__epsilon")

### Train on all data with best parameters

In [None]:
%%time
regressions.algorithms['lsv'].best_fit()

In [None]:
score, rmsee, mae, y_test, y_pred_lsv =  regressions.algorithms['lsv'].predict()

In [None]:
print(score, rmsee, mae)

In [None]:
len(y_pred_lsv)

In [None]:
len(y_test)

In [None]:
y_test.unique()

In [None]:
graph_res = pd.DataFrame(data = [y_test, y_pred_lsv])
#graph_res = graph_res.reset_index(inplace=True)
graph_res.head()

In [None]:
plt.plot(graph_res[:40]);

In [None]:
plt.plot(y_test)
plt.plot(y_pred_lsv)

In [None]:
plt.plot(y_pred_lsv)
plt.title('lsv predictions')

## Model 3: (Non-linear) Support Vector Regressor

Let's try fitting a SVR on a (small) chunk of the training data. The parameter search below is quite small, but a broader search has been done before. However, a more fine-grained search is still necessary. The downside of SVR with a non-linear kernel is that it's very slow to fit and predict.

### Randomized search

In [None]:
%%time
nl_srv_result = regressions.algorithms['svr'].fit()

In [None]:
nl_srv_result.best_params_

In [None]:
df_rs_svr = pd.DataFrame(nl_srv_result.cv_results_).sort_values('rank_test_score')
df_rs_svr = df_rs_svr.loc[df_rs_svr['param_svr__gamma'] < 0.0025].head(20)
df_rs_svr.head(30)

In [None]:
plot_result(df_rs_svr, 'mean_test_score', 'param_svr__C', 'param_svr__epsilon')

In [None]:
sns.scatterplot(x="param_svr__gamma", y="mean_test_score", data=df_rs_svr)

In [None]:
score, rmsee, mae, y_test, y_pred =  regressions.algorithms['svr'].predict()

In [None]:
print(score, rmsee, mae)

### Grid search

In [None]:
%%time
svr_gs_result = regressions.algorithms['svr'].grid_search()

In [None]:
svr_gs_result.best_params_

In [None]:
df_gs_svr = pd.DataFrame(svr_gs_result.cv_results_).sort_values('rank_test_score')
df_gs_svr.head(30)

In [None]:
plot_result(df_gs_svr, 'mean_test_score', 'param_svr__C', 'param_svr__epsilon')

### Train with best parameters

In [None]:
%%time
svr_result = regressions.algorithms['svr'].best_fit()

## Model 4: SGD Regressor

Inschatting tijd, mijn computer:
    
- X min voor een SGD (1 configuratie)
- RandomizedSearch: 250 iteraties, 5 folds per iteratie = 1250
- 1250 SGD * X = X uur (Schatting met mijn 1 core)

Memory usage:
- X GB per core?

Fitting a SVR is computationally expensive. Therefore, we try prediction with an SGD Regressor. According to the sklearn documentation, it's best to start with a RandomizedSearchCV to find reasonable hyperparameters. Therefore, we start with this.

### Randomized search

In [None]:
%%time
sgd_result = regressions.algorithms['sgd'].fit()

In [None]:
sgd_result.best_params_

In [None]:
df_rs_sgd = pd.DataFrame(sgd_result.cv_results_).sort_values('rank_test_score')
df_rs_sgd = df_rs_sgd.loc[
    df_rs_sgd['param_sgdregressor__loss'] == 'huber'
].sort_values('rank_test_score').head(5000)
df_rs_sgd.head(60)

In [None]:
sns.scatterplot(x="param_sgdregressor__alpha", y="mean_test_score", data=df_rs_sgd)

### Grid search

In [None]:
%%time
sgd_gs_result = regressions.algorithms['sgd'].grid_search()

In [None]:
%%time
sgd_gs_predict = regressions.algorithms['sgd'].predict()

In [None]:
sgd_gs_result.best_params_

In [None]:
df_gs_sgd = pd.DataFrame(sgd_gs_result.cv_results_).sort_values('rank_test_score')
df_gs_sgd.head()

In [None]:
plot_result(df_gs_sgd, 'mean_test_score', 'param_sgdregressor__alpha', 'param_sgdregressor__epsilon')

### Train with best parameters

In [None]:
%%time
regressions.algorithms['sgd'].best_fit()

## Model 5: Relevance Vector Regression

An alternative to the SVR is the Relevance Vector Machine, also used by Vandenbosch (2018). This isn't included in sklearn, but there are two packages called 'scikit-rvm' and 'sklearn-rvm' using the sklearn API that has implemented this.

### Randomized search

Inschatting tijd, mijn computer:
    
- 4 min voor een RVR (1 configuratie)
- RandomizedSearch: 250 iteraties, 5 folds per iteratie = 1250
- 1250 RVR * 4 min = 84 uur (Schatting met mijn 2 cores)

Memory usage:
- 4 GB per core?

In [None]:
%%time
emrvr_result = regressions.algorithms['emrvr'].fit()

In [None]:
emrvr_result.best_params_

### Train on best SVR parameters

In [None]:
%%time
regressions.algorithms['emrvr'].best_fit()

### Grid search

Inschatting tijd, mijn computer: 

- 4 min voor 1 RVR (1 configuratie). 
- GridSearch: 50 configuraties, 5 folds per configuratie = 250
- 250 RVR * 4 min = 17 uur (Schatting met mijn 2 cores)

<div class="alert alert-block alert-warning">
    TODO(wvxvw): The code below seems bogus.
    The pipeline uses unexpected kernel.
    I don't know why it does this. Need more info
</div>

In [None]:
from sklearn_rvm import EMRVR

from sklearn.model_selection import GridSearchCV
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler

parameters = {'svr__epsilon': [4, 4.5, 5, 5.5, 6, 6.5, 7, 7.5, 8],
              'svr__gamma': ['scale', 'auto', 0.0015]
}

pipe  = make_pipeline(StandardScaler(),
                      SVR(verbose=True, kernel='rbf'))

RVR_gridsearch = GridSearchCV(pipe, parameters, cv=5, n_jobs=-1, verbose=10)

RVR_gridsearch.fit(chunked_X_train[0], chunked_y_train[0])

output_file = os.path.join(PATH_MODELS, 'RVR_gridsearch.joblib')
dump(RVR_gridsearch, output_file)

In [None]:
RVR_gridsearch.best_params_

In [None]:
from sklearn.metrics import mean_squared_error, mean_absolute_error

try:
    RVR_gridsearch
except:
    RVR_gridsearch = load(os.path.join(PATH_MODELS, 'RVR_gridsearch.joblib'))    

# Update verbosity
RVR_gridsearch.verbose = 0

# R2
score = RVR_gridsearch.score(X_test, y_test)

# MSE
predictions = RVR_gridsearch.predict(X_test)
rmse = mean_squared_error(y_test, predictions, squared=False)
mae = mean_absolute_error(y_test, predictions)

print(f"Performance of Relevance Vector Regressor: R-squared = {score}, RMSE = {rmse} and MAE = {mae}.")

del rvr_reg

In [None]:
regressions.algorithms['emrvr'].predict()

In [None]:
regressions.algorithms['sgd'].predict()

## Model 6: Neural network

In [None]:
optimizer = NnOptimizer(rloader, epochs=20)

### Plot NN training history

In [None]:
def plot_loss(history):
    """ Plots the MSE, RMSE, and MAE loss for the training and validation data over time """
    
    %matplotlib inline
    
    fig, (ax1, ax2, ax3) = plt.subplots(3, 1, sharex=True, figsize=(12, 12), dpi=200)

    ax1.plot(history.history['loss'], label='training data')  
    min_loss = min(history.history['val_loss'])
    val_plot1 = ax1.plot(history.history['val_loss'], label='validation data')
    ax1.axhline(y = min_loss, color = val_plot1[0].get_color(), linestyle='--') 
    x0,x1 = ax1.get_xlim()
    ax1.text(x1, min_loss, "{:.2f}".format(min_loss), ha='left', va='center')
    ax1.set_title('MSE loss')
    ax1.set_ylabel('MSE')
    ax1.set_xlabel('epochs')
    ax1.legend()

    ax2.plot(history.history['root_mean_squared_error'], label='training data')
    min_loss = min(history.history['val_root_mean_squared_error'])
    val_plot2 = ax2.plot(history.history['val_root_mean_squared_error'], label='validation data')
    ax2.axhline(y = min_loss, color=val_plot2[0].get_color(), linestyle='--') 
    x0,x1 = ax2.get_xlim()
    ax2.text(x1, min_loss, '{:.2f}'.format(min_loss), ha='left', va='center')
    ax2.set_title('RMSE loss')
    ax2.set_ylabel('RMSE')
    ax2.set_xlabel('epochs')
    ax2.legend()
    
    ax3.plot(history.history['mean_absolute_error'], label='training data')    
    min_loss = min(history.history['val_mean_absolute_error'])
    val_plot3 = ax3.plot(history.history['val_mean_absolute_error'], label='validation data')
    ax3.axhline(y=min_loss, color=val_plot3[0].get_color(), linestyle='--') 
    x0,x1 = ax3.get_xlim()
    ax3.text(x1, min_loss, "{:.2f}".format(min_loss), ha='left', va='center')
    ax3.set_title('MAE loss')
    ax3.set_ylabel('MAE')
    ax3.set_xlabel('epochs')
    ax3.legend()

In [None]:
# This has to be repeated multiple times because the output from optimizer prevents proper display of the plot
history = optimizer.fit_model(0)

In [None]:
plot_loss(history)

In [None]:
history = optimizer.fit_model(1)

In [None]:
plot_loss(history)

In [None]:
history = optimizer.fit_model(2)

In [None]:
plot_loss(history)

In [None]:
history = optimizer.fit_model(3)

In [None]:
plot_loss(history)

In [None]:
history = optimizer.fit_model(4)

In [None]:
plot_loss(history)

In [None]:
for i, p in enumerate(optimizer.optimization_params):
    prediction, rmse, mae = optimizer.predict(i)
    print('\n'.join((
        f'Performance of simple FC neural network regressor #{i} ({p}):',
        f'  RMSE: {rmse}',
        f'  MAE: {mae}.',
    )))