It is highly recommended to use a powerful **GPU**, you can use it for free uploading this notebook to [Google Colab](https://colab.research.google.com/notebooks/intro.ipynb).
<table align="center">
 <td align="center"><a target="_blank" href="https://colab.research.google.com/github/ezponda/intro_deep_learning_solutions/blob/main/class/Fundamentals/Regression_tuner.ipynb">
        <img src="https://i.ibb.co/2P3SLwK/colab.png"  style="padding-bottom:5px;" />Run in Google Colab</a></td>
  <td align="center"><a target="_blank" href="https://github.com/ezponda/intro_deep_learning_solutions/blob/main/class/Fundamentals/Regression_tuner.ipynb">
        <img src="https://i.ibb.co/xfJbPmL/github.png"  height="70px" style="padding-bottom:5px;"  />View Source on GitHub</a></td>
</table>

In [1]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
import pandas as pd

# Abalone Dataset

Abalones are marine snails that can be found along coasts of almost every continent.

<img src="https://upload.wikimedia.org/wikipedia/commons/thumb/0/0b/AbaloneInside.jpg/440px-AbaloneInside.jpg" alt="abalone" border="0" width="400" height="500">



In this notebook we are going to Predict the age of abalone from physical measurements. [Link to documentation](https://archive.ics.uci.edu/ml/datasets/abalone).

In [2]:
df_train = pd.read_csv(
    "https://storage.googleapis.com/download.tensorflow.org/data/abalone_train.csv",
    names=["Length", "Diameter", "Height", "Whole weight", "Shucked weight",
           "Viscera weight", "Shell weight", "Age"])
df_train.head()

Unnamed: 0,Length,Diameter,Height,Whole weight,Shucked weight,Viscera weight,Shell weight,Age
0,0.435,0.335,0.11,0.334,0.1355,0.0775,0.0965,7
1,0.585,0.45,0.125,0.874,0.3545,0.2075,0.225,6
2,0.655,0.51,0.16,1.092,0.396,0.2825,0.37,14
3,0.545,0.425,0.125,0.768,0.294,0.1495,0.26,16
4,0.545,0.42,0.13,0.879,0.374,0.1695,0.23,13


In [3]:
df_train.describe()

Unnamed: 0,Length,Diameter,Height,Whole weight,Shucked weight,Viscera weight,Shell weight,Age
count,3320.0,3320.0,3320.0,3320.0,3320.0,3320.0,3320.0,3320.0
mean,0.522693,0.406575,0.139271,0.824734,0.357705,0.180162,0.237921,9.896988
std,0.121164,0.10012,0.042708,0.491182,0.222223,0.110182,0.140261,3.205654
min,0.075,0.055,0.0,0.002,0.001,0.0005,0.0015,1.0
25%,0.45,0.345,0.115,0.436375,0.1815,0.092,0.127375,8.0
50%,0.54,0.425,0.14,0.79525,0.3355,0.17075,0.23,9.0
75%,0.615,0.48,0.165,1.15,0.5045,0.253125,0.325,11.0
max,0.815,0.65,1.13,2.8255,1.488,0.76,1.005,27.0


In [4]:
y_train = df_train.pop('Age')
X_train = df_train.copy()

In [5]:
df_test = pd.read_csv(
    "https://storage.googleapis.com/download.tensorflow.org/data/abalone_test.csv",
    names=["Length", "Diameter", "Height", "Whole weight", "Shucked weight",
           "Viscera weight", "Shell weight", "Age"])
y_test = df_test.pop('Age')
X_test = df_test.copy()

In [6]:
print(f'X_train shape: {X_train.shape}, X_test shape: {X_test.shape}')

X_train shape: (3320, 7), X_test shape: (850, 7)


## Regression Losses

- **Mean Squared Error (MSE)**:

```python
tf.keras.losses.MSE
```
```python
model.compile(loss='mse') or model.compile(loss=tf.keras.losses.MSE)
```

$$ \mathrm{MSE} = \frac{\sum_{i=1}^n\left( y_i - \hat{y_i}\right)^2}{n}$$


- **Mean Absolute Error (MAE)**:

```python
tf.keras.losses.MAE
```
```python
model.compile(loss='mae') or model.compile(loss=tf.keras.losses.MAE)
```

$$ \mathrm{MAE} = \frac{\sum_{i=1}^n\left| y_i - \hat{y_i}\right|}{n}$$


## Question 1: Create a sequential net with at least 1 hidden layer

In [7]:
model = keras.Sequential()

model.add(layers.Dense(64, input_shape=(7,), activation='relu'))

model.add(layers.Dense(1, activation='linear'))

## model summary
model.summary()

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [8]:
model.compile(
    optimizer='adam',
    loss=tf.keras.losses.MSE,
    metrics=['mae']
)
model.fit(X_train, y_train, epochs=50, validation_split=0.2, batch_size=32)

Epoch 1/50
[1m83/83[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step - loss: 91.1614 - mae: 9.0083 - val_loss: 55.0973 - val_mae: 6.8828
Epoch 2/50
[1m83/83[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 42.1739 - mae: 5.8374 - val_loss: 14.7943 - val_mae: 2.9238
Epoch 3/50
[1m83/83[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 11.0993 - mae: 2.4292 - val_loss: 8.9494 - val_mae: 2.2152
Epoch 4/50
[1m83/83[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 9.4366 - mae: 2.2027 - val_loss: 8.5575 - val_mae: 2.1534
Epoch 5/50
[1m83/83[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 8.0143 - mae: 2.0578 - val_loss: 8.1959 - val_mae: 2.0981
Epoch 6/50
[1m83/83[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 8.5010 - mae: 2.0925 - val_loss: 7.8825 - val_mae: 2.0499
Epoch 7/50
[1m83/83[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 8.1

<keras.src.callbacks.history.History at 0x7b59e229de50>

In [9]:
results = model.evaluate(X_test, y_test, verbose=1)
print('Test Loss: {}'.format(results[0]))

[1m27/27[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 5.9223 - mae: 1.6658 
Test Loss: 5.430359840393066


## Question 2: Normalize the inputs and train the same model

In [10]:
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
X_train_norm = scaler.fit_transform(X_train)
X_test_norm = scaler.transform(X_test)
print('X_train mu, sigma', X_train_norm.mean(0), X_train_norm.std(0))
print('X_test mu, sigma', X_test_norm.mean(0), X_test_norm.std(0))

X_train mu, sigma [ 2.77689518e-16  4.65491099e-17  2.50402109e-16 -2.81434849e-16
  2.18299274e-16  1.77100637e-16 -1.05404306e-16] [1. 1. 1. 1. 1. 1. 1.]
X_test mu, sigma [0.05808422 0.06917445 0.03098307 0.04461505 0.04160742 0.02421514
 0.03516632] [0.95187926 0.95135017 0.89294094 0.99223632 0.99454932 0.97495047
 0.96304109]


In [11]:
model = keras.Sequential()

model.add(layers.Dense(64, input_shape=(7,), activation='relu'))

model.add(layers.Dense(1, activation='linear'))

## model summary
model.summary()

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [12]:
model.compile(
    optimizer='adam',
    loss=tf.keras.losses.MSE,
    metrics=['mae'] #监控 平均绝对误差（更好理解：与真实值的平均绝对偏差）。
)
model.fit(X_train_norm, y_train, epochs=50, validation_split=0.2, batch_size=32)

Epoch 1/50
[1m83/83[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step - loss: 97.4909 - mae: 9.3404 - val_loss: 62.3356 - val_mae: 7.1148
Epoch 2/50
[1m83/83[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 55.2228 - mae: 6.4807 - val_loss: 35.9791 - val_mae: 5.0052
Epoch 3/50
[1m83/83[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 36.7359 - mae: 4.9821 - val_loss: 29.0058 - val_mae: 4.4539
Epoch 4/50
[1m83/83[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 29.9022 - mae: 4.4284 - val_loss: 22.4664 - val_mae: 3.8593
Epoch 5/50
[1m83/83[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 20.6480 - mae: 3.5730 - val_loss: 15.2603 - val_mae: 3.0627
Epoch 6/50
[1m83/83[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 14.2699 - mae: 2.8571 - val_loss: 9.8054 - val_mae: 2.3002
Epoch 7/50
[1m83/83[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - los

<keras.src.callbacks.history.History at 0x7b59e055f140>

In [13]:
results = model.evaluate(X_test_norm, y_test, verbose=1)
print('Test Loss: {}'.format(results[0]))

[1m27/27[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 5.6399 - mae: 1.6264 
Test Loss: 5.176337242126465


## Optimizers:

- [SGD](https://www.tensorflow.org/api_docs/python/tf/keras/optimizers/SGD): Gradient descent with momentum
```python
tf.keras.optimizers.SGD(
    learning_rate=0.01, momentum=0.0, nesterov=False, name='SGD', **kwargs
)
```
If momentum is 0:
```python
w = w - learning_rate * gradient
```
If we have momentum:

 ```python
velocity = momentum * velocity - learning_rate * g
w = w + velocity
```


- [RMSprop](https://www.tensorflow.org/api_docs/python/tf/keras/optimizers/RMSprop): Root Mean Square Propagation
```python
tf.keras.optimizers.RMSprop(
    learning_rate=0.001, rho=0.9, momentum=0.0, epsilon=1e-07, centered=False,
    name='RMSprop', **kwargs
)
```
- [Adam](https://www.tensorflow.org/api_docs/python/tf/keras/optimizers/Adam): Adaptive Moment Estimation,  is an update to the RMSProp algorithm
```python
tf.keras.optimizers.Adam(
    learning_rate=0.001, beta_1=0.9, beta_2=0.999, epsilon=1e-07, amsgrad=False,
    name='Adam', **kwargs
)
```

```python
model.compile(loss='mse', optimizer='adam')
model.compile(loss='mse', optimizer=tf.keras.optimizers.Adam(learning_rate=0.001))
```

## Question 3: Train the same model with different optimizers

In [14]:
model = keras.Sequential()

model.add(layers.Dense(64, input_shape=(7,), activation='relu'))

model.add(layers.Dense(1, activation='linear'))

## model summary
model.summary()

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [15]:
model.compile(
    optimizer=tf.keras.optimizers.Adam(learning_rate=0.001),
    loss=tf.keras.losses.MSE,
    metrics=['mae']
)
model.fit(X_train_norm, y_train, epochs=50, validation_split=0.2, batch_size=32)

Epoch 1/50
[1m83/83[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step - loss: 98.6732 - mae: 9.3574 - val_loss: 62.7140 - val_mae: 7.0580
Epoch 2/50
[1m83/83[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 54.3153 - mae: 6.4174 - val_loss: 36.0793 - val_mae: 5.0176
Epoch 3/50
[1m83/83[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 33.6876 - mae: 4.7424 - val_loss: 27.2441 - val_mae: 4.2865
Epoch 4/50
[1m83/83[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 28.7769 - mae: 4.2760 - val_loss: 20.4072 - val_mae: 3.6105
Epoch 5/50
[1m83/83[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 19.2301 - mae: 3.4201 - val_loss: 13.2718 - val_mae: 2.7691
Epoch 6/50
[1m83/83[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 13.3390 - mae: 2.6004 - val_loss: 8.7755 - val_mae: 2.1138
Epoch 7/50
[1m83/83[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - los

<keras.src.callbacks.history.History at 0x7b59d8d7b230>

In [16]:
results = model.evaluate(X_test_norm, y_test, verbose=1)
print('Test Loss: {}'.format(results[0]))

[1m27/27[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 5.6391 - mae: 1.6253
Test Loss: 5.185255527496338


# Keras Tuner

The [Keras Tuner](https://www.tensorflow.org/tutorials/keras/keras_tuner) is a library for hyper-parameter tuning.


In [17]:
 !pip install -U keras-tuner

Collecting keras-tuner
  Downloading keras_tuner-1.4.7-py3-none-any.whl.metadata (5.4 kB)
Collecting kt-legacy (from keras-tuner)
  Downloading kt_legacy-1.0.5-py3-none-any.whl.metadata (221 bytes)
Downloading keras_tuner-1.4.7-py3-none-any.whl (129 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m129.1/129.1 kB[0m [31m4.2 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading kt_legacy-1.0.5-py3-none-any.whl (9.6 kB)
Installing collected packages: kt-legacy, keras-tuner
Successfully installed keras-tuner-1.4.7 kt-legacy-1.0.5


In [18]:
import kerastuner as kt

  import kerastuner as kt


Hyperparameters are of two types:
1. **Model hyperparameters** like number of units, type of activation or number hidden layers.
2. **Algorithm hyperparameters** like the learning rate in adam.

The model-building function takes an argument `hp` from which you can sample hyper-parameters.

```python
def build_model(hp):
    ...
    return model

```

- `hp.Int` to sample an integer from a certain range:
```python
hp.Int('units', min_value=32, max_value=256, step=32, default=64)
```
- `hp.Float` to sample a float number from a certain range:
```python
hp.Float('dropout', min_value=0.0, max_value=0.1, default=0.005, step=0.05)
```
- `hp.Choice` to select values in a list:
```python
hp.Choice('learning_rate', [1e-2, 1e-3, 1e-4])
```
- [list of hyperparameter methods](https://keras-team.github.io/keras-tuner/documentation/hyperparameters/)

In [19]:
def build_model(hp):
    model = keras.Sequential()
    # Sample different number of layers with hp.Int
    for i in range(hp.Int('num_layers', 1, 3)):
        # Sample different number of layers with hp.Int
        model.add(layers.Dense(units=hp.Int('units_' + str(i),
                                            min_value=32,
                                            max_value=128,
                                            step=32),
                               activation='relu'))
    # Sample different activation functions with hp.Choice
    model.add(layers.Dense(1, activation=hp.Choice('output_activation', ['relu', 'linear'])))

    # Sample different activation functions with hp.Choice
    model.compile(
        optimizer=keras.optimizers.Adam(
            hp.Choice('learning_rate', [1e-2, 1e-3, 1e-4])),
        loss='mse',
        metrics=['mae'])
    return model

The Keras Tuner has four [tuners](https://keras-team.github.io/keras-tuner/documentation/tuners/) available  `RandomSearch`, `Hyperband`, `BayesianOptimization`, and `Sklearn`


In [22]:
# tuner = kt.Hyperband(build_model,
#                      objective='val_loss',
#                      max_epochs=35,
#                      factor=2,
#                      hyperband_iterations=1,
#                      directory='my_dir',
#                      project_name='intro_to_kt')

tuner = kt.RandomSearch(build_model,
                     objective='val_loss',
                     max_trials=100,
                     directory='my_dir',
                     project_name='intro_to_kt')

stop_early = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=5)

Reloading Tuner from my_dir/intro_to_kt/tuner0.json


In [23]:
tuner.search(X_train_norm, y_train, epochs=30, validation_split=0.15, batch_size=32, callbacks=[stop_early])

# Get the optimal hyperparameters
best_hps=tuner.get_best_hyperparameters(num_trials=1)[0]
print(best_hps.get_config())

Trial 144 Complete [00h 00m 14s]
val_loss: 4.595340728759766

Best val_loss So Far: 4.4566192626953125
Total elapsed time: 00h 09m 55s
{'space': [{'class_name': 'Int', 'config': {'name': 'num_layers', 'default': None, 'conditions': [], 'min_value': 1, 'max_value': 3, 'step': 1, 'sampling': 'linear'}}, {'class_name': 'Int', 'config': {'name': 'units_0', 'default': None, 'conditions': [], 'min_value': 32, 'max_value': 128, 'step': 32, 'sampling': 'linear'}}, {'class_name': 'Choice', 'config': {'name': 'output_activation', 'default': 'relu', 'conditions': [], 'values': ['relu', 'linear'], 'ordered': False}}, {'class_name': 'Choice', 'config': {'name': 'learning_rate', 'default': 0.01, 'conditions': [], 'values': [0.01, 0.001, 0.0001], 'ordered': True}}, {'class_name': 'Int', 'config': {'name': 'units_1', 'default': None, 'conditions': [], 'min_value': 32, 'max_value': 128, 'step': 32, 'sampling': 'linear'}}, {'class_name': 'Int', 'config': {'name': 'units_2', 'default': None, 'conditions'

In [24]:
print(f"Best learning rate: {best_hps.get('learning_rate')}")
print(f"Best output activation function: {best_hps.get('output_activation')}")
print(f"Best number of hidden layers: {best_hps.get('num_layers')}")
for i in range(best_hps.get('num_layers')):
    print(f"Number of units of hidden layer {i+1}: {best_hps.get('units_' + str(i))}")

Best learning rate: 0.001
Best output activation function: linear
Best number of hidden layers: 3
Number of units of hidden layer 1: 96
Number of units of hidden layer 2: 128
Number of units of hidden layer 3: 96


## Train the model

In [25]:
model = tuner.hypermodel.build(best_hps)
history = model.fit(X_train_norm, y_train, epochs=50, validation_split=0.15, callbacks=[stop_early])

Epoch 1/50
[1m89/89[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 5ms/step - loss: 62.4814 - mae: 6.7253 - val_loss: 10.9510 - val_mae: 2.3261
Epoch 2/50
[1m89/89[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 8.1656 - mae: 1.9533 - val_loss: 6.3191 - val_mae: 1.8032
Epoch 3/50
[1m89/89[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 6.2229 - mae: 1.7443 - val_loss: 5.4561 - val_mae: 1.7046
Epoch 4/50
[1m89/89[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 6.4633 - mae: 1.6424 - val_loss: 5.3398 - val_mae: 1.6288
Epoch 5/50
[1m89/89[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 4.7794 - mae: 1.5657 - val_loss: 4.9003 - val_mae: 1.5552
Epoch 6/50
[1m89/89[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 4.7062 - mae: 1.5591 - val_loss: 4.8087 - val_mae: 1.5457
Epoch 7/50
[1m89/89[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - loss: 4.2893

In [26]:
results = model.evaluate(X_test_norm, y_test, verbose=1)
print('Test Loss: {}'.format(results[0]))

[1m27/27[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 6.3230 - mae: 1.8301 
Test Loss: 5.60780668258667


## Question 4: Try to search with dropout

In [28]:
def build_model(hp):
    model = keras.Sequential()
    # Sample different number of layers with hp.Int
    for i in range(hp.Int('num_layers', 1, 3)):
        # Sample different number of layers with hp.Int
        model.add(layers.Dense(units=hp.Int('units_' + str(i),
                                            min_value=32,
                                            max_value=128,
                                            step=32),
                               activation='relu'))
        model.add(layers.Dropout(hp.Float('dp_'+ str(i), min_value=0.0,
                  max_value=0.35, default=0.0, step=0.05)))
    # Sample different activation functions with hp.Choice
    model.add(layers.Dense(1, activation=hp.Choice(
        'output_activation', ['relu', 'linear'])))

    # Sample different activation functions with hp.Choice
    model.compile(
        optimizer=keras.optimizers.Adam(
            hp.Choice('learning_rate', [1e-2, 1e-3, 1e-4])),
        loss='mse',
        metrics=['mae'])
    return model


# tuner = kt.Hyperband(build_model,
#                      objective='val_loss',
#                      max_epochs=40,
#                      factor=2,
#                      hyperband_iterations=2,
#                      directory='my_dir_2',
#                      project_name='intro_to_kt')


tuner = kt.RandomSearch(build_model,
                     objective='val_loss',
                     max_trials=100,
                     directory='my_dir_2',
                     project_name='intro_to_kt')



stop_early = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=5)
tuner.search(X_train_norm, y_train, epochs=30, validation_split=0.15,
             batch_size=32, callbacks=[stop_early])

# Get the optimal hyperparameters
best_hps = tuner.get_best_hyperparameters(num_trials=1)[0]
print(best_hps.get_config())

Trial 100 Complete [00h 00m 10s]
val_loss: 4.75252103805542

Best val_loss So Far: 4.54598331451416
Total elapsed time: 00h 13m 58s
{'space': [{'class_name': 'Int', 'config': {'name': 'num_layers', 'default': None, 'conditions': [], 'min_value': 1, 'max_value': 3, 'step': 1, 'sampling': 'linear'}}, {'class_name': 'Int', 'config': {'name': 'units_0', 'default': None, 'conditions': [], 'min_value': 32, 'max_value': 128, 'step': 32, 'sampling': 'linear'}}, {'class_name': 'Float', 'config': {'name': 'dp_0', 'default': 0.0, 'conditions': [], 'min_value': 0.0, 'max_value': 0.35, 'step': 0.05, 'sampling': 'linear'}}, {'class_name': 'Choice', 'config': {'name': 'output_activation', 'default': 'relu', 'conditions': [], 'values': ['relu', 'linear'], 'ordered': False}}, {'class_name': 'Choice', 'config': {'name': 'learning_rate', 'default': 0.01, 'conditions': [], 'values': [0.01, 0.001, 0.0001], 'ordered': True}}, {'class_name': 'Int', 'config': {'name': 'units_1', 'default': None, 'conditions':

In [29]:
print(f"Best learning rate: {best_hps.get('learning_rate')}")
print(f"Best output activation function: {best_hps.get('output_activation')}")
print(f"Best number of hidden layers: {best_hps.get('num_layers')}")
for i in range(best_hps.get('num_layers')):
    print(f"Number of units of hidden layer {i+1}: {best_hps.get('units_' + str(i))}")
    print(f"Dropout rate of hidden layer {i+1}: {best_hps.get('dp_' + str(i))}")

Best learning rate: 0.001
Best output activation function: linear
Best number of hidden layers: 2
Number of units of hidden layer 1: 64
Dropout rate of hidden layer 1: 0.05
Number of units of hidden layer 2: 96
Dropout rate of hidden layer 2: 0.05


In [30]:
model = tuner.hypermodel.build(best_hps)
history = model.fit(X_train_norm, y_train, epochs=50, validation_split=0.15, callbacks=[stop_early])

Epoch 1/50
[1m89/89[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 5ms/step - loss: 73.1062 - mae: 7.6496 - val_loss: 23.2300 - val_mae: 3.8581
Epoch 2/50
[1m89/89[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 17.4145 - mae: 3.1286 - val_loss: 8.5496 - val_mae: 2.0222
Epoch 3/50
[1m89/89[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 7.7402 - mae: 1.9620 - val_loss: 7.0837 - val_mae: 1.8788
Epoch 4/50
[1m89/89[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 7.8944 - mae: 1.8701 - val_loss: 6.2290 - val_mae: 1.8100
Epoch 5/50
[1m89/89[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 6.0703 - mae: 1.8050 - val_loss: 5.8444 - val_mae: 1.7221
Epoch 6/50
[1m89/89[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 6.3037 - mae: 1.7394 - val_loss: 5.5352 - val_mae: 1.6559
Epoch 7/50
[1m89/89[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 5.054

In [31]:
results = model.evaluate(X_test_norm, y_test, verbose=1)
print('Test Loss: {}'.format(results[0]))

[1m27/27[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 5.7032 - mae: 1.6194 
Test Loss: 5.153729438781738
