<a href="https://colab.research.google.com/github/jeosol/tfx-tutorials/blob/main/keras_tuner_study.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## Keras Tuner (from keras.io/keras_tuner) 

KerasTuner is an easy-to-use, scalable hyperparameter optimization framework that solves the pain points of hyperparameter search. Easily configure you search space with define-by-run syntax, then leverage one of the available search algorithms to find the best hyperparameter values for your models. KerasTuner comes with Bayesian Optimization, Hyperband, and Random Search algorithms built-in, and is also designed to be easy for researchers to extend in order to experiment with new search algorithms.

In [1]:
!pip install keras-tuner -q

[?25l[K     |██▍                             | 10 kB 30.4 MB/s eta 0:00:01[K     |████▉                           | 20 kB 37.3 MB/s eta 0:00:01[K     |███████▎                        | 30 kB 43.5 MB/s eta 0:00:01[K     |█████████▋                      | 40 kB 44.7 MB/s eta 0:00:01[K     |████████████                    | 51 kB 47.4 MB/s eta 0:00:01[K     |██████████████▌                 | 61 kB 52.0 MB/s eta 0:00:01[K     |█████████████████               | 71 kB 32.8 MB/s eta 0:00:01[K     |███████████████████▎            | 81 kB 32.7 MB/s eta 0:00:01[K     |█████████████████████▊          | 92 kB 35.0 MB/s eta 0:00:01[K     |████████████████████████▏       | 102 kB 36.5 MB/s eta 0:00:01[K     |██████████████████████████▋     | 112 kB 36.5 MB/s eta 0:00:01[K     |█████████████████████████████   | 122 kB 36.5 MB/s eta 0:00:01[K     |███████████████████████████████▍| 133 kB 36.5 MB/s eta 0:00:01[K     |████████████████████████████████| 135 kB 36.5 MB/s 
[?25

In [2]:
from tensorflow import keras
from tensorflow.keras import layers

In [3]:
import kerastuner as kt

  """Entry point for launching an IPython kernel.


In [24]:
# Several options for constructing the mode and using kerastuner
# sources: keras.io getting_started guide 

# if we use a simple build_model function
def build_model(hp):
  units = hp.Int(name="units", min_value=16, max_value=64, step=16)
  model = keras.Sequential([
    layers.Dense(units, activation="relu"),
    layers.Dense(10, activation="softmax")
  ])
  optimizer = hp.Choice(name="optimizer", values=["rmsprop", "adam"])
  model.compile(
    optimizer=optimizer,
    loss="sparse_categorical_crossentropy",
    metrics=["accuracy"])
  return model

# build_model(keras_tuner.Hyperparameters())
# 


class SimpleMLP(kt.HyperModel):
  def __init__(self, num_classes):
    self.num_classes = num_classes

  # Build function takes a hp object
  def build(self, hp):
    units = hp.Int(name="units", min_value=16, max_value=64, step=16)
    model = keras.Sequential([
        layers.Dense(units, activation="relu"),
        layers.Dense(self.num_classes, activation="softmax")
    ])
    # we can try two choices for the optimizer
    optimizer = hp.Choice(name="optimizer", values=["rmsprop", "adam"])
    model.compile(
        optimizer=optimizer,
        loss="sparse_categorical_crossentropy",
        metrics=["accuracy"])
    return model 

# Option2: You can define the hyperparameters in advance and have the code
# constructing the model as separate

# this function takes the possible hyperparameterrs
def call_existing_code(units, activation, dropout, lr):
  model = keras.Sequential()
  model.add(layers.Flatten())
  model.add(layers.Dense(units=units, activation=activation))
  if dropout:
    model.add(layers.Dropout(rate=0.25))
  model.add(layers.Dense(10, activation="softmax"))
  model.compile(
      optimizer=keras.optimizers.Adam(learning_rate=lr),
      loss="categorical_crossentropy",
      metrics=["accuracy"]
  )
  return model

def build_modelv2(hp): 
  units = hp.Int("units", min_values=32, max_value=512, step=32)
  activation=hp.Choice("activation", ["relu", "tanh"])
  dropout = hp.Boolean("dropout")
  lr = hp.Float("lr", min_value=1e-4, max_value=1e-2, sampling="log")
  # call existing model-building code with the hyperparameter values
  model = call_existing_code(
      units=units, activation=activation, dropout=dropout, lr=lr
  )
  return model 

class SimpleMLPv2(kt.HyperModel):
  def __init__(self, num_classes):
    self.num_classes = num_classes

  # Build function takes a hp object
  def build(self, hp):    
    model = keras.Sequential()
    model.add(
        layers.Dense(
            # Tune the number of units
            units = hp.Int(name="units", min_value=16, max_value=64, step=16),
            activation=hp.Choice("activation", ["relu", "tanh"])
        )
    )
    # Tune whether to use dropout
    if hp.Boolean("dropout"):
      model.add(layers.Dropout(rate=0.25))

    # add the classification layer                   
    model.add(layers.Dense(self.num_classes, activation="softmax"))
    
    # define the optimizer learning rate as a hyperparameter
    learning_rate = hp.Float("lr", min_value=1e-4, max_value=1e-2, sampling="log")
    
    # we can try two choices for the optimizer
    #optimizer = hp.Choice(name="optimizer", values=["rmsprop", "adam"])
    optimizer = keras.optimizers.Adam(learning_rate=learning_rate)
    model.compile(
        optimizer=optimizer,
        loss="sparse_categorical_crossentropy",
        metrics=["accuracy"])
    return model 

# Example which tunes the number of layers
class SimpleMLPv3(kt.HyperModel):
  def __init__(self, num_classes):
    self.num_classes = num_classes

  # Build function takes a hp object
  def build(self, hp):    
    model = keras.Sequential()
    model.add(layers.Flatten())
    # Tune the number of layrers
    for i in range(hp.Int("num_layers", 1, 3)):      
      model.add(
        layers.Dense(
            # Tune the number of units
            units = hp.Int(name=f"units_{i}", min_value=16, max_value=64, step=16),
            activation=hp.Choice("activation", ["relu", "tanh"])
        )
      )
    # Tune whether to use dropout
    if hp.Boolean("dropout"):
      model.add(layers.Dropout(rate=0.25))

    # add the classification layer                   
    model.add(layers.Dense(self.num_classes, activation="softmax"))
    
    # define the optimizer learning rate as a hyperparameter
    learning_rate = hp.Float("lr", min_value=1e-4, max_value=1e-2, sampling="log")
    
    # we can try two choices for the optimizer
    #optimizer = hp.Choice(name="optimizer", values=["rmsprop", "adam"])
    optimizer = keras.optimizers.Adam(learning_rate=learning_rate)
    model.compile(
        optimizer=optimizer,
        loss="sparse_categorical_crossentropy",
        metrics=["accuracy"])
    return model 
    

In [8]:
hypermodel = SimpleMLP(num_classes=10)

In [19]:
# Next we pick a tuner. 
tuner = kt.BayesianOptimization(
   hypermodel,
   objective="val_accuracy", # the parameter that tuner will seek to optimize
   max_trials=10, # maximum number of different model configuration trials ,
   executions_per_trial=2,
   directory="mnist_kt_test",
   project_name="mnist_hp",
   overwrite=True,
)

In [20]:
# display an overview of the search space via search_space_summary()
tuner.search_space_summary()

Search space summary
Default search space size: 2
units (Int)
{'default': None, 'conditions': [], 'min_value': 16, 'max_value': 64, 'step': 16, 'sampling': None}
optimizer (Choice)
{'default': 'rmsprop', 'conditions': [], 'values': ['rmsprop', 'adam'], 'ordered': False}


In [21]:
# Get the data
(x_train, y_train), (x_test, y_test) = keras.datasets.mnist.load_data()
x_train = x_train.reshape((-1, 28 * 28)).astype("float32") / 255
x_test  = x_test.reshape((-1, 28 * 28)).astype("float32") / 255

# reserve the full datasets for later
x_train_full = x_train[:]
y_train_full = y_train[:]

In [22]:
num_val_samples = 10000
x_train, x_val = x_train[:-num_val_samples], x_train[-num_val_samples:]
y_train, y_val = y_train[:-num_val_samples], y_train[-num_val_samples:]

# add some callbacks
# patience is the number of epochs with no improvement after which training
# will be stopped.
callbacks = [
    keras.callbacks.EarlyStopping(monitor="val_loss", patience=5),
]

# Use a large number of epochs (you don't know in advance how many epocs 
# each model will need), and use an EarlyStopping callback to stop training
# when you start overfitting
tuner.search(
    x_train, y_train,
    batch_size=128,
    epochs=100,
    validation_data=(x_val, y_val),
    callbacks=callbacks,
    verbose=2,
)

Trial 10 Complete [00h 00m 39s]
val_accuracy: 0.9733499884605408

Best val_accuracy So Far: 0.9754500091075897
Total elapsed time: 00h 10m 26s


In [25]:
top_n = 4
best_hps = tuner.get_best_hyperparameters(top_n)

In [39]:
# in callback Earlystopping
# mode='min', training will stop once the quantity monitored has stopped decreasing
# mode='max', training will stop once the quantity monitored has stopped increasing
# mode='auto', the direction is automatically inferred from the name of the monitored quantity

# use the validation set to find the best epochs
def get_best_epoch(hp):
  model = hypermodel.build(hp)
  callbacks=[
      keras.callbacks.EarlyStopping(
          monitor="val_loss", mode="min", patience=10),
  ]
  history = model.fit(
      x_train, y_train,
      validation_data=(x_val, y_val),
      epochs=100,
      batch_size=128,
      callbacks=callbacks
  )
  val_loss_per_epoch = history.history["val_loss"]
  # add 1 because index is zero-based.
  best_epoch = val_loss_per_epoch.index(min(val_loss_per_epoch)) + 1
  print("Best epoch: {}".format(best_epoch))
  return best_epoch, model


In [40]:
# Finally, train on the full dataset for just a bit longer than this epoch count,
# since you are training on more data; 20% more in this case

def get_best_trained_model(hp):
  best_epoch, model = get_best_epoch(hp) 
  model.fit(
      x_train_full, y_train_full, 
      batch_size=128, epochs=int(best_epoch * 1.2))
  return model 

best_models = []

for hp in best_hps: 
  # get the hyperparameter and train the model on the full dataset
  model = get_best_trained_model(hp) 
  # evaluate the model on the held-out test dataset
  model.evaluate(x_test, y_test)
  # save the model in the list of best models
  best_models.append(model)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Best epoch: 13
Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Best epoch: 16
Epoch 1/19
Epoch 2/19
Epoch 3/19
Epoch 4/19
Epoch 5/19
Epoch 6/19
Epoch 7/19
Epoch 8/19
Epoch 9/19
Epoch 10/19
Epoch 11/19
Epoch 12/19
Epoch 13/19
Epoch 14/19
Epoch 15/19
Epoch 16/

In [41]:
best_models

[<keras.engine.sequential.Sequential at 0x7fafa1d0ee90>,
 <keras.engine.sequential.Sequential at 0x7fafa1cc64d0>,
 <keras.engine.sequential.Sequential at 0x7faf8060a910>,
 <keras.engine.sequential.Sequential at 0x7faf80490090>]

In [42]:
# If you are not worrying about underperforming, there's a shortcut you can take
# just use the tuner to reload the top-performing models with the best weights 
# saved during the hyperparameter search, without retraining new models from
# scratch

best_modelsv2 = tuner.get_best_models(top_n)


In [None]:
# Compare the performance of both models on the accuracy of the test data
# case 1: best_models - retrain on full data
# case 2: best_models picked from top_n models after hyperparameter tuning with KT

In [43]:
for model in best_models:
  model.evaluate(x_test, y_test)



In [44]:
# smaller loss, these models are not trained on the full data
for model in best_modelsv2:
  model.evaluate(x_test, y_test)



In [47]:
# view the summary of one of the bestmodels
best_model = best_models[2]

best_model.build(input_shape=(None,28,28))
best_model.summary()

Model: "sequential_6"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_12 (Dense)            (None, 64)                50240     
                                                                 
 dense_13 (Dense)            (None, 10)                650       
                                                                 
Total params: 50,890
Trainable params: 50,890
Non-trainable params: 0
_________________________________________________________________


In [48]:
# print a summary of the search results
tuner.results_summary()

Results summary
Results in mnist_kt_test/untitled_project
Showing 10 best trials
<keras_tuner.engine.objective.Objective object at 0x7fafa01d8110>
Trial summary
Hyperparameters:
units: 64
optimizer: rmsprop
Score: 0.9754500091075897
Trial summary
Hyperparameters:
units: 64
optimizer: adam
Score: 0.9751999974250793
Trial summary
Hyperparameters:
units: 64
optimizer: rmsprop
Score: 0.974700003862381
Trial summary
Hyperparameters:
units: 64
optimizer: rmsprop
Score: 0.9745000004768372
Trial summary
Hyperparameters:
units: 64
optimizer: rmsprop
Score: 0.9745000004768372
Trial summary
Hyperparameters:
units: 64
optimizer: rmsprop
Score: 0.9743499755859375
Trial summary
Hyperparameters:
units: 64
optimizer: rmsprop
Score: 0.9739000201225281
Trial summary
Hyperparameters:
units: 64
optimizer: rmsprop
Score: 0.9733499884605408
Trial summary
Hyperparameters:
units: 64
optimizer: adam
Score: 0.9731000065803528
Trial summary
Hyperparameters:
units: 32
optimizer: adam
Score: 0.9704000055789948


In [53]:
y_train[:2]
y_conv = keras.utils.to_categorical(y_train, 10)
y_conv

array([[0., 0., 0., ..., 0., 0., 0.],
       [1., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       ...,
       [0., 0., 0., ..., 0., 1., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 1., 0.]], dtype=float32)

In [None]:
# With the above representation if we convert y values to categorical
# in the compile method, then loss = 'categorical_crossentropy'
# but if we use the sparse representions, 1, 2, 3,4, 
# then loss= 'sparse_categorical_crossentropy'
