Afinación de parámetros con Keras Tuner
===

* 30:00 min | Última modificación: Mayo 10, 2021 | [YouTube]

Adaptado de:

* https://www.tensorflow.org/tutorials/keras/keras_tuner

In [1]:
import kerastuner as kt
import tensorflow as tf
from tensorflow import keras

print(tf.__version__)
print(kt.__version__)

2.4.1
1.0.2


In [2]:
#
#  Descarga de los datos
#
(
    (train_images, train_labels),
    (test_images, test_labels),
) = keras.datasets.fashion_mnist.load_data()

## Especificación del modelo

In [3]:
def model_builder(hp):
    
    #
    # Evalua distintas cantidades de neuronas en la capa
    # oculta
    #
    hp_units = hp.Int(
        "units",
        min_value=32,
        max_value=512,
        step=32,
    )
        
    model = keras.Sequential(
        [
            #
            # Esta capa reduce una dimension de la matriz de entrada.
            # El conjunto de entrada es una lista de matrices de 28x28.
            # Flatten genera una lista de vectores de dimensión 28x28
            #            
            keras.layers.Flatten(input_shape=(28, 28)),
            
            #
            # Preprocesamiento de la entrada [0, 255] --> [0, 1]
            #
            keras.layers.experimental.preprocessing.Rescaling(scale=1.0 / 255),
            
            #
            # Capa de procesamiento. Aquí se modifica dinamicamente la 
            # cantidad de neuronas en la capa
            #
            keras.layers.Dense(
                units=hp_units,
                activation="relu",
            ),
            
            #
            # Capa de salida
            #
            keras.layers.Dense(10)
        ]
    )

    #
    # Escoge el mejor valor para la tasa de aprendizaje
    #
    hp_learning_rate = hp.Choice(
        "learning_rate",
        values=[0.01, 0.001, 0.0001],
    )

    model.compile(
        #
        # Optimizador con distintos valores 
        # para la tasa de aprendizaje
        #
        optimizer=keras.optimizers.Adam(
            learning_rate=hp_learning_rate,
        ),
        #
        # Función de pérdida
        #
        loss=keras.losses.SparseCategoricalCrossentropy(
            from_logits=True,
        ),
        #
        # Métrica a monitoreas
        #
        metrics=["accuracy"],
    )

    return model

## Monitoreo del modelo con Early Stopping

In [4]:
callbacks = [
    tf.keras.callbacks.EarlyStopping(
        #
        # Metrica a monitorear
        #
        monitor="val_loss",
        
        #
        # Número de iteraciones sin mejora antes
        # de finalizar el entrenamiento
        #
        patience=5,
    )
]

## Estrategias de búsqueda

In [5]:
!rm -rf /tmp/hyperband_kt

hyperband_tuner = kt.Hyperband(
    hypermodel=model_builder,      # construye el modelo
    objective="val_accuracy",      # criterio para seleccionar los parametros
    max_epochs=10,                 # Número máximo de iteraciones
    factor=3,                      # factor para reducir la cantidad de modelos
    directory="/tmp/hyperband_kt", # directorio de trabajo
    project_name="hyperband_kt",   # nombre del proyecto
    overwrite=True,                # sobre-escribe la carpeta si existe
)

#
# Resumen de los parametros de la búsqueda
#
hyperband_tuner.search_space_summary()

Search space summary
Default search space size: 2
units (Int)
{'default': None, 'conditions': [], 'min_value': 32, 'max_value': 512, 'step': 32, 'sampling': None}
learning_rate (Choice)
{'default': 0.01, 'conditions': [], 'values': [0.01, 0.001, 0.0001], 'ordered': True}


In [6]:
!rm -rf /tmp/randomsearch_kt

randomsearch_tuner = kt.RandomSearch(
    hypermodel=model_builder,         # construye el modelo
    objective="val_accuracy",         # criterio para seleccionar los parametros
    max_trials=4,                     # número máximo de ensayos
    directory="/tmp/randomsearch_kt", # directorio de trabajo
    project_name="randomsearch_kt",   # Nombre del proyecto
    overwrite=True,                   # sobre-escribe la carpeta si existe
)

#
# Resumen de los parametros de la búsqueda
#
randomsearch_tuner.search_space_summary()

Search space summary
Default search space size: 2
units (Int)
{'default': None, 'conditions': [], 'min_value': 32, 'max_value': 512, 'step': 32, 'sampling': None}
learning_rate (Choice)
{'default': 0.01, 'conditions': [], 'values': [0.01, 0.001, 0.0001], 'ordered': True}


In [7]:
!rm -rf /tmp/bayesianopt_kt

bayesianoptimization_tuner = kt.BayesianOptimization(
    hypermodel=model_builder,         # construye el modelo
    objective="val_accuracy",         # criterio para seleccionar los parametros
    max_trials=4,                     # número máximo de ensayos
    seed=123456,                      # semilla del generador de aleatorios
    directory="/tmp/bayesianopt_kt",  # directorio de trabajo
    project_name="bayesianopt_kt",    # Nombre del proyecto
    overwrite=True,                   # sobre-escribe la carpeta si existe
)

#
# Resumen de los parametros de la búsqueda
#
bayesianoptimization_tuner.search_space_summary()

Search space summary
Default search space size: 2
units (Int)
{'default': None, 'conditions': [], 'min_value': 32, 'max_value': 512, 'step': 32, 'sampling': None}
learning_rate (Choice)
{'default': 0.01, 'conditions': [], 'values': [0.01, 0.001, 0.0001], 'ordered': True}


## Búsqueda de los parámetros óptimos

In [8]:
hyperband_tuner.search(
    train_images,
    train_labels,
    epochs=50,
    validation_split=0.2,
    callbacks=callbacks,
    verbose=1,
)

hyperband_tuner.results_summary()

Trial 30 Complete [00h 00m 36s]
val_accuracy: 0.8668333292007446

Best val_accuracy So Far: 0.8857499957084656
Total elapsed time: 00h 07m 22s
INFO:tensorflow:Oracle triggered exit
Results summary
Results in /tmp/hyperband_kt/hyperband_kt
Showing 10 best trials
Objective(name='val_accuracy', direction='max')
Trial summary
Hyperparameters:
units: 448
learning_rate: 0.001
tuner/epochs: 10
tuner/initial_epoch: 0
tuner/bracket: 0
tuner/round: 0
Score: 0.8857499957084656
Trial summary
Hyperparameters:
units: 384
learning_rate: 0.001
tuner/epochs: 10
tuner/initial_epoch: 4
tuner/bracket: 1
tuner/round: 1
tuner/trial_id: 065db2af472ef92978aaa72b7758c580
Score: 0.8850833177566528
Trial summary
Hyperparameters:
units: 384
learning_rate: 0.001
tuner/epochs: 4
tuner/initial_epoch: 0
tuner/bracket: 1
tuner/round: 0
Score: 0.8818333148956299
Trial summary
Hyperparameters:
units: 224
learning_rate: 0.001
tuner/epochs: 10
tuner/initial_epoch: 4
tuner/bracket: 2
tuner/round: 2
tuner/trial_id: 192702e7

In [9]:
randomsearch_tuner.search(
    train_images,
    train_labels,
    epochs=50,
    validation_split=0.2,
    callbacks=callbacks,
    verbose=1,
)

randomsearch_tuner.results_summary()

Trial 4 Complete [00h 02m 28s]
val_accuracy: 0.8964999914169312

Best val_accuracy So Far: 0.8972499966621399
Total elapsed time: 00h 09m 44s
INFO:tensorflow:Oracle triggered exit
Results summary
Results in /tmp/randomsearch_kt/randomsearch_kt
Showing 10 best trials
Objective(name='val_accuracy', direction='max')
Trial summary
Hyperparameters:
units: 480
learning_rate: 0.0001
Score: 0.8972499966621399
Trial summary
Hyperparameters:
units: 416
learning_rate: 0.0001
Score: 0.8964999914169312
Trial summary
Hyperparameters:
units: 256
learning_rate: 0.0001
Score: 0.8943333625793457
Trial summary
Hyperparameters:
units: 384
learning_rate: 0.0001
Score: 0.8939999938011169


In [10]:
bayesianoptimization_tuner.search(
    train_images,
    train_labels,
    epochs=50,
    validation_split=0.2,
    callbacks=callbacks,
    verbose=1,
)

bayesianoptimization_tuner.results_summary()

Trial 4 Complete [00h 02m 06s]
val_accuracy: 0.8740000128746033

Best val_accuracy So Far: 0.8923333287239075
Total elapsed time: 00h 07m 01s
INFO:tensorflow:Oracle triggered exit
Results summary
Results in /tmp/bayesianopt_kt/bayesianopt_kt
Showing 10 best trials
Objective(name='val_accuracy', direction='max')
Trial summary
Hyperparameters:
units: 416
learning_rate: 0.001
Score: 0.8923333287239075
Trial summary
Hyperparameters:
units: 128
learning_rate: 0.0001
Score: 0.89041668176651
Trial summary
Hyperparameters:
units: 32
learning_rate: 0.0001
Score: 0.8740000128746033
Trial summary
Hyperparameters:
units: 480
learning_rate: 0.01
Score: 0.8669999837875366


## Obtención del mejor modelo

In [11]:
best_hyperband_hps = hyperband_tuner.get_best_hyperparameters(num_trials=1)[0]

#
# Cantidad optima de neuronas en la capa oculta 
# y tasa de aprendizaje
#
best_hyperband_hps.get("units"), best_hyperband_hps.get('learning_rate')

(448, 0.001)

In [12]:
best_randomsearch_hps = randomsearch_tuner.get_best_hyperparameters(num_trials=1)[0]

#
# Cantidad optima de neuronas en la capa oculta 
# y tasa de aprendizaje
#
best_randomsearch_hps.get("units"), best_randomsearch_hps.get('learning_rate')

(480, 0.0001)

In [13]:
best_bayesianopt_hps = bayesianoptimization_tuner.get_best_hyperparameters(num_trials=1)[0]

#
# Cantidad optima de neuronas en la capa oculta 
# y tasa de aprendizaje
#
best_bayesianopt_hps.get("units"), best_bayesianopt_hps.get('learning_rate')

(416, 0.001)

## Obtención del mejor modelo

In [14]:
#
# Construcción del modelo con los hiperparametros óptimos
# y entrenamiento para 50 epochs.
#
# Se busca obtener el número óptimo de epochs para
# entrenar el modelo
#
model = hyperband_tuner.hypermodel.build(best_hyperband_hps)

history = model.fit(
    train_images,
    train_labels,
    epochs=50,
    validation_split=0.2,
    verbose=2,
)

val_acc_per_epoch = history.history["val_accuracy"]
best_epoch = val_acc_per_epoch.index(max(val_acc_per_epoch)) + 1

#
# Número máximo de epochs usadas para entrenar el modelo
#
best_epoch

Epoch 1/50
1500/1500 - 4s - loss: 0.4944 - accuracy: 0.8254 - val_loss: 0.4105 - val_accuracy: 0.8524
Epoch 2/50
1500/1500 - 3s - loss: 0.3681 - accuracy: 0.8662 - val_loss: 0.3524 - val_accuracy: 0.8718
Epoch 3/50
1500/1500 - 4s - loss: 0.3290 - accuracy: 0.8785 - val_loss: 0.3371 - val_accuracy: 0.8798
Epoch 4/50
1500/1500 - 4s - loss: 0.3072 - accuracy: 0.8863 - val_loss: 0.3196 - val_accuracy: 0.8858
Epoch 5/50
1500/1500 - 4s - loss: 0.2863 - accuracy: 0.8944 - val_loss: 0.3588 - val_accuracy: 0.8755
Epoch 6/50
1500/1500 - 4s - loss: 0.2696 - accuracy: 0.9011 - val_loss: 0.3265 - val_accuracy: 0.8837
Epoch 7/50
1500/1500 - 4s - loss: 0.2556 - accuracy: 0.9032 - val_loss: 0.3185 - val_accuracy: 0.8862
Epoch 8/50
1500/1500 - 4s - loss: 0.2463 - accuracy: 0.9090 - val_loss: 0.3161 - val_accuracy: 0.8855
Epoch 9/50
1500/1500 - 4s - loss: 0.2330 - accuracy: 0.9128 - val_loss: 0.3371 - val_accuracy: 0.8847
Epoch 10/50
1500/1500 - 4s - loss: 0.2258 - accuracy: 0.9151 - val_loss: 0.3473 - 

19

In [15]:
#
# Reentrena el modelo usando el número óptimo de epochs
#
model = hyperband_tuner.hypermodel.build(best_hyperband_hps)

history = model.fit(
    train_images,
    train_labels,
    epochs=50,
    validation_split=0.2,
    verbose=2,
)

Epoch 1/50
1500/1500 - 4s - loss: 0.4904 - accuracy: 0.8273 - val_loss: 0.4339 - val_accuracy: 0.8448
Epoch 2/50
1500/1500 - 3s - loss: 0.3716 - accuracy: 0.8644 - val_loss: 0.4524 - val_accuracy: 0.8330
Epoch 3/50
1500/1500 - 4s - loss: 0.3330 - accuracy: 0.8777 - val_loss: 0.3273 - val_accuracy: 0.8827
Epoch 4/50
1500/1500 - 4s - loss: 0.3061 - accuracy: 0.8877 - val_loss: 0.3678 - val_accuracy: 0.8719
Epoch 5/50
1500/1500 - 4s - loss: 0.2861 - accuracy: 0.8932 - val_loss: 0.3184 - val_accuracy: 0.8880
Epoch 6/50
1500/1500 - 4s - loss: 0.2718 - accuracy: 0.8997 - val_loss: 0.3281 - val_accuracy: 0.8828
Epoch 7/50
1500/1500 - 4s - loss: 0.2574 - accuracy: 0.9033 - val_loss: 0.3343 - val_accuracy: 0.8837
Epoch 8/50
1500/1500 - 4s - loss: 0.2451 - accuracy: 0.9083 - val_loss: 0.3362 - val_accuracy: 0.8819
Epoch 9/50
1500/1500 - 4s - loss: 0.2359 - accuracy: 0.9127 - val_loss: 0.3171 - val_accuracy: 0.8897
Epoch 10/50
1500/1500 - 4s - loss: 0.2249 - accuracy: 0.9159 - val_loss: 0.3242 - 