# TL07 Regularización

Propósito: evitar modelos sobreajustados modificando el comportamiento de descenso por gradiente, objetivo y datos

## Inicialización: librerías, semilla, lectura de MNIST sin normalización y partición train-val-test

In [2]:
pip install keras_tuner

Collecting keras_tuner
  Downloading keras_tuner-1.4.7-py3-none-any.whl.metadata (5.4 kB)
Collecting kt-legacy (from keras_tuner)
  Downloading kt_legacy-1.0.5-py3-none-any.whl.metadata (221 bytes)
Downloading keras_tuner-1.4.7-py3-none-any.whl (129 kB)
[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/129.1 kB[0m [31m?[0m eta [36m-:--:--[0m[2K   [91m━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m[91m╸[0m[90m━━━━━━━━━━━[0m [32m92.2/129.1 kB[0m [31m2.6 MB/s[0m eta [36m0:00:01[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m129.1/129.1 kB[0m [31m2.4 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading kt_legacy-1.0.5-py3-none-any.whl (9.6 kB)
Installing collected packages: kt-legacy, keras_tuner
Successfully installed keras_tuner-1.4.7 kt-legacy-1.0.5


In [3]:
import numpy as np; import matplotlib.pyplot as plt
import os; os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
import keras; import keras_tuner
keras.utils.set_random_seed(23); input_dim = (28, 28, 1); num_classes = 10
(x_train_val, y_train_val), (x_test, y_test) = keras.datasets.mnist.load_data()
x_train_val = x_train_val.astype("float32")
x_test = x_test.astype("float32")
x_train_val = np.expand_dims(x_train_val, -1)
x_test = np.expand_dims(x_test, -1)
print(x_train_val.shape, y_train_val.shape, x_test.shape, y_test.shape)
y_train_val = keras.utils.to_categorical(y_train_val, num_classes)
y_test = keras.utils.to_categorical(y_test, num_classes)
x_train = x_train_val[:-10000]; x_val = x_train_val[-10000:]
y_train = y_train_val[:-10000]; y_val = y_train_val[-10000:]

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/mnist.npz
[1m11490434/11490434[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step
(60000, 28, 28, 1) (60000,) (10000, 28, 28, 1) (10000,)


MyHyperModel: exploramos aumento de datos (rotación, translación y zoom) y dropout 0.5

In [4]:
class MyHyperModel(keras_tuner.HyperModel):
  def build(self, hp):
      M = keras.Sequential()
      M.add(keras.Input(shape=(28, 28, 1)))
      factor = hp.Float("factor", min_value=0.01, max_value=0.3, step=2, sampling="log")
      M.add(keras.layers.RandomRotation(factor, fill_mode="nearest"))
      M.add(keras.layers.RandomTranslation(factor, factor, fill_mode="nearest"))
      M.add(keras.layers.RandomZoom(factor, fill_mode="nearest"))
      M.add(keras.layers.Rescaling(1./255))
      filters = 64
      M.add(keras.layers.Conv2D(filters, kernel_size=(3, 3), activation="relu"))
      M.add(keras.layers.MaxPooling2D(pool_size=(2, 2)))
      M.add(keras.layers.Conv2D(2*filters, kernel_size=(3, 3), activation="relu"))
      M.add(keras.layers.MaxPooling2D(pool_size=(2, 2)))
      M.add(keras.layers.Flatten())
      M.add(keras.layers.Dense(units=800, activation='relu'))
      # dropout = hp.Float("dropout", min_value=0.0, max_value=0.5, step=0.1)
      dropout = 0.5
      M.add(keras.layers.Dropout(dropout))
      M.add(keras.layers.Dense(10, activation='softmax'))
      opt = keras.optimizers.Adam(learning_rate=0.00168)
      M.compile(loss="categorical_crossentropy", optimizer=opt, metrics=["accuracy"])
      return M
  def fit(self, hp, M, x, y, xy_val, **kwargs):
      factor = 0.3787; patience = 5
      reduce_cb = keras.callbacks.ReduceLROnPlateau(
      monitor='val_accuracy', factor=factor, patience=patience, min_delta=1e-4, min_lr=1e-5)
      early_cb = keras.callbacks.EarlyStopping(monitor='val_accuracy', patience=2*patience, min_delta=1e-5)
      kwargs['callbacks'].extend([reduce_cb, early_cb])
      return M.fit(x, y, batch_size=256, epochs=100, validation_data=xy_val, **kwargs)

Experimento: exploración y resumen de resultados

In [5]:
tuner = keras_tuner.BayesianOptimization(
  MyHyperModel(), objective="val_accuracy", max_trials=10, executions_per_trial=1,
  overwrite=True, directory="/tmp", project_name="MNIST")


In [None]:
tuner.search(x_train, y_train, (x_val, y_val))


Search: Running Trial #1

Value             |Best Value So Far |Hyperparameter
0.04              |0.04              |factor

Epoch 1/100
[1m 66/196[0m [32m━━━━━━[0m[37m━━━━━━━━━━━━━━[0m [1m1:40[0m 769ms/step - accuracy: 0.6845 - loss: 0.9475

In [None]:
tuner.results_summary(num_trials=3)

Experimento (cont.): evaluación en test de los mejores modelos en validación

In [None]:
num_models = 10
best_hyperparameters = tuner.get_best_hyperparameters(num_trials=num_models)
best_models = tuner.get_best_models(num_models=num_models)
for m in range(num_models):
    values = best_hyperparameters[m].values
    score = best_models[m].evaluate(x_test, y_test, verbose=0)
    print(f'Model {m}: Hyperparameters: {values!s} Loss: {score[0]:.4} Precisión: {score[1]:.2%}')

## Ejercicio: realiza un experimento similar al de MNIST con Fashion-MNIST

Inicialización: librerías, semilla, lectura de Fashion-MNIST sin normalización y partición train-val-test


In [None]:
import numpy as np; import matplotlib.pyplot as plt
import os; os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
import keras; import keras_tuner
keras.utils.set_random_seed(23); input_dim = (28, 28, 1); num_classes = 10
(x_train_val, y_train_val), (x_test, y_test) = keras.datasets.fashion_mnist.load_data()
x_train_val = x_train_val.astype("float32")
x_test = x_test.astype("float32")
x_train_val = np.expand_dims(x_train_val, -1)
x_test = np.expand_dims(x_test, -1)
print(x_train_val.shape, y_train_val.shape, x_test.shape, y_test.shape)
y_train_val = keras.utils.to_categorical(y_train_val, num_classes)
y_test = keras.utils.to_categorical(y_test, num_classes)
x_train = x_train_val[:-10000]; x_val = x_train_val[-10000:]
y_train = y_train_val[:-10000]; y_val = y_train_val[-10000:]


MyHyperModel: exploramos aumento de datos (rotación, translación y zoom) y dropout 0.5

In [None]:
class MyHyperModel(keras_tuner.HyperModel):
  def build(self, hp):
      M = keras.Sequential()
      M.add(keras.Input(shape=(28, 28, 1)))
      factor = hp.Float("factor", min_value=0.01, max_value=0.3, step=2, sampling="log")
      M.add(keras.layers.RandomRotation(factor, fill_mode="nearest"))
      M.add(keras.layers.RandomTranslation(factor, factor, fill_mode="nearest"))
      M.add(keras.layers.RandomZoom(factor, fill_mode="nearest"))
      M.add(keras.layers.Rescaling(1./255))
      filters = 64
      M.add(keras.layers.Conv2D(filters, kernel_size=(3, 3), activation="relu"))
      M.add(keras.layers.MaxPooling2D(pool_size=(2, 2)))
      M.add(keras.layers.Conv2D(2*filters, kernel_size=(3, 3), activation="relu"))
      M.add(keras.layers.MaxPooling2D(pool_size=(2, 2)))
      M.add(keras.layers.Flatten())
      M.add(keras.layers.Dense(units=800, activation='relu'))
      # dropout = hp.Float("dropout", min_value=0.0, max_value=0.5, step=0.1)
      dropout = 0.5
      M.add(keras.layers.Dropout(dropout))
      M.add(keras.layers.Dense(10, activation='softmax'))
      opt = keras.optimizers.Adam(learning_rate=0.00015)
      M.compile(loss="categorical_crossentropy", optimizer=opt, metrics=["accuracy"])
      return M
  def fit(self, hp, M, x, y, xy_val, **kwargs):
      factor = 0.32; patience = 5
      reduce_cb = keras.callbacks.ReduceLROnPlateau(
      monitor='val_accuracy', factor=factor, patience=patience, min_delta=1e-4, min_lr=1e-5)
      early_cb = keras.callbacks.EarlyStopping(monitor='val_accuracy', patience=2*patience, min_delta=1e-5)
      kwargs['callbacks'].extend([reduce_cb, early_cb])
      return M.fit(x, y, batch_size=256, epochs=100, validation_data=xy_val, **kwargs)


Experimento: exploración y resumen de resultados

In [None]:
tuner = keras_tuner.BayesianOptimization(
  MyHyperModel(), objective="val_accuracy", max_trials=10, executions_per_trial=1,
  overwrite=True, directory="/tmp", project_name="Fashion-MNIST")

In [None]:
tuner.search(x_train, y_train, (x_val, y_val))

In [None]:
tuner.results_summary(num_trials=3)

In [None]:
num_models = 10
best_hyperparameters = tuner.get_best_hyperparameters(num_trials=num_models)
best_models = tuner.get_best_models(num_models=num_models)
for m in range(num_models):
  values = best_hyperparameters[m].values
  score = best_models[m].evaluate(x_test, y_test, verbose=0)
  print(f'Model {m}: Hyperparameters: {values!s} Loss: {score[0]:.4} Precisión: {score[1]:.2%}')
