# Trying to get over 98% accuracy on MNIST

In [None]:
%pip install keras-tuner
%pip install tensorboard-plugin-profile

Collecting keras-tuner
  Downloading keras_tuner-1.4.7-py3-none-any.whl.metadata (5.4 kB)
Collecting kt-legacy (from keras-tuner)
  Downloading kt_legacy-1.0.5-py3-none-any.whl.metadata (221 bytes)
Downloading keras_tuner-1.4.7-py3-none-any.whl (129 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m129.1/129.1 kB[0m [31m4.5 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading kt_legacy-1.0.5-py3-none-any.whl (9.6 kB)
Installing collected packages: kt-legacy, keras-tuner
Successfully installed keras-tuner-1.4.7 kt-legacy-1.0.5
Collecting tensorboard-plugin-profile
  Downloading tensorboard_plugin_profile-2.19.0-cp311-none-manylinux2014_x86_64.whl.metadata (5.0 kB)
Collecting gviz-api>=1.9.0 (from tensorboard-plugin-profile)
  Downloading gviz_api-1.10.0-py2.py3-none-any.whl.metadata (2.6 kB)
Downloading tensorboard_plugin_profile-2.19.0-cp311-none-manylinux2014_x86_64.whl (25.8 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m25.8/25.8 MB[0m [31m17.2 MB/s

In [None]:
import tensorflow as tf
import keras_tuner as kt
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns

from pathlib import Path

# Load MNIST dataset

In [None]:
mnist = tf.keras.datasets.mnist.load_data()

(x_train, y_train), (x_test, y_test) = mnist

# Scale down pixel intensities
x_train, x_test = x_train / 255.0, x_test / 255.0

print(x_train.shape) # Look at data

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/mnist.npz
[1m11490434/11490434[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step
(60000, 28, 28)


# Create Model

In [None]:
def build_mnist_model(hp):
    n_hidden = hp.Int("n_hidden", min_value=0, max_value=4, default=2)
    n_neurons = hp.Int("n_neurons", min_value=16, max_value=512)
    learning_rate = hp.Float("learning_rate", min_value=1e-4, max_value=5e-1, sampling="log")
    optimizer = hp.Choice("optimizer", values=["nadam", "adam", "rmsprop"])

    dropout = hp.Float("dropout", min_value=0.0, max_value=0.5)

    if optimizer == "sgd":
      optimizer = tf.keras.optimizers.SGD(learning_rate=learning_rate)
    else:
      optimizer = tf.keras.optimizers.Adam(learning_rate=learning_rate)

    model = tf.keras.Sequential()

    model.add(tf.keras.layers.Flatten(input_shape=(28, 28)))

    for _ in range(n_hidden):
      model.add(tf.keras.layers.Dense(n_neurons, activation="relu"))
      model.add(tf.keras.layers.Dropout(dropout))

    model.add(tf.keras.layers.Dense(10, activation="softmax"))

    model.compile(loss="sparse_categorical_crossentropy", optimizer=optimizer, metrics=["accuracy"])

    return model


class MnistHyperModel(kt.HyperModel):
  def build(self, hp):
    return build_mnist_model(hp)

  def fit(self, hp, model, X, y, **kwargs):
    if hp.Boolean("normalize"):
      norm_layer = tf.keras.layers.Normalization()
      X = norm_layer(X)
    return model.fit(X, y, **kwargs)


# Train Model

## Do Hyperparameter Tuning

In [None]:
mnist_hyperband_tuner = kt.Hyperband(
    MnistHyperModel(),
    objective="val_accuracy",
    max_epochs=10,
    factor=3,
    directory="mnist_hyperband",
    project_name="hyperband_tuning")

  super().__init__(**kwargs)


In [None]:
mnist_root_logdir = Path(mnist_hyperband_tuner.project_dir) / "tensorboard"
mnist_tensorboard_cb = tf.keras.callbacks.TensorBoard(mnist_root_logdir) # Allows for TensorBoard visualization

mnist_early_stopping_cb = tf.keras.callbacks.EarlyStopping(patience=2, restore_best_weights=True) # Interrupts training when no progress on validation set for 2 trials

mnist_hyperband_tuner.search(x_train, y_train, epochs=10, validation_split=0.2,
                             callbacks=[mnist_early_stopping_cb, mnist_tensorboard_cb])

Trial 30 Complete [00h 00m 34s]
val_accuracy: 0.10616666823625565

Best val_accuracy So Far: 0.9804999828338623
Total elapsed time: 00h 13m 36s


## Get best hyperparameters

In [None]:
best_hps=mnist_hyperband_tuner.get_best_hyperparameters(num_trials=1)[0]

### Find optimal number of epochs to train

In [None]:
# Build the model with the optimal hyperparameters and train it on the data for 50 epochs
finding_optimal_epochs_model = mnist_hyperband_tuner.hypermodel.build(best_hps)
history = finding_optimal_epochs_model.fit(x_train, y_train, epochs=50, validation_split=0.2)

val_acc_per_epoch = history.history['val_accuracy']
best_epoch = val_acc_per_epoch.index(max(val_acc_per_epoch)) + 1
print('Best epoch: %d' % (best_epoch,))

  super().__init__(**kwargs)


Epoch 1/50
[1m1500/1500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 3ms/step - accuracy: 0.7734 - loss: 0.7357 - val_accuracy: 0.9504 - val_loss: 0.1671
Epoch 2/50
[1m1500/1500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 3ms/step - accuracy: 0.9385 - loss: 0.2029 - val_accuracy: 0.9638 - val_loss: 0.1208
Epoch 3/50
[1m1500/1500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 4ms/step - accuracy: 0.9588 - loss: 0.1382 - val_accuracy: 0.9716 - val_loss: 0.0947
Epoch 4/50
[1m1500/1500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 3ms/step - accuracy: 0.9659 - loss: 0.1088 - val_accuracy: 0.9732 - val_loss: 0.0884
Epoch 5/50
[1m1500/1500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 3ms/step - accuracy: 0.9741 - loss: 0.0886 - val_accuracy: 0.9751 - val_loss: 0.0840
Epoch 6/50
[1m1500/1500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 3ms/step - accuracy: 0.9764 - loss: 0.0760 - val_accuracy: 0.9763 - val_loss: 0.0771
Epoch 7/50
[1m1

### Train new model with hyper-parameters on optimal number of epochs

In [None]:
hypermodel = mnist_hyperband_tuner.hypermodel.build(best_hps)

# Retrain the model
hypermodel.fit(x_train, y_train, epochs=best_epoch, validation_split=0.2)

  super().__init__(**kwargs)


Epoch 1/44
[1m1500/1500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 3ms/step - accuracy: 0.7767 - loss: 0.7222 - val_accuracy: 0.9534 - val_loss: 0.1595
Epoch 2/44
[1m1500/1500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 3ms/step - accuracy: 0.9423 - loss: 0.1964 - val_accuracy: 0.9653 - val_loss: 0.1156
Epoch 3/44
[1m1500/1500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 3ms/step - accuracy: 0.9561 - loss: 0.1452 - val_accuracy: 0.9716 - val_loss: 0.0951
Epoch 4/44
[1m1500/1500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 3ms/step - accuracy: 0.9669 - loss: 0.1107 - val_accuracy: 0.9723 - val_loss: 0.0919
Epoch 5/44
[1m1500/1500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 3ms/step - accuracy: 0.9721 - loss: 0.0885 - val_accuracy: 0.9766 - val_loss: 0.0783
Epoch 6/44
[1m1500/1500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 3ms/step - accuracy: 0.9759 - loss: 0.0752 - val_accuracy: 0.9768 - val_loss: 0.0783
Epoch 7/44
[1m1

<keras.src.callbacks.history.History at 0x7df710729590>

# Evaluate model

In [None]:
hypermodel.evaluate(x_test, y_test)

[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step - accuracy: 0.9809 - loss: 0.1023


[0.08391046524047852, 0.9843999743461609]

# Save Model

In [None]:
hypermodel.save('mnist_high_accuracy_model.keras')