### Setup

In [1]:
import tensorflow as tf
from tensorflow import keras

In [2]:
! pip install -q -U keras-tuner

In [3]:
import keras_tuner as kt

### Download and prepare the dataset

In [17]:
(img_train, label_train), (img_test, label_test) = tf.keras.datasets.mnist.load_data()

In [5]:
# Normalise the pixel values between 0 and 1
img_train = img_train.astype('float32') / 255.0
img_test = img_train.astype('float32') / 255.0

### Define the model

In [6]:
def model_builder(hp):
    model = keras.Sequential()
    model.add(keras.layers.Flatten(input_shape = (28, 28)))
    
    # Tune the number of units in the first Dense layer
    # Choose an optimal value between 32-512
    
    hp_units = hp.Int('units', min_value = 32, max_value = 512, step = 32)
    model.add(keras.layers.Dense(units = hp_units, activation = 'relu'))
    model.add(keras.layers.Dense(10))
    
    # Tune the learning rate for the optimizer
    # Choose an optimal value from 0.01, 0.001, or 0.0001
    hp_learning_rate = hp.Choice('learning_rate', values = [1e-2, 1e-3, 1e-4])
    
    model.compile(optimizer = keras.optimizers.Adam(learning_rate = hp_learning_rate),
                  loss = keras.losses.SparseCategoricalCrossentropy(from_logits = True),
                  metrics = ['accuracy'])
    
    return model 

### Instantiate the tuner and perform hypertuning

In [7]:
tuner = kt.Hyperband(model_builder,
                     objective = 'val_accuracy',
                     max_epochs = 10,
                     factor = 3, 
                     directory = 'Developer/GitHub_Codes/tensor-flow-basics/',
                     project_name = 'kt_logs')

Reloading Tuner from Developer/GitHub_Codes/tensor-flow-basics/kt_logs/tuner0.json


2025-06-28 16:13:39.038353: I metal_plugin/src/device/metal_device.cc:1154] Metal device set to: Apple M2 Pro
2025-06-28 16:13:39.038381: I metal_plugin/src/device/metal_device.cc:296] systemMemory: 16.00 GB
2025-06-28 16:13:39.038389: I metal_plugin/src/device/metal_device.cc:313] maxCacheSize: 5.33 GB
2025-06-28 16:13:39.038414: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:305] Could not identify NUMA node of platform GPU ID 0, defaulting to 0. Your kernel may not have been built with NUMA support.
2025-06-28 16:13:39.038437: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:271] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 0 MB memory) -> physical PluggableDevice (device: 0, name: METAL, pci bus id: <undefined>)


In [8]:
stop_early = tf.keras.callbacks.EarlyStopping(monitor = 'val_loss', patience = 5) # patience means wait for 5 consecutive epochs with no improvement in val_loss before stopping the training.

In [9]:
tuner.search(img_train, label_train, epochs = 50, validation_split = 0.2, callbacks = [stop_early])

# Get the optimal hyperparameters
best_hps = tuner.get_best_hyperparameters(num_trials = 1)[0]

print(f"""
The hyperparameter search is complete. The optimal number of units in the first densely-connected
layer is {best_hps.get('units')} and the optimal learning rate for the optimizer
is {best_hps.get('learning_rate')}.
""")


The hyperparameter search is complete. The optimal number of units in the first densely-connected
layer is 224 and the optimal learning rate for the optimizer
is 0.001.



In [10]:
print(f"""
The hyperparameter search is complete. The optimal number of units in the first densely-connected
layer is {best_hps.get('units')} and the optimal learning rate for the optimizer
is {best_hps.get('learning_rate')}.
""")


The hyperparameter search is complete. The optimal number of units in the first densely-connected
layer is 224 and the optimal learning rate for the optimizer
is 0.001.



### Train the model

In [11]:
# Build the model with the optimal hyperparameters and train it on the data for 50 epochs
model = tuner.hypermodel.build(best_hps)
history = model.fit(img_train, label_train, epochs = 50, validation_split = 0.2)

  super().__init__(**kwargs)


Epoch 1/50


2025-06-28 16:13:39.407052: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:117] Plugin optimizer for device_type GPU is enabled.


[1m1500/1500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 7ms/step - accuracy: 0.8537 - loss: 0.5038 - val_accuracy: 0.9080 - val_loss: 0.3305
Epoch 2/50
[1m1500/1500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 6ms/step - accuracy: 0.9087 - loss: 0.3248 - val_accuracy: 0.9095 - val_loss: 0.3385
Epoch 3/50
[1m1500/1500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 6ms/step - accuracy: 0.9075 - loss: 0.3355 - val_accuracy: 0.9120 - val_loss: 0.3335
Epoch 4/50
[1m1500/1500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 6ms/step - accuracy: 0.9098 - loss: 0.3347 - val_accuracy: 0.8946 - val_loss: 0.4209
Epoch 5/50
[1m1500/1500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 6ms/step - accuracy: 0.9060 - loss: 0.3594 - val_accuracy: 0.9114 - val_loss: 0.3630
Epoch 6/50
[1m1500/1500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 7ms/step - accuracy: 0.9053 - loss: 0.3696 - val_accuracy: 0.8999 - val_loss: 0.4421
Epoch 7/50
[1m1500/150

In [12]:
val_acc_per_epoch = history.history['val_accuracy']
best_epoch = val_acc_per_epoch.index(max(val_acc_per_epoch)) + 1
print("Best epoch: %d at %d " % (best_epoch, max(val_acc_per_epoch)))

Best epoch: 3 at 0 


In [13]:
hypermodel = tuner.hypermodel.build(best_hps)

# Retrain the model
hypermodel.fit(img_train, label_train, epochs=best_epoch, validation_split=0.2)

Epoch 1/3
[1m1500/1500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 7ms/step - accuracy: 0.8488 - loss: 0.5069 - val_accuracy: 0.9113 - val_loss: 0.3180
Epoch 2/3
[1m1500/1500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 7ms/step - accuracy: 0.9087 - loss: 0.3211 - val_accuracy: 0.9170 - val_loss: 0.3076
Epoch 3/3
[1m1500/1500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 7ms/step - accuracy: 0.9081 - loss: 0.3311 - val_accuracy: 0.9084 - val_loss: 0.3378


<keras.src.callbacks.history.History at 0x17f707970>

In [18]:
eval_result = hypermodel.evaluate(img_test, label_test)
print("[test loss, test accuracy]: ", eval_result)

[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step - accuracy: 0.8696 - loss: 93.2792
[test loss, test accuracy]:  [86.07719421386719, 0.8834999799728394]


In [19]:
print("img_test shape:", img_test.shape)
print("label_test shape:", label_test.shape)

img_test shape: (10000, 28, 28)
label_test shape: (10000,)
