# Improved Fashion MNIST accuracy using CNN

In [1]:
%pip install keras-tuner

Collecting keras-tuner
  Downloading keras_tuner-1.4.7-py3-none-any.whl.metadata (5.4 kB)
Collecting kt-legacy (from keras-tuner)
  Downloading kt_legacy-1.0.5-py3-none-any.whl.metadata (221 bytes)
Downloading keras_tuner-1.4.7-py3-none-any.whl (129 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m129.1/129.1 kB[0m [31m3.5 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading kt_legacy-1.0.5-py3-none-any.whl (9.6 kB)
Installing collected packages: kt-legacy, keras-tuner
Successfully installed keras-tuner-1.4.7 kt-legacy-1.0.5


In [2]:
import tensorflow as tf
import keras_tuner as kt
import numpy as np
import matplotlib.pyplot as plt

from functools import partial

## Load Fashion MNIST dataset

In [6]:
fashion_mnist = tf.keras.datasets.fashion_mnist.load_data()
(train_images, train_labels), (test_images, test_labels) = fashion_mnist

# Scale pixel intensities (immediate 10x improvement to accuracy)

train_images = train_images / 255.0
test_images = test_images / 255.0

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/train-labels-idx1-ubyte.gz
[1m29515/29515[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step
Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/train-images-idx3-ubyte.gz
[1m26421880/26421880[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step
Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/t10k-labels-idx1-ubyte.gz
[1m5148/5148[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step
Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/t10k-images-idx3-ubyte.gz
[1m4422102/4422102[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step


## Create Model

In [3]:
def build_model(hp):
    DefaultConv2D = partial(tf.keras.layers.Conv2D,
                           padding="same",
                           activation='relu',
                           kernel_initializer='he_normal')


    learning_rate = hp.Float('learning_rate', min_value=1e-4, max_value=1e-2, sampling='log')
    optimizer = hp.Choice("optimizer", values=["sgd", "adam", "nadam"])

    if optimizer == 'adam':
        optimizer = tf.keras.optimizers.Adam(learning_rate=learning_rate)
    elif optimizer == 'nadam':
        optimizer = tf.keras.optimizers.Nadam(learning_rate=learning_rate)
    else:
        optimizer = tf.keras.optimizers.RMSprop(learning_rate=learning_rate)

    model = tf.keras.Sequential()

    # Initial convolutional layer
    model.add(DefaultConv2D(
        filters=hp.Int('conv_1_filter', min_value=32, max_value=128, step=32),
        kernel_size=hp.Choice('conv_1_kernel', values=[3, 5, 7]),
        input_shape=[28, 28, 1]))

    model.add(tf.keras.layers.MaxPooling2D(pool_size=2))

    # Second convolutional block
    model.add(DefaultConv2D(
        filters=hp.Int('conv_2_filter', min_value=64, max_value=256, step=64),
        kernel_size=hp.Choice('conv_2_kernel', values=[3, 5])))

    # Optional second layer in the block
    if hp.Boolean('conv_2_second_layer'):
        model.add(DefaultConv2D(
            filters=hp.Int('conv_2_second_filter', min_value=64, max_value=256, step=64),
            kernel_size=3))

    model.add(tf.keras.layers.MaxPooling2D(pool_size=2))

    # Third convolutional block (optional)
    if hp.Boolean('include_third_conv_block'):
        model.add(DefaultConv2D(
            filters=hp.Int('conv_3_filter', min_value=128, max_value=512, step=128),
            kernel_size=3))

        if hp.Boolean('conv_3_second_layer'):
            model.add(DefaultConv2D(
                filters=hp.Int('conv_3_second_filter', min_value=128, max_value=512, step=128),
                kernel_size=3))

        model.add(tf.keras.layers.MaxPooling2D(pool_size=2))

    # Flatten and fully connected layers
    model.add(tf.keras.layers.Flatten())

    # First dense layer
    model.add(tf.keras.layers.Dense(
        units=hp.Int('dense_1_units', min_value=64, max_value=256, step=64),
        activation='relu',
        kernel_initializer='he_normal'))

    # Dropout rate
    model.add(tf.keras.layers.Dropout(
        rate=hp.Float('dropout_1', min_value=0.2, max_value=0.7, step=0.1)))

    # Optional second dense layer
    if hp.Boolean('include_second_dense'):
        model.add(tf.keras.layers.Dense(
            units=hp.Int('dense_2_units', min_value=32, max_value=128, step=32),
            activation='relu',
            kernel_initializer='he_normal'))
        model.add(tf.keras.layers.Dropout(
            rate=hp.Float('dropout_2', min_value=0.2, max_value=0.5, step=0.1)))

    # Output layer
    model.add(tf.keras.layers.Dense(10, activation='softmax'))

    model.compile(
        optimizer=optimizer,
        loss='sparse_categorical_crossentropy',
        metrics=['accuracy']
    )

    return model

class HyperModel(kt.HyperModel):
  def build(self, hp):
    return build_model(hp)

  def fit(self, hp, model, X, y, **kwargs):
    if hp.Boolean("normalize"):
      norm_layer = tf.keras.layers.Normalization()
      X = norm_layer(X)
    return model.fit(X, y, **kwargs)

## Train Model

### Do Hyperparameter Tuning

In [4]:
hyperband_tuner = kt.Hyperband(
    HyperModel(),
    objective="val_accuracy",
    max_epochs=10,
    factor=3,
    directory="fashion_mnist_cnn_hyperband",
    project_name="hyperband_tuning")

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [7]:
early_stopping_cb = tf.keras.callbacks.EarlyStopping(patience=2, restore_best_weights=True) # Interrupts training when no progress on validation set for 2 trials
model_checkpoint_cb = tf.keras.callbacks.ModelCheckpoint("fashion_mnist_cnn.keras",
                                                         save_best_only=True) # Auto save best model

callbacks = [early_stopping_cb, model_checkpoint_cb]


hyperband_tuner.search(train_images, train_labels, epochs=15, validation_split=0.2,
                             callbacks=callbacks)

Trial 30 Complete [00h 01m 16s]
val_accuracy: 0.9087499976158142

Best val_accuracy So Far: 0.9201666712760925
Total elapsed time: 00h 25m 18s


### Get Best Hyperparameters

In [12]:
best_hps=hyperband_tuner.get_best_hyperparameters(num_trials=1)[0]

### Find optimal number of epochs to train

In [None]:
# Build the model with the optimal hyperparameters and train it on the data for 50 epochs
finding_optimal_epochs_model = hyperband_tuner.hypermodel.build(best_hps)
history = finding_optimal_epochs_model.fit(train_images, train_labels, epochs=50, validation_split=0.2)

val_acc_per_epoch = history.history['val_accuracy']
best_epoch = val_acc_per_epoch.index(max(val_acc_per_epoch)) + 1
print('Best epoch: %d' % (best_epoch,))

Epoch 1/50


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 10ms/step - accuracy: 0.5538 - loss: 1.3703 - val_accuracy: 0.8165 - val_loss: 0.5044
Epoch 2/50
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 5ms/step - accuracy: 0.7953 - loss: 0.5748 - val_accuracy: 0.8485 - val_loss: 0.4113
Epoch 3/50
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step - accuracy: 0.8384 - loss: 0.4711 - val_accuracy: 0.8630 - val_loss: 0.3832
Epoch 4/50
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 5ms/step - accuracy: 0.8699 - loss: 0.3763 - val_accuracy: 0.8640 - val_loss: 0.3706
Epoch 5/50
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 5ms/step - accuracy: 0.8799 - loss: 0.3458 - val_accuracy: 0.8760 - val_loss: 0.3592
Epoch 6/50
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 6ms/step - accuracy: 0.8966 - loss: 0.2903 - val_accuracy: 0.8785 - val_loss: 0.3469
Epoch 7/50
[1m250/250[0m [32m━━━━━━

### Train new model with hyper-parameters on optimal number of epochs

In [14]:
hypermodel = hyperband_tuner.hypermodel.build(best_hps)

# Retrain the model
hypermodel.fit(train_images, train_labels, epochs=best_epoch, validation_split=0.2)

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 1/19
[1m1500/1500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 5ms/step - accuracy: 0.7166 - loss: 0.7912 - val_accuracy: 0.8812 - val_loss: 0.3237
Epoch 2/19
[1m1500/1500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 4ms/step - accuracy: 0.8810 - loss: 0.3462 - val_accuracy: 0.8964 - val_loss: 0.2860
Epoch 3/19
[1m1500/1500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 4ms/step - accuracy: 0.9057 - loss: 0.2756 - val_accuracy: 0.9081 - val_loss: 0.2475
Epoch 4/19
[1m1500/1500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 4ms/step - accuracy: 0.9183 - loss: 0.2348 - val_accuracy: 0.9093 - val_loss: 0.2573
Epoch 5/19
[1m1500/1500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 4ms/step - accuracy: 0.9277 - loss: 0.2043 - val_accuracy: 0.9172 - val_loss: 0.2391
Epoch 6/19
[1m1500/1500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 4ms/step - accuracy: 0.9372 - loss: 0.1764 - val_accuracy: 0.9155 - val_loss: 0.2440
Epoch 7/19
[

<keras.src.callbacks.history.History at 0x7b5c4e545610>

## Evaluate Model

In [15]:
hypermodel.evaluate(test_images, test_labels)

[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step - accuracy: 0.9149 - loss: 0.4263


[0.3972543179988861, 0.9178000092506409]

## Save Model

In [16]:
hypermodel.save('fashion_mnist_cnn.keras')