In [1]:
import tensorflow as tf
from keras import layers, models, callbacks
import numpy as np
import matplotlib.pyplot as plt

In [2]:
(x_train, y_train), (x_test, y_test) = tf.keras.datasets.mnist.load_data()
x_train = x_train.reshape(-1,28,28,1).astype('float32') / 255.0
x_test  = x_test.reshape(-1,28,28,1).astype('float32') / 255.0

y_train_oh = tf.keras.utils.to_categorical(y_train, num_classes=10)
y_test_oh  = tf.keras.utils.to_categorical(y_test,  num_classes=10)

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/mnist.npz
[1m11490434/11490434[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step


In [3]:
def build_cnn():
    model = tf.keras.Sequential([
        layers.Conv2D(32, 3, padding='same', activation='relu', input_shape=(28, 28, 1)),
        layers.BatchNormalization(),
        layers.MaxPooling2D(),

        layers.Conv2D(64, 3, padding='same', activation='relu'),
        layers.BatchNormalization(),
        layers.MaxPooling2D(),

        layers.Conv2D(128, 3, padding='same', activation='relu'),
        layers.BatchNormalization(),
        layers.MaxPooling2D(),

        layers.Flatten(),
        layers.Dense(128, activation='relu'),
        layers.BatchNormalization(),
        layers.Dropout(0.5),

        layers.Dense(10, activation='softmax')
    ])
    return model

In [4]:
optimizer = tf.keras.optimizers.SGD(learning_rate=0.01, momentum=0.95, nesterov=True)
lr_schedule1 = tf.keras.callbacks.ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=2, verbose=1)
lr_schedule2 = tf.keras.callbacks.LearningRateScheduler(lambda epoch: 0.01 * 0.95**epoch, verbose=1)
early_stop = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)
loss_fn = tf.keras.losses.CategoricalCrossentropy(label_smoothing=0.15)


model = build_cnn()
model.compile(optimizer=optimizer, loss=loss_fn , metrics=['accuracy'])

model.fit(x_train, y_train_oh, epochs=50, batch_size=256, validation_split=0.1, callbacks=[lr_schedule2, early_stop])

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)



Epoch 1: LearningRateScheduler setting learning rate to 0.01.
Epoch 1/50
[1m211/211[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m109s[0m 507ms/step - accuracy: 0.8449 - loss: 1.1922 - val_accuracy: 0.1372 - val_loss: 2.2608 - learning_rate: 0.0100

Epoch 2: LearningRateScheduler setting learning rate to 0.0095.
Epoch 2/50
[1m211/211[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m140s[0m 499ms/step - accuracy: 0.9800 - loss: 0.8189 - val_accuracy: 0.9403 - val_loss: 1.0470 - learning_rate: 0.0095

Epoch 3: LearningRateScheduler setting learning rate to 0.009025.
Epoch 3/50
[1m211/211[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m108s[0m 511ms/step - accuracy: 0.9855 - loss: 0.7952 - val_accuracy: 0.9892 - val_loss: 0.7596 - learning_rate: 0.0090

Epoch 4: LearningRateScheduler setting learning rate to 0.00857375.
Epoch 4/50
[1m211/211[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m140s[0m 501ms/step - accuracy: 0.9884 - loss: 0.7847 - val_accuracy: 0.9915 - val_loss: 0.7423 

<keras.src.callbacks.history.History at 0x79e2ee527790>

In [5]:
test_loss, test_acc = model.evaluate(x_test, y_test_oh)
print(f"Test accuracy: {test_acc*100:.2f}")

[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 19ms/step - accuracy: 0.9915 - loss: 0.7293
Test accuracy: 99.33
