# TensorFlow: Classification using MNIST dataset

Demonstrates creating DNN to accomplish image classification with Conv2D and MaxPool2D layers.
Model is trained on fashion MNIST dataset.

In [1]:
import os
import numpy as np
import matplotlib.pyplot as plt

In [2]:
os.environ["TF_CPP_MIN_LOG_LEVEL"] = "1"

import tensorflow as tf
import tensorflow_datasets as tfds

print("TF Version: ", tf.__version__)
print("TF Eager mode: ", tf.executing_eagerly())
print("TF GPU is", "available" if tf.config.list_physical_devices("GPU") else "not available")

TF Version: 2.20.0
TF Devices: ['CPU', 'GPU']


## Prepare Datasets

In [None]:
(train_ds, val_ds, test_ds), ds_info = tfds.load("fashion_mnist",
    split=["train[:80%]", "train[80%:]", "test"],
    with_info=True,
    as_supervised=True)

In [None]:
_ = tfds.show_examples(train_ds.take(6), ds_info)

In [None]:
train_ds = (train_ds
            .shuffle(buffer_size=1000)
            .batch(32)
            .prefetch(tf.data.experimental.AUTOTUNE))

test_ds = (test_ds
           .batch(32)
           .prefetch(tf.data.experimental.AUTOTUNE))

val_ds = (val_ds
          .batch(32)
          .prefetch(tf.data.experimental.AUTOTUNE))

In [12]:
ds, info = tfds.load("mnist", split="train", with_info=True)
tfds.as_dataframe(ds.take(3), info)

2025-12-06 00:14:26.826783: W tensorflow/core/kernels/data/cache_dataset_ops.cc:917] The calling iterator did not fully read the dataset being cached. In order to avoid unexpected truncation of the dataset, the partially cached contents of the dataset  will be discarded. This can happen if you have an input pipeline similar to `dataset.cache().take(k).repeat()`. You should use `dataset.take(k).cache().repeat()` instead.


Unnamed: 0,image,label
0,,4
1,,1
2,,0


## Build Model

In [None]:
# Define model
model = tf.keras.models.Sequential([
    tf.keras.Input(shape=(28, 28, 1)),
    tf.keras.layers.Rescaling(1.0 / 255.0),
    tf.keras.layers.Conv2D(32, kernel_size=(3, 3), activation=tf.nn.relu),
    tf.keras.layers.MaxPool2D(),
    tf.keras.layers.Conv2D(16, kernel_size=(3, 3), activation=tf.nn.relu),
    tf.keras.layers.MaxPool2D(),
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(256, activation=tf.nn.relu),
    tf.keras.layers.Dense(10)
    # tf.keras.layers.Dense(10, activation=tf.nn.softmax)
])

In [None]:
# Plot model architecture
tf.keras.utils.plot_model(model, show_shapes=True)

In [None]:
# Compile model
model.compile(
    optimizer="adam",
    #loss="sparse_categorical_crossentropy",
    loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
    metrics=["accuracy"])

In [None]:
callbacks = [
    tf.keras.callbacks.EarlyStopping(
        patience=5,
        min_delta=1e-2,
        restore_best_weights=True,
        verbose=1
    ),
    tf.keras.callbacks.TensorBoard(
        log_dir="logs",
        histogram_freq=0,
        embeddings_freq=0,
        update_freq="epoch"
    )
]

%%time
history = model.fit(
    train_ds,
    epochs=20,
    callbacks=callbacks,
    validation_data=val_ds,
    verbose=2)

## Evaluate Model

In [None]:
model.evaluate(test_ds)

In [None]:
loss = history.history["loss"]
val_loss = history.history["val_loss"]
accuracy = history.history["accuracy"]
val_accuracy = history.history["val_accuracy"]

In [None]:
xs = range(1, len(loss) + 1)
plt.figure(figsize=(9,3))

plt.subplot(121)
plt.plot(xs, loss, "b", label="Training Loss")
plt.plot(xs, val_loss, "r", label="Validation Loss")
plt.title("Training and Validation Loss")
plt.xlabel("Epochs")
plt.ylabel("Loss")
plt.legend()

plt.subplot(122)
plt.plot(xs, accuracy, "b", label="Training Accuracy")
plt.plot(xs, val_accuracy, "r", label="Validation Accuracy")
plt.title("Training and Validation Accuracy")
plt.xlabel("Epochs")
plt.ylabel("Accuracy")
plt.legend()

plt.show()

## Predict

In [None]:
# Create probability model from linear output of base model
probability_model = tf.keras.Sequential([
    model,
    tf.keras.layers.Softmax()
])

In [None]:
# Get experimental batch from test dataset
images, true_labels = next(iter(test_ds.take(1)))

In [None]:
# Get predictions on the batch
predictions = probability_model.predict(images)

In [None]:
# Collect predicted labels
predicted_labels = np.array([np.argmax(prediction) for prediction in predictions])

In [None]:
# Plot true labels and predicted
plt.figure(figsize=(15, 15))
plt.grid(False)

for i in range(32):
    true_label = true_labels[i].numpy()
    predicted_label = predicted_labels[i]
    # Plot an image with label
    plt.subplot(8, 8, (i*2) + 1)
    plt.xticks([])
    plt.yticks([])
    plt.imshow(images[i], cmap="binary")
    plt.xlabel(f"{true_label} vs {predicted_label}")
    # Plot a bar with predictions
    plt.subplot(8, 8, (i*2) + 2)
    plt.xticks(range(10))
    plt.yticks([])
    plt.ylim([0, 1])
    thisplot = plt.bar(range(10), predictions[i], color="r")
    thisplot[predicted_labels[i]].set_color("r")
    thisplot[true_labels[i]].set_color("b")

plt.show()