#01. Load and Preprocess the MNIST Dataset

In [15]:
# Import necessary libraries
#tensorflow is the main library used for building and training the neural network
import tensorflow as tf
#mnist is a submodule of Keras (within TensorFlow) that contains the MNIST dataset.
from tensorflow.keras.datasets import mnist
#Sequential is a Keras model type that allows you to build a neural network layer by layer.
from tensorflow.keras.models import Sequential
#Dense and Flatten are layers used in the neural network.
from tensorflow.keras.layers import Dense, Flatten
#to_categorical is a utility function that converts class vectors (integers) to binary class matrices (one-hot encoding).
from tensorflow.keras.utils import to_categorical
#matplotlib.pyplot is a plotting library used for visualizing the data and results.
import matplotlib.pyplot as plt


In [16]:
# Load the dataset
#train_images and train_labels contain the training images and their corresponding labels.
#test_images and test_labels contain the test images and their corresponding labels.
#This line loads the MNIST dataset using the mnist.load_data() function from Keras.
(train_images, train_labels), (test_images, test_labels) = mnist.load_data()

In [17]:
# Normalize the images
#The pixel values in the images range from 0 to 255.
train_images = train_images / 255.0
#Dividing by 255.0 normalizes these values to the range 0 to 1, which helps improve the performance and training speed of the neural network.
test_images = test_images / 255.0

In [18]:
# One-hot encode the labels
#The labels are originally integers (0-9). One-hot encoding converts these integers into binary matrices.
#to_categorical is a Keras utility function that performs this conversion.
train_labels = to_categorical(train_labels)
test_labels = to_categorical(test_labels)

#02. Create a Neural Network Model

In [19]:
# Create the model
#Sequential creates a linear stack of layers.
model = Sequential([
    #Flatten(input_shape=(28, 28)) converts the 28x28 2D images into a 1D array of 784 elements.
    Flatten(input_shape=(28, 28)),
    #Dense(128, activation='relu') adds a fully connected (dense) layer with 128 units and ReLU activation function.
    #ReLU (Rectified Linear Unit) introduces non-linearity to the model.
    Dense(128, activation='relu'),
    #Dense(10, activation='softmax') adds a fully connected (dense) layer with 10 units (one for each digit) and softmax activation function,
    #which outputs a probability distribution over the 10 classes.
    Dense(10, activation='softmax')
])

In [20]:
# Compile the model
#optimizer='adam' specifies the Adam optimizer, which is a popular optimization algorithm for training neural networks.
model.compile(optimizer='adam',
#loss='categorical_crossentropy' specifies the loss function for multi-class classification problems.
              loss='categorical_crossentropy',
#metrics=['accuracy'] specifies that we want to track accuracy during training and evaluation.
              metrics=['accuracy'])

#03. Train the Model

In [None]:
# Train the model
#model.fit trains the model on the training data.
#epochs=10 specifies that the training process will run for 10 iterations over the entire dataset.
#validation_split=0.2 specifies that 20% of the training data will be used for validation,
#helping us monitor the model's performance on unseen data during training.
history = model.fit(train_images, train_labels, epochs=10, validation_split=0.2)

Epoch 1/10
Epoch 2/10

#04. Evaluate the Model

In [None]:
# Evaluate the model
#test_loss is the loss on the test data.test_acc is the accuracy on the test data.
#model.evaluate computes the loss and accuracy of the model on the test data.
#print outputs the test accuracy.
test_loss, test_acc = model.evaluate(test_images, test_labels)
print(f'Test accuracy: {test_acc}')

#05. Visualize the Results

In [None]:
# Make predictions
#model.predict generates predictions for the test images.
#predictions will contain the predicted probability distributions for each test image.
predictions = model.predict(test_images)

In [None]:
# Plot the first 5 test images, their predicted labels, and the true labels
# Color correct predictions in green and incorrect predictions in red
#A loop iterates over the first 5 test images.
for i in range(5):
  #plt.figure(figsize=(6, 3)) creates a new figure with the specified size.
    plt.figure(figsize=(6, 3))
  #plt.subplot(1, 2, 1) creates a subplot for the image.
    plt.subplot(1, 2, 1)
  #plt.imshow(test_images[i], cmap=plt.cm.binary) displays the test image using a binary colormap.
    plt.imshow(test_images[i], cmap=plt.cm.binary)
  #plt.xlabel(f"True: {test_labels[i].argmax()}") labels the subplot with the true label.
    plt.xlabel(f"True: {test_labels[i].argmax()}")
  #plt.subplot(1, 2, 2) creates a subplot for the bar plot of predicted probabilities.
    plt.subplot(1, 2, 2)
  #plt.bar(range(10), predictions[i]) creates a bar plot of the predicted probabilities for each digit.
    plt.bar(range(10), predictions[i])
  #plt.xlabel(f"Predicted: {predictions[i].argmax()}") labels the subplot with the predicted label.
    plt.xlabel(f"Predicted: {predictions[i].argmax()}")
  #plt.show() displays the plot.
    plt.show()