In [1]:
import tensorflow as tf
from tensorflow.keras import layers, models
from tensorflow.keras.datasets import mnist
from tensorflow.keras.utils import to_categorical

In [2]:
# 1. Data Loading and Preprocessing
(train_images, train_labels), (test_images, test_labels) = mnist.load_data()
# Normalize pixel values to be between 0 and 1
train_images = train_images.astype('float32') / 255
test_images = test_images.astype('float32') / 255
# Reshape images to (num_samples, 28, 28, 1) for CNN input
train_images = train_images.reshape((60000, 28, 28, 1))
test_images = test_images.reshape((10000, 28, 28, 1))
# One-hot encode labels
train_labels = to_categorical(train_labels)
test_labels = to_categorical(test_labels)

In [3]:
# 2. Neural Network Architecture (CNN)
model = models.Sequential()
model.add(layers.Conv2D(32, (3, 3), activation='relu', input_shape=(28, 28, 1))) # Convolutional layer
model.add(layers.MaxPooling2D((2, 2))) # Max pooling layer
model.add(layers.Conv2D(64, (3, 3), activation='relu')) # Convolutional layer
model.add(layers.MaxPooling2D((2, 2))) # Max pooling layer
model.add(layers.Conv2D(64, (3, 3), activation='relu')) # Convolutional layer
model.add(layers.Flatten()) # Flatten the 2D feature maps to 1D
model.add(layers.Dense(64, activation='relu')) # Fully connected layer
model.add(layers.Dense(10, activation='softmax')) # Output layer (10 classes)

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [4]:
# 3. Compilation (Loss Function and Optimizer)
model.compile(optimizer='rmsprop',
 loss='categorical_crossentropy', # Loss function for multi-class classification
 metrics=['accuracy'])

In [5]:
# 4. Training
model.fit(train_images, train_labels, epochs=5, batch_size=64)

Epoch 1/5
[1m938/938[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m52s[0m 51ms/step - accuracy: 0.8698 - loss: 0.4085
Epoch 2/5
[1m938/938[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m86s[0m 55ms/step - accuracy: 0.9833 - loss: 0.0544
Epoch 3/5
[1m938/938[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m47s[0m 50ms/step - accuracy: 0.9893 - loss: 0.0339
Epoch 4/5
[1m938/938[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m46s[0m 48ms/step - accuracy: 0.9926 - loss: 0.0237
Epoch 5/5
[1m938/938[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m89s[0m 56ms/step - accuracy: 0.9941 - loss: 0.0205


<keras.src.callbacks.history.History at 0x1c7e954bb30>

In [6]:
# 5. Evaluation
test_loss, test_acc = model.evaluate(test_images, test_labels)
print(f'Test accuracy: {test_acc}')

[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 13ms/step - accuracy: 0.9874 - loss: 0.0376
Test accuracy: 0.9907000064849854


In [8]:
# Example prediction
import numpy as np
predictions = model.predict(test_images)
example_prediction = predictions[0] #first test image prediction.
predicted_digit = np.argmax(example_prediction) #get the highest probability index, which is the predicted digit.
actual_digit = np.argmax(test_labels[0]) #get the actual label of the first test image.
print(f"Predicted Digit: {predicted_digit}, Actual Digit: {actual_digit}")

[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 12ms/step
Predicted Digit: 7, Actual Digit: 7


Notes:

1. preprocessing
   
mnist.load_data() -

Normalization: Pixel values are scaled to the range [0, 1] to improve training stability.

Reshaping: Images are reshaped to have a channel dimension (1 for grayscale).

One-hot encoding: Labels are converted to a binary matrix representation (e.g., digit 3 becomes [0, 0, 0,

1, 0, 0, 0, 0, 0, 0]).

Neural Network Architecture:

models.Sequential(): Creates a sequential model (layers are added in order)

layers.Conv2D(): Convolutional layers extract features from the images.

layers.MaxPooling2D(): Max pooling layers reduce the spatial dimensions of the feature maps.

layers.Flatten(): Flattens the 2D feature maps into a 1D vector.

layers.Dense(): Fully connected layers perform classification.

softmax activation on the final layer for multi-class probability output.

Compilation

optimizer='rmsprop': Specifies the optimization algorithm (adjusts weights during training).

loss='categorical_crossentropy': Specifies the loss function for multi-class classification.

metrics=['accuracy']: Specifies the metric to monitor during training and evaluation.

Training:

model.fit(): Trains the model on the training data.

epochs: Number of times the entire training dataset is passed through the network.

batch_size: Number of samples processed before updating weights.

Evaluation:

model.evaluate(): Evaluates the model on the test data.

test_loss and test_acc: Returns the loss and accuracy on the test set.

Prediction:

model.predict(): Generates predictions for the test dataset.

np.argmax(): used to convert the one hot encoded vectors back into the digit that was predicted.