In [3]:
pip install tensorflow numpy mnist


Collecting mnist
  Downloading mnist-0.2.2-py2.py3-none-any.whl (3.5 kB)
Installing collected packages: mnist
Successfully installed mnist-0.2.2


Note: We don’t need to install the keras package because it now comes bundled with TensorFlow as its official high-level API! Using TensorFlow’s Keras is now recommended over the standalone keras package.

In [6]:
import numpy as np
import mnist
from tensorflow import keras
import matplotlib.pyplot as plt


In [8]:
# Load and preprocess the MNIST dataset using keras.datasets
(train_images, train_labels), (test_images, test_labels) = keras.datasets.mnist.load_data()


Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/mnist.npz


In [10]:
print(train_images.shape) # (60000, 28, 28)
print(train_labels.shape) # (60000,)

(60000, 28, 28)
(60000,)


2. Preparing the Data
Before we begin, we’ll normalize the image pixel values from [0, 255] to [-0.5, 0.5] to make our network easier to train (using smaller, centered values usually leads to better results). We’ll also reshape each image from (28, 28) to (28, 28, 1) because Keras requires the third dimension.

In [11]:
import numpy as np
import mnist


In [13]:
# Normalize the images to be in the range [-0.5, 0.5]
train_images = (train_images / 255.0) - 0.5
test_images = (test_images / 255.0) - 0.5

In [14]:
# Reshape the images to include the channel dimension (28, 28, 1)
train_images = np.expand_dims(train_images, axis=3)
test_images = np.expand_dims(test_images, axis=3)

In [15]:
print(train_images.shape) # (60000, 28, 28, 1)
print(test_images.shape)  # (10000, 28, 28, 1)


(60000, 28, 28, 1)
(10000, 28, 28, 1)


We’re ready to start building our CNN!



3. Building the Model
Every Keras model is either built using the Sequential class, which represents a linear stack of layers, or the functional Model class, which is more customizeable. We’ll be using the simpler Sequential model, since our CNN will be a linear stack of layers.

We start by instantiating a Sequential model:

In [16]:
from tensorflow.keras.models import Sequential

# WIP
model = Sequential([
  # layers...
])

The Sequential constructor takes an array of Keras Layers. We’ll use 3 types of layers for our CNN: Convolutional, Max Pooling, and Softmax.

In [17]:
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Dense, Flatten

num_filters = 8
filter_size = 3
pool_size = 2

model = Sequential([
  Conv2D(num_filters, filter_size, input_shape=(28, 28, 1)),
  MaxPooling2D(pool_size=pool_size),
  Flatten(),
  Dense(10, activation='softmax'),
])
model.compile(
  'adam',
  loss='categorical_crossentropy',
  metrics=['accuracy'],
)
import mnist
# Print the first label
print(train_labels[0])
# This should print the label of the first training image
from tensorflow.keras.utils import to_categorical
model.fit(
  train_images,
  to_categorical(train_labels),
  epochs=3,
  validation_data=(test_images, to_categorical(test_labels)),
)
# Compile the model.
model.compile(
  'adam',
  loss='categorical_crossentropy',
  metrics=['accuracy'],
)
# Compile the model.
model.compile(
  'adam',
  loss='categorical_crossentropy',
  metrics=['accuracy'],
)
import numpy as np
from tensorflow.keras.datasets import mnist
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Dense, Flatten
from tensorflow.keras.utils import to_categorical

# Load MNIST dataset
(train_images, train_labels), (test_images, test_labels) = mnist.load_data()

# Normalize and preprocess the images
train_images = (train_images / 255.0) - 0.5
test_images = (test_images / 255.0) - 0.5

# Reshape the images to include a single channel (for grayscale)
train_images = np.expand_dims(train_images, axis=-1)
test_images = np.expand_dims(test_images, axis=-1)

# Convert labels to categorical (one-hot encoding)
train_labels = to_categorical(train_labels, num_classes=10)
test_labels = to_categorical(test_labels, num_classes=10)

# Define the model architecture
model = Sequential([
    Conv2D(32, (3, 3), activation='relu', input_shape=(28, 28, 1)),
    MaxPooling2D((2, 2)),
    Flatten(),
    Dense(10, activation='softmax')
])

# Compile the model
model.compile(optimizer='adam',
              loss='categorical_crossentropy',
              metrics=['accuracy'])

# Train the model
model.fit(train_images, train_labels, epochs=3, validation_data=(test_images, test_labels))
model.save_weights('cnn.h5')
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Dense, Flatten

# Define your model architecture for loading weights
num_filters = 8
filter_size = 3
pool_size = 2

model = Sequential([
    Conv2D(num_filters, filter_size, input_shape=(28, 28, 1)),
    MaxPooling2D(pool_size=pool_size),
    Flatten(),
    Dense(10, activation='softmax'),
])

# Load weights into the model
model.load_weights('cnn.h5')

# Now you can use the model for predictions or further training
# Predict on the first 5 test images.
predictions = model.predict(test_images[:5])

# Print our model's predictions.
print(np.argmax(predictions, axis=1)) # [7, 2, 1, 0, 4]

# Check our predictions against the ground truths.
print(test_labels[:5]) # [7, 2, 1, 0, 4]
model = Sequential([
  Conv2D(num_filters, filter_size, input_shape=(28, 28, 1)),
  Conv2D(num_filters, filter_size),
  MaxPooling2D(pool_size=pool_size),
  Flatten(),
  Dense(10, activation='softmax'),
])
from tensorflow.keras.layers import Dropout

model = Sequential([
  Conv2D(num_filters, filter_size, input_shape=(28, 28, 1)),
  MaxPooling2D(pool_size=pool_size),
  Dropout(0.5),
  Flatten(),
  Dense(10, activation='softmax'),
])
from tensorflow.keras.layers import Dense

model = Sequential([
  Conv2D(num_filters, filter_size, input_shape=(28, 28, 1)),
  MaxPooling2D(pool_size=pool_size),
  Flatten(),
  Dense(64, activation='relu'),
  Dense(10, activation='softmax'),
])
# These can be changed, too!
num_filters = 8
filter_size = 3

model = Sequential([
  # See https://keras.io/layers/convolutional/#conv2d for more info.
  Conv2D(
    num_filters,
    filter_size,
    input_shape=(28, 28, 1),
    strides=2,
    padding='same',
    activation='relu',
  ),
  MaxPooling2D(pool_size=pool_size),
  Flatten(),
  Dense(10, activation='softmax'),
])

num_filters, filter_size, and pool_size are self-explanatory variables that set the hyperparameters for our CNN.
The first layer in any Sequential model must specify the input_shape, so we do so on Conv2D. Once this input shape is specified, Keras will automatically infer the shapes of inputs for later layers.
The output Softmax layer has 10 nodes, one for each class.

4. Compiling the Model
Before we can begin training, we need to configure the training process. We decide 3 key factors during the compilation step:

The optimizer. We’ll stick with a pretty good default: the Adam gradient-based optimizer. Keras has many other optimizers you can look into as well.
The loss function. Since we’re using a Softmax output layer, we’ll use the Cross-Entropy loss. Keras distinguishes between binary_crossentropy (2 classes) and categorical_crossentropy (>2 classes), so we’ll use the latter. See all Keras losses.
A list of metrics. Since this is a classification problem, we’ll just have Keras report on the accuracy metric.

In [18]:
model.compile(
  'adam',
  loss='categorical_crossentropy',
  metrics=['accuracy'],
)

5. Training the Model
Training a model in Keras literally consists only of calling fit() and specifying some parameters.

There’s one thing we have to be careful about: Keras expects the training targets to be 10-dimensional vectors, since there are 10 nodes in our Softmax output layer. Right now, our train_labels and test_labels arrays contain single integers representing the class for each image:

In [20]:
import mnist


In [22]:
# Print the first label
print(train_labels[0])
# This should print the label of the first training image


5


Conveniently, Keras has a utility method that fixes this exact issue: to_categorical. It turns our array of class integers into an array of one-hot vectors instead.
For example, 2 would become [0, 0, 1, 0, 0, 0, 0, 0, 0, 0] (it’s zero-indexed).

In [24]:
from tensorflow.keras.utils import to_categorical


In [25]:
model.fit(
  train_images,
  to_categorical(train_labels),
  epochs=3,
  validation_data=(test_images, to_categorical(test_labels)),
)


Epoch 1/3
Epoch 2/3
Epoch 3/3


<keras.src.callbacks.History at 0x78a7fbb4d4e0>

In [40]:
# Compile the model.
model.compile(
  'adam',
  loss='categorical_crossentropy',
  metrics=['accuracy'],
)


In [43]:
import numpy as np
from tensorflow.keras.datasets import mnist
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Dense, Flatten
from tensorflow.keras.utils import to_categorical

# Load MNIST dataset
(train_images, train_labels), (test_images, test_labels) = mnist.load_data()

# Normalize and preprocess the images
train_images = (train_images / 255.0) - 0.5
test_images = (test_images / 255.0) - 0.5

# Reshape the images to include a single channel (for grayscale)
train_images = np.expand_dims(train_images, axis=-1)
test_images = np.expand_dims(test_images, axis=-1)

# Convert labels to categorical (one-hot encoding)
train_labels = to_categorical(train_labels, num_classes=10)
test_labels = to_categorical(test_labels, num_classes=10)

# Define the model architecture
model = Sequential([
    Conv2D(32, (3, 3), activation='relu', input_shape=(28, 28, 1)),
    MaxPooling2D((2, 2)),
    Flatten(),
    Dense(10, activation='softmax')
])

# Compile the model
model.compile(optimizer='adam',
              loss='categorical_crossentropy',
              metrics=['accuracy'])

# Train the model
model.fit(train_images, train_labels, epochs=3, validation_data=(test_images, test_labels))


Epoch 1/3
Epoch 2/3
Epoch 3/3


<keras.src.callbacks.History at 0x78a7d1ee39a0>

6. Using the Model
Now that we have a working, trained model, let’s put it to use.

In [50]:
model.save_weights('cnn.h5')


In [51]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Dense, Flatten

# Define your model architecture for loading weights
num_filters = 8
filter_size = 3
pool_size = 2

model = Sequential([
    Conv2D(num_filters, filter_size, input_shape=(28, 28, 1)),
    MaxPooling2D(pool_size=pool_size),
    Flatten(),
    Dense(10, activation='softmax'),
])

# Load weights into the model
model.load_weights('cnn.h5')

# Now you can use the model for predictions or further training


In [52]:
# Predict on the first 5 test images.
predictions = model.predict(test_images[:5])

# Print our model's predictions.
print(np.argmax(predictions, axis=1)) # [7, 2, 1, 0, 4]

# Check our predictions against the ground truths.
print(test_labels[:5]) # [7, 2, 1, 0, 4]

[8 8 8 8 8]
[[0. 0. 0. 0. 0. 0. 0. 1. 0. 0.]
 [0. 0. 1. 0. 0. 0. 0. 0. 0. 0.]
 [0. 1. 0. 0. 0. 0. 0. 0. 0. 0.]
 [1. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 1. 0. 0. 0. 0. 0.]]


8. Extensions
There’s much more we can do to experiment with and improve our network - in this official Keras MNIST CNN example, they achieve 99 test accuracy after 15 epochs.

In [53]:
model = Sequential([
  Conv2D(num_filters, filter_size, input_shape=(28, 28, 1)),
  Conv2D(num_filters, filter_size),
  MaxPooling2D(pool_size=pool_size),
  Flatten(),
  Dense(10, activation='softmax'),
])


In [54]:
from tensorflow.keras.layers import Dropout

model = Sequential([
  Conv2D(num_filters, filter_size, input_shape=(28, 28, 1)),
  MaxPooling2D(pool_size=pool_size),
  Dropout(0.5),
  Flatten(),
  Dense(10, activation='softmax'),
])

In [55]:
from tensorflow.keras.layers import Dense

model = Sequential([
  Conv2D(num_filters, filter_size, input_shape=(28, 28, 1)),
  MaxPooling2D(pool_size=pool_size),
  Flatten(),
  Dense(64, activation='relu'),
  Dense(10, activation='softmax'),
])


In [56]:
# These can be changed, too!
num_filters = 8
filter_size = 3

model = Sequential([
  # See https://keras.io/layers/convolutional/#conv2d for more info.
  Conv2D(
    num_filters,
    filter_size,
    input_shape=(28, 28, 1),
    strides=2,
    padding='same',
    activation='relu',
  ),
  MaxPooling2D(pool_size=pool_size),
  Flatten(),
  Dense(10, activation='softmax'),
])