Loading the MNIST dataset in Keras

In [25]:
from tensorflow.keras.datasets import mnist
(train_images, train_labels), (test_images, test_labels) = mnist.load_data()

In [26]:
train_images.shape

(60000, 28, 28)

In [27]:
len(train_labels)

60000

In [28]:
train_labels

array([5, 0, 4, ..., 5, 6, 8], dtype=uint8)

In [29]:
test_images.shape

(10000, 28, 28)

In [30]:
len(test_labels)

10000

In [31]:
test_labels

array([7, 2, 1, ..., 4, 5, 6], dtype=uint8)

The network architecture

In [32]:
from tensorflow import keras
from tensorflow.keras import layers
model = keras.Sequential([
    layers.Dense(512, activation="relu"),
    layers.Dense(10, activation="softmax")
])

The compilation step

In [33]:
model.compile(optimizer="rmsprop",
              loss="sparse_categorical_crossentropy",
              metrics=["accuracy"])

Preparing the image data

In [34]:
train_images = train_images.reshape((60000, 28 * 28))
train_images = train_images.astype("float32") / 255
test_images = test_images.reshape((10000, 28 * 28))
test_images = test_images.astype("float32") / 255

"Fitting" the model

In [35]:
model.fit(train_images, train_labels, epochs=5, batch_size=128)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<keras.src.callbacks.History at 0x16acfc190>

Using the model to make predictions

In [36]:
test_digits = test_images[0:10]
predictions = model.predict(test_digits)
predictions[0]



array([3.2509075e-09, 4.4044674e-11, 1.2272695e-06, 2.1889403e-04,
       1.1186543e-11, 8.2000319e-08, 8.7055963e-15, 9.9977773e-01,
       5.0178496e-08, 1.9367353e-06], dtype=float32)

In [37]:
predictions[0].argmax()

7

In [38]:
predictions[0][7]

0.99977773

In [39]:
test_labels[0]

7

Evaluating the model on new data

In [40]:
test_loss, test_acc = model.evaluate(test_images, test_labels)
print(f"test_acc: {test_acc}")

test_acc: 0.98089998960495
