Most of the code comes from a sample from the book:
https://livebook.manning.com/book/deep-learning-with-python-second-edition


In [2]:
from tensorflow.keras.datasets import mnist
from tensorflow import keras
from tensorflow.keras import layers
import matplotlib.pyplot as plt

In [3]:
(train_images, train_labels), (test_images, test_labels) = mnist.load_data()

In [4]:
train_images.shape

(60000, 28, 28)

If you check the shape, you see that we have a training set of 60000 samples, with each a matrix of 28 by 28 data points (pixels). Every pixel has a value from 0 to 255. The train labels contain the numbers that have been written.

In [5]:
train_labels

array([5, 0, 4, ..., 5, 6, 8], dtype=uint8)

In [6]:
# The reshape creates one array of 784 long instead of a matrix of 28 x 28
train_images = train_images.reshape((60000, 28 * 28))
train_images = train_images.astype("float32") / 255
test_images = test_images.reshape((10000, 28 * 28))
test_images = test_images.astype("float32") / 255

In [7]:
# What is the architecture of the network? How many layers? Why have 10 nodes in the second layer?
model = keras.Sequential([
    layers.Dense(512, activation="relu"),
    layers.Dense(10, activation="softmax")
])

In [8]:
model.compile(optimizer="rmsprop",
              loss="sparse_categorical_crossentropy",
              metrics=["accuracy"])

In [9]:
history = model.fit(train_images, train_labels, epochs=5, batch_size=128)

Train on 60000 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


In [None]:
# Reconstruct one of the images, we use this one to show the model works
# Don't forget to reshape the array of 784 back into a matrix of 28x28
test_image_index=17

digit = test_images[test_image_index]
fig = plt.figure
plt.imshow(digit.reshape((28,28)), cmap='gray')
plt.show()

predictions = model.predict(test_images[test_image_index:test_image_index+1])
print("Softmax predictions for the digit above")
for idx, pred in enumerate(predictions[0]):
    print('{} - {:.5f}'.format(idx,pred))
print(f'The most likely digit based on the max value is {predictions[0].argmax()}')
print(f'The label tells us it is a {test_labels[test_image_index]}')

In [18]:
test_loss, test_acc = model.evaluate(test_images, test_labels)



In [19]:
print(f"test accuracy: {test_acc}, train accuracy: {history.history['accuracy'][4]}")

test accuracy: 0.9801999926567078, train accuracy: 0.9894000291824341
