In [1]:
# loading the dataset

from tensorflow.keras.datasets import mnist

(train_images, train_labels), (test_images, test_labels) = mnist.load_data()

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/mnist.npz
[1m11490434/11490434[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 0us/step


In [2]:
# look at the data

train_images.shape

(60000, 28, 28)

In [3]:
train_labels

array([5, 0, 4, ..., 5, 6, 8], shape=(60000,), dtype=uint8)

### Preprocessing

Before training, we’ll preprocess the data by reshaping it into the shape the model expects and scaling it so that all values are in the [0, 1] interval. Previously, our training images were stored in an array of shape (60000, 28, 28) of type uint8 with values in the [0, 255] interval. We’ll transform it into a float32 array of shape (60000, 28 * 28) with values between 0 and 1.

Chollet, François . Deep Learning with Python, Second Edition (p. 29). Manning. Kindle Edition. 

In [6]:
train_images = train_images.reshape((60000, 28 * 28))
train_images = train_images.astype("float32") / 255 
test_images = test_images.reshape((10000, 28 * 28))
test_images = test_images.astype("float32") / 255


In [4]:
# build the network architecture


from tensorflow import keras
from tensorflow.keras import layers

model = keras.Sequential([
        layers.Dense(512, activation = "relu"),
        layers.Dense(10, activation = "softmax")
])




###  For any DL model we need 3 things"

1. An Optimizer
2. A loss function
3. Metric to Optimize on


In [5]:
# compilation step

model.compile(optimizer = "rmsprop",
              loss = "sparse_categorical_crossentropy",
              metrics = ["accuracy"])

In [7]:
# fitting step

model.fit(train_images, train_labels, epochs = 5, batch_size = 128)

Epoch 1/5
[1m469/469[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 10ms/step - accuracy: 0.9251 - loss: 0.2602
Epoch 2/5
[1m469/469[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 9ms/step - accuracy: 0.9688 - loss: 0.1061
Epoch 3/5
[1m469/469[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 10ms/step - accuracy: 0.9785 - loss: 0.0706
Epoch 4/5
[1m469/469[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 9ms/step - accuracy: 0.9849 - loss: 0.0510
Epoch 5/5
[1m469/469[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 8ms/step - accuracy: 0.9884 - loss: 0.0385


<keras.src.callbacks.history.History at 0x1aa4e654290>

In [8]:
# use the mdoel to make predictions

test_digits = test_images[0:10]
predictions = model.predict(test_digits)

# predict for 1st image
predictions[0]

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 155ms/step


array([1.12246195e-08, 1.82140980e-09, 1.77234051e-06, 5.43606111e-05,
       1.39854777e-11, 1.58673661e-08, 5.50668343e-13, 9.99941111e-01,
       1.03678616e-07, 2.64417167e-06], dtype=float32)

In [11]:
predictions[0].argmax()

# 7 is the label

print(f"The actual label on test data is: {test_labels[0]} and predicted label is {predictions[0].argmax()}")

The actual label on test data is: 7 and predicted label is 7


In [12]:
# accuracy over entire data


test_loss, test_acc = model.evaluate(test_images, test_labels)

print(f"test accuracy is {test_acc}")

[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 4ms/step - accuracy: 0.9777 - loss: 0.0699
test accuracy is 0.9776999950408936
