# MNIST digit classification using TensorFlow 2.0

In [1]:
import tensorflow as tf
from tensorflow import keras
from keras.models import Sequential
from keras.layers import Dense, Flatten

In [2]:
mnist = tf.keras.datasets.mnist

In [3]:
(x_train,y_train), (x_test, y_test) = mnist.load_data()
x_train, x_test = tf.cast(x_train/255.0, tf.float32), tf.cast(x_test/255.0, tf.float32)
y_train, y_test = tf.cast(y_train,tf.int64),tf.cast(y_test,tf.int64)

In [4]:
# Let's define our model as a Sequential() model, as follows
model = Sequential()
# In a sequential model, we stack each layer, one above another

In [13]:
input = tf.keras.Input(shape=(28, 28, 1))
layer1 = Dense(256, activation='relu')(Flatten()(input))
# We defined layer1, but where is the input to layer1 coming from? We need to specify the input to layer1 in a bracket notation at the end
layer2 = Dense(128, activation='relu')(layer1)
output = Dense(10, activation='softmax')(layer2)
model = tf.keras.Model(inputs=input, outputs=output)

In [14]:
"""
Now that we have defined the model, the next step is to compile it. In this phase, we set up how the model should learn. We define three parameters when compiling the model:

The optimizer parameter: This defines the optimization algorithm we want to use; for example, the gradient descent, in this case.
The loss parameter: This is the objective function that we are trying to minimize; for example, the mean squared error or cross-entropy loss.
The metrics parameter: This is the metric through which we want to assess the model's performance; for example, accuracy. We can also specify more than one metric.
"""
model.compile(loss='sparse_categorical_crossentropy', optimizer='sgd', metrics=['accuracy'])


# If labels are integers → Use sparse_categorical_crossentropy (No need to convert labels).
# If labels are one-hot encoded → Use categorical_crossentropy. [1, 0, 0], [0, 1, 0], [0, 0, 1]

In [15]:
# Training the model can be done using the fit function. We specify our features, x; labels, y; the number of epochs we want to train; and the batch_size
model.fit(x_train, y_train, batch_size=32, epochs=10)

Epoch 1/10
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 3ms/step - accuracy: 0.7231 - loss: 1.0164
Epoch 2/10
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 4ms/step - accuracy: 0.9177 - loss: 0.2942
Epoch 3/10
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 3ms/step - accuracy: 0.9306 - loss: 0.2400
Epoch 4/10
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 3ms/step - accuracy: 0.9420 - loss: 0.2026
Epoch 5/10
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 3ms/step - accuracy: 0.9504 - loss: 0.1790
Epoch 6/10
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 4ms/step - accuracy: 0.9559 - loss: 0.1584
Epoch 7/10
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 4ms/step - accuracy: 0.9597 - loss: 0.1442
Epoch 8/10
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 3ms/step - accuracy: 0.9628 - loss: 0.1279
Epoch 9/10
[1m1875/1

<keras.src.callbacks.history.History at 0x7fbe4de6b6d0>

In [16]:
model.evaluate(x_test, y_test)

[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step - accuracy: 0.9611 - loss: 0.1318


[0.1137709692120552, 0.9664999842643738]