# Second exercise: A more sofisticated 'Hello World'

In [1]:
import tensorflow as tf
print(f"TensorFlow version: {tf.__version__}")

TensorFlow version: 2.7.0


In [2]:
# Load the MNIST dataset
mnist = tf.keras.datasets.mnist

(x_train, y_train), (x_test, y_test) = mnist.load_data()
x_train, x_test = x_train / 255.0, x_test / 255.0

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/mnist.npz


In [3]:
# Build the neural network
model = tf.keras.models.Sequential([
  tf.keras.layers.Flatten(input_shape=(28, 28)),
  tf.keras.layers.Dense(128, activation='relu'),
  tf.keras.layers.Dropout(0.2),
  tf.keras.layers.Dense(10)
])

In [9]:
# Get the logits for the first sample
predictions = model(x_train[:1]).numpy()
predictions

array([[ 0.23083556,  0.17747326, -0.22908765, -1.0573186 ,  0.11959449,
        -0.6913041 ,  0.04142074,  0.01451893, -0.08985385, -0.19998842]],
      dtype=float32)

In [13]:
# To convert the logits to probabilities, we use a softmax
probabilities = tf.nn.softmax(predictions).numpy()
print(f"Probabilities: {probabilities}")
print(f"Sum of probabilities: {probabilities.sum()}")

Probabilities: [[0.13974124 0.1324798  0.08822313 0.03853775 0.12502971 0.05557052
  0.11562794 0.1125588  0.10140304 0.09082808]]
Sum of probabilities: 1.0000001192092896


In [14]:
# Define a loss function
loss_fn = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)

In [15]:
# The model is untrained, so the probability 
# for each class is  close to random (1/10)
exact_loss = -tf.math.log(1/10)
real_loss = loss_fn(y_train[:1], predictions).numpy()

print(f"Exact loss: {exact_loss}")
print(f"Computed loss: {real_loss}")

Exact loss: 2.3025851249694824
Computed loss: 2.8901023864746094


In [16]:
# Set the optimizer, loss and metric and compile
# the model
model.compile(optimizer="adam",
              loss=loss_fn,
              metrics=["accuracy"])

In [17]:
# Train the model
model.fit(x_train, y_train, epochs=5)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<keras.callbacks.History at 0x7fecf3db8b90>

In [18]:
# Let's evaluate the model on the test set
model.evaluate(x_test, y_test, verbose=2)

313/313 - 1s - loss: 0.0808 - accuracy: 0.9759 - 550ms/epoch - 2ms/step


[0.08082335442304611, 0.9758999943733215]

In [19]:
# To make the model returning a probability,
# we attach a softmax at the end
probability_model = tf.keras.Sequential([
  model,
  tf.keras.layers.Softmax()
])

In [34]:
# Let's get the probability on the first test sample
probs = probability_model(x_test[:1])
max_prob = probs.numpy().max()
max_index = probs.numpy().argmax()

print(f"Prediction {max_index} with probability {max_prob}")
print(f"Ground truth: {y_test[0]}")

Prediction 7 with probability 0.9999837875366211
Ground truth: 7
