In [None]:
# Source: https://www.tensorflow.org/tutorials/quickstart/beginner

In [2]:
import tensorflow as tf 

In [None]:
# Import the mnist dataset

In [4]:
mnist = tf.keras.datasets.mnist

(x_train, y_train), (x_test, y_test) = mnist.load_data()
x_train, x_test = x_train / 255.0, x_test / 255.0

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/mnist.npz


In [5]:
# Build the tf.keras.Sequential model by stacking layers. Choose an optimizer and loss function for training:

model = tf.keras.models.Sequential([
  tf.keras.layers.Flatten(input_shape=(28, 28)),
  tf.keras.layers.Dense(128, activation='relu'),
  tf.keras.layers.Dropout(0.2),
  tf.keras.layers.Dense(10)
])

In [6]:
# For each example the model returns a vector of "logits" or "log-odds" scores, one for each class.


predictions = model(x_train[:1]).numpy()
predictions

array([[ 0.2750336 ,  0.67444354,  0.12404131, -0.2585907 , -0.15187818,
         0.5294264 ,  0.01462451, -0.08443351,  0.19712715, -0.20341349]],
      dtype=float32)

In [7]:
# The tf.nn.softmax function converts these logits to "probabilities" for each class:
tf.nn.softmax(predictions).numpy()

# The losses.SparseCategoricalCrossentropy loss takes a vector of logits and a True index and returns a scalar loss for each example.
loss_fn = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)

In [None]:
# This loss is equal to the negative log probability of the true class: It is zero if the model is sure of the correct class.
# This untrained model gives probabilities close to random (1/10 for each class), so the initial loss should be close to -tf.math.log(1/10) ~= 2.3.


In [9]:
loss_fn(y_train[:1], predictions).numpy()

model.compile(optimizer='adam',
              loss=loss_fn,
              metrics=['accuracy'])

In [10]:
# The Model.fit method adjusts the model parameters to minimize the loss:

In [11]:
model.fit(x_train, y_train, epochs=5)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<keras.callbacks.History at 0x183b04eb8>

In [12]:
# The Model.evaluate method checks the models performance, usually on a "Validation-set" or "Test-set".

In [13]:
model.evaluate(x_test,  y_test, verbose=2)

313/313 - 1s - loss: 0.0695 - accuracy: 0.9778


[0.0695451870560646, 0.9778000116348267]

In [14]:
# The image classifier is now trained to ~98% accuracy on this dataset. To learn more, read the TensorFlow tutorials.

# If you want your model to return a probability, you can wrap the trained model, and attach the softmax to it:

In [15]:
probability_model = tf.keras.Sequential([
  model,
  tf.keras.layers.Softmax()
])

In [16]:
probability_model(x_test[:5])


<tf.Tensor: shape=(5, 10), dtype=float32, numpy=
array([[3.09053632e-07, 3.28332050e-07, 1.43766747e-05, 8.74276855e-04,
        2.57519700e-12, 2.89719139e-07, 2.02882619e-13, 9.99105632e-01,
        1.22735833e-06, 3.52019310e-06],
       [1.08900666e-09, 2.48847664e-05, 9.99951005e-01, 2.37835084e-05,
        4.93490077e-16, 4.77456474e-09, 1.29304245e-09, 8.81708010e-13,
        3.34216310e-07, 3.73109448e-13],
       [4.95600574e-08, 9.99057472e-01, 6.45012915e-05, 1.51665645e-05,
        1.31420275e-05, 5.09219262e-06, 6.05317518e-06, 5.82593028e-04,
        2.54572864e-04, 1.31739432e-06],
       [9.99633193e-01, 9.64233990e-08, 1.49386164e-04, 3.42449589e-08,
        6.16966827e-06, 7.22652317e-07, 1.91709201e-04, 1.32535806e-05,
        1.04821551e-09, 5.47903392e-06],
       [1.07822063e-06, 1.38656020e-07, 7.19884247e-06, 9.72823045e-07,
        9.96646464e-01, 1.07687178e-07, 3.86033253e-06, 2.31311114e-05,
        9.14309794e-07, 3.31609766e-03]], dtype=float32)>