In [1]:
# 12)
# Train a deep MLP on the MNIST dataset
# and see if you can get over 98% precision

# Just like in the last exercise of Chapter 9, 
# try adding all the bells and whistles
#  - save checkpoints
#  - restore the last checkpoint in case of an interruption,
#  - add summaries, 
#  - plot learning curves using TensorBoard

In [2]:
from __future__ import absolute_import, division, print_function, unicode_literals

import tensorflow as tf

In [56]:
(X_train, y_train), (X_test, y_test) = tf.keras.datasets.mnist.load_data()

In [57]:
X_train, X_test = X_train / 255.0, X_test / 255.0

In [5]:
def create_model():
    he_initializer = tf.keras.initializers.he_normal(seed=None)


    model = tf.keras.models.Sequential([
        tf.keras.layers.Flatten(input_shape=(28, 28)),
        tf.keras.layers.Dense(100, activation='relu', kernel_initializer=he_initializer),
        tf.keras.layers.Dense(100, activation='relu', kernel_initializer=he_initializer),
        tf.keras.layers.Dense(100, activation='relu', kernel_initializer=he_initializer),
        tf.keras.layers.Dense(100, activation='relu', kernel_initializer=he_initializer),
        tf.keras.layers.Dense(100, activation='relu', kernel_initializer=he_initializer),
        tf.keras.layers.Dropout(0.2),
        tf.keras.layers.Dense(10, activation='softmax')
    ])

    model.compile(optimizer='adam', 
                  loss='sparse_categorical_crossentropy', 
                  metrics=['accuracy'])
    return model

In [6]:
model = create_model()
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
flatten (Flatten)            (None, 784)               0         
_________________________________________________________________
dense (Dense)                (None, 100)               78500     
_________________________________________________________________
dense_1 (Dense)              (None, 100)               10100     
_________________________________________________________________
dense_2 (Dense)              (None, 100)               10100     
_________________________________________________________________
dense_3 (Dense)              (None, 100)               10100     
_________________________________________________________________
dense_4 (Dense)              (None, 100)               10100     
_________________________________________________________________
dropout (Dropout)            (None, 100)               0

In [7]:
model.fit(X_train, y_train, epochs=10)

Train on 60000 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<tensorflow.python.keras.callbacks.History at 0x15161ad10>

In [8]:
loss,acc = model.evaluate(X_test,  y_test, verbose=2)
print("Initial model, accuracy: {:5.2f}%".format(100*acc))

10000/1 - 1s - loss: 0.0514 - accuracy: 0.9770
Initial model, accuracy: 97.70%


In [9]:
import os
checkpoint_path = "training_1/cp.ckpt"
checkpoint_dir = os.path.dirname(checkpoint_path)

In [10]:
# Create a callback that saves the model's weights
cp_callback = tf.keras.callbacks.ModelCheckpoint(filepath=checkpoint_path,
                                                 save_weights_only=True,
                                                 verbose=1)

In [21]:
# Train the model with the new callback
model.fit(X_train, 
          y_train,  
          epochs=10,
          validation_data=(X_test,y_test),
          callbacks=[cp_callback])  # Pass callback to training

Train on 60000 samples, validate on 10000 samples
Epoch 1/10
Epoch 00001: saving model to training_1/cp.ckpt
Epoch 2/10
Epoch 00002: saving model to training_1/cp.ckpt
Epoch 3/10
Epoch 00003: saving model to training_1/cp.ckpt
Epoch 4/10
Epoch 00004: saving model to training_1/cp.ckpt
Epoch 5/10
Epoch 00005: saving model to training_1/cp.ckpt
Epoch 6/10
Epoch 00006: saving model to training_1/cp.ckpt
Epoch 7/10
Epoch 00007: saving model to training_1/cp.ckpt
Epoch 8/10
Epoch 00008: saving model to training_1/cp.ckpt
Epoch 9/10
Epoch 00009: saving model to training_1/cp.ckpt
Epoch 10/10
Epoch 00010: saving model to training_1/cp.ckpt


<tensorflow.python.keras.callbacks.History at 0x13870d290>

In [11]:
!ls {checkpoint_dir}

checkpoint                  cp.ckpt.index
cp.ckpt.data-00000-of-00001


In [12]:
# Create a basic model instance
model_2 = create_model()

# Evaluate the model
loss, acc = model_2.evaluate(X_test, y_test, verbose=2)
print("Untrained model, accuracy: {:5.2f}%".format(100*acc))

10000/1 - 1s - loss: 2.4328 - accuracy: 0.1203
Untrained model, accuracy: 12.03%


In [13]:
model_2.load_weights(checkpoint_path)

# Re-evaluate the model
loss,acc = model_2.evaluate(X_test,  y_test, verbose=2)
print("Restored model, accuracy: {:5.2f}%".format(100*acc))

10000/1 - 1s - loss: 0.0955 - accuracy: 0.9504
Restored model, accuracy: 95.04%


In [35]:
#=======================================#
# Doing it again, but with save_freq=5  #
# (save every 5 epochs)                 #
#=======================================#

In [59]:
def create_model():
    he_initializer = tf.keras.initializers.he_normal(seed=None)

    model = tf.keras.models.Sequential([
        tf.keras.layers.Flatten(input_shape=(28, 28)),
        tf.keras.layers.Dense(100, activation='elu', kernel_initializer=he_initializer),
        tf.keras.layers.Dense(100, activation='elu', kernel_initializer=he_initializer),
        tf.keras.layers.Dense(100, activation='elu', kernel_initializer=he_initializer),
        tf.keras.layers.Dense(100, activation='elu', kernel_initializer=he_initializer),
        tf.keras.layers.Dense(100, activation='elu', kernel_initializer=he_initializer),
        tf.keras.layers.Dense(100, activation='elu', kernel_initializer=he_initializer),
        tf.keras.layers.Dense(100, activation='elu', kernel_initializer=he_initializer),
        tf.keras.layers.Dropout(0.4),
        tf.keras.layers.Dense(10, activation='softmax')
    ])

    model.compile(optimizer='adam', 
                  loss='sparse_categorical_crossentropy', 
                  metrics=['accuracy'])
    return model

In [60]:
model = create_model()

In [61]:
checkpoint_path = "training_2/cp.ckpt"
checkpoint_dir = os.path.dirname(checkpoint_path)

In [62]:
cp_callback = tf.keras.callbacks.ModelCheckpoint(filepath=checkpoint_path,
                                                 save_weights_only=True,
                                                 verbose=1, 
                                                 save_freq=120000) # save_freq is in samples/batches, not epochs

In [63]:
model.fit(X_train, y_train, 
          epochs=15,
          validation_data=(X_test, y_test),
          callbacks=[cp_callback])

Train on 60000 samples, validate on 10000 samples
Epoch 1/15
Epoch 2/15
Epoch 00002: saving model to training_2/cp.ckpt
Epoch 3/15
Epoch 4/15
Epoch 00004: saving model to training_2/cp.ckpt
Epoch 5/15
Epoch 6/15
Epoch 00006: saving model to training_2/cp.ckpt
Epoch 7/15
Epoch 8/15
Epoch 00008: saving model to training_2/cp.ckpt
Epoch 9/15
Epoch 10/15
Epoch 00010: saving model to training_2/cp.ckpt
Epoch 11/15
Epoch 12/15
Epoch 00012: saving model to training_2/cp.ckpt
Epoch 13/15
Epoch 14/15
Epoch 00014: saving model to training_2/cp.ckpt
Epoch 15/15


<tensorflow.python.keras.callbacks.History at 0x1720a2e90>

In [42]:
model_2 = create_model()

In [43]:
# Evaluate the model
loss, acc = model_2.evaluate(X_test, y_test, verbose=2)
print("Untrained model, accuracy: {:5.2f}%".format(100*acc))

10000/1 - 1s - loss: 2.3976 - accuracy: 0.0928
Untrained model, accuracy:  9.28%


In [44]:
model_2.load_weights(checkpoint_path)

<tensorflow.python.training.tracking.util.CheckpointLoadStatus at 0x1525faf50>

In [45]:
# Evaluate the model
loss, acc = model_2.evaluate(X_test, y_test, verbose=2)
print("Loaded weights, accuracy: {:5.2f}%".format(100*acc))

10000/1 - 1s - loss: 0.0546 - accuracy: 0.9759
Loaded weights, accuracy: 97.59%


In [41]:
loss_1, acc_1 = model.evaluate(X_test, y_test, verbose=2)
print("model 1 accuracy: {:5.2f}%".format(100*acc_1))

10000/1 - 1s - loss: 0.0546 - accuracy: 0.9759
model 1 accuracy: 97.59%


In [24]:
!ls {checkpoint_dir}

checkpoint
cp.ckpt.data-00000-of-00001
cp.ckpt.index
[1m[36mcp.ckpt_temp_e48150c0b6074e959c8406465e76eb8c[m[m
