In [1]:
import os

import tensorflow as tf
from tensorflow import keras

  from ._conv import register_converters as _register_converters


# Setup

## Gather data

In [2]:
(X_train, y_train), (X_test, y_test) = tf.keras.datasets.mnist.load_data()

y_train = y_train[:1000]
y_test = y_test[:1000]

X_train = X_train[:1000].reshape(-1, 28 * 28) / 255.0
X_test = X_test[:1000].reshape(-1, 28 * 28) / 255.0

## Define a simple model

In [3]:
def create_model():
    model = tf.keras.models.Sequential([
        keras.layers.Dense(512, activation=tf.nn.relu, input_shape=(784,)),
        keras.layers.Dropout(0.2),
        keras.layers.Dense(10, activation=tf.nn.softmax)
    ])
    
    model.compile(
        optimizer="adam",
        loss=tf.keras.losses.sparse_categorical_crossentropy,
        metrics=["accuracy"]
    )
    
    return model

model1 = create_model()
model1.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense (Dense)                (None, 512)               401920    
_________________________________________________________________
dropout (Dropout)            (None, 512)               0         
_________________________________________________________________
dense_1 (Dense)              (None, 10)                5130      
Total params: 407,050
Trainable params: 407,050
Non-trainable params: 0
_________________________________________________________________


# Save checkpoints during training

## Checkpoint callback usage

In [4]:
checkpoint_path = "training_1/cp.ckpt"
checkpoint_dir = os.path.dirname(checkpoint_path)

cp_callback = tf.keras.callbacks.ModelCheckpoint(
    checkpoint_path, save_weights_only=True, verbose=1
)

model1.fit(
    X_train, y_train, epochs=10,
    validation_data = (X_test, y_test),
    callbacks=[cp_callback]
)

Train on 1000 samples, validate on 1000 samples
Epoch 1/10

Epoch 00001: saving model to training_1/cp.ckpt
Epoch 2/10

Epoch 00002: saving model to training_1/cp.ckpt
Epoch 3/10

Epoch 00003: saving model to training_1/cp.ckpt
Epoch 4/10

Epoch 00004: saving model to training_1/cp.ckpt
Epoch 5/10

Epoch 00005: saving model to training_1/cp.ckpt
Epoch 6/10

Epoch 00006: saving model to training_1/cp.ckpt
Epoch 7/10

Epoch 00007: saving model to training_1/cp.ckpt
Epoch 8/10

Epoch 00008: saving model to training_1/cp.ckpt
Epoch 9/10

Epoch 00009: saving model to training_1/cp.ckpt
Epoch 10/10

Epoch 00010: saving model to training_1/cp.ckpt


<tensorflow.python.keras.callbacks.History at 0x25184ba7b00>

## Create a new, untrained model
Model must have the same architecture as the saved model

In [8]:
model2 = create_model()

# Accuracy should be somewhere around 10%
loss, acc = model2.evaluate(X_test, y_test)
print("Untrained model, acc: {:.2f}".format(100 * acc))

Untrained model, acc: 10.30


## Load in weights from checkpoint and re-evaluate

In [9]:
model2.load_weights(checkpoint_path)
loss, acc = model2.evaluate(X_test, y_test)
print("Restored model, acc: {:.2f}".format(100 * acc))

Restored model, acc: 86.80


## Checkpoint callback options

In [32]:
# include epoch in file name
checkpoint_path = "training_2/cp-{epoch:04d}.ckpt"
checkpoint_dir = os.path.dirname(checkpoint_path)

# Note: the default tensofrlow format will only save the 5 most recent checkpoints

cp_callback = tf.keras.callbacks.ModelCheckpoint(
    checkpoint_path, verbose=1, save_weights_only=True,
    # save weights every 5 epochs
    period=5
)

model3 = create_model()
model3.fit(
    X_train, y_train, epochs=50, callbacks=[cp_callback],
    validation_data = (X_test, y_test), verbose=0
)


Epoch 00005: saving model to training_2/cp-0005.ckpt

Epoch 00010: saving model to training_2/cp-0010.ckpt

Epoch 00015: saving model to training_2/cp-0015.ckpt

Epoch 00020: saving model to training_2/cp-0020.ckpt

Epoch 00025: saving model to training_2/cp-0025.ckpt

Epoch 00030: saving model to training_2/cp-0030.ckpt

Epoch 00035: saving model to training_2/cp-0035.ckpt

Epoch 00040: saving model to training_2/cp-0040.ckpt

Epoch 00045: saving model to training_2/cp-0045.ckpt

Epoch 00050: saving model to training_2/cp-0050.ckpt


<tensorflow.python.keras.callbacks.History at 0x251899575c0>

In [33]:
latest = tf.train.latest_checkpoint(checkpoint_dir)
latest

'training_2\\cp-0050.ckpt'

In [36]:
model4 = create_model()
model4.load_weights(latest)
loss, acc = model4.evaluate(X_test, y_test)
print("Restored model, acc: {:.2f}".format(100 * acc))

Restored model, acc: 87.50


# Manually save weights

In [40]:
# Save the weights
model4.save_weights("./checkpoints/my_checkpoint")

In [43]:
# Restore the weights
model5 = create_model()
model5.load_weights("./checkpoints/my_checkpoint")
loss, acc = model5.evaluate(X_test, y_test)
print("Restored model, acc: {:.2f}".format(100 * acc))

Restored model, acc: 87.50


# Save the entire model

## As an HDF5 file

In [44]:
model6 = create_model()

# Need to use a keras optimizer to restore optimizer state
model6.compile(
    optimizer="adam",
    loss=tf.keras.losses.sparse_categorical_crossentropy,
    metrics=["accuracy"]
)

model6.fit(X_train, y_train, epochs=5)

# Save the entire model to an HDF5 file
model6.save("model6.h5")

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


## Recreate model from file

In [47]:
model6a = keras.models.load_model("model6.h5")
model6a.summary()
loss, acc = model6a.evaluate(X_test, y_test)
print("Restored model, acc: {:.2f}".format(100 * acc))

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_28 (Dense)             (None, 512)               401920    
_________________________________________________________________
dropout_14 (Dropout)         (None, 512)               0         
_________________________________________________________________
dense_29 (Dense)             (None, 10)                5130      
Total params: 407,050
Trainable params: 407,050
Non-trainable params: 0
_________________________________________________________________
Restored model, acc: 84.90


## As a **saved_model**
*Doesn't work on my current version (1.10)

In [53]:
# model7 = create_model()
# model7.fit(X_train, y_train, epochs=5)

# # Create a saved model
# saved_model_path = tf.contrib.saved_model.save_keras_model(
#     model7,
#     "./saved_models"
# )

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


AttributeError: module 'tensorflow.contrib.saved_model' has no attribute 'save_keras_model'