Sounds kind of basic, but if you can't save and load the model, than what is the point in even training it?

In [1]:
# pip install -q pyyaml h5py

In [2]:
import os

import tensorflow as tf
from tensorflow import keras

print(tf.version.VERSION)

2.4.1


In [3]:
# Using the MNIST dataset and only the first 1000 sample to make it faster
(train_images, train_labels), (test_images, test_labels) = tf.keras.datasets.mnist.load_data()

train_labels = train_labels[:1000]
test_labels = test_labels[:1000]

train_images = train_images[:1000].reshape(-1, 28 * 28) / 255.0
test_images = test_images[:1000].reshape(-1, 28 * 28) / 255.0

In [4]:
# Define a simple sequential model
def create_model():
    model = tf.keras.models.Sequential([
        keras.layers.Dense(512, activation='relu', input_shape=(784,)),
        keras.layers.Dropout(0.2),
        keras.layers.Dense(10)
    ])

    model.compile(optimizer='adam',
              loss=tf.losses.SparseCategoricalCrossentropy(from_logits=True),
              metrics=[tf.metrics.SparseCategoricalAccuracy()])
    return model

# Create a basic model instance
model = create_model()

# Display the model's architecture
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense (Dense)                (None, 512)               401920    
_________________________________________________________________
dropout (Dropout)            (None, 512)               0         
_________________________________________________________________
dense_1 (Dense)              (None, 10)                5130      
Total params: 407,050
Trainable params: 407,050
Non-trainable params: 0
_________________________________________________________________


In [5]:
# Save the model during and afer training. Usual in case your computer crashes halfway thorugh
checkpoint_path = "training_1/cp.ckpt"
checkpoint_dir = os.path.dirname(checkpoint_path)

# Create a callback that saves the model's weights
cp_callback = tf.keras.callbacks.ModelCheckpoint(filepath=checkpoint_path,
                                                 save_weights_only=True,
                                                 verbose=1)

# Train the model with the new callback
model.fit(train_images, 
        train_labels,  
        epochs=10,
        validation_data=(test_images, test_labels),
        callbacks=[cp_callback])  # Pass callback to training

# This may generate warnings related to saving the state of the optimizer.
# These warnings (and similar warnings throughout this notebook)
# are in place to discourage outdated usage, and can be ignored.

Epoch 1/10

Epoch 00001: saving model to training_1\cp.ckpt
Epoch 2/10

Epoch 00002: saving model to training_1\cp.ckpt
Epoch 3/10

Epoch 00003: saving model to training_1\cp.ckpt
Epoch 4/10

Epoch 00004: saving model to training_1\cp.ckpt
Epoch 5/10

Epoch 00005: saving model to training_1\cp.ckpt
Epoch 6/10

Epoch 00006: saving model to training_1\cp.ckpt
Epoch 7/10

Epoch 00007: saving model to training_1\cp.ckpt
Epoch 8/10

Epoch 00008: saving model to training_1\cp.ckpt
Epoch 9/10

Epoch 00009: saving model to training_1\cp.ckpt
Epoch 10/10

Epoch 00010: saving model to training_1\cp.ckpt


<tensorflow.python.keras.callbacks.History at 0x160213ef9a0>

In [9]:
ls {checkpoint_dir}

 Volume in drive C has no label.
 Volume Serial Number is DAEB-91B6

 Directory of C:\Users\antho\Desktop\programming\machine_learning_learning\tutorials\08_saving_loading\training_1

02/09/2021  07:38 AM    <DIR>          .
02/09/2021  07:38 AM    <DIR>          ..
02/09/2021  07:38 AM                71 checkpoint
02/09/2021  07:38 AM         4,886,673 cp.ckpt.data-00000-of-00001
02/09/2021  07:38 AM             1,222 cp.ckpt.index
               3 File(s)      4,887,966 bytes
               2 Dir(s)  227,544,252,416 bytes free


IMPORTANT - when loading the model you have to create a model first. All you do when saving the model 
is save the weights. So, let's load it

In [11]:
# Create a basic model instance
model = create_model()

# Evaluate the untrained model
loss, acc = model.evaluate(test_images, test_labels, verbose=2)
print("Untrained model, accuracy: {:5.2f}%".format(100 * acc))

32/32 - 0s - loss: 2.4033 - sparse_categorical_accuracy: 0.0850
Untrained model, accuracy:  8.50%


In [12]:
# Loads the weights
model.load_weights(checkpoint_path)

# Re-evaluate the model
loss, acc = model.evaluate(test_images, test_labels, verbose=2)
print("Restored model, accuracy: {:5.2f}%".format(100 * acc))

32/32 - 0s - loss: 0.4258 - sparse_categorical_accuracy: 0.8660
Restored model, accuracy: 86.60%


But wait, there's more. 

We and save with unique file names, for exmaple, use the epoch in the name. We'll be saviing ever 5 epoch in this instance.

In [14]:
# Include the epoch in the file name (uses `str.format`)
checkpoint_path = "training_2/cp-{epoch:04d}.ckpt"
checkpoint_dir = os.path.dirname(checkpoint_path)

batch_size = 32

# Create a callback that saves the model's weights every 5 epochs
cp_callback = tf.keras.callbacks.ModelCheckpoint(
    filepath=checkpoint_path, 
    verbose=1, 
    save_weights_only=True,
    save_freq=5*batch_size)

# Create a new model instance
model = create_model()

# Save the weights using the `checkpoint_path` format
model.save_weights(checkpoint_path.format(epoch=0))

# Train the model with the new callback
model.fit(train_images, 
          train_labels,
          epochs=50, 
          callbacks=[cp_callback],
          validation_data=(test_images, test_labels),
          verbose=0)


Epoch 00005: saving model to training_2\cp-0005.ckpt

Epoch 00010: saving model to training_2\cp-0010.ckpt

Epoch 00015: saving model to training_2\cp-0015.ckpt

Epoch 00020: saving model to training_2\cp-0020.ckpt

Epoch 00025: saving model to training_2\cp-0025.ckpt

Epoch 00030: saving model to training_2\cp-0030.ckpt

Epoch 00035: saving model to training_2\cp-0035.ckpt

Epoch 00040: saving model to training_2\cp-0040.ckpt

Epoch 00045: saving model to training_2\cp-0045.ckpt

Epoch 00050: saving model to training_2\cp-0050.ckpt


<tensorflow.python.keras.callbacks.History at 0x1614b5c6cd0>

In [15]:
ls {checkpoint_dir}

 Volume in drive C has no label.
 Volume Serial Number is DAEB-91B6

 Directory of C:\Users\antho\Desktop\programming\machine_learning_learning\tutorials\08_saving_loading\training_2

02/09/2021  07:42 AM    <DIR>          .
02/09/2021  07:42 AM    <DIR>          ..
02/09/2021  07:42 AM                81 checkpoint
02/09/2021  07:42 AM         1,628,726 cp-0000.ckpt.data-00000-of-00001
02/09/2021  07:42 AM               402 cp-0000.ckpt.index
02/09/2021  07:42 AM         4,886,685 cp-0005.ckpt.data-00000-of-00001
02/09/2021  07:42 AM             1,222 cp-0005.ckpt.index
02/09/2021  07:42 AM         4,886,685 cp-0010.ckpt.data-00000-of-00001
02/09/2021  07:42 AM             1,222 cp-0010.ckpt.index
02/09/2021  07:42 AM         4,886,685 cp-0015.ckpt.data-00000-of-00001
02/09/2021  07:42 AM             1,222 cp-0015.ckpt.index
02/09/2021  07:42 AM         4,886,685 cp-0020.ckpt.data-00000-of-00001
02/09/2021  07:42 AM             1,222 cp-0020.ckpt.index
02/09/2021  07:42 AM         4,88

In [16]:
# Look at all them files!

In [17]:
# Handy method to only get the latest
latest = tf.train.latest_checkpoint(checkpoint_dir)
latest

'training_2\\cp-0050.ckpt'

In [19]:
# Test out the latest
# Create a new model instance
model = create_model()

# Load the previously saved weights
model.load_weights(latest)

# Re-evaluate the model
loss, acc = model.evaluate(test_images, test_labels, verbose=2)
print("Restored model, accuracy: {:5.2f}%".format(100 * acc))

32/32 - 0s - loss: 0.4938 - sparse_categorical_accuracy: 0.8740
Restored model, accuracy: 87.40%


### Manually Saving the Weights

We can also manually save the model weights as shown here. All this does is save the weights to a given file. Nothing too fancy here

In [20]:
# Save the weights
model.save_weights('./checkpoints/my_checkpoint')

# Create a new model instance
model = create_model()

# Restore the weights
model.load_weights('./checkpoints/my_checkpoint')

# Evaluate the model
loss, acc = model.evaluate(test_images, test_labels, verbose=2)
print("Restored model, accuracy: {:5.2f}%".format(100 * acc))

32/32 - 0s - loss: 0.4938 - sparse_categorical_accuracy: 0.8740
Restored model, accuracy: 87.40%


### Save the Whole Thing
Instead of having to create the exact model before we load the weights, we could just load the entire model from the file along with the weights. That would be nice if we wanted to share our model without having to tell the person exactly what archetecture we used. So, let's do that. 

In [21]:
# Create and train a new model instance.
model = create_model()
model.fit(train_images, train_labels, epochs=5)

# Save the entire model as a SavedModel.
!mkdir -p saved_model
model.save('saved_model/my_model')

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
INFO:tensorflow:Assets written to: saved_model/my_model\assets


In [30]:
# Let's see the damage
print(os.listdir('./saved_model/'))

print(os.listdir('./saved_model/my_model/'))

['my_model']
['assets', 'saved_model.pb', 'variables']


In [29]:
os.listdir('./saved_model/my_model/')

['assets', 'saved_model.pb', 'variables']

In [31]:
# Load that model back in as it's own. 
new_model = tf.keras.models.load_model('saved_model/my_model')

# Check its architecture
new_model.summary()

Model: "sequential_6"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_12 (Dense)             (None, 512)               401920    
_________________________________________________________________
dropout_6 (Dropout)          (None, 512)               0         
_________________________________________________________________
dense_13 (Dense)             (None, 10)                5130      
Total params: 407,050
Trainable params: 407,050
Non-trainable params: 0
_________________________________________________________________


In [32]:
# Hey, that looks like the model that we started with!

In [33]:
# Evaluate the restored model
loss, acc = new_model.evaluate(test_images, test_labels, verbose=2)
print('Restored model, accuracy: {:5.2f}%'.format(100 * acc))

print(new_model.predict(test_images).shape)

32/32 - 0s - loss: 0.4283 - sparse_categorical_accuracy: 0.8610
Restored model, accuracy: 86.10%
(1000, 10)


In [34]:
# HDF5 file format - no idea. 
# Create and train a new model instance.
model = create_model()
model.fit(train_images, train_labels, epochs=5)

# Save the entire model to a HDF5 file.
# The '.h5' extension indicates that the model should be saved to HDF5.
model.save('my_model.h5')

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


In [35]:
# Recreate the exact same model, including its weights and the optimizer
new_model = tf.keras.models.load_model('my_model.h5')

# Show the model architecture
new_model.summary()

Model: "sequential_7"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_14 (Dense)             (None, 512)               401920    
_________________________________________________________________
dropout_7 (Dropout)          (None, 512)               0         
_________________________________________________________________
dense_15 (Dense)             (None, 10)                5130      
Total params: 407,050
Trainable params: 407,050
Non-trainable params: 0
_________________________________________________________________


In [36]:
loss, acc = new_model.evaluate(test_images, test_labels, verbose=2)
print('Restored model, accuracy: {:5.2f}%'.format(100 * acc))

32/32 - 0s - loss: 0.4386 - sparse_categorical_accuracy: 0.8570
Restored model, accuracy: 85.70%


## Conclusion
Pretty basic, but very important.