# Load and save models

A model consists of:
- An architecture
- The weights (the state of the model)
- An optimizer -> this enables restart training where you left)
- A set of losses and metrics

Using the Keras API you can:
- Save everything (TF SavedModel) (standard practice)
- Save the architecture (JSON)
- Save the weights (when training)

In [1]:
# Setup
import numpy as np
import tensorflow as tf
from tensorflow import keras

## 1. Saving everything
You can use:
- TensorFlow SavedModel format.
- Keras G5 format (old).

### TensorFlow SavedModel:

In [2]:
# Let's define a model architecture
def get_model():
    # Create a simple model.
    inputs = keras.Input(shape=(32,))
    outputs = keras.layers.Dense(1)(inputs)
    model = keras.Model(inputs, outputs)
    model.compile(optimizer="adam", loss="mean_squared_error")
    return model


In [3]:
# Imagine that now you train the model
model = get_model()

test_input = np.random.random((128, 32))
test_target = np.random.random((128, 1))
model.fit(test_input, test_target)



<keras.callbacks.History at 0x7f45e775b750>

In [4]:
# Once the model is trained, you can save everything in SaveModel format like
# follows:
model.save("my_model")  # creates a folder 'my_model'

INFO:tensorflow:Assets written to: my_model/assets


In [5]:
# To load the saved model, you can do:
reconstructed_model = keras.models.load_model("my_model")

# Test:
np.testing.assert_allclose(
    model.predict(test_input), reconstructed_model.predict(test_input)
)


In [6]:
# Since the optimizer has been saved too, you can resume training:
reconstructed_model.fit(test_input, test_target)




<keras.callbacks.History at 0x7f45e7435cd0>

In [7]:
# Let's inspect what is in the folder:
%ls my_model

[0m[01;34massets[0m/  keras_metadata.pb  saved_model.pb  [01;34mvariables[0m/


- saved_model.pb -> model architecture + training configuration (optimizer, losses, metrics)
- variables/ -> model weights

#### Saving custom models:
When saving the model and its layers, the SavedModel format stores the class name, call function, losses, and weights (and the config, if implemented). It is always a good practice to define the get_config and from_config methods when writing a custom model or layer class.

In [8]:
# Create a custom model
class CustomModel(keras.Model):
    def __init__(self, hidden_units):
        super(CustomModel, self).__init__()
        self.hidden_units = hidden_units
        self.dense_layers = [keras.layers.Dense(u) for u in hidden_units]

    def call(self, inputs):
      """The call defines the computation graph of the model"""
      x = inputs
      for layer in self.dense_layers:
          x = layer(x)
      return x

    # Let's define 'get_config' and 'from_config' methods to load and save this
    # custom model
    def get_config(self):
        return {"hidden_units": self.hidden_units}

    @classmethod
    def from_config(cls, config):
        return cls(**config)


# Try the model with an input
model = CustomModel([16, 16, 10])
# Build the model by calling it
input_arr = tf.random.uniform((1, 5))
outputs = model(input_arr)

# And save the model
model.save("my_model")


INFO:tensorflow:Assets written to: my_model/assets


In [9]:
# Option 1: Load the model with 'custom_object':
loaded_1 = keras.models.load_model(
    "my_model", custom_objects={"CustomModel": CustomModel}
)



In [10]:
# Option 2: Load the model without the CustomClass
del CustomModel
loaded_2 = keras.models.load_model("my_model")



In [11]:
# Test both models
np.testing.assert_allclose(loaded_1(input_arr), outputs)
np.testing.assert_allclose(loaded_2(input_arr), outputs)

### Keras H5 format (old)

In [12]:
model = get_model()

test_input = np.random.random((128, 32))
test_target = np.random.random((128, 1))
model.fit(test_input, test_target)

# Since we are giving the 'h5' extension, the model will be saved using HF5.
model.save("my_h5_model.h5")

# Again, we can load the model from disk
reconstructed_model = keras.models.load_model("my_h5_model.h5")

# Test
np.testing.assert_allclose(
    model.predict(test_input), reconstructed_model.predict(test_input)
)

# Again, training can be resume:
reconstructed_model.fit(test_input, test_target)



<keras.callbacks.History at 0x7f45e4a2b650>

It's recommended to use TF SavedModel instead of Keras H5.

## 2. Saving the architecture

This applies to model defined using the Sequential or Functional APIs, not subclassed.

### get_config(), from_config()

In [13]:
# 1. Sequential model

# Let's define a sequential model
model = keras.Sequential([keras.Input((32,)), keras.layers.Dense(1)])

# Get the config (architecture)
config = model.get_config()

# From that config, create a new identical architecture
new_model = keras.Sequential.from_config(config)

config

{'layers': [{'class_name': 'InputLayer',
   'config': {'batch_input_shape': (None, 32),
    'dtype': 'float32',
    'name': 'input_3',
    'ragged': False,
    'sparse': False}},
  {'class_name': 'Dense',
   'config': {'activation': 'linear',
    'activity_regularizer': None,
    'bias_constraint': None,
    'bias_initializer': {'class_name': 'Zeros', 'config': {}},
    'bias_regularizer': None,
    'dtype': 'float32',
    'kernel_constraint': None,
    'kernel_initializer': {'class_name': 'GlorotUniform',
     'config': {'seed': None}},
    'kernel_regularizer': None,
    'name': 'dense_8',
    'trainable': True,
    'units': 1,
    'use_bias': True}}],
 'name': 'sequential'}

In [14]:
# 2. Functional model

# Let's define a model
inputs = keras.Input((32,))
outputs = keras.layers.Dense(1)(inputs)
model = keras.Model(inputs, outputs)

# Get the config (architecture)
config = model.get_config()

# From that config, create a new identical architecture
new_model = keras.Model.from_config(config)

config

{'input_layers': [['input_4', 0, 0]],
 'layers': [{'class_name': 'InputLayer',
   'config': {'batch_input_shape': (None, 32),
    'dtype': 'float32',
    'name': 'input_4',
    'ragged': False,
    'sparse': False},
   'inbound_nodes': [],
   'name': 'input_4'},
  {'class_name': 'Dense',
   'config': {'activation': 'linear',
    'activity_regularizer': None,
    'bias_constraint': None,
    'bias_initializer': {'class_name': 'Zeros', 'config': {}},
    'bias_regularizer': None,
    'dtype': 'float32',
    'kernel_constraint': None,
    'kernel_initializer': {'class_name': 'GlorotUniform',
     'config': {'seed': None}},
    'kernel_regularizer': None,
    'name': 'dense_9',
    'trainable': True,
    'units': 1,
    'use_bias': True},
   'inbound_nodes': [[['input_4', 0, 0, {}]]],
   'name': 'dense_9'}],
 'name': 'model_2',
 'output_layers': [['dense_9', 0, 0]]}

In [15]:
# 3. Layer


# Let's define a layer
layer = keras.layers.Dense(3, activation="relu")

# Get the config (architecture)
layer_config = layer.get_config()

# From that config, create a new identical architecture
new_layer = keras.layers.Dense.from_config(layer_config)

layer_config

{'activation': 'relu',
 'activity_regularizer': None,
 'bias_constraint': None,
 'bias_initializer': {'class_name': 'Zeros', 'config': {}},
 'bias_regularizer': None,
 'dtype': 'float32',
 'kernel_constraint': None,
 'kernel_initializer': {'class_name': 'GlorotUniform',
  'config': {'seed': None}},
 'kernel_regularizer': None,
 'name': 'dense_10',
 'trainable': True,
 'units': 3,
 'use_bias': True}

### JSON

This is similar to get_config and from_config, but the model can be loaded without the original model class.

In [16]:
# Define a model, for example a Sequential one
model = keras.Sequential([keras.Input((32,)), keras.layers.Dense(1)])

# Export the model architecture to JSON
json_config = model.to_json()

# Load the architecture from the JSON. You don't need to know the model is a 
# Sequential one.
new_model = keras.models.model_from_json(json_config)

json_config

'{"class_name": "Sequential", "config": {"name": "sequential_1", "layers": [{"class_name": "InputLayer", "config": {"batch_input_shape": [null, 32], "dtype": "float32", "sparse": false, "ragged": false, "name": "input_5"}}, {"class_name": "Dense", "config": {"name": "dense_11", "trainable": true, "dtype": "float32", "units": 1, "activation": "linear", "use_bias": true, "kernel_initializer": {"class_name": "GlorotUniform", "config": {"seed": null}}, "bias_initializer": {"class_name": "Zeros", "config": {}}, "kernel_regularizer": null, "bias_regularizer": null, "activity_regularizer": null, "kernel_constraint": null, "bias_constraint": null}}]}, "keras_version": "2.7.0", "backend": "tensorflow"}'

#### Custom objects

TODO: When studying https://www.tensorflow.org/guide/keras/custom_layers_and_models

## 3. Saving the weights

This is useful for:
- Inference
- Transfer Learning


In [17]:
# Example 1: Transfering weights from one layer to another

def create_layer():
  layer = keras.layers.Dense(64, activation="relu", name="dense_2")
  layer.build((None, 784))
  return layer

layer1 = create_layer()
layer2 = create_layer()

layer2.set_weights(layer1.get_weights())

In [18]:
# Example 2: Transfering weights from one model to another

def create_model():
  inputs = keras.Input(shape=(784,), name="digits")
  x = keras.layers.Dense(64, activation="relu", name="dense_1")(inputs)
  x = keras.layers.Dense(64, activation="relu", name="dense_12")(x)
  outputs = keras.layers.Dense(10, name="predictions")(x)
  functional_model = keras.Model(inputs=inputs, outputs=outputs, name="3_layer_mlp")
  return functional_model

model1 = create_model()
model2 = create_model()

# Call one model to create the weights
dummy_input = tf.ones((1,784))
model1(dummy_input)

# Copy weights
model2.set_weights(model1.get_weights())

assert len(model1.weights) == len(model2.weights)
for a, b in zip(model1.weights, model2.weights):
    np.testing.assert_allclose(a.numpy(), b.numpy())

Stateless layer do not affect to weight transfering.

In [19]:
def create_model_with_stateless_layer():
  """Same model as before but with a dropout layer"""
  inputs = keras.Input(shape=(784,), name="digits")
  x = keras.layers.Dense(64, activation="relu", name="dense_1")(inputs)
  x = keras.layers.Dense(64, activation="relu", name="dense_12")(x)
  x = keras.layers.Dropout(0.5)(x)
  outputs = keras.layers.Dense(10, name="predictions")(x)
  functional_model = keras.Model(inputs=inputs, outputs=outputs, name="3_layer_mlp")
  return functional_model

model1 = create_model()
model2 = create_model_with_stateless_layer()

# Call one model to create the weights
dummy_input = tf.ones((1,784))
model1(dummy_input)

# Copy weights
model2.set_weights(model1.get_weights())

assert len(model1.weights) == len(model2.weights)
for a, b in zip(model1.weights, model2.weights):
    np.testing.assert_allclose(a.numpy(), b.numpy())

We can save model's weights in two different formats:
- TensorFlow checkpoint
- HDF5

We can use 'save_format' argument or the output extension to specify the format.

### TensorFlow checkpoints

In [20]:
# TF Checkpoint format

model = create_model()

model.save_weights("ckpt")  # save weights
load_status = model.load_weights("ckpt")  # load weights

# Assert that all variables have been restored from 
# the checkpoint
load_status.assert_consumed()

<tensorflow.python.training.tracking.util.CheckpointLoadStatus at 0x7f45e4a97990>

In [21]:
# Let's see a Transfer Learning example

# First, we instanciate our previosly used model
model = create_model()

# Then, we create another similar model that excludes the final layer
pretrained_model = keras.Model(model.inputs, model.layers[-1].input, name="pretrained_model")

# And assign random weights (to simulate pretraining)
for w in pretrained_model.weights:
  w.assign(tf.random.normal(w.shape))
pretrained_model.save_weights("pretrained_ckpt")
pretrained_model.summary()

# Now, we have a new model that shares the backbone with the pretrained one
inputs = keras.Input(shape=(784,), name="digits")
x = keras.layers.Dense(64, activation="relu", name="dense_1")(inputs)
x = keras.layers.Dense(64, activation="relu", name="dense_2")(x)
outputs = keras.layers.Dense(5, name="predictions")(x)
new_model = keras.Model(inputs=inputs, outputs=outputs, name="new_model")

# Since the only different layer is the last one, which is not saved on the
# pretrained checkpoint, we can load the pretrained weights
new_model.load_weights("pretrained_ckpt")
new_model.summary()

# assert all of the pretrained weights have been loaded
for a, b in zip(pretrained_model.weights, new_model.weights):
    np.testing.assert_allclose(a.numpy(), b.numpy())

Model: "pretrained_model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 digits (InputLayer)         [(None, 784)]             0         
                                                                 
 dense_1 (Dense)             (None, 64)                50240     
                                                                 
 dense_12 (Dense)            (None, 64)                4160      
                                                                 
Total params: 54,400
Trainable params: 54,400
Non-trainable params: 0
_________________________________________________________________
Model: "new_model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 digits (InputLayer)         [(None, 784)]             0         
                                                                 
 dense_1 (Dense)           

In [22]:
# Same example but using the Sequential API

# Define a sequential model that shares the backbone with the pretrained one
model = keras.Sequential([pretrained_model, keras.layers.Dense(5, name="predictions")])

# Now, load the weights. Note here that we are using the pretrained_model
pretrained_model.load_weights("pretrained_ckpt")
# model.load_weights("pretrained_ckpt") -> this wont work

# assert the weights
# assert all of the pretrained weights have been loaded
for a, b in zip(pretrained_model.weights, model.weights):
    np.testing.assert_allclose(a.numpy(), b.numpy())

pretrained_model.summary()
model.summary()

Model: "pretrained_model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 digits (InputLayer)         [(None, 784)]             0         
                                                                 
 dense_1 (Dense)             (None, 64)                50240     
                                                                 
 dense_12 (Dense)            (None, 64)                4160      
                                                                 
Total params: 54,400
Trainable params: 54,400
Non-trainable params: 0
_________________________________________________________________
Model: "sequential_2"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 pretrained_model (Functiona  (None, 64)               54400     
 l)                                                              
                        

It is generally recommended to stick to the same API for building models.

We can use tf.train.Checkpoint to save and restore exact layers/variables.

In [24]:
# Create a model, and save the weights of the first and last layers.
init_model = create_model()
first_dense = init_model.layers[1]
last_dense = init_model.layers[-1]
ckpt_path = tf.train.Checkpoint(
    dense=first_dense,
    kernel=last_dense.kernel,
    bias=last_dense.bias
).save("ckpt")

# Define another model with the same first and last layers
class ContrivedModel(keras.Model):
    def __init__(self):
        super(ContrivedModel, self).__init__()
        self.first_dense = keras.layers.Dense(64)
        self.kernel = self.add_variable("kernel", shape=(64, 10))
        self.bias = self.add_variable("bias", shape=(10,))

    def call(self, inputs):
        x = self.first_dense(inputs)
        return tf.matmul(x, self.kernel) + self.bias

model = ContrivedModel()
_ = model(tf.ones((1, 784)))  # create the variables of the dense layer

# And load the previous weights
tf.train.Checkpoint(
    dense=model.first_dense,
    kernel=model.kernel,
    bias=model.bias
).restore(ckpt_path)

  app.launch_new_instance()


<tensorflow.python.training.tracking.util.CheckpointLoadStatus at 0x7f45e0ce1790>

### HDF5 format

A model can use a hdf5 checkpoint if it has the same layers and trainable statuses as saved in the checkpoint.

In [25]:
# Define a sequential model
sequential_model = keras.Sequential(
    [
        keras.Input(shape=(784,), name="digits"),
        keras.layers.Dense(64, activation="relu", name="dense_1"),
        keras.layers.Dense(64, activation="relu", name="dense_2"),
        keras.layers.Dense(10, name="predictions"),
    ]
)

# To load and save the weights in HDF5 we use:
sequential_model.save_weights("weights.h5")
sequential_model.load_weights("weights.h5")

Changing layer.trainable may result in a different layer.weights ordering when the model contains nested layers.

In [31]:
class NestedDenseLayer(keras.layers.Layer):
    def __init__(self, units, name=None):
        super(NestedDenseLayer, self).__init__(name=name)
        self.dense_1 = keras.layers.Dense(units, name="dense_1")
        self.dense_2 = keras.layers.Dense(units, name="dense_2")

    def call(self, inputs):
        return self.dense_2(self.dense_1(inputs))

# Print model's weights
nested_model = keras.Sequential([keras.Input((784,)), NestedDenseLayer(10, "nested")])
variable_names = [v.name for v in nested_model.weights]
print(variable_names)

# Set one layer as not trainable, and print model's weights again
nested_model.get_layer("nested").dense_1.trainable = False
variable_names_2 = [v.name for v in nested_model.weights]
print(variable_names_2)
print("variable ordering changed:", variable_names != variable_names_2)


['nested/dense_1/kernel:0', 'nested/dense_1/bias:0', 'nested/dense_2/kernel:0', 'nested/dense_2/bias:0']
['nested/dense_2/kernel:0', 'nested/dense_2/bias:0', 'nested/dense_1/kernel:0', 'nested/dense_1/bias:0']
variable ordering changed: True


In [34]:
# Transfer Learning example
def create_functional_model():
    inputs = keras.Input(shape=(784,), name="digits")
    x = keras.layers.Dense(64, activation="relu", name="dense_1")(inputs)
    x = keras.layers.Dense(64, activation="relu", name="dense_2")(x)
    outputs = keras.layers.Dense(10, name="predictions")(x)
    return keras.Model(inputs=inputs, outputs=outputs, name="3_layer_mlp")

# Create a model and save its weights in HDF5
functional_model = create_functional_model()
functional_model.save_weights("pretrained_weights.h5")

# Now create a model with the same architecture and load the weights
pretrained_model = create_functional_model()
pretrained_model.load_weights("pretrained_weights.h5")

# Remove the last layer and append a new one
extracted_layers = pretrained_model.layers[:-1]
extracted_layers.append(keras.layers.Dense(5, name="dense_3"))
model = keras.Sequential(extracted_layers)
model.summary()

Model: "sequential_10"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_1 (Dense)             (None, 64)                50240     
                                                                 
 dense_2 (Dense)             (None, 64)                4160      
                                                                 
 dense_3 (Dense)             (None, 5)                 325       
                                                                 
Total params: 54,725
Trainable params: 54,725
Non-trainable params: 0
_________________________________________________________________
