In [1]:
import os
import numpy as np
from collections import OrderedDict

import tensorflow as tf
from tensorflow.python.keras.datasets import boston_housing
from tensorflow.contrib.eager.python import tfe

  from ._conv import register_converters as _register_converters


In [2]:
# enable eager mode
tf.enable_eager_execution()
tf.set_random_seed(0)
np.random.seed(0)

In [3]:
if not os.path.exists('weights/'):
    os.makedirs('weights/')

# constants
batch_size = 128
epochs = 26

# dataset loading
(x_train, y_train), (x_test, y_test) = boston_housing.load_data()

# normalization of dataset
mean = x_train.mean(axis=0)
std = x_train.std(axis=0)

x_train = (x_train - mean) / (std + 1e-8)
x_test = (x_test - mean) / (std + 1e-8)

x_train = x_train.astype('float32')
x_test = x_test.astype('float32')

print('x train', x_train.shape, x_train.mean(), x_train.std())
print('y train', y_train.shape, y_train.mean(), y_train.std())
print('x test', x_test.shape, x_test.mean(), x_test.std())
print('y test', y_test.shape, y_test.mean(), y_test.std())

x train (404, 13) 3.6316616e-10 1.0
y train (404,) 22.395049504950492 9.199035423364862
x test (102, 13) 0.02082699 0.98360837
y test (102,) 23.07843137254902 9.123806690181466


# Hybrid Models
Here, we are going to try to mix Keras Layers with Tensorflow Variables. 

Keras has a nice function - "add_variable" and "add_weights" which can be used in custom layers or models to register weight matrices generated from the backend independent of Keras. However, this feature has not yet been implemented in tf.keras for Eager mode. 

Therefore, we are going to do this completely in base Eager style code - defining our own gradients function, our own train and evaluate loop and model saving and loading.

Hopefully, this can be fixed in later versions of Tensorflow, so that we can just use the "self.add_variable(..)" inside our Model classes and go back to using Model.fit() and Model.predict().

In [4]:
# model definition
class CustomRegressor(tf.keras.Model):
    def __init__(self):
        super(CustomRegressor, self).__init__()
        # self.add_variable and self.add_weight are not yet supported
        self.custom_variables = OrderedDict()

        # we also use a keras layer along with a custom weight matrix
        self.hidden2 = tf.keras.layers.Dense(1)

    def call(self, inputs, training=None, mask=None):
        if 'hidden' not in self.custom_variables:
            # this is equivalent to a Dense layer from Keras (same as hidden2)
            hidden = tf.get_variable('hidden1', shape=[inputs.shape[-1], 1], dtype=tf.float32,
                                     initializer=tf.keras.initializers.he_normal())
            self.custom_variables['hidden'] = hidden

        output1 = tf.matmul(inputs, self.custom_variables['hidden'])
        output1 = tf.nn.relu(output1)

        output2 = self.hidden2(inputs)

        output = output1 + output2  # goofy layer ; just for demonstration purposes
        return output

# Gradients
Normally, Keras handles the gradient computation for us. However, for our hybrid model, only the Keras Dense layer is registered. The weights stored in the `custom_variables` dictionary are not managed by Keras at all, so we cannot depend on Keras' Model.fit() to update those weights.

Therefore, we go around this by using tf.GradientTape() to monitor and compute the gradients of all the variables inside the model, and then pass the loss and the grad_vars as the result.

## Note
It is important to remember that we need to explicitely pass a list of custom variables to the `tape.gradients()` function.

Here, we use an OrderedDict to manage our custom variables, and therefore we can access the variables using OrderedDict.values(). We then append these custom variables to the variables that are managed by Keras (inside `model.variables`).

In [5]:
def gradients(model, x, y):
    with tf.GradientTape() as tape:
        outputs = model(x)
        loss = tf.losses.mean_squared_error(y, outputs[:, 0])
        loss = tf.reduce_mean(loss)

    gradients = tape.gradient(loss, model.variables + list(model.custom_variables.values()))
    grad_vars = zip(gradients, model.variables + list(model.custom_variables.values()))
    return loss, grad_vars

# Train Loop

Since Keras does not directly manage all of the variables, we can't rely on the Model.fit() method anymore. So we create a training loop using the best practices of Eager execution.

These best practices are : 
- Using tf.data.Dataset() apis to manage the train and test sets
- Using Eager metrics (tfe.metrics) to calculate the per iteration loss
- Use tfe.Saver() to save the weights of the model

## Training Loop Phases

### Training Phase

In the training phase, we loop over the entire training dataset, compute the gradients, use the Optimizer to apply those gradients and then update our metrics.

For generating batches using Dataset API, it is beneficial to use an infinite generator and manage the cutoff points of the inner loop ourselves. Also, preprocessing can be done with .map() functions, which aren't needed here.

For metrics, it is useful to use tfe.metrics.Accuracy() or tfe.metrics.Mean() to compute the categorical accuracy or mean over the entire training set.

### Test Phase

In the test phase, we loop over the test dataset exactly one time. This is ensured by using Dataset.make_one_shot_iterator(). In Eager mode, when you use this, you can loop over the provided iterator just like normal, and it will generate the batches for the entire set exactly once, so no need to bother with clipping off the loop.

## Saving Weights
When saving weights of a hybrid model, several important points must be remembered.

- Keras maintains an automatic internal naming scheme. Therefore, when you create 2 models, they **will have different names for the Keras Layers**. Custom Variables are not affected by this thankfully, but this causes an issue when loading weights.
    - Since checkpoints depend on the layer names for saving and restoring, creating saving a model (with say subscript `_1` for all layers) and then attempting to restore a second instance of this model (with subscript `_2` for all layers) will throw an Error. The checkpoint cannot find the weights with the layer names (since it stores the layers with name subscript `_1`)
    - Easy fix = Use **tf.keras.backend.clear_session()** before restoring and creating the second model. This will reset the name counter to `_1`.


- Model weights and custom Variables are built only after the 1st call. 
    - When training, variables building is done automatically when we pass the batch of training samples so we dont have to worry when saving the model.
    - However, when we create a second model to restore, the weights are not yet built. We need to pass a dummy batch to force the model to finish building all of its weights with correct shapes.
    - This is also true for Keras models with **only Keras layers/submodels** when in Eager mode. A Keras model with only Keras layers is also not completely built unless you use Model._set_input(dummy_batch) on it or call it with a dummy batch before restoring weights to it.




In [6]:
device = '/cpu:0' if tfe.num_gpus() == 0 else '/gpu:0'

with tf.device(device):
    # build model and optimizer
    model = CustomRegressor()
    
    dummy_x = tf.zeros([1] + [x_train.shape[-1]])
    model._set_inputs(dummy_x)

    # Can no longer use Keras utility functions since we could not register the variable to keras properly
    # Whenever TF allows the addition of variables using Keras APIs, this will become easier like before
    optimizer = tf.train.AdamOptimizer(1.0)

    # wrap with datasets to make life slightly easier
    train_dataset = tf.data.Dataset.from_tensor_slices((x_train, y_train))
    train_dataset = train_dataset.batch(batch_size).shuffle(100).repeat().prefetch(20)

    test_dataset = tf.data.Dataset.from_tensor_slices((x_test, y_test))
    test_dataset = test_dataset.batch(batch_size)

    # train the model
    num_batch_per_epoch = len(x_train) // batch_size + 1
    for e in range(epochs):

        # measure the losses
        train_loss = tfe.metrics.Mean()
        test_loss = tfe.metrics.Mean()

        for b, (x, y) in enumerate(train_dataset):
            loss, grads = gradients(model, x, y)
            optimizer.apply_gradients(grads, tf.train.get_or_create_global_step())

            # update the running training loss
            train_loss(loss)

            if b >= num_batch_per_epoch:
                break

        # evaluate after epoch
        iterator = test_dataset.make_one_shot_iterator()  # dont repeat any values from test set
        for x, y in iterator:
            preds = model(x)
            loss = tf.losses.mean_squared_error(y, preds[:, 0])

            test_loss(loss)

        print("Epoch %d: Train Loss = %0.4f | Test Loss = %0.4f\n" % (e + 1, train_loss.result(), test_loss.result()))

    # Make sure to add not just the "model" variables, but also the custom variables we added !
    saver = tfe.Saver(model.variables + list(model.custom_variables.values()))
    saver.save('weights/10_01_custom_models/weights.ckpt')
    print("Model saved")

Epoch 1: Train Loss = 304.1588 | Test Loss = 215.8727

Epoch 2: Train Loss = 224.8164 | Test Loss = 98.4446

Epoch 3: Train Loss = 78.5605 | Test Loss = 76.8298

Epoch 4: Train Loss = 75.1571 | Test Loss = 82.2543

Epoch 5: Train Loss = 129.3626 | Test Loss = 76.2721

Epoch 6: Train Loss = 105.4899 | Test Loss = 56.4735

Epoch 7: Train Loss = 42.5363 | Test Loss = 54.0988

Epoch 8: Train Loss = 44.8493 | Test Loss = 30.6633

Epoch 9: Train Loss = 63.8818 | Test Loss = 26.2789

Epoch 10: Train Loss = 28.5036 | Test Loss = 33.6216

Epoch 11: Train Loss = 25.2791 | Test Loss = 16.5017

Epoch 12: Train Loss = 39.0379 | Test Loss = 18.6802

Epoch 13: Train Loss = 31.4691 | Test Loss = 19.9136

Epoch 14: Train Loss = 20.5009 | Test Loss = 17.1845

Epoch 15: Train Loss = 24.8141 | Test Loss = 15.9176

Epoch 16: Train Loss = 28.9241 | Test Loss = 15.6662

Epoch 17: Train Loss = 22.7698 | Test Loss = 15.2226

Epoch 18: Train Loss = 20.3520 | Test Loss = 14.0791

Epoch 19: Train Loss = 23.5152 |

# Restore the weights to the model

- As suggested above, we clear the backend to reset the naming counter in tf.keras, then we generate the model.
- We build the model using a dummy batch
- Load a new Saver object, and pass the custom variables and Layer variables to this Saver and restore
- After this, we can use the model just as during training

In [7]:
# clear the previous session
tf.keras.backend.clear_session()

with tf.device(device):    
    # Now we restore the model and predict again on test set
    model2 = CustomRegressor()

    # we need to run the model at least once to build all of the variables and the custom variables
    # make sure to build the model the same way, otherwise it wont find the weights in the checkpoints properlyoperly
    # safest option is to call model._set_inputs(tf_input_batch) explicitly
    dummy_x = tf.zeros([1] + [x_train.shape[-1]])
    model2._set_inputs(dummy_x)
    
    # ensure that you are loading both the Keras variables AND the custom variables
    saver2 = tfe.Saver(model2.variables + list(model2.custom_variables.values()))
    saver2.restore('weights/10_01_custom_models/weights.ckpt')
    print("Weights restored")

    # evaluate the results
    iterator = test_dataset.make_one_shot_iterator()  # dont repeat any values from test set
    test_loss = tfe.metrics.Mean()

    for x, y in iterator:
        preds = model2(x)
        loss = tf.losses.mean_squared_error(y, preds[:, 0])

        test_loss(loss)

    print("Test Loss = %0.4f\n" % (test_loss.result()))

INFO:tensorflow:Restoring parameters from weights/10_custom_models/weights.ckpt
Weights restored
Test Loss = 14.1264

