In [2]:
import tensorflow as tf
import numpy as np
from datetime import datetime
%load_ext tensorboard

# Modules, Layers and Models

A model is abstractly:
- a function that computes something on tensors (in a forward pass)
- Some variable that can be updated in a response to training

### Defining Models and Layers in Tensorflow
- Most models are made of layers
- Layers are functions with a known mathematical structure that can be reused and have trainable variables
- in Tensorflow most high-level implementations of layers and models, such as keras and sonnet, or built on the same foundational class **tf.Module**


Here's an example of a very simple tf.Module that operates on a scalar tensor:

In [4]:
class SimpleModule(tf.Module):
    def __init__(self, name=None):
        super(SimpleModule, self).__init__(name=name)
        self.a_variable = tf.Variable(5.0, name="train_me")
        self.non_trainable_variable = tf.Variable(5.0,trainable=False, name="do_not_train_me")
    def __call__(self,x):
        return self.a_variable * x + self.non_trainable_variable
    
simple_module = SimpleModule(name="simple")
simple_module(tf.constant(5.0))


<tf.Tensor: shape=(), dtype=float32, numpy=30.0>

- Modules and, by extension, layers are deep-learning terminology for "objects": they have internal state, and methods that use that state.
- You can set the trainability of variables on and off for any reason, including freezing layers and variables during fine-tuning.
- **Note: tf.Module is the base class for both tf.keras.layers.Layer and tf.keras.Model, so everything you come across here also applies in Keras. For historical compatibility reasons Keras layers do not collect variables from modules, so your models should use only modules or only Keras layers. However, the methods shown below for inspecting variables are the same in either case.**
- By subclassing tf.Module, any tf.Variable or tf.Module instances assigned to this object's properties are automatically collected. This allows you to save and load variables, and also create collections of tf.Modules.

In [5]:
# All trainable variables
print("Trainable Variables: ",simple_module.trainable_variables)
# Every Varaible
print("All Variables:", simple_module.variables)

Trainable Variables:  (<tf.Variable 'train_me:0' shape=() dtype=float32, numpy=5.0>,)
All Variables: (<tf.Variable 'train_me:0' shape=() dtype=float32, numpy=5.0>, <tf.Variable 'do_not_train_me:0' shape=() dtype=float32, numpy=5.0>)


In [6]:
# This is an example of a two-layer linear layer model made out of modules.
# First lets build a dense (linear) layer
class Dense(tf.Module):
    def __init__(self, in_features, out_features,name=None):
        super(Dense, self).__init__(name=name)
        self.w = tf.Variable(tf.random.normal([in_features, out_features]), name='w')
        self.b = tf.Variable(tf.zeros([out_features]), name='b')
    
    def __call__(self,x):
        y = tf.matmul(x,self.w) + self.b
        return tf.nn.relu(y)

#And then the complete model, which makes two layer instances and applies them:

class SequentialModule(tf.Module):
    def __init__(self, name=None):
        super(SequentialModule, self).__init__(name=name)
        self.dense1 = Dense(in_features=3, out_features=3)
        self.dense2 = Dense(in_features=3, out_features=2)

    def __call__(self,x):
        x = self.dense1(x)
        return self.dense2(x)
    
# You have made a model!
my_model = SequentialModule(name="the_model")
# Call it, with random results
print("Model results:", my_model(tf.constant([[2.0, 2.0, 2.0]])))

Model results: tf.Tensor([[0.4566276 2.815871 ]], shape=(1, 2), dtype=float32)


In [7]:
# tf.Module instances will automatically collect, recursively, any tf.Variable or tf.Module instances assigned to it. 
# This allows you to manage collections of tf.Modules with a single model instance, and save and load whole models.
print("Submodules: ", my_model.submodules)

Submodules:  (<__main__.Dense object at 0x000001C305BA62B0>, <__main__.Dense object at 0x000001C306287D90>)


In [8]:
for var in my_model.variables:
    print(var,"\n")

<tf.Variable 'b:0' shape=(3,) dtype=float32, numpy=array([0., 0., 0.], dtype=float32)> 

<tf.Variable 'w:0' shape=(3, 3) dtype=float32, numpy=
array([[-1.8656597 , -0.92340726,  1.6337233 ],
       [ 1.3419585 ,  0.32734588, -2.011836  ],
       [ 0.6673131 ,  1.4529397 ,  0.6999783 ]], dtype=float32)> 

<tf.Variable 'b:0' shape=(2,) dtype=float32, numpy=array([0., 0.], dtype=float32)> 

<tf.Variable 'w:0' shape=(3, 2) dtype=float32, numpy=
array([[-1.3162576 , -2.4800007 ],
       [ 0.750268  ,  1.9627956 ],
       [-0.70074105,  0.255436  ]], dtype=float32)> 



In [11]:
# Waiting to Create variables
# By deferring variable creation to the first time the module is called with a specific input shape, 
# you do not need specify the input size up front.
# This flexibility is why TensorFlow layers often only need to specify the shape of their outputs, 
# such as in tf.keras.layers.Dense, rather than both the input and output size.
class FlexibleDenseModule(tf.Module):
    def __init__(self, out_features, name=None):
        super(FlexibleDenseModule, self).__init__(name=name)
        self.is_built = False
        self.out_features = out_features
    def __call__(self, x):
        if not self.is_built:
            self.w = tf.Variable(tf.random.normal([x.shape[-1], self.out_features]), name='w')
            self.b = tf.zeros([self.out_features], name='b')
            self.is_built = True
        y = tf.matmul(x,self.w)+self.b
        return tf.nn.relu(y)

# Lets use this in a model
class MySequentialModule(tf.Module):
    def __init__(self, name=None):
        super(MySequentialModule, self).__init__(name=name)
        self.dense1 = FlexibleDenseModule(out_features=3)
        self.dense2 = FlexibleDenseModule(out_features=2)

    def __call__(self, x):
        x = self.dense1(x)
        return self.dense2(x)
    
my_model = MySequentialModule(name="my_model")
print("Model Results: ", my_model(tf.constant([[2.0, 2.0, 2.0]])))



Model Results:  tf.Tensor([[1.6514297 0.       ]], shape=(1, 2), dtype=float32)


### Saving Weights

- We can save a tf.Module as both a checkpoint and a SavedModel.
- Checkpoints are just the weights (that is, the values of the set of variables inside the module and its submodules)
- Checkpoints consist of two kinds of files: the data itself and an index file for metadata. 
- The index file keeps track of what is actually saved and the numbering of checkpoints, while the checkpoint data contains the variable values and their attribute lookup paths.
- We can look inside a checkpoint to be sure the whole collection of variables is saved, sorted by the Python object that contains them.

In [12]:
chkp_path = "my_checkpoint"
checkpoint = tf.train.Checkpoint(model=my_model)
checkpoint.write(chkp_path)

'my_checkpoint'

In [13]:
!ls my_checkpoint*

my_checkpoint.data-00000-of-00001
my_checkpoint.index


In [14]:
tf.train.list_variables(chkp_path)

[('_CHECKPOINTABLE_OBJECT_GRAPH', []),
 ('model/dense1/w/.ATTRIBUTES/VARIABLE_VALUE', [3, 3]),
 ('model/dense2/w/.ATTRIBUTES/VARIABLE_VALUE', [3, 2])]

In [16]:
# During distributed (multi-machine) training they can be sharded, which is why they are numbered (e.g., '00000-of-00001'). In this case, though, there is only have one shard.
# When you load models back in, you overwrite the values in your Python object.
# Note: As checkpoints are at the heart of long training workflows tf.checkpoint.CheckpointManager is a helper class that makes checkpoint management much easier. 
# Refer to the Training checkpoints guide for more details.
new_model = MySequentialModule()
new_checkpoint = tf.train.Checkpoint(model=new_model)
new_checkpoint.restore("my_checkpoint")

# Should be the same result as above
new_model(tf.constant([[2.0, 2.0, 2.0]]))

<tf.Tensor: shape=(1, 2), dtype=float32, numpy=array([[1.6514297, 0.       ]], dtype=float32)>

### Saving Functions
- TensorFlow can run models without the original Python objects, as demonstrated by TensorFlow Serving and TensorFlow Lite, even when you download a trained model from TensorFlow Hub.
- TensorFlow needs to know how to do the computations described in Python, but without the original code. To do this, you can make a graph, which is described in the Introduction to graphs and functions guide.
- This graph contains operations, or ops, that implement the function.
- We can define a graph in the model above by adding the @tf.function decorator to indicate that this code should run as a graph.

In [17]:
class MySequentialModule(tf.Module):
  def __init__(self, name=None):
    super().__init__(name=name)

    self.dense_1 = Dense(in_features=3, out_features=3)
    self.dense_2 = Dense(in_features=3, out_features=2)

  @tf.function
  def __call__(self, x):
    x = self.dense_1(x)
    return self.dense_2(x)

# You have made a model with a graph!
my_model = MySequentialModule(name="the_model")

# The module you have made works exactly the same as before. Each unique signature passed into the function creates a separate graph

In [18]:
print(my_model([[2.0, 2.0, 2.0]]))
print(my_model([[[2.0, 2.0, 2.0], [2.0, 2.0, 2.0]]]))

tf.Tensor([[7.049494 0.      ]], shape=(1, 2), dtype=float32)
tf.Tensor(
[[[7.049494 0.      ]
  [7.049494 0.      ]]], shape=(1, 2, 2), dtype=float32)


In [21]:
# We can visualize the graph by tracing it within a TensorBoard summary.

# Set up Logging
stamp = datetime.now().date()
logdir = "logs/func/%s" % stamp
writer = tf.summary.create_file_writer(logdir)
# Create a new model to get a fresh trace
# Otherwise the summary will not see the graph
new_mode = MySequentialModule()
# bracket the function call with tf.summary.trace_on() and tf.summary.trace_export()
tf.summary.trace_on(graph=True)
tf.profiler.experimental.start(logdir)
# Call only one tf.function when tracing
z = print(new_mode(tf.constant([[2.0, 2.0, 2.0]])))
with writer.as_default():
    tf.summary.trace_export(name="my_func_trace", step=0, profiler_outdir=logdir)


tf.Tensor([[0.        3.6446092]], shape=(1, 2), dtype=float32)


In [22]:
# Launch TensorBoard to view the resulting trace:
#docs_infra: no_execute
%tensorboard --logdir logs/func

### Creating a SavedModel
The recommended way of sharing completely trained models is to use SavedModel. SavedModel contains both a collection of functions and a collection of weights.

You can save the model you have just trained as follows:

In [23]:
tf.saved_model.save(my_model, "the_saved_model")

INFO:tensorflow:Assets written to: the_saved_model\assets


In [24]:
# Inspect the SavedModel in the directory
!ls -l the_saved_model

total 16
drwxr-xr-x 1 F85SJ00 1049089     0 Mar  1 12:06 assets
-rw-r--r-- 1 F85SJ00 1049089 14315 Mar  1 12:06 saved_model.pb
drwxr-xr-x 1 F85SJ00 1049089     0 Mar  1 12:06 variables


In [25]:
# The variables/ directory contains a checkpoint of the variables
!ls -l the_saved_model/variables

total 2
-rw-r--r-- 1 F85SJ00 1049089 490 Mar  1 12:06 variables.data-00000-of-00001
-rw-r--r-- 1 F85SJ00 1049089 356 Mar  1 12:06 variables.index


In [26]:
# The saved_model.pb file is a protocol buffer describing the functional tf.Graph.
# Models and layers can be loaded from this representation without actually making an instance of the class that created it. 
# This is desired in situations where you do not have (or want) a Python interpreter, such as serving at scale or on an edge device, 
# or in situations where the original Python code is not available or practical to use.
# You can load the model as new object:
new_model = tf.saved_model.load("the_saved_model")

In [27]:
# new_model created from loading a saved model, is an internal Tensorflow user object without any of the class knowledge. It is not of type SequentialModule
isinstance(new_model, SequentialModule)

False

In [29]:
# This new model works on the already-defined input signatures. You can't add more signatures to a model restored like this.
print(my_model([[2.0, 2.0, 2.0]]))
print(my_model([[[2.0, 2.0, 2.0], [2.0, 2.0, 2.0]]]))
# Thus, using SavedModel, you are able to save TensorFlow weights and graphs using tf.Module, and then load them again.

tf.Tensor([[7.049494 0.      ]], shape=(1, 2), dtype=float32)
tf.Tensor(
[[[7.049494 0.      ]
  [7.049494 0.      ]]], shape=(1, 2, 2), dtype=float32)
