In [2]:
import tensorflow as tf
import numpy as np
from datetime import datetime
%load_ext tensorboard

# Modules, Layers and Models

A model is abstractly:
- a function that computes something on tensors (in a forward pass)
- Some variable that can be updated in a response to training

### Defining Models and Layers in Tensorflow
- Most models are made of layers
- Layers are functions with a known mathematical structure that can be reused and have trainable variables
- in Tensorflow most high-level implementations of layers and models, such as keras and sonnet, or built on the same foundational class **tf.Module**


Here's an example of a very simple tf.Module that operates on a scalar tensor:

In [4]:
class SimpleModule(tf.Module):
    def __init__(self, name=None):
        super(SimpleModule, self).__init__(name=name)
        self.a_variable = tf.Variable(5.0, name="train_me")
        self.non_trainable_variable = tf.Variable(5.0,trainable=False, name="do_not_train_me")
    def __call__(self,x):
        return self.a_variable * x + self.non_trainable_variable
    
simple_module = SimpleModule(name="simple")
simple_module(tf.constant(5.0))


<tf.Tensor: shape=(), dtype=float32, numpy=30.0>

- Modules and, by extension, layers are deep-learning terminology for "objects": they have internal state, and methods that use that state.
- You can set the trainability of variables on and off for any reason, including freezing layers and variables during fine-tuning.
- **Note: tf.Module is the base class for both tf.keras.layers.Layer and tf.keras.Model, so everything you come across here also applies in Keras. For historical compatibility reasons Keras layers do not collect variables from modules, so your models should use only modules or only Keras layers. However, the methods shown below for inspecting variables are the same in either case.**
- By subclassing tf.Module, any tf.Variable or tf.Module instances assigned to this object's properties are automatically collected. This allows you to save and load variables, and also create collections of tf.Modules.

In [5]:
# All trainable variables
print("Trainable Variables: ",simple_module.trainable_variables)
# Every Varaible
print("All Variables:", simple_module.variables)

Trainable Variables:  (<tf.Variable 'train_me:0' shape=() dtype=float32, numpy=5.0>,)
All Variables: (<tf.Variable 'train_me:0' shape=() dtype=float32, numpy=5.0>, <tf.Variable 'do_not_train_me:0' shape=() dtype=float32, numpy=5.0>)


In [6]:
# This is an example of a two-layer linear layer model made out of modules.
# First lets build a dense (linear) layer
class Dense(tf.Module):
    def __init__(self, in_features, out_features,name=None):
        super(Dense, self).__init__(name=name)
        self.w = tf.Variable(tf.random.normal([in_features, out_features]), name='w')
        self.b = tf.Variable(tf.zeros([out_features]), name='b')
    
    def __call__(self,x):
        y = tf.matmul(x,self.w) + self.b
        return tf.nn.relu(y)

#And then the complete model, which makes two layer instances and applies them:

class SequentialModule(tf.Module):
    def __init__(self, name=None):
        super(SequentialModule, self).__init__(name=name)
        self.dense1 = Dense(in_features=3, out_features=3)
        self.dense2 = Dense(in_features=3, out_features=2)

    def __call__(self,x):
        x = self.dense1(x)
        return self.dense2(x)
    
# You have made a model!
my_model = SequentialModule(name="the_model")
# Call it, with random results
print("Model results:", my_model(tf.constant([[2.0, 2.0, 2.0]])))

Model results: tf.Tensor([[0.4566276 2.815871 ]], shape=(1, 2), dtype=float32)


In [7]:
# tf.Module instances will automatically collect, recursively, any tf.Variable or tf.Module instances assigned to it. 
# This allows you to manage collections of tf.Modules with a single model instance, and save and load whole models.
print("Submodules: ", my_model.submodules)

Submodules:  (<__main__.Dense object at 0x000001C305BA62B0>, <__main__.Dense object at 0x000001C306287D90>)


In [8]:
for var in my_model.variables:
    print(var,"\n")

<tf.Variable 'b:0' shape=(3,) dtype=float32, numpy=array([0., 0., 0.], dtype=float32)> 

<tf.Variable 'w:0' shape=(3, 3) dtype=float32, numpy=
array([[-1.8656597 , -0.92340726,  1.6337233 ],
       [ 1.3419585 ,  0.32734588, -2.011836  ],
       [ 0.6673131 ,  1.4529397 ,  0.6999783 ]], dtype=float32)> 

<tf.Variable 'b:0' shape=(2,) dtype=float32, numpy=array([0., 0.], dtype=float32)> 

<tf.Variable 'w:0' shape=(3, 2) dtype=float32, numpy=
array([[-1.3162576 , -2.4800007 ],
       [ 0.750268  ,  1.9627956 ],
       [-0.70074105,  0.255436  ]], dtype=float32)> 



In [11]:
# Waiting to Create variables
# By deferring variable creation to the first time the module is called with a specific input shape, 
# you do not need specify the input size up front.
# This flexibility is why TensorFlow layers often only need to specify the shape of their outputs, 
# such as in tf.keras.layers.Dense, rather than both the input and output size.
class FlexibleDenseModule(tf.Module):
    def __init__(self, out_features, name=None):
        super(FlexibleDenseModule, self).__init__(name=name)
        self.is_built = False
        self.out_features = out_features
    def __call__(self, x):
        if not self.is_built:
            self.w = tf.Variable(tf.random.normal([x.shape[-1], self.out_features]), name='w')
            self.b = tf.zeros([self.out_features], name='b')
            self.is_built = True
        y = tf.matmul(x,self.w)+self.b
        return tf.nn.relu(y)

# Lets use this in a model
class MySequentialModule(tf.Module):
    def __init__(self, name=None):
        super(MySequentialModule, self).__init__(name=name)
        self.dense1 = FlexibleDenseModule(out_features=3)
        self.dense2 = FlexibleDenseModule(out_features=2)

    def __call__(self, x):
        x = self.dense1(x)
        return self.dense2(x)
    
my_model = MySequentialModule(name="my_model")
print("Model Results: ", my_model(tf.constant([[2.0, 2.0, 2.0]])))



Model Results:  tf.Tensor([[1.6514297 0.       ]], shape=(1, 2), dtype=float32)
