In [1]:
import tensorflow as tf
from tensorflow import keras

import numpy as np

## Use a Keras layer

In [2]:
layer = keras.layers.Dense(
    2,
    activation="relu",
    bias_initializer="zeros",
)
data = tf.random.uniform([3, 3])
layer(data)

<tf.Tensor: shape=(3, 2), dtype=float32, numpy=
array([[0.29601812, 0.        ],
       [0.14150175, 0.        ],
       [0.22652026, 0.29924074]], dtype=float32)>

## Write your custom layer

In [3]:
class Linear(keras.layers.Layer):
    def __init__(self, units):
        super().__init__()
        self.units = units

    def build(self, input_shape):
        input_dim = input_shape[-1]
        self.w = self.add_weight(
            shape=[input_dim, self.units], initializer="glorot_uniform",
        )
        self.b = self.add_weight(
            shape=[self.units,], initializer="zeros"
        )

    def call(self, inputs):
        return tf.matmul(inputs, self.w) + self.b

layer = Linear(2)
layer(data)

<tf.Tensor: shape=(3, 2), dtype=float32, numpy=
array([[0.46128497, 0.5139472 ],
       [0.85526496, 0.23158368],
       [0.6606808 , 0.566612  ]], dtype=float32)>

# Different ways of building the model

- Sequential API
- [**Preferred**] Functional API
- Subclassing model

In [4]:
# Sequential model
sequential_model = keras.Sequential([
    keras.Input([28, 28, 3]),
    keras.layers.Conv2D(8, 2),
    keras.layers.MaxPool2D(2),
    keras.layers.Flatten(),
    keras.layers.Dense(2),
    keras.layers.Softmax(),
])

print(sequential_model.summary())

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d (Conv2D)             (None, 27, 27, 8)         104       
                                                                 
 max_pooling2d (MaxPooling2D  (None, 13, 13, 8)        0         
 )                                                               
                                                                 
 flatten (Flatten)           (None, 1352)              0         
                                                                 
 dense_1 (Dense)             (None, 2)                 2706      
                                                                 
 softmax (Softmax)           (None, 2)                 0         
                                                                 
Total params: 2,810
Trainable params: 2,810
Non-trainable params: 0
______________________________________________________

In [5]:
# Functional model
inputs = keras.Input([28, 28, 3])
x = keras.layers.Conv2D(8, 2)(inputs)
x = keras.layers.MaxPool2D(2)(x)
x = keras.layers.Flatten()(x)
x = keras.layers.Dense(2)(x)
outputs = keras.layers.Softmax()(x)

functional_model = keras.Model(inputs=inputs, outputs=outputs)
print(functional_model.summary())

Model: "model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_2 (InputLayer)        [(None, 28, 28, 3)]       0         
                                                                 
 conv2d_1 (Conv2D)           (None, 27, 27, 8)         104       
                                                                 
 max_pooling2d_1 (MaxPooling  (None, 13, 13, 8)        0         
 2D)                                                             
                                                                 
 flatten_1 (Flatten)         (None, 1352)              0         
                                                                 
 dense_2 (Dense)             (None, 2)                 2706      
                                                                 
 softmax_1 (Softmax)         (None, 2)                 0         
                                                             

In [6]:
# Subclassing `keras.Model`

class MyModel(keras.Model):
    def __init__(self, *args, **kwargs):
        super().__init__(*args, **kwargs)
        self._conv = keras.layers.Conv2D(8, 2)
        self._max_pool = keras.layers.MaxPool2D(2)
        self._flatten = keras.layers.Flatten()
        self._dense = keras.layers.Dense(2)
        self._softmax = keras.layers.Softmax()

    def call(self, inputs):
        x = self._conv(inputs)
        x = self._max_pool(x)
        x = self._flatten(x)
        x = self._dense(x)
        return self._softmax(x)

subclass_model = MyModel()
subclass_model.build([None, 28, 28, 3])
subclass_model.summary()

Model: "my_model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d_2 (Conv2D)           multiple                  104       
                                                                 
 max_pooling2d_2 (MaxPooling  multiple                 0         
 2D)                                                             
                                                                 
 dense_3 (Dense)             multiple                  2706      
                                                                 
 softmax_2 (Softmax)         multiple                  0         
                                                                 
Total params: 2,810
Trainable params: 2,810
Non-trainable params: 0
_________________________________________________________________


# Models are callable.

`keras.Model` instances are also callable. You can call models as if it is a function, no matter which way you used to build the model.

In [7]:
sample_data = tf.random.uniform([1, 28, 28, 3])

print(sequential_model(sample_data))
print(functional_model(sample_data))
print(subclass_model(sample_data))

tf.Tensor([[0.1127926 0.8872074]], shape=(1, 2), dtype=float32)
tf.Tensor([[0.46518022 0.5348197 ]], shape=(1, 2), dtype=float32)
tf.Tensor([[0.48656997 0.51343006]], shape=(1, 2), dtype=float32)


In [8]:
variable_names = [v.name for v in functional_model.trainable_variables]
print("\n".join(variable_names))

conv2d_1/kernel:0
conv2d_1/bias:0
dense_2/kernel:0
dense_2/bias:0


## Models can be sliced

In [10]:
sliced_model = keras.Model(
    inputs=functional_model.inputs,
    outputs=functional_model.get_layer("conv2d_1").output,
)
sliced_model.summary()

Model: "model_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_2 (InputLayer)        [(None, 28, 28, 3)]       0         
                                                                 
 conv2d_1 (Conv2D)           (None, 27, 27, 8)         104       
                                                                 
Total params: 104
Trainable params: 104
Non-trainable params: 0
_________________________________________________________________


# Train the model

Now we have defined the model. For actual training, we need the following pieces:
- Data
- Loss/Target function
- Optimizer

In [11]:
# Prepare data for a 2-class classification.
data = tf.random.uniform([20, 28, 28, 3])
label = tf.convert_to_tensor(np.random.randint(2, size=20))

In [12]:
# Let's use functional model defined above.
model = functional_model

In [13]:
model.compile(
    loss=keras.losses.SparseCategoricalCrossentropy(),
    optimizer=keras.optimizers.Adam(0.001),
    metrics=[keras.metrics.SparseCategoricalAccuracy()],
)

In [14]:
model.fit(
    data,
    label,
    batch_size=5,
    epochs=2,
)

Epoch 1/2
Epoch 2/2


<keras.callbacks.History at 0x7fed5ea512d0>

# Eager mode and graph mode

Debug in eager mode, put actual job in graph mode.

In [15]:
# Eager mode
print(model(data[0:1, ...]))

# Graph mode
print(model.predict(data[0:1, ...]))

tf.Tensor([[0.6383353  0.36166468]], shape=(1, 2), dtype=float32)
[[0.6383353  0.36166468]]


In [16]:
@tf.function(jit_compile=True)
def forward_pass(x):
    outputs = model(x)
    print(outputs)
    return outputs

print(forward_pass(data[0:1, ...]))

Tensor("model/softmax_1/Softmax:0", shape=(1, 2), dtype=float32)
tf.Tensor([[0.6383352  0.36166477]], shape=(1, 2), dtype=float32)


In [17]:
# You can also run `fit`, `predict` in eager mode.
model.compile(
    loss=keras.losses.SparseCategoricalCrossentropy(),
    optimizer=keras.optimizers.Adam(0.001),
    metrics=[keras.metrics.SparseCategoricalAccuracy()],
    run_eagerly=True
)

# Run `predict` in eager mode by setting `run_eagerly` above.
print(model.predict(data[0:1, ...]))

[[0.6383353  0.36166468]]


# Write your custom training loop

We encourage using `fit` for training/finetuning as much as possible, but it's still important to write you custom training loop. For example, if you want to debug the training loop, or have a better control over each step, you can use custom training loop.

In [18]:
train_data = tf.data.Dataset.from_tensor_slices((data, label))
train_data = train_data.batch(5).cache().prefetch(tf.data.AUTOTUNE)

In [19]:
loss_fn = keras.losses.SparseCategoricalCrossentropy()
optimizer = keras.optimizers.Adam(0.001)
metric = keras.metrics.SparseCategoricalAccuracy()

In [20]:
@tf.function
def train_step(data):
    x, y = data
    with tf.GradientTape() as tape:
        outputs = model(x)
        loss = loss_fn(y, outputs)

    gradients = tape.gradient(loss, model.trainable_variables)
    optimizer.apply_gradients(zip(gradients, model.trainable_variables))
    metric.update_state(y, outputs)

    return loss


for i, data in enumerate(train_data):
    loss = train_step(data)
    print(f"Batch {i}, loss: {loss:.3f}, accuracy: {metric.result():.3f}")

Batch 0, loss: 0.468, accuracy: 0.800
Batch 1, loss: 0.753, accuracy: 0.700
Batch 2, loss: 0.492, accuracy: 0.733
Batch 3, loss: 0.794, accuracy: 0.650


# Save your model.

In [21]:
# Save the weights.
model.save_weights("weights")

In [22]:
sequential_model.load_weights("weights")



<tensorflow.python.checkpoint.checkpoint.CheckpointLoadStatus at 0x7fed5df5ec50>

In [25]:
tf.reduce_sum(sequential_model.get_layer("conv2d").kernel - model.get_layer("conv2d_1").kernel)

<tf.Tensor: shape=(), dtype=float32, numpy=0.0>

# Use callbacks to control your training

`keras.callbacks` provides the ability to control your training loop. For example, you can use callbacks to save your checkpoints at a certain frequency.

In [26]:
!mkdir checkpoints

In [29]:
checkpoint_filepath = 'checkpoints/checkpoint'
model_checkpoint_callback = keras.callbacks.ModelCheckpoint(
    filepath=checkpoint_filepath,
    save_weights_only=True,
    monitor="loss",
)

model.fit(
    train_data,
    batch_size=5,
    epochs=3,
    callbacks=[model_checkpoint_callback]
)

Epoch 1/3
Epoch 2/3
Epoch 3/3


<keras.callbacks.History at 0x7fed5e025ed0>