# Keras: Sequential API

A Sequential model is appropriate for a plain stack of layers where each layer has exactly one input tensor and one output tensor.

In [1]:
# Import the libraries
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers

In [2]:
# Defining Sequential model with 3 layers. We can set a name to each layer to 
# annotate TensorBoard graphs
model = keras.Sequential([
  layers.Dense(2, activation="relu", name="layer1"),
  layers.Dense(3, activation="relu", name="layer2"),
  layers.Dense(4, name="layer3")
])

In [3]:
# Create an input tensor
x = tf.ones((3, 3))
x

<tf.Tensor: shape=(3, 3), dtype=float32, numpy=
array([[1., 1., 1.],
       [1., 1., 1.],
       [1., 1., 1.]], dtype=float32)>

In [5]:
# And test the model
y = model(x)
y

<tf.Tensor: shape=(3, 4), dtype=float32, numpy=
array([[ 0.04578663, -0.6100886 , -0.40908617, -0.43029374],
       [ 0.04578663, -0.6100886 , -0.40908617, -0.43029374],
       [ 0.04578663, -0.6100886 , -0.40908617, -0.43029374]],
      dtype=float32)>

A Sequential model is not appropriate when:

Your model has multiple inputs or multiple outputs
Any of your layers has multiple inputs or multiple outputs
You need to do layer sharing
You want non-linear topology (e.g. a residual connection, a multi-branch model)

In [6]:
# To access the layers
model.layers

[<keras.layers.core.dense.Dense at 0x7fdfe0c34950>,
 <keras.layers.core.dense.Dense at 0x7fdfe0c14a10>,
 <keras.layers.core.dense.Dense at 0x7fdfe0e09e90>]

In [7]:
# You can create a Sequential model using 'add()'
model = keras.Sequential()
model.add(layers.Dense(2, activation="relu"))
model.add(layers.Dense(3, activation="relu"))
model.add(layers.Dense(4))

In [8]:
# You can remove a layer using 'pop'
print(model.layers)

model.pop()

print(model.layers)
print(len(model.layers))

[<keras.layers.core.dense.Dense object at 0x7fdfdbd28090>, <keras.layers.core.dense.Dense object at 0x7fdfdbd28b10>, <keras.layers.core.dense.Dense object at 0x7fdfdbd32a10>]
[<keras.layers.core.dense.Dense object at 0x7fdfdbd28090>, <keras.layers.core.dense.Dense object at 0x7fdfdbd28b10>]
2


In [9]:
# When we create a layer, initially it has no weights because
# we need to know the input shape 
layer = layers.Dense(3)
layer.weights

[]

In [10]:
# Call the layer on a test input. Now it has weights.
x = tf.ones((1, 4))
y = layer(x)
layer.weights

[<tf.Variable 'dense_3/kernel:0' shape=(4, 3) dtype=float32, numpy=
 array([[-0.6007376 , -0.06637585,  0.4098016 ],
        [-0.9226789 ,  0.5341182 , -0.12492877],
        [ 0.61663806, -0.10880071,  0.82164776],
        [-0.52269375, -0.29569155,  0.15987003]], dtype=float32)>,
 <tf.Variable 'dense_3/bias:0' shape=(3,) dtype=float32, numpy=array([0., 0., 0.], dtype=float32)>]

In [14]:
# The same applies to model weights. Without seeing an input:
model = keras.Sequential(
    [
        layers.Dense(2, activation="relu"),
        layers.Dense(3, activation="relu"),
        layers.Dense(4),
    ]
)  # No weights at this stage!

model.weights

ValueError: ignored

In [15]:
# After seeing an input (i.e, "build" the model)
y = model(x)
model.weights

[<tf.Variable 'dense_4/kernel:0' shape=(4, 2) dtype=float32, numpy=
 array([[ 0.58612585, -0.56917024],
        [-0.35078835,  0.5479853 ],
        [ 0.40503788,  0.4647112 ],
        [ 0.4070437 ,  0.22992492]], dtype=float32)>,
 <tf.Variable 'dense_4/bias:0' shape=(2,) dtype=float32, numpy=array([0., 0.], dtype=float32)>,
 <tf.Variable 'dense_5/kernel:0' shape=(2, 3) dtype=float32, numpy=
 array([[-1.0920233 ,  0.22439897, -0.12472177],
        [ 0.2574823 , -0.15595067,  0.46388662]], dtype=float32)>,
 <tf.Variable 'dense_5/bias:0' shape=(3,) dtype=float32, numpy=array([0., 0., 0.], dtype=float32)>,
 <tf.Variable 'dense_6/kernel:0' shape=(3, 4) dtype=float32, numpy=
 array([[ 0.70583594, -0.5479173 , -0.48999706,  0.19975698],
        [ 0.7353419 , -0.27570838, -0.8089179 ,  0.6360003 ],
        [-0.25185823,  0.50565934,  0.8248184 ,  0.37599814]],
       dtype=float32)>,
 <tf.Variable 'dense_6/bias:0' shape=(4,) dtype=float32, numpy=array([0., 0., 0., 0.], dtype=float32)>]

In [16]:
# After build the model, we can call its summary()
model.summary()

Model: "sequential_2"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_4 (Dense)             (1, 2)                    10        
                                                                 
 dense_5 (Dense)             (1, 3)                    9         
                                                                 
 dense_6 (Dense)             (1, 4)                    16        
                                                                 
Total params: 35
Trainable params: 35
Non-trainable params: 0
_________________________________________________________________


In [17]:
# However, if you want to display the model while building it,
# it might be interesting to add a "Input" layer so that the model
# knows in advance its input shape
model = keras.Sequential()
model.add(keras.Input(shape=(4,)))
model.add(layers.Dense(2, activation="relu"))
model.summary()

Model: "sequential_3"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_7 (Dense)             (None, 2)                 10        
                                                                 
Total params: 10
Trainable params: 10
Non-trainable params: 0
_________________________________________________________________


In [18]:
# The input is not treated like a layer, because it isn't
model.layers

[<keras.layers.core.dense.Dense at 0x7fdfdbbfff10>]

In [19]:
# The same behaviour can be simulated passing an 'input_shape' argument
# to the first layer
model = keras.Sequential()
model.add(layers.Dense(2, activation="relu", input_shape=(4,)))
model.summary()

Model: "sequential_4"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_8 (Dense)             (None, 2)                 10        
                                                                 
Total params: 10
Trainable params: 10
Non-trainable params: 0
_________________________________________________________________


In [20]:
# Models initialized with a predefined input shape always
# have weights
model.weights

[<tf.Variable 'dense_8/kernel:0' shape=(4, 2) dtype=float32, numpy=
 array([[ 0.46502137,  0.04955745],
        [-0.36202693,  0.13694692],
        [ 0.01382732,  0.6414015 ],
        [ 0.78624225,  0.57746816]], dtype=float32)>,
 <tf.Variable 'dense_8/bias:0' shape=(2,) dtype=float32, numpy=array([0., 0.], dtype=float32)>]

In general, it's a recommended best practice to always specify the input shape of a Sequential model in advance if you know what it is.

In [22]:
# add() + summary() is a common debugging workflow

# Let's build a ConvNet
model = keras.Sequential()
model.add(keras.Input(shape=(250, 250, 3)))  # 250x250 RGB
model.add(layers.Conv2D(32, 5, strides=2, activation="relu"))  # 32 filters 5x5 with 2x2 strides
model.add(layers.Conv2D(32, 3, activation="relu"))
model.add(layers.MaxPool2D(3))

# Print the summary for now
model.summary()

Model: "sequential_6"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d (Conv2D)             (None, 123, 123, 32)      2432      
                                                                 
 conv2d_1 (Conv2D)           (None, 121, 121, 32)      9248      
                                                                 
 max_pooling2d (MaxPooling2D  (None, 40, 40, 32)       0         
 )                                                               
                                                                 
Total params: 11,680
Trainable params: 11,680
Non-trainable params: 0
_________________________________________________________________


In [23]:
# Let's continue adding layers...
model.add(layers.Conv2D(32, 3, activation="relu"))
model.add(layers.Conv2D(32, 3, activation="relu"))
model.add(layers.MaxPooling2D(3))
model.add(layers.Conv2D(32, 3, activation="relu"))
model.add(layers.Conv2D(32, 3, activation="relu"))
model.add(layers.MaxPooling2D(2))

model.summary()

Model: "sequential_6"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d (Conv2D)             (None, 123, 123, 32)      2432      
                                                                 
 conv2d_1 (Conv2D)           (None, 121, 121, 32)      9248      
                                                                 
 max_pooling2d (MaxPooling2D  (None, 40, 40, 32)       0         
 )                                                               
                                                                 
 conv2d_2 (Conv2D)           (None, 38, 38, 32)        9248      
                                                                 
 conv2d_3 (Conv2D)           (None, 36, 36, 32)        9248      
                                                                 
 max_pooling2d_1 (MaxPooling  (None, 12, 12, 32)       0         
 2D)                                                  

In [24]:
# We have a 4x4x32 feature maps, time to apply global max pooling
model.add(layers.GlobalMaxPooling2D())
model.summary()

Model: "sequential_6"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d (Conv2D)             (None, 123, 123, 32)      2432      
                                                                 
 conv2d_1 (Conv2D)           (None, 121, 121, 32)      9248      
                                                                 
 max_pooling2d (MaxPooling2D  (None, 40, 40, 32)       0         
 )                                                               
                                                                 
 conv2d_2 (Conv2D)           (None, 38, 38, 32)        9248      
                                                                 
 conv2d_3 (Conv2D)           (None, 36, 36, 32)        9248      
                                                                 
 max_pooling2d_1 (MaxPooling  (None, 12, 12, 32)       0         
 2D)                                                  

In [25]:
# Finally, add a Dense classification layer
model.add(layers.Dense(10))
model.summary()

Model: "sequential_6"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d (Conv2D)             (None, 123, 123, 32)      2432      
                                                                 
 conv2d_1 (Conv2D)           (None, 121, 121, 32)      9248      
                                                                 
 max_pooling2d (MaxPooling2D  (None, 40, 40, 32)       0         
 )                                                               
                                                                 
 conv2d_2 (Conv2D)           (None, 38, 38, 32)        9248      
                                                                 
 conv2d_3 (Conv2D)           (None, 36, 36, 32)        9248      
                                                                 
 max_pooling2d_1 (MaxPooling  (None, 12, 12, 32)       0         
 2D)                                                  

## Feature extraction with a Sequential model

Once the model has been built, every layer has an input and output attribute. This can be used to use a model as feature extractor.

In [26]:
# Define the model
initial_model = keras.Sequential(
    [
        keras.Input(shape=(250, 250, 3)),
        layers.Conv2D(32, 5, strides=2, activation="relu"),
        layers.Conv2D(32, 3, activation="relu"),
        layers.Conv2D(32, 3, activation="relu"),
    ]
)

In [28]:
# With the model inputs and the outputs of each layer, define a feature extractor
feature_extractor = keras.Model(
    inputs=initial_model.inputs,
    outputs=[layer.output for layer in initial_model.layers]
)
feature_extractor.summary()

Model: "model_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_4 (InputLayer)        [(None, 250, 250, 3)]     0         
                                                                 
 conv2d_6 (Conv2D)           (None, 123, 123, 32)      2432      
                                                                 
 conv2d_7 (Conv2D)           (None, 121, 121, 32)      9248      
                                                                 
 conv2d_8 (Conv2D)           (None, 119, 119, 32)      9248      
                                                                 
Total params: 20,928
Trainable params: 20,928
Non-trainable params: 0
_________________________________________________________________


In [32]:
# Test the feature extractor
x = tf.ones((1, 250, 250, 3))
features = feature_extractor(x)
len(features)

3

In [33]:
# Something similar can be done from a intermediate layer
initial_model = keras.Sequential(
    [
        keras.Input(shape=(250, 250, 3)),
        layers.Conv2D(32, 5, strides=2, activation="relu"),
        layers.Conv2D(32, 3, activation="relu", name="my_intermediate_layer"),
        layers.Conv2D(32, 3, activation="relu"),
    ]
)

feature_extractor = keras.Model(
    inputs=initial_model.inputs,
    outputs=initial_model.get_layer(name="my_intermediate_layer").output
)

In [34]:
# Test the feature extractor
x = tf.ones((1, 250, 250, 3))
features = feature_extractor(x)
len(features)

1

## Transfer Learning

Transfer learning consists of freezing the bottom layers in a model and only training the top layers.

- Blueprint 1: Freeze the bottom layers.

In [36]:
# First, let's define a model 
model = keras.Sequential([
  keras.Input(shape=(784)),
  layers.Dense(32, activation="relu"),
  layers.Dense(32, activation="relu"),
  layers.Dense(32, activation="relu"),
  layers.Dense(10),
])

In [40]:
# Presumably, here you would load the pretrained weights
# model.load_weights(...)

In [38]:
# Let's freeze all the layers except the last one:
for layer in model.layers[:-1]:
  layer.trainable = False

In [39]:
# Now, you would compile and train (this will only update the weights in the
# last layer)
# model.compile(...)
# model.fit(...)

- Blueprint 2: Stack a pre-trained model + some news classification layers.

In [41]:
# Let's load a standard ConvNet like Xception as the base model
base_model = keras.applications.Xception(
    weights="imagenet",
    include_top=False,
    pooling="avg"
)

Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/xception/xception_weights_tf_dim_ordering_tf_kernels_notop.h5


In [42]:
# Now, freeze the whole base model
base_model.trainable = False

In [43]:
# Let's add a trainable classifier on top 
model = keras.Sequential([
  base_model,
  layers.Dense(1000)
])

In [44]:
# Now, again you would compile and train the model
# model.compile(...)
# model.fit(...)