In [2]:
import numpy as np

from keras.models import Sequential
from keras.layers import LSTM, Dense, Flatten

# First example

What? LSTM feeding its output to dense layer without specified batch shape.

* We only specify the `input_shape` (and not the full `batch_input_shape`)
* We do not return the output sequences

In [3]:
batch_size = 5
n_timesteps = 10
n_input_features = 20
n_output_features= 30

model = Sequential()
model.add(LSTM(input_shape=(n_timesteps, n_input_features), 
               units=n_output_features, 
               return_sequences=False, 
               name="lstm_layer"))
model.add(Dense(1, name="dense_layer"))
model.compile(loss="mse", optimizer="adam")
model.summary()

sample_size = 100
sample_features = np.random.random(size=(sample_size, n_timesteps, n_input_features))
sample_labels = np.random.randint(low=0, high=2, size=(sample_size, 1))

print("Shape of feature tensor:", sample_features.shape)
print("Shape of labels:", sample_labels.shape)

_ = model.fit(x=sample_features, 
              y=sample_labels)

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm_layer (LSTM)            (None, 30)                6120      
_________________________________________________________________
dense_layer (Dense)          (None, 1)                 31        
Total params: 6,151
Trainable params: 6,151
Non-trainable params: 0
_________________________________________________________________
Shape of feature tensor: (100, 10, 20)
Shape of labels: (100, 1)
Epoch 1/1


# First example (ct'd)

What? Same as first example but the batch size is explicitly specified.

* Specifiy `batch_input_shape` (and not just `input_shape`)
* Do not return the output sequences

In [4]:
batch_size = 5
n_timesteps = 10
n_input_features = 20
n_output_features= 30

model = Sequential()
model.add(LSTM(batch_input_shape=(batch_size, n_timesteps, n_input_features), 
               units=n_output_features, 
               return_sequences=False, 
               name="lstm_layer"))
model.add(Dense(1, name="dense_layer"))
model.compile(loss="mse", optimizer="adam")
model.summary()

sample_size = 10 * batch_size # If `sample_size % batch_size != 0` then error!
sample_features = np.random.random(size=(sample_size, n_timesteps, n_input_features))
sample_labels = np.random.randint(low=0, high=2, size=(sample_size, 1))

print("Shape of feature tensor:", sample_features.shape)
print("Shape of labels:", sample_labels.shape)

_ = model.fit(x=sample_features, 
              y=sample_labels, 
              batch_size=batch_size)

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm_layer (LSTM)            (5, 30)                   6120      
_________________________________________________________________
dense_layer (Dense)          (5, 1)                    31        
Total params: 6,151
Trainable params: 6,151
Non-trainable params: 0
_________________________________________________________________
Shape of feature tensor: (50, 10, 20)
Shape of labels: (50, 1)
Epoch 1/1


# Second example

What? LSTM returning the output sequences and not only the last output sequence. Thus output has first to be further processed in another layer.

* We set `return_sequences=True` and now we return tensors of shape `(n_timesteps, n_output_features)` from the LSTM layer. This cannot processed as is by our `Dense` layer. In this example we add a `Flatten` layer that flattens the LSTM output to a vector that can be feed to the `Dense` layer. (Another option is to add an another LSTM layer instead of the `Flatten` layer.)

In [5]:
batch_size = 5
n_timesteps = 10
n_input_features = 20
n_output_features= 30

model = Sequential()
model.add(LSTM(input_shape=(n_timesteps, n_input_features), 
               units=n_output_features, 
               return_sequences=True,
               name="lstm_layer"))
model.add(Flatten(name="flatten_layer"))
model.add(Dense(1, name="dense_layer"))
model.compile(loss="mse", optimizer="adam")
model.summary()

sample_size = 100
sample_features = np.random.random(size=(sample_size, n_timesteps, n_input_features))
sample_labels = np.random.randint(low=0, high=2, size=(sample_size, 1))

print("Shape of feature tensor:", sample_features.shape)
print("Shape of labels:", sample_labels.shape)

_ = model.fit(x=sample_features, 
              y=sample_labels)

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm_layer (LSTM)            (None, 10, 30)            6120      
_________________________________________________________________
flatten_layer (Flatten)      (None, 300)               0         
_________________________________________________________________
dense_layer (Dense)          (None, 1)                 301       
Total params: 6,421
Trainable params: 6,421
Non-trainable params: 0
_________________________________________________________________
Shape of feature tensor: (100, 10, 20)
Shape of labels: (100, 1)
Epoch 1/1


# Second example (ct'd)

In [6]:
batch_size = 5
n_timesteps = 10
n_input_features = 20
n_output_features= 30

model = Sequential()
model.add(LSTM(batch_input_shape=(batch_size, n_timesteps, n_input_features), 
               units=n_output_features, 
               return_sequences=True,
               name="lstm_layer"))
model.add(Flatten(name="flatten_layer"))
model.add(Dense(1, name="dense_layer"))
model.compile(loss="mse", optimizer="adam")
model.summary()

sample_size = 10 * batch_size
sample_features = np.random.random(size=(sample_size, n_timesteps, n_input_features))
sample_labels = np.random.randint(low=0, high=2, size=(sample_size, 1))

print("Shape of feature tensor:", sample_features.shape)
print("Shape of labels:", sample_labels.shape)

_ = model.fit(x=sample_features, 
              y=sample_labels,
              batch_size=batch_size) # Note: If `batch_size` is not specified here --> error!

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm_layer (LSTM)            (5, 10, 30)               6120      
_________________________________________________________________
flatten_layer (Flatten)      (5, 300)                  0         
_________________________________________________________________
dense_layer (Dense)          (5, 1)                    301       
Total params: 6,421
Trainable params: 6,421
Non-trainable params: 0
_________________________________________________________________
Shape of feature tensor: (50, 10, 20)
Shape of labels: (50, 1)
Epoch 1/1


In the above note `batch_size` defaults to 32 in `fit` if not given.