In [2]:
import numpy as np
from tensorflow import keras

In [3]:
# function to create a periodic noisy time series
def generate_time_series(batch_size, n_steps):
    freq1, freq2, offset1, offset2 = np.random.rand(4, batch_size, 1)
    time = np.linspace(0, 1, n_steps)
    series = 0.5 * np.sin((time - offset1) * (freq1 * 10 + 10))  # wave 1
    series += 0.2 * np.sin((time - offset2) * (freq2 * 20 + 20))  # + wave 2
    series += 0.1 * (np.random.rand(batch_size, n_steps) - 0.5)  # noise
    return series[..., np.newaxis].astype(np.float32)

In [3]:
# create training, validation, and test sets
n_steps = 50
series = generate_time_series(10000, n_steps=n_steps + 1)
X_train, y_train = series[:7000, :n_steps], series[:7000, -1]  # we aim at predicting only the last time step
X_valid, y_valid = series[7000:9000, :n_steps], series[7000:9000, -1]
X_test, y_test = series[9000:, :n_steps], series[9000:, -1]

In [22]:
series.shape

(10000, 51, 1)

In [4]:
X_train.shape

(7000, 50, 1)

# Naive Forecasting

In [8]:
# naive forecasting
y_pred = X_valid[:, -1]
np.mean(keras.losses.mean_squared_error(y_valid, y_pred))

0.02138044

# Linear Forecasting

In [9]:
# linear forecasting
model = keras.models.Sequential([
    keras.layers.Flatten(input_shape=[50, 1]),
    keras.layers.Dense(1)
])

In [10]:
# compile model
model.compile(loss="mse", optimizer="adam")

In [11]:
# fit model
model.fit(X_train, y_train, validation_data=(X_valid, y_valid), epochs=20)

Epoch 1/20


2023-02-28 10:23:33.540879: W tensorflow/core/platform/profile_utils/cpu_utils.cc:128] Failed to get CPU frequency: 0 Hz
2023-02-28 10:23:33.692683: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.


Epoch 2/20
  1/219 [..............................] - ETA: 1s - loss: 0.0422

2023-02-28 10:23:35.227563: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.


Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


<keras.callbacks.History at 0x287826830>

In [13]:
# predict validation set 
y_pred = model.predict(X_valid)



2023-02-28 10:24:46.833297: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.


In [14]:
# compute final validation error
np.mean(keras.losses.mean_squared_error(y_valid, y_pred))

0.0036552807

# RNN Forecasting

In [17]:
# model with only one recurrent neuron
model = keras.models.Sequential([
    keras.layers.SimpleRNN(1, input_shape=[None, 1], activation="tanh")  # RNNs can take any number of timesteps, thus the None
])

In [None]:
# compile and fit model (takes a long time)
model.compile(loss="mse", optimizer="adam")
model.fit(X_train, y_train, validation_data=(X_valid, y_valid), epochs=20)

In [None]:
# predict and get validation score (should return ca. 0.014)
y_pred = model.predict(X_valid)
np.mean(keras.losses.mean_squared_error(y_valid, y_pred))

# Deep RNN

In [20]:
#  model of stacked SimpleRNNs
model = keras.models.Sequential([
    keras.layers.SimpleRNN(20, return_sequences=True, input_shape=[None, 1]),
    keras.layers.SimpleRNN(20, return_sequences=True),
    keras.layers.SimpleRNN(1)  # this is an example, but SimpleRNN is rarely used as output layer because its hidden state is not expected to be used much
])

In [None]:
# compile and fit model (takes a long time)
model.compile(loss="mse", optimizer="adam")
model.fit(X_train, y_train, validation_data=(X_valid, y_valid), epochs=20)

In [None]:
# predict and get validation score (should return ca. 0.003)
y_pred = model.predict(X_valid)
np.mean(keras.losses.mean_squared_error(y_valid, y_pred))

In [None]:
# more realistic model of stacked SimpleRNNs
# it converges fast 
model = keras.models.Sequential([
    keras.layers.SimpleRNN(20, return_sequences=True, input_shape=[None, 1]),
    keras.layers.SimpleRNN(20),
    keras.layers.Dense(1)
])

# Predictions More than 1 Step Ahead

In [None]:
# predict 10 steps in a row, one step at the time
series = generate_time_series(1, n_steps=n_steps + 10)
X_new, y_new = series[:,:n_steps], series[:, n_steps:]
X = X_new
for step_ahead in range(10):
    y_pred_one = model.predict(X[:, step_ahead:])[:, np.newaxis, :]
    X = np.concatenate([X, y_pred_one], axis=1)

Y_pred = X[:, n_steps:]

In [None]:
# predict 10 steps ahead, all steps at the time
series = generate_time_series(10000, n_steps=n_steps + 10)
X_train, Y_train = series[:7000, :n_steps], series[:7000, -10:, 0]
X_valid, Y_valid = series[7000:9000, :n_steps], series[7000:9000, -10:, 0]
X_test, Y_test = series[9000:, :n_steps], series[9000:, -10:, 0]

model = keras.models.Sequential([
    keras.layers.SimpleRNN(20, return_sequences=True, input_shape=[None, 1]),
    keras.layers.SimpleRNN(20),
    keras.layers.Dense(10)
])

Y_pred = model.predict(X_new)

In [None]:
# predict 10 steps ahead, all steps at the time, training 10 steps at each step
Y = np.empty((10000, n_steps, 10))  # each target is a sequence of 10D vectors
for step_ahead in range(1, 10 + 1):
    Y[:, :, step_ahead - 1] = series[:, step_ahead:step_ahead + n_steps, 0]

Y_train = Y[:7000]
Y_valid = Y[7000:9000]
Y_test = Y[9000:]

model = keras.model.Sequential([
    keras.layers.SimpleRNN(20, return_sequences=True, input_shape=[None, 1]),
    keras.layers.SimpleRNN(20, return_sequences=True),  # it returns an output for each time step
    keras.layers.TimeDistributed(keras.layers.Dense(10))  # it takes one input per time step and will output a sequence
])

In [10]:
# training uses all outputs, but the performance is measured based only on the last one
# define a custom MSE for only the last prediction. Note that the last prediction is a sequence of 10 values
def last_time_step_mse(Y_true, Y_pred):
    return keras.metrics.mean_squared_error(Y_true[:, -1], Y_pred[:, -1])

optimizer = keras.optimizers.Adam(learning_rate=0.01)
model.compile(loss="mse", optimizer=optimizer, metrics=[last_time_step_mse])

# SimpleRNN Cell Layer Normalization

In [5]:
# Simple RNN cell with layer normalization
class LNSimpleRNNCell(keras.layers.Layer):
    def __init__(self, units, activation="tanh", **kwargs) -> None:
        super().__init__(**kwargs)
        self.state_size = units
        self.output_size = units
        self.simple_rnn_cell = keras.layers.SimpleRNNCell(units, activation=None)  # SimpleRNN without activation
        self.layer_norm = keras.layers.LayerNormalization()  # normalize the layer after the linear combination
        self.activation = keras.activations.get(activation)  # finally apply the activation function

    def call(self, inputs, states):  # custom recurrent layers take as input the time series (inputs) and the hidden states
        outputs, new_states = self.simple_rnn_cell(inputs, states)
        norm_outputs = self.activation(self.layer_norm(outputs))
        return norm_outputs, [norm_outputs]

In [7]:
# use the memory cell defined above to make a recurrent layer and then model
model = keras.models.Sequential([
    keras.layers.RNN(LNSimpleRNNCell(20), return_sequences=True, input_shape=[None, 1]),  # RNN is the base class for recurrent layers
    keras.layers.RNN(LNSimpleRNNCell(20), return_sequences=True),
    keras.layers.TimeDistributed(keras.layers.Dense(10))
])

Metal device set to: Apple M1

systemMemory: 16.00 GB
maxCacheSize: 5.33 GB



2023-02-28 12:45:41.898013: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:305] Could not identify NUMA node of platform GPU ID 0, defaulting to 0. Your kernel may not have been built with NUMA support.
2023-02-28 12:45:41.898157: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:271] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 0 MB memory) -> physical PluggableDevice (device: 0, name: METAL, pci bus id: <undefined>)


# LSTM Cells

In [None]:
# model with LSTM layers
model = keras.models.Sequential([
    keras.layers.LSTM(20, return_sequences=True, input_shape=[None, 1]),
    keras.layers.LSTM(20, return_sequences=True),
    keras.layers.TimeDistributed(keras.layers.Dense(10))
])

# the same model wrapping LSTM cells with the RNN base class
model = keras.models.Sequential([
    keras.layers.RNN(keras.layers.LSTMCell(20), return_sequences=True, input_shape=[None, 1]),
    keras.layers.RNN(keras.layers.LSTMCell(20), return_sequences=True),
    keras.layers.TimeDistributed(keras.layers.Dense(10))
])

# RNN with Convolutional Layer

In [None]:
# RNN model that first extracts features from the input time series
model = keras.models.Sequential([
    keras.layers.Conv1D(filters=20, kernel_size=4, strides=2, padding="valid", input_shape=[None, 1]),
    keras.layers.GRU(20, return_sequences=True),
    keras.layers.GRU(20, return_sequences=True),
    keras.layers.TimeDistributed(keras.layers.Dense(10))
])
model.compile(loss="mse", optimizer="adam", metrics=[last_time_step_mse])
history = model.fit(X_train, Y_train[:, 3::2], 
                    epochs=20, 
                    validation_data=(X_valid, Y_valid[:, 3::2]))  # stride is needed because the Conv1D halves the size of the input