# Deep learning for timeseries with Recurrent Neural Networks

To start, we are going to look at a very simple case: predicting values along a sine curve.

In [None]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.metrics import mean_squared_error

import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import SimpleRNN, Dense, LSTM, Dropout

tf.get_logger().setLevel('ERROR')

Our time series will consist simply of a sine wave:

In [None]:
n_steps = 150
time_steps = np.linspace(0, 30, n_steps)
sine_wave = np.sin(time_steps)

In [None]:
plt.plot(sine_wave, label='Original Sequence')
plt.xlabel('Time Step')
plt.ylabel('Value')
plt.title('Original Sequence')
plt.show()

We'll manually split this up into a training and a test set, where the test set consists of points in the future after the "times" of our training data.

In [None]:
train_size = int(n_steps * 0.75)
train = sine_wave[:train_size]
test = sine_wave[train_size:]

Our samples for doing time series prediction are time series.  We have one total time series in our data so far, but we can break that up into individual samples of shorter time series.

In [None]:
def create_dataset(dataset, look_back=1):
    X, Y = [], []
    for i in range(len(dataset) - look_back):
        a = dataset[i:(i + look_back)]
        X.append(a)
        Y.append(dataset[i + look_back])
    return np.array(X), np.array(Y)

look_back = 2  # Number of previous time steps to use as input features
X_train, y_train = create_dataset(train, look_back)
X_test, y_test = create_dataset(test, look_back)

# Not strictly necessary here since we only have 1 feature
# but reshape into [samples, time steps, features]
X_train = X_train.reshape(X_train.shape[0],X_train.shape[1],1)
X_test = X_test.reshape(X_test.shape[0],X_test.shape[1],1)

In [None]:
X_train.shape

In [None]:
y_train.shape

In [None]:
for i in range(4):
    print(X_train[i], y_train[i])

Using a simple RNN model:

In [None]:
network = Sequential([
    SimpleRNN(units=10, activation='relu', return_sequences=True, input_shape=(2, 1)),
    SimpleRNN(units=10, activation='relu'),
    Dense(units=1)
])

In [None]:
network.summary()

Compile the model with an appropriate optimizer, loss function, and metric.  This is a regression problem, since the predictions are continuously valued.

In [None]:
network.compile(optimizer='adam', loss='mse', metrics=['mae'])

Train the model:

In [None]:
model = network.fit(X_train, y_train, 
                    epochs=100, 
                    verbose=0)

In [None]:
import matplotlib.pyplot as plt

history_dict = model.history
loss_values = history_dict['loss']
mae_values = history_dict['mae']

epochs = range(1, len(mae_values) + 1)

plt.plot(epochs, loss_values, 'bo', label='Training Loss')
plt.plot(epochs, mae_values, 'b', label='Training MAE')
plt.title('Training Loss and MAE')
plt.xlabel('Epochs')
plt.ylabel('Loss/MAE')
plt.legend()
plt.show()

Assessing the model's predictions on the training data:

In [None]:
X_train.shape

In [None]:
# Generate predictions
predicted_sequence = network.predict(X_train)

predicted_sequence.shape

In [None]:
# Plot the original and predicted sequences
plt.plot(y_train, label='Original Sequence')
plt.plot(predicted_sequence, label='Predicted Sequence')
plt.xlabel('Time Step')
plt.ylabel('Value')
plt.title('Original vs. Predicted Sequence')
plt.legend()
plt.show()

In [None]:
X_test[0]

In [None]:
network.predict(X_test[0].reshape(-1,2,1))

In [None]:
network.evaluate(X_test, y_test)

In [None]:
y_predict = network.predict(X_test)

In [None]:
mean_squared_error(y_test, y_predict)

# Improving the predictions

There are a couple easy ways to try to tweak this model:
* change the length of the time series
  * try going from 2 to 5, and note the places you need to change
* increase the amount of data sampled within a given time range (may not be possible for real data)
  * try using 750 data points total instead of 150 (but keeping the sample time series length at 2 points)
* increase the number of layers
  * what happens with more? what happens with less??
* use hyperparameters and/or regularization constraints
  * do you think the model is overfitting?  and how would you assess that?

# LSTM

LSTMs are another way of using RNN.  They can be better suited for capturing long-range dependencies in sequences. The rest of the code remains largely the same, including preprocessing, data preparation, model building, training, and evaluation.

In [None]:
# Build the RNN model using LSTM
network = Sequential([
    LSTM(units=10, activation='relu', input_shape=(2, 1)),
    Dense(units=1)
])

In [None]:
network.summary()

In [None]:
# Compile the model
network.compile(optimizer='adam', loss='mse', metrics=['mae'])

In [None]:
# Train the model
model = network.fit(X_train, y_train, epochs=100, verbose=0)

In [None]:
import matplotlib.pyplot as plt

history_dict = model.history
loss_values = history_dict['loss']
mae_values = history_dict['mae']

epochs = range(1, len(mae_values) + 1)

plt.plot(epochs, loss_values, 'bo', label='Training Loss')
plt.plot(epochs, mae_values, 'b', label='Training MAE')
plt.title('Training Loss and MAE')
plt.xlabel('Epochs')
plt.ylabel('Loss/MAE')
plt.legend()
plt.show()

In [None]:
# Generate predictions
predicted_sequence = network.predict(X_train)

# Plot the original and predicted sequences
plt.plot(y_train, label='Original Sequence')
plt.plot(predicted_sequence, label='Predicted Sequence')
plt.xlabel('Time Step')
plt.ylabel('Value')
plt.title('Original vs. Predicted Sequence')
plt.legend()
plt.show()

In [None]:
X_test[0]

In [None]:
network.predict(X_test[0].reshape(-1,2,1))

In [None]:
network.evaluate(X_test, y_test)

In [None]:
y_predict = network.predict(X_test)

In [None]:
mean_squared_error(y_test, y_predict)

What happens if we increase the number of units and toss in another layer?

In [None]:
# Build the RNN model using LSTM
network = Sequential([
    LSTM(units=32, activation='relu', return_sequences=True, input_shape=(2, 1)),
    LSTM(units=32, activation='relu'),
    Dense(units=1)
])

In [None]:
network.summary()

In [None]:
# Compile the model
network.compile(optimizer='adam', loss='mse', metrics=['mae'])

In [None]:
# Train the model
model = network.fit(X_train, y_train, epochs=100, verbose=0)

In [None]:
import matplotlib.pyplot as plt

history_dict = model.history
loss_values = history_dict['loss']
mae_values = history_dict['mae']

epochs = range(1, len(mae_values) + 1)

plt.plot(epochs, loss_values, 'bo', label='Training Loss')
plt.plot(epochs, mae_values, 'b', label='Training MAE')
plt.title('Training Loss and MAE')
plt.xlabel('Epochs')
plt.ylabel('Loss/MAE')
plt.legend()
plt.show()

In [None]:
# Generate predictions
predicted_sequence = network.predict(X_train)

# Plot the original and predicted sequences
plt.plot(y_train, label='Original Sequence')
plt.plot(predicted_sequence, label='Predicted Sequence')
plt.xlabel('Time Step')
plt.ylabel('Value')
plt.title('Original vs. Predicted Sequence')
plt.legend()
plt.show()

In [None]:
X_test[0]

In [None]:
network.predict(X_test[0].reshape(-1,2,1))

In [None]:
network.evaluate(X_test, y_test)

In [None]:
y_predict = network.predict(X_test)

In [None]:
mean_squared_error(y_test, y_predict)

But perhaps we're overfitting?

In [None]:
# Split into train and test set
train_size = int(n_steps * 0.5)
val_size = int(n_steps * 0.25)
train = sine_wave[:train_size]
val = sine_wave[train_size:train_size+val_size]
test = sine_wave[train_size+val_size:]

In [None]:
# Create our "samples", i.e., our sets of time series

def create_dataset(dataset, look_back=1):
    X, Y = [], []
    for i in range(len(dataset) - look_back):
        a = dataset[i:(i + look_back)]
        X.append(a)
        Y.append(dataset[i + look_back])
    return np.array(X), np.array(Y)

look_back = 2  # Number of previous time steps to use as input features
X_train, y_train = create_dataset(train, look_back)
X_val, y_val = create_dataset(val, look_back)
X_test, y_test = create_dataset(test, look_back)

# Not strictly necessary here since we only have 1 feature
# but reshape into [samples, time steps, features]
X_train = X_train.reshape(X_train.shape[0],X_train.shape[1],1)
X_val = X_val.reshape(X_val.shape[0],X_val.shape[1],1)
X_test = X_test.reshape(X_test.shape[0],X_test.shape[1],1)

In [None]:
# Build the RNN model using LSTM
network = Sequential([
    LSTM(units=32, activation='relu', return_sequences=True, input_shape=(2, 1)),
    LSTM(units=32, activation='relu'),
    Dense(units=1)
])

In [None]:
network.summary()

In [None]:
# Compile the model
network.compile(optimizer='adam', loss='mse', metrics=['mae'])

In [None]:
# Train the model
model = network.fit(X_train, y_train, 
                    validation_data = (X_val, y_val),
                    epochs=100, 
                    verbose=0)

In [None]:
mae = model.history["mae"]
val_mae = model.history["val_mae"]
loss = model.history["loss"]
val_loss = model.history["val_loss"]
epochs = range(1, len(mae) + 1)
plt.plot(epochs, mae, "bo", label="Training mae")
plt.plot(epochs, val_mae, "b", label="Validation mae")
plt.title("Training and validation mae")
plt.legend()
plt.figure()
plt.plot(epochs, loss, "bo", label="Training loss")
plt.plot(epochs, val_loss, "b", label="Validation loss")
plt.title("Training and validation loss")
plt.legend()
plt.show()

In [None]:
# Generate predictions
predicted_sequence = network.predict(X_train)

# Plot the original and predicted sequences
plt.plot(y_train, label='Original Sequence')
plt.plot(predicted_sequence, label='Predicted Sequence')
plt.xlabel('Time Step')
plt.ylabel('Value')
plt.title('Original vs. Predicted Sequence')
plt.legend()
plt.show()

In [None]:
X_test[0]

In [None]:
network.predict(X_test[0].reshape(-1,2,1))

In [None]:
network.evaluate(X_test, y_test)

In [None]:
y_predict = network.predict(X_test)

In [None]:
mean_squared_error(y_test, y_predict)

To demonstrate adding some regularization, here we introduce a Dropout layer too:

In [None]:
# Build the RNN model using LSTM
network = Sequential([
    LSTM(units=32, activation='relu', return_sequences=True, input_shape=(2, 1)),
    Dropout(0.2),
    LSTM(units=32, activation='relu'),
    Dense(units=1)
])

In [None]:
network.summary()

In [None]:
# Compile the model
network.compile(optimizer='adam', loss='mse', metrics=['mae'])

In [None]:
# Train the model
model = network.fit(X_train, y_train, 
                    validation_data = (X_val, y_val),
                    epochs=100,
                    verbose=0)

In [None]:
mae = model.history["mae"]
val_mae = model.history["val_mae"]
loss = model.history["loss"]
val_loss = model.history["val_loss"]
epochs = range(1, len(mae) + 1)
plt.plot(epochs, mae, "bo", label="Training mae")
plt.plot(epochs, val_mae, "b", label="Validation mae")
plt.title("Training and validation mae")
plt.legend()
plt.figure()
plt.plot(epochs, loss, "bo", label="Training loss")
plt.plot(epochs, val_loss, "b", label="Validation loss")
plt.title("Training and validation loss")
plt.legend()
plt.show()

In [None]:
# Generate predictions
predicted_sequence = network.predict(X_train)

# Plot the original and predicted sequences
plt.plot(y_train, label='Original Sequence')
plt.plot(predicted_sequence, label='Predicted Sequence')
plt.xlabel('Time Step')
plt.ylabel('Value')
plt.title('Original vs. Predicted Sequence')
plt.legend()
plt.show()

In [None]:
X_test[0]

In [None]:
network.predict(X_test[0].reshape(-1,2,1))

In [None]:
network.evaluate(X_test, y_test)

In [None]:
y_predict = network.predict(X_test)

In [None]:
mean_squared_error(y_test, y_predict)

# New Time Series:  Airline Passenger Dataset

In [None]:
import pandas as pd
from sklearn.preprocessing import MinMaxScaler

In [None]:
# Load the dataset
url = "https://raw.githubusercontent.com/jbrownlee/Datasets/master/airline-passengers.csv"
data = pd.read_csv(url)
passengers = data['Passengers'].values.astype(float)

In [None]:
data

In [None]:
# Normalize the data
scaler = MinMaxScaler(feature_range=(0, 1))
passengers = scaler.fit_transform(passengers.reshape(-1, 1))

In [None]:
# Prepare the data
train_size = int(len(passengers) * 0.75)
train = passengers[:train_size]
test = passengers[train_size:]

In [None]:
plt.figure(figsize=(10, 6))
plt.plot(scaler.inverse_transform(passengers))
plt.xlabel('Time')
plt.ylabel('Number of Passengers')
plt.title('Original Data for Air Passenger Forecasting using RNN')
plt.show()

In [None]:
def create_dataset(dataset, look_back=1):
    X, Y = [], []
    for i in range(len(dataset) - look_back):
        a = dataset[i:(i + look_back), 0]
        X.append(a)
        Y.append(dataset[i + look_back, 0])
    return np.array(X), np.array(Y)

look_back = 12  # Number of previous time steps to use as input features
X_train, y_train = create_dataset(train, look_back)
X_test, y_test = create_dataset(test, look_back)

# Reshape the input to [samples, time steps, features]
# though here again we only have 1 feature
X_train = np.reshape(X_train, (X_train.shape[0], X_train.shape[1], 1))
X_test = np.reshape(X_test, (X_test.shape[0], X_test.shape[1], 1))

In [None]:
X_train.shape

In [None]:
# Build the RNN model
network = Sequential([
    SimpleRNN(units=4, activation='relu', input_shape=(look_back, 1)),
    Dense(units=1)
])

In [None]:
# Compile the model
network.compile(optimizer='adam', loss='mse', metrics=['mae'])

In [None]:
# Train the model
network.fit(X_train, y_train, 
            epochs=100, 
            verbose=0)

In [None]:
# Make predictions
train_predict = network.predict(X_train)
test_predict = network.predict(X_test)

In [None]:
# Inverse transform predictions to original scale
train_predict = scaler.inverse_transform(train_predict)
y_train = scaler.inverse_transform([y_train])
test_predict = scaler.inverse_transform(test_predict)
y_test = scaler.inverse_transform([y_test])

In [None]:
# Calculate RMSE (Root Mean Squared Error)
train_rmse = np.sqrt(mean_squared_error(y_train[0], train_predict[:,0]))
test_rmse = np.sqrt(mean_squared_error(y_test[0], test_predict[:,0]))
print(f'Train RMSE: {train_rmse:.2f}')
print(f'Test RMSE: {test_rmse:.2f}')

In [None]:
# Plot the predicted vs. actual values
train_plot = np.empty_like(passengers)
train_plot[:,:] = np.nan
train_plot[look_back:len(train_predict)+look_back, :] = train_predict

test_plot = np.empty_like(passengers)
test_plot[:,:] = np.nan
test_plot[len(train_predict)+(look_back*2):len(passengers), :] = test_predict

plt.figure(figsize=(10, 6))
plt.plot(scaler.inverse_transform(passengers), label='Original Data')
plt.plot(train_plot, label='Training Predictions')
plt.plot(test_plot, label='Test Predictions')
plt.xlabel('Time')
plt.ylabel('Number of Passengers')
plt.title('Air Passenger Forecasting using RNN')
plt.legend()
plt.show()

## Trying with an LSTM version

Restarting with the original data again to be sure the processing steps are the same.

In [None]:
# Load the dataset
url = "https://raw.githubusercontent.com/jbrownlee/Datasets/master/airline-passengers.csv"
data = pd.read_csv(url)
passengers = data['Passengers'].values.astype(float)

In [None]:
# Normalize the data
scaler = MinMaxScaler(feature_range=(0, 1))
passengers = scaler.fit_transform(passengers.reshape(-1, 1))

In [None]:
# Prepare the data
train_size = int(len(passengers) * 0.75)
train = passengers[:train_size]
test = passengers[train_size:]

In [None]:
plt.figure(figsize=(10, 6))
plt.plot(scaler.inverse_transform(passengers))
plt.xlabel('Time')
plt.ylabel('Number of Passengers')
plt.title('Original Data for Air Passenger Forecasting using RNN')
plt.show()

In [None]:
def create_dataset(dataset, look_back=1):
    X, Y = [], []
    for i in range(len(dataset) - look_back):
        a = dataset[i:(i + look_back), 0]
        X.append(a)
        Y.append(dataset[i + look_back, 0])
    return np.array(X), np.array(Y)

look_back = 12  # Number of previous time steps to use as input features
X_train, y_train = create_dataset(train, look_back)
X_test, y_test = create_dataset(test, look_back)

# Reshape the input to [samples, time steps, features]
# though here again we only have 1 feature
X_train = np.reshape(X_train, (X_train.shape[0], X_train.shape[1], 1))
X_test = np.reshape(X_test, (X_test.shape[0], X_test.shape[1], 1))

In [None]:
# Build the RNN model
network = Sequential([
    LSTM(units=50, activation='relu', input_shape=(look_back, 1)),
    Dense(units=1)
])

In [None]:
# Compile the model
network.compile(optimizer='adam', loss='mse', metrics=['mae'])

In [None]:
# Train the model
network.fit(X_train, y_train, 
            epochs=100, 
            verbose=0)

In [None]:
# Make predictions
train_predict = network.predict(X_train)
test_predict = network.predict(X_test)

In [None]:
# Inverse transform predictions to original scale
train_predict = scaler.inverse_transform(train_predict)
y_train = scaler.inverse_transform([y_train])
test_predict = scaler.inverse_transform(test_predict)
y_test = scaler.inverse_transform([y_test])

In [None]:
# Calculate RMSE (Root Mean Squared Error)
train_rmse = np.sqrt(mean_squared_error(y_train[0], train_predict[:,0]))
test_rmse = np.sqrt(mean_squared_error(y_test[0], test_predict[:,0]))
print(f'Train RMSE: {train_rmse:.2f}')
print(f'Test RMSE: {test_rmse:.2f}')

In [None]:
# Plot the predicted vs. actual values
train_plot = np.empty_like(passengers)
train_plot[:,:] = np.nan
train_plot[look_back:len(train_predict)+look_back, :] = train_predict

test_plot = np.empty_like(passengers)
test_plot[:,:] = np.nan
test_plot[len(train_predict)+(look_back*2):len(passengers), :] = test_predict

plt.figure(figsize=(10, 6))
plt.plot(scaler.inverse_transform(passengers), label='Original Data')
plt.plot(train_plot, label='Training Predictions')
plt.plot(test_plot, label='Test Predictions')
plt.xlabel('Time')
plt.ylabel('Number of Passengers')
plt.title('Air Passenger Forecasting using RNN')
plt.legend()
plt.show()

## One more time, with a multi-layered LSTM version

Restarting with the original data again to be sure the processing steps are the same.

In [None]:
# Load the dataset
url = "https://raw.githubusercontent.com/jbrownlee/Datasets/master/airline-passengers.csv"
data = pd.read_csv(url)
passengers = data['Passengers'].values.astype(float)

In [None]:
# Normalize the data
scaler = MinMaxScaler(feature_range=(0, 1))
passengers = scaler.fit_transform(passengers.reshape(-1, 1))

In [None]:
# Prepare the data
train_size = int(len(passengers) * 0.75)
train = passengers[:train_size]
test = passengers[train_size:]

In [None]:
plt.figure(figsize=(10, 6))
plt.plot(scaler.inverse_transform(passengers))
plt.xlabel('Time')
plt.ylabel('Number of Passengers')
plt.title('Original Data for Air Passenger Forecasting using RNN')
plt.show()

In [None]:
def create_dataset(dataset, look_back=1):
    X, Y = [], []
    for i in range(len(dataset) - look_back):
        a = dataset[i:(i + look_back), 0]
        X.append(a)
        Y.append(dataset[i + look_back, 0])
    return np.array(X), np.array(Y)

look_back = 12  # Number of previous time steps to use as input features
X_train, y_train = create_dataset(train, look_back)
X_test, y_test = create_dataset(test, look_back)

# Reshape the input to [samples, time steps, features]
# though here again we only have 1 feature
X_train = np.reshape(X_train, (X_train.shape[0], X_train.shape[1], 1))
X_test = np.reshape(X_test, (X_test.shape[0], X_test.shape[1], 1))

In [None]:
# Build the multi-layer LSTM model with dropout
network = Sequential([
    LSTM(units=50, activation='relu', return_sequences=True, input_shape=(look_back, 1)),
    Dropout(0.2),
    LSTM(units=50, activation='relu', return_sequences=True),
    Dropout(0.2),
    LSTM(units=50, activation='relu'),
    Dropout(0.2),
    Dense(units=1)
])

In [None]:
# Compile the model
network.compile(optimizer='adam', loss='mse', metrics=['mae'])

In [None]:
# Train the model
network.fit(X_train, y_train, 
            epochs=100,
            verbose=0)

In [None]:
# Make predictions
train_predict = network.predict(X_train)
test_predict = network.predict(X_test)

In [None]:
# Inverse transform predictions to original scale
train_predict = scaler.inverse_transform(train_predict)
y_train = scaler.inverse_transform([y_train])
test_predict = scaler.inverse_transform(test_predict)
y_test = scaler.inverse_transform([y_test])

In [None]:
# Calculate RMSE (Root Mean Squared Error)
train_rmse = np.sqrt(mean_squared_error(y_train[0], train_predict[:,0]))
test_rmse = np.sqrt(mean_squared_error(y_test[0], test_predict[:,0]))
print(f'Train RMSE: {train_rmse:.2f}')
print(f'Test RMSE: {test_rmse:.2f}')

In [None]:
# Plot the predicted vs. actual values
train_plot = np.empty_like(passengers)
train_plot[:,:] = np.nan
train_plot[look_back:len(train_predict)+look_back, :] = train_predict

test_plot = np.empty_like(passengers)
test_plot[:,:] = np.nan
test_plot[len(train_predict)+(look_back*2):len(passengers), :] = test_predict

plt.figure(figsize=(10, 6))
plt.plot(scaler.inverse_transform(passengers), label='Original Data')
plt.plot(train_plot, label='Training Predictions')
plt.plot(test_plot, label='Test Predictions')
plt.xlabel('Time')
plt.ylabel('Number of Passengers')
plt.title('Air Passenger Forecasting using RNN')
plt.legend()
plt.show()