# Forecasting methods (II/II)

Last week, we analyzed simulated data. Today, we wlil focus on the dataset from the Kaggle competition.

We will compare various neural network architectures, including multilayer perceptraons (MLP), convolutional neural networks (CNN) and recurrent neural networks (RNN).

Your task is to understand the role of the different hyperparameters for each architecture. Then, you should identify the best hyperparameters to generate out-of-sample forecasts.

Some important hyperparameters include: LAG (the number of lagged values), LATENT_DIM (the number of units in the layer), BATCH_SIZE (number of samples per mini-batch), EPOCHS (the number of epochs), the optimizer and the early stop strategy.


In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
plt.rcParams['figure.figsize'] = [10, 5]
import os
print(os.getcwd())
import datetime as dt

from main.utils.utils_methods import *
from main.utils.utils import *

import keras
from keras.callbacks import EarlyStopping, ModelCheckpoint
from keras.layers import GRU, SimpleRNN, LSTM


from main.module.mlp_multioutput import mlp_multioutput
from main.module.cnn_dilated import cnn_dilated
from main.module.rnn_vector_output import rnn_vector_output
from main.module.rnn_encoder_decoder import rnn_encoder_decoder

%load_ext autoreload
%autoreload 2



In [None]:
# Load the Kaggle training data
data_all = pd.read_csv("../compet_data/public/train.csv", index_col = "Day", parse_dates = True)
data_all = data_all.asfreq("D")
data_all.head()



In [None]:
# Choose the time series to work with: "series-1", "series-2", ..., "series-90" 
which_series =  "series-1"
data_raw = data_all[which_series].rename("traffic").to_frame()
data_raw.plot()

In [None]:
# Starting dates for Validation and test data
valid_start_dt = '2017-04-01'
test_start_dt = '2017-07-15'

# Clean traininig and validation data
learn_raw = data_raw.copy()[data_raw.index < test_start_dt][['traffic']]

learn_cleaned_1, _ = clean(learn_raw.squeeze())
learn_cleaned_2, _ = clean(learn_cleaned_1)
learn_cleaned = learn_cleaned_2.copy().to_frame()
learn_cleaned.plot()

data_cleaned = data_raw.copy()
data_cleaned.loc[data_cleaned.index < test_start_dt] = learn_cleaned

In [None]:
HORIZON = 7
LAG = 4

In [None]:
# We save and build input-output pairs for the (raw) test data 

look_back_dt = dt.datetime.strptime(test_start_dt, '%Y-%m-%d') - dt.timedelta(days= LAG)
test_raw = data_raw.copy()[(data_raw.index >=look_back_dt)][['traffic']]

tensor_structure = {'encoder_input':(range(-LAG+1, 1), ['traffic']), 'decoder_input':(range(0, HORIZON), ['traffic'])}
test_inputs_raw, _, _   = X_y(test_raw, HORIZON, tensor_structure)

In [None]:
# What kind of transformations do you want to apply?
log_transform = True
seasonal_difference = True

data_before_diff = data_cleaned.copy()
if log_transform:
    data_before_diff = np.log(data_before_diff)
    data_before_diff.plot()

data_final = data_before_diff.copy()
if seasonal_difference:
    #
    m = 7
    assert(HORIZON % m == 0) # Simpler to implement
    nb_season = int(HORIZON/m)
    data_final = data_final.diff(m) # carreful pd.diff vs np.diff

    #
    reference_data = list()
    list_dates = pd.date_range(dt.datetime.strptime(test_start_dt, '%Y-%m-%d') - dt.timedelta(days=1), data_cleaned.index[-1] - dt.timedelta(days=HORIZON))
    for i, origin_dt in enumerate(list_dates):    
        season = data_before_diff.loc[pd.date_range(origin_dt  - dt.timedelta(days= m - 1), origin_dt)] 
        seasonal_reference = np.tile(season["traffic"].values, nb_season)
        reference_data.append(seasonal_reference)
    reference_season = np.vstack( reference_data)


data_final.plot()

In [None]:
# TRAINING/VALIDATION/TEST DATA (TRANSFORMED)
train = data_final.copy()[data_final.index < valid_start_dt][['traffic']]

look_back_dt = dt.datetime.strptime(valid_start_dt, '%Y-%m-%d') - dt.timedelta(days=LAG)
valid = data_final.copy()[(data_final.index >=look_back_dt)][['traffic']]

look_back_dt = dt.datetime.strptime(test_start_dt, '%Y-%m-%d') - dt.timedelta(days= LAG)
test = data_final.copy()[(data_final.index >=look_back_dt)][['traffic']]

In [None]:
plt.plot(train)
plt.plot(valid)
plt.plot(test) 
plt.show()

In [None]:
# EMBEDDING
train_inputs, valid_inputs, test_inputs, X_train, y_train, X_valid, y_valid, X_test, y_test = embed_data(train, valid, test, HORIZON, LAG)

train_inputs.dataframe.head()
valid_inputs.dataframe.head()
test_inputs.dataframe.head()

Choose which loss function you want to experiment with. It is used later in the code to fit and evaluate a neural network model.

In [None]:
# Loss function to be used to optimize the model parameters
loss_fct = 'mse' # 'mae'
# Accuracy measure to be used to evaluate test predictions.
accuracy_measure = mse # mae # mape # smape

# True values
true_values = pd.DataFrame(test_inputs["target"], columns=['t+'+str(t) for t in range(1, HORIZON+1)])


## Recursive MLP


In [None]:
#########################
file_header = "model_" + "mlp_recursive"
verbose = 0

optimizer_adam = keras.optimizers.Adam(learning_rate=0.01) 
earlystop = EarlyStopping(monitor='val_loss', min_delta=0, patience= 100)

LATENT_DIM = 5   # number of units in the RNN layer
BATCH_SIZE = 32  # number of samples per mini-batch
EPOCHS = 100      # maximum number of times the training algorithm will cycle through all samples
loss = loss_fct

best_val = ModelCheckpoint('../work/' + file_header + '_{epoch:02d}.h5', save_best_only=True, mode='min', period=1)
#########################
 
_, _, _, X_train_onestep, y_train_onestep, X_valid_onestep, y_valid_onestep, _, _ = embed_data(train, valid, test, 1, LAG, freq = None, variable = 'traffic')
model_mlp_recursive, history_mlp_recursive = mlp_multioutput(X_train_onestep, y_train_onestep, X_valid_onestep, y_valid_onestep, 
                        LATENT_DIM = LATENT_DIM, 
                        BATCH_SIZE = BATCH_SIZE, 
                        EPOCHS = EPOCHS, 
                        LAG = LAG, 
                        HORIZON = 1, 
                        loss = loss, 
                        optimizer = optimizer_adam,
                        earlystop = earlystop, 
                        best_val = best_val,
                        verbose=verbose)
plot_learning_curves(history_mlp_recursive)

best_epoch = np.argmin(np.array(history_mlp_recursive.history['val_loss']))+1
filepath = '../work/' + file_header + '_{:02d}.h5'
model_mlp_recursive.load_weights(filepath.format(best_epoch))

#
for h in range(HORIZON):
    pred = model_mlp_recursive.predict(X_test)
    X_test = pd.DataFrame(np.hstack( (np.delete(X_test.to_numpy(), 0, 1), pred) ), index = X_test.index, columns =X_test.columns)
    if h > 0:
        predictions = np.hstack( (predictions, pred) )
    else:
        predictions = pred

# 
if seasonal_difference:
    predictions = predictions + reference_season

if log_transform:
    predictions = np.exp(predictions)

predictions_mlp_recursive = predictions

predictions_mlp_recursive = pd.DataFrame(predictions_mlp_recursive, columns=['t+'+str(t) for t in range(1, HORIZON+1)])

In [None]:
true_values = pd.DataFrame(test_inputs_raw["target"], columns=['t+'+str(t) for t in range(1, HORIZON+1)])

results_mlp_recursive = list()

for h in range(1, HORIZON+1):
    time_horizon = 't+'+ str(h)
    results_mlp_recursive.append(accuracy_measure(true_values[time_horizon], predictions_mlp_recursive[time_horizon]))

print(results_mlp_recursive)
print(np.mean(results_mlp_recursive))

In [None]:
plt.plot(true_values["t+1"], "-o")
plt.plot(predictions_mlp_recursive["t+1"], "-o")

# Multioutput MLP

In [None]:
#########################
file_header = "model_" + "mlp_multioutput"
verbose = 0

optimizer_adam = keras.optimizers.Adam(learning_rate=0.01) 
earlystop = EarlyStopping(monitor='val_loss', min_delta=0, patience= 100)

LATENT_DIM = 5   # number of units in the RNN layer
BATCH_SIZE = 32  # number of samples per mini-batch
EPOCHS = 100      # maximum number of times the training algorithm will cycle through all samples
loss = loss_fct

best_val = ModelCheckpoint('../work/' + file_header + '_{epoch:02d}.h5', save_best_only=True, mode='min', period=1)
#########################

model_mlp_multioutput, history_mlp_multioutput = mlp_multioutput(X_train, y_train, X_valid, y_valid, 
                        LATENT_DIM = LATENT_DIM, 
                        BATCH_SIZE = BATCH_SIZE, 
                        EPOCHS = EPOCHS, 
                        LAG = LAG, 
                        HORIZON = HORIZON, 
                        loss = loss, 
                        optimizer = optimizer_adam,
                        earlystop = earlystop, 
                        best_val = best_val,
                        verbose=verbose)
plot_learning_curves(history_mlp_multioutput)

best_epoch = np.argmin(np.array(history_mlp_multioutput.history['val_loss']))+1
filepath = '../work/' + file_header + '_{:02d}.h5'
model_mlp_multioutput.load_weights(filepath.format(best_epoch))




In [None]:
#
predictions_mlp_multioutput = model_mlp_multioutput.predict(X_test)

if seasonal_difference:
    predictions_mlp_multioutput = predictions_mlp_multioutput + reference_season

if log_transform:
    predictions_mlp_multioutput = np.exp(predictions_mlp_multioutput)

predictions_mlp_multioutput = pd.DataFrame(predictions_mlp_multioutput, columns=['t+'+str(t) for t in range(1, HORIZON+1)])



In [None]:
results_mlp_multioutput = list()

for h in range(1, HORIZON+1):
    time_horizon = 't+'+ str(h)
    results_mlp_multioutput.append(accuracy_measure(true_values[time_horizon], predictions_mlp_multioutput[time_horizon]))

print(results_mlp_multioutput)
print(np.mean(results_mlp_multioutput))



In [None]:
plt.plot(true_values["t+1"], "-o")
plt.plot(predictions_mlp_multioutput["t+1"], "-o")

# 1-D Convolutional Neural Networks (CNN)

Read and try to understand the function *cnn_dilated*. You can try different number of filters and filter sizes.

In [None]:
#########################
file_header = "model_" + "cnn"
verbose = 0


#optimizer_adam = keras.optimizers.Adam(learning_rate=0.01) 
optimizer_rmsprop = 'RMSprop'

earlystop = EarlyStopping(monitor='val_loss', min_delta=0, patience= 100)

LATENT_DIM = 5   # number of units in the RNN layer

KERNEL_SIZE = 2 # for CNN

BATCH_SIZE = 32  # number of samples per mini-batch
EPOCHS = 100      # maximum number of times the training algorithm will cycle through all samples
loss = loss_fct

best_val = ModelCheckpoint('../work/' + file_header + '_{epoch:02d}.h5', save_best_only=True, mode='min', period=1)
#########################

model_cnn, history_cnn = cnn_dilated(train_inputs, valid_inputs, 
                        LATENT_DIM = LATENT_DIM, 
                        KERNEL_SIZE = KERNEL_SIZE,
                        BATCH_SIZE = BATCH_SIZE, 
                        EPOCHS = EPOCHS, 
                        LAG = LAG, 
                        HORIZON = HORIZON, 
                        loss = loss, 
                        optimizer = optimizer_rmsprop,
                        earlystop = earlystop, 
                        best_val = best_val,
                        verbose=verbose)

plot_learning_curves(history_cnn)

best_epoch = np.argmin(np.array(history_cnn.history['val_loss']))+1
filepath = '../work/' + file_header + '_{:02d}.h5'
model_cnn.load_weights(filepath.format(best_epoch))




In [None]:
predictions_cnn = model_cnn.predict(test_inputs['encoder_input'])

if seasonal_difference:
    predictions_cnn = predictions_cnn + reference_season

if log_transform:
    predictions_cnn = np.exp(predictions_cnn)

predictions_cnn = pd.DataFrame(predictions_cnn, columns=['t+'+str(t) for t in range(1, HORIZON+1)])



In [None]:
results_cnn = list()

for h in range(1, HORIZON+1):
    time_horizon = 't+'+ str(h)
    results_cnn.append(accuracy_measure(true_values[time_horizon], predictions_cnn[time_horizon]))

print(results_cnn)
print(np.mean(results_cnn))

In [None]:
plt.plot(true_values["t+1"], "-o")
plt.plot(predictions_cnn["t+1"], "-o")

# RNN vector-output

Read and try to understand the function *rnn_vector_output*. You can try different RNN architectures (GRU, LSTM).

In [None]:

#########################
file_header = "model_" + "rnn_vector_output"
verbose = 0

optimizer_adam = keras.optimizers.Adam(learning_rate=0.01) 

earlystop = EarlyStopping(monitor='val_loss', min_delta=0, patience= 100)

RECURRENT_MODEL = GRU #SimpleRNN # GRU # LSTM

LATENT_DIM = 5   # number of units in the RNN layer
BATCH_SIZE = 32  # number of samples per mini-batch
EPOCHS = 100      # maximum number of times the training algorithm will cycle through all samples
loss = loss_fct

best_val = ModelCheckpoint('../work/' + file_header + '_{epoch:02d}.h5', save_best_only=True, mode='min', period=1)
#########################

model_rnn_vector_output, history_rnn_vector_output = rnn_vector_output(train_inputs, valid_inputs, 
                        RECURRENT_MODEL = RECURRENT_MODEL,
                        LATENT_DIM = LATENT_DIM, 
                        BATCH_SIZE = BATCH_SIZE, 
                        EPOCHS = EPOCHS, 
                        LAG = LAG, 
                        HORIZON = HORIZON, 
                        loss = loss, 
                        optimizer = optimizer_adam,
                        earlystop = earlystop, 
                        best_val = best_val,
                        verbose=verbose)

plot_learning_curves(history_rnn_vector_output)

best_epoch = np.argmin(np.array(history_rnn_vector_output.history['val_loss']))+1
filepath = '../work/' + file_header + '_{:02d}.h5'
model_rnn_vector_output.load_weights(filepath.format(best_epoch))

In [None]:
predictions_rnn_vector_output = model_rnn_vector_output.predict(test_inputs['encoder_input'])

if seasonal_difference:
    predictions_rnn_vector_output = predictions_rnn_vector_output + reference_season

if log_transform:
    predictions_rnn_vector_output = np.exp(predictions_rnn_vector_output)

predictions_rnn_vector_output = pd.DataFrame(predictions_rnn_vector_output, columns=['t+'+str(t) for t in range(1, HORIZON+1)])


In [None]:
results_rnn_vector_output  = list()

for h in range(1, HORIZON+1):
    time_horizon = 't+'+ str(h)
    results_rnn_vector_output.append(accuracy_measure(true_values[time_horizon], predictions_rnn_vector_output[time_horizon]))

print(results_rnn_vector_output)
print(np.mean(results_rnn_vector_output))

In [None]:
plt.plot(true_values["t+1"], "-o")
plt.plot(predictions_rnn_vector_output["t+1"], "-o")


# RNN encoder-decoder

Read and try to understand the function *rnn_encoder_decoder*. You can try different RNN architectures (GRU, LSTM).

In [None]:

#########################
file_header = "model_" + "rnn_encoder_decoder"
verbose = 0

optimizer_adam = keras.optimizers.Adam(learning_rate=0.01) 

earlystop = EarlyStopping(monitor='val_loss', min_delta=0, patience= 100)

RECURRENT_MODEL = GRU #SimpleRNN # GRU # LSTM

LATENT_DIM = 5   # number of units in the RNN layer
BATCH_SIZE = 32  # number of samples per mini-batch
EPOCHS = 100      # maximum number of times the training algorithm will cycle through all samples
loss = loss_fct

best_val = ModelCheckpoint('../work/' + file_header + '_{epoch:02d}.h5', save_best_only=True, mode='min', period=1)
#########################

model_rnn_encoder_decoder, history_rnn_encoder_decoder = rnn_encoder_decoder(train_inputs, valid_inputs, 
                        RECURRENT_MODEL = RECURRENT_MODEL,
                        LATENT_DIM = LATENT_DIM, 
                        BATCH_SIZE = BATCH_SIZE, 
                        EPOCHS = EPOCHS, 
                        LAG = LAG, 
                        HORIZON = HORIZON, 
                        loss = loss, 
                        optimizer = optimizer_rmsprop,
                        earlystop = earlystop, 
                        best_val = best_val,
                        verbose=verbose)

plot_learning_curves(history_rnn_encoder_decoder)

best_epoch = np.argmin(np.array(history_rnn_encoder_decoder.history['val_loss']))+1
filepath = '../work/' + file_header + '_{:02d}.h5'
model_rnn_encoder_decoder.load_weights(filepath.format(best_epoch))

In [None]:
predictions_rnn_encoder_decoder= model_rnn_encoder_decoder.predict(test_inputs['encoder_input'])

if seasonal_difference:
    predictions_rnn_encoder_decoder = predictions_rnn_encoder_decoder + reference_season

if log_transform:
    predictions_rnn_encoder_decoder = np.exp(predictions_rnn_encoder_decoder)

predictions_rnn_encoder_decoder = pd.DataFrame(predictions_rnn_encoder_decoder, columns=['t+'+str(t) for t in range(1, HORIZON+1)])

In [None]:
results_rnn_encoder_decoder  = list()

for h in range(1, HORIZON+1):
    time_horizon = 't+'+ str(h)
    results_rnn_encoder_decoder.append(accuracy_measure(true_values[time_horizon], predictions_rnn_encoder_decoder[time_horizon]))

print(results_rnn_encoder_decoder)
print(np.mean(results_rnn_encoder_decoder))

In [None]:
plt.plot(true_values["t+1"], "-o")
plt.plot(predictions_rnn_encoder_decoder["t+1"], "-o")

In [None]:
# Forecast errors over the forecast horizon
plt.plot(results_mlp_recursive, color='green', marker='o')
plt.plot(results_mlp_multioutput, color='blue', marker='o')
plt.plot(results_cnn, color='orange', marker='o')
plt.plot(results_rnn_vector_output, color='red', marker='o')
plt.plot(results_rnn_encoder_decoder, color='black', marker='o')



We encourage you to try other combinations of transformations and forecasting strategies. Note that certain series might need diifferent transformations.