****Import required packages****

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

****Import Tensorflow****

In [None]:
import tensorflow as tf
# required for solving issue with cuDNN
gpu_devices = tf.config.experimental.list_physical_devices('GPU')
for device in gpu_devices: tf.config.experimental.set_memory_growth(device, True)

In [None]:
tf.random.set_seed(42)

****Import Keras****

In [None]:
from tensorflow import keras

In [None]:
data_filename = '../data/yahoo_stock.csv'

In [None]:
yahoo_stock_prices = pd.read_csv(data_filename)

In [None]:
yahoo_stock_prices.shape

In [None]:
yahoo_stock_prices_columns = list(yahoo_stock_prices.columns)

In [None]:
print(yahoo_stock_prices_columns)

In [None]:
yahoo_stock_prices.head()

In [None]:
type(yahoo_stock_prices['Date'][0])

In [None]:
import datetime

In [None]:
start_date_str = yahoo_stock_prices['Date'][0]
start_date_str

In [None]:
print(type(start_date_str))

In [None]:
end_date_str = yahoo_stock_prices['Date'][yahoo_stock_prices.shape[0] - 1]
end_date_str

In [None]:
print(type(end_date_str))

In [None]:
start_date = datetime.datetime.strptime(start_date_str, '%Y-%m-%d')

In [None]:
print(start_date)

In [None]:
end_date = datetime.datetime.strptime(end_date_str, '%Y-%m-%d')

In [None]:
print(end_date)

In [None]:
duration = end_date - start_date

In [None]:
print(duration)

In [None]:
yahoo_stock_prices.describe()

In [None]:
yahoo_stock_prices.dtypes

****Plot timeseries of closing price, i.e. target variable****

In [None]:
closing_prices = yahoo_stock_prices['Close'].values

In [None]:
differenced_closing_prices = closing_prices[1:] - closing_prices[0:(closing_prices.shape[0] - 1)]

In [None]:
log_differenced_closing_prices = np.log10(differenced_closing_prices)

In [None]:
plt.plot(closing_prices)

In [None]:
plt.plot(differenced_closing_prices)

In [None]:
plt.plot(log_differenced_closing_prices)

****Check for missing values****

In [None]:
yahoo_stock_prices.isnull().any()

In [None]:
timeseries = yahoo_stock_prices.iloc[:, 1:]

In [None]:
timeseries.head()

In [None]:
timeseries.describe()

****Normalization****

In [None]:
timeseries_mean = timeseries.mean(axis=0)

In [None]:
timeseries_std = timeseries.std(axis=0)

In [None]:
timeseries -= timeseries_mean

In [None]:
timeseries /= timeseries_std

In [None]:
timeseries.describe()

In [None]:
timeseries = timeseries.values

In [None]:
print(type(timeseries))

In [None]:
timeseries.shape

In [None]:
timeseries.dtype

****Transform initial data into batch of sequences****

In [None]:
def create_sequences(timeseries, pred_var_index, p, h=1):
    (n, d) = timeseries.shape
    sequences = np.zeros((n - h - p, p, d)).astype('float32')
    targets = np.zeros((n - h - p)).astype('float32')
    for i in range(p, n - h):
        sequence = timeseries[(i - p) : i, :]
        target = timeseries[i + h - 1, pred_var_index]
        sequences[i - p, :, :] = sequence
        targets[i - p] = target
    return sequences, targets

In [None]:
past = 3

In [None]:
horizon = 1

In [None]:
pred_var_index = 3

In [None]:
sequences, targets = create_sequences(timeseries, pred_var_index, past, horizon)

In [None]:
print(type(sequences))

In [None]:
print(sequences.shape)

In [None]:
print(sequences.dtype)

In [None]:
print(sequences[0,:,:])

In [None]:
print(type(targets))

In [None]:
print(targets.shape)

In [None]:
print(targets.dtype)

In [None]:
print(targets[0])

****Split data****

In [None]:
from sklearn.model_selection import train_test_split

In [None]:
X_train, X_test, y_train, y_test = train_test_split(sequences, targets, test_size=0.1, shuffle=False)

In [None]:
print(X_train.shape)

In [None]:
print(y_train.shape)

In [None]:
print(X_test.shape)

In [None]:
print(y_test.shape)

****Function for plotting training history****

In [None]:
def plot_training_history(history):
    loss = history.history['loss']
    val_loss = history.history['val_loss']
    epochs = range(1, len(loss) + 1)
    plt.figure()
    plt.plot(epochs, loss, label='Training loss')
    plt.plot(epochs, val_loss, label='Validation loss')
    plt.legend()
    plt.show()

****Classic RNN****

In [None]:
rnn = keras.Sequential()
rnn.add(keras.layers.SimpleRNN(16))
rnn.add(keras.layers.Dense(1, activation='linear'))

In [None]:
rnn.compile(optimizer='rmsprop', loss=keras.losses.MeanSquaredError(), metrics=[keras.metrics.MeanAbsolutePercentageError()])

In [None]:
history_rnn = rnn.fit(X_train, y_train, epochs=20, validation_split=0.1)

In [None]:
rnn.summary()

In [None]:
plot_training_history(history_rnn)

In [None]:
rnn_results = rnn.evaluate(X_test, y_test)

In [None]:
print('Simple RNN test error (%): ', round(rnn_results[1], 3))

****Stacked RNN****

In [None]:
stacked_rnn = keras.Sequential()
stacked_rnn.add(keras.layers.SimpleRNN(16, return_sequences=True))
stacked_rnn.add(keras.layers.SimpleRNN(16, return_sequences=False))
stacked_rnn.add(keras.layers.Dense(1, activation='linear'))

In [None]:
stacked_rnn.compile(optimizer='rmsprop', loss=keras.losses.MeanSquaredError(), metrics=[keras.metrics.MeanAbsolutePercentageError()])

In [None]:
history_stacked_rnn = stacked_rnn.fit(X_train, y_train, epochs=20, validation_split=0.1)

In [None]:
stacked_rnn.summary()

In [None]:
plot_training_history(history_stacked_rnn)

In [None]:
stacked_rnn_results = stacked_rnn.evaluate(X_test, y_test)

In [None]:
print('Stacked RNN test error (%): ', round(stacked_rnn_results[1], 3))

****Regularized stacked RNN****

In [None]:
reg_stacked_rnn = keras.Sequential()
reg_stacked_rnn.add(keras.layers.SimpleRNN(16, return_sequences=True, dropout=0.2, recurrent_dropout=0.2))
reg_stacked_rnn.add(keras.layers.SimpleRNN(16, return_sequences=False, dropout=0.2, recurrent_dropout=0.2))
reg_stacked_rnn.add(keras.layers.Dense(1, activation='linear'))

In [None]:
reg_stacked_rnn.compile(optimizer='rmsprop', loss=keras.losses.MeanSquaredError(), metrics=[keras.metrics.MeanAbsolutePercentageError()])

In [None]:
history_reg_stacked_rnn = reg_stacked_rnn.fit(X_train, y_train, epochs=20, validation_split=0.1)

In [None]:
reg_stacked_rnn.summary()

In [None]:
plot_training_history(history_reg_stacked_rnn)

In [None]:
reg_stacked_rnn_results = reg_stacked_rnn.evaluate(X_test, y_test)

In [None]:
print('Regularized stacked RNN test error (%): ', round(reg_stacked_rnn_results[1], 3))

****LSTM****

In [None]:
lstm = keras.Sequential()
lstm.add(keras.layers.LSTM(16))
lstm.add(keras.layers.Dense(1, activation='linear'))

In [None]:
lstm.compile(optimizer='rmsprop', loss=keras.losses.MeanSquaredError(), metrics=[keras.metrics.MeanAbsolutePercentageError()])

In [None]:
history_lstm = lstm.fit(X_train, y_train, epochs=20, validation_split=0.1)

In [None]:
lstm.summary()

In [None]:
plot_training_history(history_lstm)

In [None]:
lstm_results = lstm.evaluate(X_test, y_test)

In [None]:
print('LSTM test error (%): ', round(lstm_results[1], 3))

****GRU****

In [None]:
gru = keras.Sequential()
gru.add(keras.layers.GRU(16))
gru.add(keras.layers.Dense(1, activation='linear'))

In [None]:
gru.compile(optimizer='rmsprop', loss=keras.losses.MeanSquaredError(), metrics=[keras.metrics.MeanAbsolutePercentageError()])

In [None]:
history_gru = gru.fit(X_train, y_train, epochs=20, validation_split=0.1)

In [None]:
gru.summary()

In [None]:
plot_training_history(history_gru)

In [None]:
gru_results = gru.evaluate(X_test, y_test)

In [None]:
print('GRU test error (%): ', round(gru_results[1], 3))

****Bidirectional RNN, LSTM and GRU****

In [None]:
component_type = 'gru'

In [None]:
if component_type == 'rnn':
    bidirectional_model = keras.Sequential()
    bidirectional_model.add(keras.layers.Bidirectional(keras.layers.SimpleRNN(16)))
    bidirectional_model.add(keras.layers.Dense(1, activation='linear'))
elif component_type == 'lstm':
    bidirectional_model = keras.Sequential()
    bidirectional_model.add(keras.layers.Bidirectional(keras.layers.LSTM(16)))
    bidirectional_model.add(keras.layers.Dense(1, activation='linear'))
elif component_type == 'gru':
    bidirectional_model = keras.Sequential()
    bidirectional_model.add(keras.layers.Bidirectional(keras.layers.GRU(16)))
    bidirectional_model.add(keras.layers.Dense(1, activation='linear'))

In [None]:
bidirectional_model.compile(optimizer='rmsprop', loss=keras.losses.MeanSquaredError(), metrics=[keras.metrics.MeanAbsolutePercentageError()])

In [None]:
history_bidirectional_model = bidirectional_model.fit(X_train, y_train, epochs=20, validation_split=0.1)

In [None]:
plot_training_history(history_bidirectional_model)

In [None]:
bidirectional_model_results = bidirectional_model.evaluate(X_test, y_test)

In [None]:
print('Bidirectional model test error (%): ', round(bidirectional_model_results[1], 3))

****1D convolutional****

In [None]:
conv1D_seq = keras.Sequential()
conv1D_seq.add(keras.layers.Conv1D(16, 2, activation='relu'))
conv1D_seq.add(keras.layers.MaxPooling1D(2))
conv1D_seq.add(keras.layers.Dense(1, activation='linear'))

In [None]:
conv1D_seq.compile(optimizer='rmsprop', loss=keras.losses.MeanSquaredError(), metrics=[keras.metrics.MeanAbsolutePercentageError()])

In [None]:
history_conv1D_seq = conv1D_seq.fit(X_train, y_train, epochs=20, validation_split=0.1)

In [None]:
plot_training_history(history_conv1D_seq)

In [None]:
conv1D_seq_results = conv1D_seq.evaluate(X_test, y_test)

In [None]:
print('1D convolutional model test error (%): ', round(conv1D_seq_results[1], 3))

****Plot actual and predictions for best performing model****

In [None]:
y_pred = bidirectional_model.predict(X_test)

In [None]:
plt.figure()
plt.plot(y_test, label='Real')
plt.plot(y_pred, label='Best RNN predictions')
plt.legend()
plt.show()

In [None]:
mape = keras.metrics.MeanAbsolutePercentageError()

In [None]:
mape.update_state(y_true = y_test, y_pred = y_pred)

In [None]:
mape.result().numpy()

****Baseline models****

****Persistence model****

In [None]:
def baseline_predict(X, pred_var_index):
    n = X.shape[0]
    p = X.shape[1]
    preds = []
    for i in range(n):
        pred = X[i, p - 1, pred_var_index]
        preds.append(pred)
    return np.array(preds)

In [None]:
baseline_preds = baseline_predict(X_test, pred_var_index)

In [None]:
mape = keras.metrics.MeanAbsolutePercentageError()

In [None]:
mape.update_state(y_true = y_test, y_pred = baseline_preds)

In [None]:
mape.result().numpy()

In [None]:
plt.figure()
plt.plot(y_test, label='Real')
plt.plot(baseline_preds, label='Predictions')
plt.legend()
plt.show()

****Feedforward DNN****

In [None]:
dnn = keras.Sequential()
dnn.add(keras.layers.Flatten(input_shape=(past, 6)))
dnn.add(keras.layers.Dense(16, activation='relu'))
dnn.add(keras.layers.Dense(1, activation='linear'))

In [None]:
dnn.compile(optimizer='rmsprop', loss=keras.losses.MeanSquaredError(), metrics=[keras.metrics.MeanAbsolutePercentageError()])

In [None]:
history_dnn = dnn.fit(X_train, y_train, epochs=20, validation_split=0.1)

In [None]:
dnn.summary()

In [None]:
plot_training_history(history_dnn)

In [None]:
dnn_results = dnn.evaluate(X_test, y_test)

In [None]:
print('Feedforward DNN test error (%): ', round(dnn_results[1], 3))

In [None]:
dnn_preds = dnn.predict(X_test)

In [None]:
plt.figure()
plt.plot(y_test, label='Real')
plt.plot(dnn_preds, label='Predictions')
plt.legend()
plt.show()