# Compare TensorFlow RNN, ARIMA & Linear Regression Approaches

## Generate & Plot Test Data Set

Also do all the necessary imports.

In [None]:
import matplotlib.pyplot as plt
import matplotlib as mpl
import pandas as pd
import numpy as np
from statsmodels.tsa.arima.model import ARIMA
from sklearn.metrics import mean_squared_error
from sklearn.linear_model import LinearRegression
import tensorflow as tf
from tqdm import tqdm
import util
import json

%matplotlib inline

mpl.rcParams['figure.figsize'] = (16, 12)
mpl.rcParams['axes.grid'] = False

# Number of days to generate data for
DAYS = 1

# generate a numpy array of raw data first
d = util.gen_data(days=DAYS)

# turn it into a pandas data frame
df = pd.DataFrame({'Time': d[:, 0], 'ACEs': d[:, 1]})

# plot just the ACEs series
# plt.plot(df['ACEs'])

In [None]:
#
# plot raw data using bokeh
#
from bokeh.plotting import figure, show
from bokeh.io import output_notebook

output_notebook()

p = figure(width=1000)
p.line(x=df['Time'], y=df['ACEs'], color='steelblue', line_width=2, legend_label="Test Data")

show(p)

## Loop over Data & Predict

In [None]:
# number of historical data points to start with
HIST = 100

# number of historical datapoints to consider for LR & ARIMA
POINTS_ARIMA = 5
POINTS_LR = 5
POLY_ORDER = 1

x = [x for x in range(0, 1440)]
y = df['ACEs'].values

x_train, x_test = x[:HIST], x[HIST:]
y_train, y_test = y[:HIST], y[HIST:]

history_x = list([x for x in x_train])
history_y = list([y for y in y_train])

predictions_lr = list()
predictions_arima = list()
predictions_rnn_single = list()
predictions_rnn_multi = list()


BREADTH = 30

#
# create and compile a single-layer model
#
# def create_model_single(breadth=BREADTH, input_shape=None):
#     assert input_shape is not None
#     retval = tf.keras.models.Sequential([
#         tf.keras.layers.LSTM(BREADTH, input_shape=input_shape),
#         tf.keras.layers.Dense(1)
#     ])
#     retval.compile(optimizer='adam', loss='mae')
#     return retval

#
# create and compile a multi-layer model
#
# def create_model_multi(breadth=BREADTH, input_shape=None):
#     assert input_shape is not None
#     retval = tf.keras.models.Sequential([
#         tf.keras.layers.LSTM(BREADTH, return_sequences=True, input_shape=input_shape),
#         tf.keras.layers.LSTM(BREADTH, return_sequences=True),
#         tf.keras.layers.LSTM(BREADTH),
#         tf.keras.layers.Dense(1)
#     ])
#     retval.compile(optimizer='adam', loss='mae')
#     return retval    

#
# load in normaliization parameters from test data
#
with open('mean_stddev.json', 'r') as f:
    data = json.loads(f.read())
TRAIN_MEAN = data['mean']
TRAIN_STDDEV = data['stddev']

#
# instantiate a pre-trained RNNs
#
# generate a filename for weights based on these parameters
LOAD_EPOCHS              = 5
LOAD_EVALUATION_INTERVAL = 200
LOAD_VALIDATION_STEPS    = 50

# single-layer model with weights
# model_rnn_single = create_model_single(input_shape=(BREADTH, 1))
# model_rnn_single.load_weights('weights-prerun/lstm_weights-single-%03d-%03d-%03d.weights.h5' % (LOAD_EPOCHS, LOAD_EVALUATION_INTERVAL, LOAD_VALIDATION_STEPS))
# model_rnn_single.load_weights('lstm_weights-single-%03d-%03d-%03d.weights.h5' % (LOAD_EPOCHS, LOAD_EVALUATION_INTERVAL, LOAD_VALIDATION_STEPS))

# single layer model load
model_rnn_single = tf.keras.models.load_model('lstm_model-single-%03d-%03d-%03d.keras' % (LOAD_EPOCHS, LOAD_EVALUATION_INTERVAL, LOAD_VALIDATION_STEPS))

# multi-layer model
# model_rnn_multi = create_model_multi(input_shape=(BREADTH, 1))
#model_rnn_multi.load_weights('weights-prerun/lstm_weights-multi-%03d-%03d-%03d.weights.h5' % (LOAD_EPOCHS, LOAD_EVALUATION_INTERVAL, LOAD_VALIDATION_STEPS))
# model_rnn_multi.load_weights('lstm_weights-multi-%03d-%03d-%03d.weights.h5' % (LOAD_EPOCHS, LOAD_EVALUATION_INTERVAL, LOAD_VALIDATION_STEPS))

# multi-layer model
model_rnn_multi = tf.keras.models.load_model('lstm_model-multi-%03d-%03d-%03d.keras' % (LOAD_EPOCHS, LOAD_EVALUATION_INTERVAL, LOAD_VALIDATION_STEPS))

# loop over test array now
for x, y in tqdm(zip(x_test, y_test)):
    
    # do ARIMA model & prediction
    model_arima = ARIMA(history_y, order=(POINTS_ARIMA, 1, 0))
    model_arima_fit = model_arima.fit()
    output = model_arima_fit.forecast()
    yhat_ar = output[0]
    predictions_arima.append(yhat_ar)
    
    # do LR
    x_arr = history_x[-POINTS_LR:]
    y_arr = history_y[-POINTS_LR:]
    fit = np.polyfit(x_arr, y_arr, POLY_ORDER, full=True)
    yhat_lr = np.polyval(fit[0], x)
    predictions_lr.append(yhat_lr)
    
    # do RNN predictions on last 30 data points
    y_window = np.array(history_y[-BREADTH:])
    y_arr = np.concatenate([np.roll(y_window, i) for i in range(0, 256)])
    y_arr = (y_arr - TRAIN_MEAN) / TRAIN_STDDEV
    y_arr = y_arr.reshape(-1, 30, 1)
    
    # single-layer RNN
    yhat_rnn_single = model_rnn_single.predict(y_arr)[0][0] * TRAIN_STDDEV + TRAIN_MEAN
    predictions_rnn_single.append(yhat_rnn_single)
    
    # multi-layer RNN
    yhat_rnn_multi = model_rnn_multi.predict(y_arr)[0][0] * TRAIN_STDDEV + TRAIN_MEAN
    predictions_rnn_multi.append(yhat_rnn_multi)
    
    # update history
    history_x.append(x)
    history_y.append(y)
    

#
# does some MSE calculations
#
error_arima = mean_squared_error(y_test, predictions_arima)
error_lr = mean_squared_error(y_test, predictions_lr)
error_rnn_single = mean_squared_error(y_test, predictions_rnn_single)
error_rnn_multi = mean_squared_error(y_test, predictions_rnn_multi)

print('Test MSE (ARIMA)      : %.3f' % error_arima)
print('Test MSE (LR)         : %.3f' % error_lr)
print('Test MSE (RNN single) : %.3f' % error_rnn_single)
print('Test MSE (RNN multi)  : %.3f' % error_rnn_multi)

In [None]:
#
# plot same data using bokeh
#
from bokeh.plotting import figure, show
from bokeh.io import output_notebook

output_notebook()

p = figure(min_width=1000, min_height=600)
p.line(x=history_x[HIST:], y=history_y[HIST:], color='green', line_width=4, alpha=0.6, legend_label='True Data')
p.line(x=history_x[HIST:], y=predictions_lr, color='blue', legend_label='LR Predictions')
p.line(x=history_x[HIST:], y=predictions_arima, color='orange', legend_label='ARIMA Predictions')
p.line(x=history_x[HIST:], y=predictions_rnn_single, color='red', legend_label='Single-Layer RNN', line_width=2)
p.line(x=history_x[HIST:], y=predictions_rnn_multi, color='purple', legend_label='Multi-Layer RNN')

show(p)

# Scratch Area

In [None]:
#plt.plot(history[HIST:], color='green')
#plt.plot(predictions_arima, color='blue')
#plt.plot(predictions_lr, color='orange')
#plt.plot(predictions_rnn_single, color='red')
#plt.plot(predictions_rnn_multi, color='purple')
#plt.show()