# Univariate Modeling Using RNN

#### (Meant to be run within Google Colab)

In [1]:
import pandas as pd
import numpy as np
import pickle

from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, mean_squared_error, mean_absolute_error

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, LSTM, SimpleRNN
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.preprocessing.sequence import TimeseriesGenerator
from tensorflow.keras.utils import timeseries_dataset_from_array
from tensorflow.keras.callbacks import EarlyStopping

import matplotlib.pyplot as plt

import random
random.seed(42)
np.random.seed(42)
import tensorflow as tf
tf.random.set_seed(42)

In [None]:
#mount drive
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [121]:
#import campus5
from google.colab import files
import io

uploaded = files.upload()

campus5 = io.BytesIO(uploaded['campus5.csv'])
df = pd.read_csv(campus5)

Saving campus5.csv to campus5 (1).csv


In [122]:
df.index = pd.to_datetime(df['Timestamp'])

In [92]:
#Looking at just Solar Generation to Predict Solar Generation. And train/test/splitting
df = df[['SolarGeneration']]
y_train, y_test = train_test_split(df['SolarGeneration'], shuffle = False, test_size = 0.15)

In [93]:
def print_last(res):
  '''
  Helper function to just print out the last training and testing loss 
  function values as well as the last metrics of a fitted model
  '''
  print(f"Mean Absolute Error - Training: {np.round(res.history['loss'][-1], 5)}")
  print(f"Mean Absolute Error - Testing: {np.round(res.history['val_loss'][-1], 5)}")
  print(f"Root Mean Squared Error - Training: {np.round(np.sqrt(res.history['mse'][-1]), 5)}")
  print(f"Root Mean Squared Error - Testing: {np.round(np.sqrt(res.history['val_mse'][-1]), 5)}")

In [94]:
early_stop = EarlyStopping(monitor = 'val_loss', patience = 5)

First investigating 12 lengths (15min) at a time to look at 3-hour intervals  

Examining the different types of models with various numbers of SimpleRNN layers, Dense layers, and nodes for each.

In [95]:
seq_length = 12
train_ds = timeseries_dataset_from_array(
    y_train.to_numpy(),
    targets = y_train[seq_length:],
    sequence_length = seq_length,
    batch_size = 32,
    shuffle = True,
    seed = 42
)

val_ds = timeseries_dataset_from_array(
    y_test.to_numpy(),
    targets = y_test[seq_length:],
    sequence_length = seq_length,
    batch_size = 32,
)

In [96]:
model = Sequential()
model.add(SimpleRNN(1, input_shape = [None,1]))
model.add(Dense(1, activation = 'linear'))

model.compile(loss = 'mae',
              optimizer = Adam(learning_rate = 0.001), 
              metrics = ['mse'])

res = model.fit(train_ds, 
                validation_data = val_ds, 
                epochs = 50, 
                verbose = 0,
                callbacks = [early_stop])

print_last(res)

Mean Absolute Error - Training: 0.24034
Mean Absolute Error - Testing: 0.23555
Root Mean Squared Error - Training: 0.67961
Root Mean Squared Error - Testing: 0.6221


In [97]:
model = Sequential()

model.add(SimpleRNN(32, input_shape = [None,1]))
model.add(Dense(1, activation = 'linear'))

model.compile(loss = 'mae',
              optimizer = Adam(learning_rate = 0.001), 
              metrics = ['mse'])

res = model.fit(train_ds, 
                validation_data = val_ds, 
                epochs = 50, 
                verbose = 0,
                callbacks = [early_stop])

print_last(res)

Mean Absolute Error - Training: 0.19899
Mean Absolute Error - Testing: 0.18984
Root Mean Squared Error - Training: 0.64007
Root Mean Squared Error - Testing: 0.57907


In [98]:
model = Sequential()

model.add(SimpleRNN(32, input_shape = [None,1], return_sequences = True))
model.add(SimpleRNN(32, input_shape = [None,1]))
model.add(Dense(1, activation = 'linear'))

model.compile(loss = 'mae',
              optimizer = Adam(learning_rate = 0.001), 
              metrics = ['mse'])

res = model.fit(train_ds, 
                validation_data = val_ds, 
                epochs = 50, 
                verbose = 0,
                callbacks = [early_stop])

print_last(res)

Mean Absolute Error - Training: 0.19957
Mean Absolute Error - Testing: 0.19663
Root Mean Squared Error - Training: 0.63343
Root Mean Squared Error - Testing: 0.57871


In [99]:
model = Sequential()

model.add(SimpleRNN(32, input_shape = [None,1], return_sequences = True))
model.add(SimpleRNN(32, input_shape = [None,1], return_sequences = True))
model.add(SimpleRNN(32, input_shape = [None,1]))
model.add(Dense(1, activation = 'linear'))

model.compile(loss = 'mae',
              optimizer = Adam(learning_rate = 0.001), 
              metrics = ['mse'])

res = model.fit(train_ds, 
                validation_data = val_ds, 
                epochs = 50, 
                verbose = 0,
                callbacks = [early_stop])

print_last(res)

Mean Absolute Error - Training: 0.20633
Mean Absolute Error - Testing: 0.19282
Root Mean Squared Error - Training: 0.63873
Root Mean Squared Error - Testing: 0.58261


In [100]:
model = Sequential()

model.add(SimpleRNN(32, input_shape = [None,1], return_sequences = True))
model.add(SimpleRNN(32, input_shape = [None,1], return_sequences = True))
model.add(SimpleRNN(32, input_shape = [None,1]))
model.add(Dense(32, activation = 'relu'))
model.add(Dense(1, activation = 'linear'))

model.compile(loss = 'mae',
              optimizer = Adam(learning_rate = 0.001), 
              metrics = ['mse'])

res = model.fit(train_ds, 
                validation_data = val_ds, 
                epochs = 50, 
                verbose = 0,
                callbacks = [early_stop])

print_last(res)

Mean Absolute Error - Training: 0.2013
Mean Absolute Error - Testing: 0.24388
Root Mean Squared Error - Training: 0.63756
Root Mean Squared Error - Testing: 0.59736


---

Next investigating 16 lengths (15min) at a time to look at 4-hour intervals  

Examining the different types of models with various numbers of SimpleRNN layers, Dense layers, and nodes for each.

In [101]:
seq_length = 16
train_ds = timeseries_dataset_from_array(
    y_train.to_numpy(),
    targets = y_train[seq_length:],
    sequence_length = seq_length,
    batch_size = 96,
    shuffle = True,
    seed = 42
)

val_ds = timeseries_dataset_from_array(
    y_test.to_numpy(),
    targets = y_test[seq_length:],
    sequence_length = seq_length,
    batch_size = 96,
)

In [102]:
model = Sequential()

model.add(SimpleRNN(32, input_shape = [None,1]))
model.add(Dense(1, activation = 'linear'))

model.compile(loss = 'mae',
              optimizer = Adam(learning_rate = 0.001), 
              metrics = ['mse'])

res = model.fit(train_ds, 
                validation_data = val_ds, 
                epochs = 50, 
                verbose = 0,
                callbacks = [early_stop])

print_last(res)

Mean Absolute Error - Training: 0.19621
Mean Absolute Error - Testing: 0.18977
Root Mean Squared Error - Training: 0.63986
Root Mean Squared Error - Testing: 0.58625


In [103]:
model = Sequential()

model.add(SimpleRNN(32, input_shape = [None,1], return_sequences = True))
model.add(SimpleRNN(32, input_shape = [None,1]))
model.add(Dense(1, activation = 'linear'))

model.compile(loss = 'mae',
              optimizer = Adam(learning_rate = 0.001), 
              metrics = ['mse'])

res = model.fit(train_ds, 
                validation_data = val_ds, 
                epochs = 50, 
                verbose = 0,
                callbacks = [early_stop])

print_last(res)

Mean Absolute Error - Training: 0.1993
Mean Absolute Error - Testing: 0.19424
Root Mean Squared Error - Training: 0.63645
Root Mean Squared Error - Testing: 0.57289


---

Investigating 96 lengths (15min) at a time to look at 24-hour intervals  

Examining the different types of models with various numbers of SimpleRNN layers, Dense layers, and nodes for each.

In [104]:
seq_length = 96
train_ds = timeseries_dataset_from_array(
    y_train.to_numpy(),
    targets = y_train[seq_length:],
    sequence_length = seq_length,
    batch_size = 672,
    shuffle = True,
    seed = 42
)

val_ds = timeseries_dataset_from_array(
    y_test.to_numpy(),
    targets = y_test[seq_length:],
    sequence_length = seq_length,
    batch_size = 672,
)

In [105]:
model = Sequential()

model.add(SimpleRNN(32, input_shape = [None,1]))
model.add(Dense(1, activation = 'linear'))

model.compile(loss = 'mae',
              optimizer = Adam(learning_rate = 0.001), 
              metrics = ['mse'])

res = model.fit(train_ds, 
                validation_data = val_ds, 
                epochs = 50, 
                verbose = 0,
                callbacks = [early_stop])

print_last(res)

Mean Absolute Error - Training: 0.20041
Mean Absolute Error - Testing: 0.18551
Root Mean Squared Error - Training: 0.64097
Root Mean Squared Error - Testing: 0.58078


In [106]:
model = Sequential()

model.add(SimpleRNN(32, input_shape = [None,1], return_sequences = True))
model.add(SimpleRNN(32, input_shape = [None,1]))
model.add(Dense(1, activation = 'linear'))

model.compile(loss = 'mae',
              optimizer = Adam(learning_rate = 0.001), 
              metrics = ['mse'])

res = model.fit(train_ds, 
                validation_data = val_ds, 
                epochs = 50, 
                verbose = 0,
                callbacks = [early_stop])

print_last(res)

Mean Absolute Error - Training: 0.20062
Mean Absolute Error - Testing: 0.18666
Root Mean Squared Error - Training: 0.64015
Root Mean Squared Error - Testing: 0.58121


In [107]:
model = Sequential()

model.add(SimpleRNN(64, input_shape = [None,1]))
model.add(Dense(1, activation = 'linear'))

model.compile(loss = 'mae',
              optimizer = Adam(learning_rate = 0.001), 
              metrics = ['mse'])

res = model.fit(train_ds, 
                validation_data = val_ds, 
                epochs = 50, 
                verbose = 0,
                callbacks = [early_stop])

print_last(res)

Mean Absolute Error - Training: 0.19532
Mean Absolute Error - Testing: 0.19348
Root Mean Squared Error - Training: 0.64109
Root Mean Squared Error - Testing: 0.5844


In [108]:
model = Sequential()

model.add(SimpleRNN(96, input_shape = [None,1]))
model.add(Dense(1, activation = 'linear'))

model.compile(loss = 'mae',
              optimizer = Adam(learning_rate = 0.001), 
              metrics = ['mse'])

res = model.fit(train_ds, 
                validation_data = val_ds, 
                epochs = 50, 
                verbose = 0,
                callbacks = [early_stop])

print_last(res)

Mean Absolute Error - Training: 0.19598
Mean Absolute Error - Testing: 0.18363
Root Mean Squared Error - Training: 0.64333
Root Mean Squared Error - Testing: 0.58579


# Tuning Optimizer of the Top Two Models
### (with corresponding sequence lengths)


In [119]:
seq_length = 96
train_ds = timeseries_dataset_from_array(
    y_train.to_numpy(),
    targets = y_train[seq_length:],
    sequence_length = seq_length,
    batch_size = 672,
    shuffle = True,
    seed = 42
)

val_ds = timeseries_dataset_from_array(
    y_test.to_numpy(),
    targets = y_test[seq_length:],
    sequence_length = seq_length,
    batch_size = 672,
)

In [110]:
model = Sequential()

model.add(SimpleRNN(96, input_shape = [None,1]))
model.add(Dense(1, activation = 'linear'))

model.compile(loss = 'mae',
              optimizer = Adam(learning_rate = 0.0008), 
              metrics = ['mse'])

res = model.fit(train_ds, 
                validation_data = val_ds, 
                epochs = 50, 
                verbose = 0,
                callbacks = [early_stop])

print_last(res)

Mean Absolute Error - Training: 0.19748
Mean Absolute Error - Testing: 0.18931
Root Mean Squared Error - Training: 0.64481
Root Mean Squared Error - Testing: 0.58588


In [111]:
model = Sequential()

model.add(SimpleRNN(96, input_shape = [None,1]))
model.add(Dense(1, activation = 'linear'))

model.compile(loss = 'mae',
              optimizer = Adam(learning_rate = 0.001), 
              metrics = ['mse'])

res = model.fit(train_ds, 
                validation_data = val_ds, 
                epochs = 50, 
                verbose = 0,
                callbacks = [early_stop])

print_last(res)

Mean Absolute Error - Training: 0.20201
Mean Absolute Error - Testing: 0.19343
Root Mean Squared Error - Training: 0.64533
Root Mean Squared Error - Testing: 0.58699


In [112]:
model = Sequential()

model.add(SimpleRNN(96, input_shape = [None,1]))
model.add(Dense(1, activation = 'linear'))

model.compile(loss = 'mae',
              optimizer = Adam(learning_rate = 0.0015), 
              metrics = ['mse'])

res = model.fit(train_ds, 
                validation_data = val_ds, 
                epochs = 50, 
                verbose = 0,
                callbacks = [early_stop])

print_last(res)

Mean Absolute Error - Training: 0.19604
Mean Absolute Error - Testing: 0.20313
Root Mean Squared Error - Training: 0.64105
Root Mean Squared Error - Testing: 0.58618


In [113]:
model = Sequential()

model.add(SimpleRNN(96, input_shape = [None,1]))
model.add(Dense(1, activation = 'linear'))

model.compile(loss = 'mae',
              optimizer = Adam(learning_rate = 0.002), 
              metrics = ['mse'])

res = model.fit(train_ds, 
                validation_data = val_ds, 
                epochs = 50, 
                verbose = 0,
                callbacks = [early_stop])

print_last(res)

Mean Absolute Error - Training: 0.19904
Mean Absolute Error - Testing: 0.18806
Root Mean Squared Error - Training: 0.64129
Root Mean Squared Error - Testing: 0.5882


In [120]:
model = Sequential()

model.add(SimpleRNN(96, input_shape = [None,1]))
model.add(Dense(1, activation = 'linear'))

model.compile(loss = 'mae',
              optimizer = Adam(learning_rate = 0.0025), 
              metrics = ['mse'])

res = model.fit(train_ds, 
                validation_data = val_ds, 
                epochs = 50, 
                verbose = 0,
                callbacks = [early_stop])

print_last(res)

Mean Absolute Error - Training: 0.19681
Mean Absolute Error - Testing: 0.18769
Root Mean Squared Error - Training: 0.64021
Root Mean Squared Error - Testing: 0.58013


---
---
---

In [114]:
seq_length = 16
train_ds = timeseries_dataset_from_array(
    y_train.to_numpy(),
    targets = y_train[seq_length:],
    sequence_length = seq_length,
    batch_size = 96,
    shuffle = True,
    seed = 42
)

val_ds = timeseries_dataset_from_array(
    y_test.to_numpy(),
    targets = y_test[seq_length:],
    sequence_length = seq_length,
    batch_size = 96,
)

In [115]:
model = Sequential()

model.add(SimpleRNN(32, input_shape = [None,1], return_sequences = True))
model.add(SimpleRNN(32, input_shape = [None,1]))
model.add(Dense(1, activation = 'linear'))

model.compile(loss = 'mae',
              optimizer = Adam(learning_rate = 0.0008), 
              metrics = ['mse'])

res = model.fit(train_ds, 
                validation_data = val_ds, 
                epochs = 50, 
                verbose = 0,
                callbacks = [early_stop])

print_last(res)

Mean Absolute Error - Training: 0.2003
Mean Absolute Error - Testing: 0.18711
Root Mean Squared Error - Training: 0.64188
Root Mean Squared Error - Testing: 0.57652


In [116]:
model = Sequential()

model.add(SimpleRNN(32, input_shape = [None,1], return_sequences = True))
model.add(SimpleRNN(32, input_shape = [None,1]))
model.add(Dense(1, activation = 'linear'))

model.compile(loss = 'mae',
              optimizer = Adam(learning_rate = 0.001), 
              metrics = ['mse'])

res = model.fit(train_ds, 
                validation_data = val_ds, 
                epochs = 50, 
                verbose = 0,
                callbacks = [early_stop])

print_last(res)

Mean Absolute Error - Training: 0.20041
Mean Absolute Error - Testing: 0.18713
Root Mean Squared Error - Training: 0.63707
Root Mean Squared Error - Testing: 0.57442


In [117]:
model = Sequential()

model.add(SimpleRNN(32, input_shape = [None,1], return_sequences = True))
model.add(SimpleRNN(32, input_shape = [None,1]))
model.add(Dense(1, activation = 'linear'))

model.compile(loss = 'mae',
              optimizer = Adam(learning_rate = 0.0015), 
              metrics = ['mse'])

res = model.fit(train_ds, 
                validation_data = val_ds, 
                epochs = 50, 
                verbose = 0,
                callbacks = [early_stop])

print_last(res)

Mean Absolute Error - Training: 0.20164
Mean Absolute Error - Testing: 0.19037
Root Mean Squared Error - Training: 0.64363
Root Mean Squared Error - Testing: 0.58374


In [118]:
model = Sequential()

model.add(SimpleRNN(32, input_shape = [None,1], return_sequences = True))
model.add(SimpleRNN(32, input_shape = [None,1]))
model.add(Dense(1, activation = 'linear'))

model.compile(loss = 'mae',
              optimizer = Adam(learning_rate = 0.002), 
              metrics = ['mse'])

res = model.fit(train_ds, 
                validation_data = val_ds, 
                epochs = 50, 
                verbose = 0,
                callbacks = [early_stop])

print_last(res)

Mean Absolute Error - Training: 0.20048
Mean Absolute Error - Testing: 0.19652
Root Mean Squared Error - Training: 0.63161
Root Mean Squared Error - Testing: 0.57564


The BEST model for Mean Absolute Error is two SimpleRNNs with 32 nodes each, adam optimizer with 0.0008 learning rate, and with 15-min intervals of 16 (4 hours)

Best MAE - 0.1871

---
# Hourly Modeling
---

In [16]:
hourly = df.resample('H').mean()
hourly = hourly.interpolate(method = 'pad')

In [17]:
y_train, y_test = train_test_split(hourly, shuffle = False, test_size = 0.15)

Investigating at 13-hour intervals

In [18]:
seq_length = 13
train_ds = timeseries_dataset_from_array(
    y_train.to_numpy(),
    targets = y_train[seq_length:],
    sequence_length = seq_length,
    batch_size = 48,
    shuffle = True,
    seed = 42
)

val_ds = timeseries_dataset_from_array(
    y_test.to_numpy(),
    targets = y_test[seq_length:],
    sequence_length = seq_length,
    batch_size = 48,
)

In [19]:
model = Sequential()
model.add(SimpleRNN(1, input_shape = [None,1]))
model.add(Dense(1, activation = 'linear'))

model.compile(loss = 'mae',
              optimizer = Adam(learning_rate = 0.001), 
              metrics = ['mse'])

res = model.fit(train_ds, 
                validation_data = val_ds, 
                epochs = 50, 
                verbose = 0)

print_last(res)

Mean Absolute Error - Training: 1.09033
Mean Absolute Error - Testing: 1.40736
Root Mean Squared Error - Training: 2.00456
Root Mean Squared Error - Testing: 2.40214


In [20]:
model = Sequential()

model.add(SimpleRNN(48, input_shape = [None,1]))
model.add(Dense(1, activation = 'linear'))

model.compile(loss = 'mae',
              optimizer = Adam(learning_rate = 0.001), 
              metrics = ['mse'])

res = model.fit(train_ds, 
                validation_data = val_ds, 
                epochs = 50, 
                verbose = 0,
                callbacks = [early_stop])

print_last(res)

Mean Absolute Error - Training: 0.25123
Mean Absolute Error - Testing: 0.27612
Root Mean Squared Error - Training: 0.60023
Root Mean Squared Error - Testing: 0.60838


In [21]:
model = Sequential()

model.add(SimpleRNN(48, input_shape = [None,1], return_sequences = True))
model.add(SimpleRNN(48, input_shape = [None,1]))
model.add(Dense(1, activation = 'linear'))

model.compile(loss = 'mae',
              optimizer = Adam(learning_rate = 0.001), 
              metrics = ['mse'])

res = model.fit(train_ds, 
                validation_data = val_ds, 
                epochs = 50, 
                verbose = 0,
                callbacks = [early_stop])

print_last(res)

Mean Absolute Error - Training: 0.25297
Mean Absolute Error - Testing: 0.26065
Root Mean Squared Error - Training: 0.59523
Root Mean Squared Error - Testing: 0.59247


Investigating 24-hour intervals

In [22]:
seq_length = 24
train_ds = timeseries_dataset_from_array(
    y_train.to_numpy(),
    targets = y_train[seq_length:],
    sequence_length = seq_length,
    batch_size = 48,
    shuffle = True,
    seed = 42
)

val_ds = timeseries_dataset_from_array(
    y_test.to_numpy(),
    targets = y_test[seq_length:],
    sequence_length = seq_length,
    batch_size = 48,
)

In [23]:
model = Sequential()

model.add(SimpleRNN(48, input_shape = [None,1]))
model.add(Dense(1, activation = 'linear'))

model.compile(loss = 'mae',
              optimizer = Adam(learning_rate = 0.001), 
              metrics = ['mse'])

res = model.fit(train_ds, 
                validation_data = val_ds, 
                epochs = 50, 
                verbose = 0,
                callbacks = [early_stop])

print_last(res)

Mean Absolute Error - Training: 0.2373
Mean Absolute Error - Testing: 0.23883
Root Mean Squared Error - Training: 0.58628
Root Mean Squared Error - Testing: 0.56281


In [24]:
model = Sequential()

model.add(SimpleRNN(48, input_shape = [None,1], return_sequences = True))
model.add(SimpleRNN(48, input_shape = [None,1]))
model.add(Dense(1, activation = 'linear'))

model.compile(loss = 'mae',
              optimizer = Adam(learning_rate = 0.001), 
              metrics = ['mse'])

res = model.fit(train_ds, 
                validation_data = val_ds, 
                epochs = 50, 
                verbose = 0,
                callbacks = [early_stop])

print_last(res)

Mean Absolute Error - Training: 0.23527
Mean Absolute Error - Testing: 0.23566
Root Mean Squared Error - Training: 0.57025
Root Mean Squared Error - Testing: 0.55458


In [25]:
model = Sequential()

model.add(SimpleRNN(72, input_shape = [None,1]))
model.add(Dense(1, activation = 'linear'))

model.compile(loss = 'mae',
              optimizer = Adam(learning_rate = 0.001), 
              metrics = ['mse'])

res = model.fit(train_ds, 
                validation_data = val_ds, 
                epochs = 50, 
                verbose = 0,
                callbacks = [early_stop])

print_last(res)

Mean Absolute Error - Training: 0.23691
Mean Absolute Error - Testing: 0.23858
Root Mean Squared Error - Training: 0.57434
Root Mean Squared Error - Testing: 0.54871


In [26]:
model = Sequential()

model.add(SimpleRNN(72, input_shape = [None,1], return_sequences = True))
model.add(SimpleRNN(48, input_shape = [None,1]))
model.add(Dense(1, activation = 'linear'))

model.compile(loss = 'mae',
              optimizer = Adam(learning_rate = 0.001), 
              metrics = ['mse'])

res = model.fit(train_ds, 
                validation_data = val_ds, 
                epochs = 50, 
                verbose = 0,
                callbacks = [early_stop])

print_last(res)

Mean Absolute Error - Training: 0.22803
Mean Absolute Error - Testing: 0.23764
Root Mean Squared Error - Training: 0.5555
Root Mean Squared Error - Testing: 0.55176


In [27]:
model = Sequential()

model.add(SimpleRNN(48, input_shape = [None,1], return_sequences = True))
model.add(SimpleRNN(72, input_shape = [None,1]))
model.add(Dense(1, activation = 'linear'))

model.compile(loss = 'mae',
              optimizer = Adam(learning_rate = 0.001), 
              metrics = ['mse'])

res = model.fit(train_ds, 
                validation_data = val_ds, 
                epochs = 50, 
                verbose = 0,
                callbacks = [early_stop])

print_last(res)

Mean Absolute Error - Training: 0.24563
Mean Absolute Error - Testing: 0.25973
Root Mean Squared Error - Training: 0.58067
Root Mean Squared Error - Testing: 0.57408


---
# Daily Modeling
---

In [48]:
daily = df.resample('D').max().ffill()

In [49]:
y_train, y_test = train_test_split(daily, shuffle = False, test_size = 0.15)

Investigating 1-week intervals

In [50]:
seq_length = 7
train_ds = timeseries_dataset_from_array(
    y_train.to_numpy(),
    targets = y_train[seq_length:],
    sequence_length = seq_length,
    batch_size = 28,
    shuffle = True,
    seed = 42
)

val_ds = timeseries_dataset_from_array(
    y_test.to_numpy(),
    targets = y_test[seq_length:],
    sequence_length = seq_length,
    batch_size = 28,
)

In [51]:
model = Sequential()

model.add(SimpleRNN(1, input_shape = [None,1]))
model.add(Dense(1, activation = 'linear'))

model.compile(loss = 'mae',
              optimizer = Adam(learning_rate = 0.001), 
              metrics = ['mse'])

res = model.fit(train_ds, 
                validation_data = val_ds, 
                epochs = 50, 
                verbose = 0,
                callbacks = [early_stop])

print_last(res)

Mean Absolute Error - Training: 2.63027
Mean Absolute Error - Testing: 3.08085
Root Mean Squared Error - Training: 2.85879
Root Mean Squared Error - Testing: 3.16829


In [52]:
model = Sequential()

model.add(SimpleRNN(7, input_shape = [None,1]))
model.add(Dense(1, activation = 'linear'))

model.compile(loss = 'mae',
              optimizer = Adam(learning_rate = 0.001), 
              metrics = ['mse'])

res = model.fit(train_ds, 
                validation_data = val_ds, 
                epochs = 50, 
                verbose = 0,
                callbacks = [early_stop])

print_last(res)

Mean Absolute Error - Training: 0.61969
Mean Absolute Error - Testing: 0.45712
Root Mean Squared Error - Training: 1.06081
Root Mean Squared Error - Testing: 0.89066


In [53]:
model = Sequential()

model.add(SimpleRNN(12, input_shape = [None,1]))
model.add(Dense(1, activation = 'linear'))

model.compile(loss = 'mae',
              optimizer = Adam(learning_rate = 0.001), 
              metrics = ['mse'])

res = model.fit(train_ds, 
                validation_data = val_ds, 
                epochs = 50, 
                verbose = 0,
                callbacks = [early_stop])

print_last(res)

Mean Absolute Error - Training: 0.63252
Mean Absolute Error - Testing: 0.48603
Root Mean Squared Error - Training: 1.06603
Root Mean Squared Error - Testing: 0.9151


In [54]:
model = Sequential()

model.add(SimpleRNN(31, input_shape = [None,1]))
model.add(Dense(1, activation = 'linear'))

model.compile(loss = 'mae',
              optimizer = Adam(learning_rate = 0.001), 
              metrics = ['mse'])

res = model.fit(train_ds, 
                validation_data = val_ds, 
                epochs = 50, 
                verbose = 0,
                callbacks = [early_stop])

print_last(res)

Mean Absolute Error - Training: 0.61306
Mean Absolute Error - Testing: 0.46041
Root Mean Squared Error - Training: 1.05342
Root Mean Squared Error - Testing: 0.89714


In [55]:
model = Sequential()

model.add(SimpleRNN(365, input_shape = [None,1]))
model.add(Dense(1, activation = 'linear'))

model.compile(loss = 'mae',
              optimizer = Adam(learning_rate = 0.001), 
              metrics = ['mse'])

res = model.fit(train_ds, 
                validation_data = val_ds, 
                epochs = 50, 
                verbose = 0,
                callbacks = [early_stop])

print_last(res)

Mean Absolute Error - Training: 0.72348
Mean Absolute Error - Testing: 0.47793
Root Mean Squared Error - Training: 1.10367
Root Mean Squared Error - Testing: 0.86883


In [56]:
model = Sequential()

model.add(SimpleRNN(7, input_shape = [None,1], return_sequences = True))
model.add(SimpleRNN(7, input_shape = [None,1]))
model.add(Dense(1, activation = 'linear'))

model.compile(loss = 'mae',
              optimizer = Adam(learning_rate = 0.001), 
              metrics = ['mse'])

res = model.fit(train_ds, 
                validation_data = val_ds, 
                epochs = 50, 
                verbose = 0,
                callbacks = [early_stop])

print_last(res)

Mean Absolute Error - Training: 0.62544
Mean Absolute Error - Testing: 0.47534
Root Mean Squared Error - Training: 1.06193
Root Mean Squared Error - Testing: 0.93186


In [57]:
model = Sequential()

model.add(SimpleRNN(30, input_shape = [None,1], return_sequences = True))
model.add(SimpleRNN(365, input_shape = [None,1]))
model.add(Dense(1, activation = 'linear'))

model.compile(loss = 'mae',
              optimizer = Adam(learning_rate = 0.001), 
              metrics = ['mse'])

res = model.fit(train_ds, 
                validation_data = val_ds, 
                epochs = 50, 
                verbose = 0,
                callbacks = [early_stop])

print_last(res)

Mean Absolute Error - Training: 0.64554
Mean Absolute Error - Testing: 0.45283
Root Mean Squared Error - Training: 1.04567
Root Mean Squared Error - Testing: 0.90361


In [58]:
model = Sequential()

model.add(SimpleRNN(30, input_shape = [None,1], return_sequences = True))
model.add(SimpleRNN(90, input_shape = [None,1], return_sequences = True))
model.add(SimpleRNN(365, input_shape = [None,1]))
model.add(Dense(1, activation = 'linear'))

model.compile(loss = 'mae',
              optimizer = Adam(learning_rate = 0.001), 
              metrics = ['mse'])

res = model.fit(train_ds, 
                validation_data = val_ds, 
                epochs = 50, 
                verbose = 0,
                callbacks = [early_stop])

print_last(res)

Mean Absolute Error - Training: 0.64174
Mean Absolute Error - Testing: 0.49721
Root Mean Squared Error - Training: 1.05389
Root Mean Squared Error - Testing: 0.97802


In [59]:
model = Sequential()

model.add(SimpleRNN(30, input_shape = [None,1], return_sequences = True))
model.add(SimpleRNN(365, input_shape = [None,1], return_sequences = True))
model.add(SimpleRNN(365, input_shape = [None,1]))
model.add(Dense(1, activation = 'linear'))

model.compile(loss = 'mae',
              optimizer = Adam(learning_rate = 0.001), 
              metrics = ['mse'])

res = model.fit(train_ds, 
                validation_data = val_ds, 
                epochs = 50, 
                verbose = 0,
                callbacks = [early_stop])

print_last(res)

Mean Absolute Error - Training: 0.6237
Mean Absolute Error - Testing: 0.50574
Root Mean Squared Error - Training: 1.05238
Root Mean Squared Error - Testing: 0.85971


In [60]:
model = Sequential()

model.add(SimpleRNN(7, input_shape = [None,1], return_sequences = True))
model.add(SimpleRNN(30, input_shape = [None,1], return_sequences = True))
model.add(SimpleRNN(365, input_shape = [None,1]))
model.add(Dense(1, activation = 'linear'))

model.compile(loss = 'mae',
              optimizer = Adam(learning_rate = 0.001), 
              metrics = ['mse'])

res = model.fit(train_ds, 
                validation_data = val_ds, 
                epochs = 50, 
                verbose = 0,
                callbacks = [early_stop])

print_last(res)

Mean Absolute Error - Training: 0.68168
Mean Absolute Error - Testing: 0.44837
Root Mean Squared Error - Training: 1.08119
Root Mean Squared Error - Testing: 0.89723


Investigating 30-day intervals


In [61]:
seq_length = 30
train_ds = timeseries_dataset_from_array(
    y_train.to_numpy(),
    targets = y_train[seq_length:],
    sequence_length = seq_length,
    batch_size = 90,
    shuffle = True,
    seed = 42
)

val_ds = timeseries_dataset_from_array(
    y_test.to_numpy(),
    targets = y_test[seq_length:],
    sequence_length = seq_length,
    batch_size = 90,
)

In [62]:
model = Sequential()

model.add(SimpleRNN(30, input_shape = [None,1], return_sequences = True))
model.add(SimpleRNN(365, input_shape = [None,1]))
model.add(Dense(1, activation = 'linear'))

model.compile(loss = 'mae',
              optimizer = Adam(learning_rate = 0.001), 
              metrics = ['mse'])

res = model.fit(train_ds, 
                validation_data = val_ds, 
                epochs = 50, 
                verbose = 0,
                callbacks = [early_stop])

print_last(res)

Mean Absolute Error - Training: 0.65454
Mean Absolute Error - Testing: 0.49901
Root Mean Squared Error - Training: 1.06471
Root Mean Squared Error - Testing: 0.9847


In [63]:
model = Sequential()

model.add(SimpleRNN(30, input_shape = [None,1], return_sequences = True))
model.add(SimpleRNN(90, input_shape = [None,1], return_sequences = True))
model.add(SimpleRNN(365, input_shape = [None,1]))
model.add(Dense(1, activation = 'linear'))

model.compile(loss = 'mae',
              optimizer = Adam(learning_rate = 0.001), 
              metrics = ['mse'])

res = model.fit(train_ds, 
                validation_data = val_ds, 
                epochs = 50, 
                verbose = 0,
                callbacks = [early_stop])

print_last(res)

Mean Absolute Error - Training: 0.63348
Mean Absolute Error - Testing: 0.4699
Root Mean Squared Error - Training: 1.05082
Root Mean Squared Error - Testing: 0.9077


Increasing the intervals didn't seem to help. So investigating smaller intervals (3-day)

In [64]:
seq_length = 3
train_ds = timeseries_dataset_from_array(
    y_train.to_numpy(),
    targets = y_train[seq_length:],
    sequence_length = seq_length,
    batch_size = 21,
    shuffle = True,
    seed = 42
)

val_ds = timeseries_dataset_from_array(
    y_test.to_numpy(),
    targets = y_test[seq_length:],
    sequence_length = seq_length,
    batch_size = 21,
)

In [65]:
model = Sequential()

model.add(SimpleRNN(30, input_shape = [None,1], return_sequences = True))
model.add(SimpleRNN(365, input_shape = [None,1]))
model.add(Dense(1, activation = 'linear'))

model.compile(loss = 'mae',
              optimizer = Adam(learning_rate = 0.001), 
              metrics = ['mse'])

res = model.fit(train_ds, 
                validation_data = val_ds, 
                epochs = 50, 
                verbose = 0,
                callbacks = [early_stop])

print_last(res)

Mean Absolute Error - Training: 0.69263
Mean Absolute Error - Testing: 0.45512
Root Mean Squared Error - Training: 1.09823
Root Mean Squared Error - Testing: 0.85814


In [66]:
model = Sequential()

model.add(SimpleRNN(30, input_shape = [None,1], return_sequences = True))
model.add(SimpleRNN(365, input_shape = [None,1]))
model.add(Dense(64, activation = 'relu'))
model.add(Dropout(0.15))

model.add(Dense(1, activation = 'linear'))

model.compile(loss = 'mae',
              optimizer = Adam(learning_rate = 0.001), 
              metrics = ['mse'])

res = model.fit(train_ds, 
                validation_data = val_ds, 
                epochs = 50, 
                verbose = 0,
                callbacks = [early_stop])

print_last(res)

Mean Absolute Error - Training: 0.86451
Mean Absolute Error - Testing: 0.65837
Root Mean Squared Error - Training: 1.19629
Root Mean Squared Error - Testing: 0.9091


In [67]:
model = Sequential()

model.add(SimpleRNN(30, input_shape = [None,1], return_sequences = True))
model.add(SimpleRNN(365, input_shape = [None,1]))
model.add(Dense(32, activation = 'relu'))
model.add(Dropout(0.15))
model.add(Dense(32, activation = 'relu'))
model.add(Dropout(0.15))
model.add(Dense(1, activation = 'linear'))

model.compile(loss = 'mae',
              optimizer = Adam(learning_rate = 0.001), 
              metrics = ['mse'])

res = model.fit(train_ds, 
                validation_data = val_ds, 
                epochs = 50, 
                verbose = 0,
                callbacks = [early_stop])

print_last(res)

Mean Absolute Error - Training: 1.10241
Mean Absolute Error - Testing: 1.55852
Root Mean Squared Error - Training: 1.42781
Root Mean Squared Error - Testing: 1.61258
