# Univariate Modeling Using RNN

#### (Meant to be run within Google Colab)

In [1]:
import pandas as pd
import numpy as np

from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, mean_squared_error, mean_absolute_error

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, LSTM, SimpleRNN
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.preprocessing.sequence import TimeseriesGenerator
from tensorflow.keras.utils import timeseries_dataset_from_array
from tensorflow.keras.callbacks import EarlyStopping

import matplotlib.pyplot as plt
np.random.seed(42)

In [None]:
#mount drive
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [3]:
#import campus5
from google.colab import files
import io

uploaded = files.upload()

campus5 = io.BytesIO(uploaded['campus5.csv'])
df = pd.read_csv(campus5)

Saving campus5.csv to campus5.csv


In [4]:
df.index = pd.to_datetime(df['Timestamp'])

In [5]:
#Looking at just Solar Generation to Predict Solar Generation. And train/test/splitting
df = df[['SolarGeneration']]
y_train, y_test = train_test_split(df['SolarGeneration'], shuffle = False, test_size = 0.15)

In [6]:
def print_last(res):
  '''
  Helper function to just print out the last training and testing loss 
  function values as well as the last metrics of a fitted model
  '''
  print(f"Mean Absolute Error - Training: {np.round(res.history['loss'][-1], 5)}")
  print(f"Mean Absolute Error - Testing: {np.round(res.history['val_loss'][-1], 5)}")
  print(f"Root Mean Squared Error - Training: {np.round(np.sqrt(res.history['mse'][-1]), 5)}")
  print(f"Root Mean Squared Error - Testing: {np.round(np.sqrt(res.history['val_mse'][-1]), 5)}")

In [7]:
early_stop = EarlyStopping(monitor = 'val_loss', patience = 5)

First investigating 12 lengths (15min) at a time to look at 3-hour intervals  

Examining the different types of models with various numbers of SimpleRNN layers, Dense layers, and nodes for each.

In [8]:
seq_length = 12
train_ds = timeseries_dataset_from_array(
    y_train.to_numpy(),
    targets = y_train[seq_length:],
    sequence_length = seq_length,
    batch_size = 32,
    shuffle = True,
    seed = 42
)

val_ds = timeseries_dataset_from_array(
    y_test.to_numpy(),
    targets = y_test[seq_length:],
    sequence_length = seq_length,
    batch_size = 32,
)

In [9]:
model = Sequential()
model.add(SimpleRNN(1, input_shape = [None,1]))
model.add(Dense(1, activation = 'linear'))

model.compile(loss = 'mae',
              optimizer = Adam(learning_rate = 0.001), 
              metrics = ['mse'])

res = model.fit(train_ds, 
                validation_data = val_ds, 
                epochs = 50, 
                verbose = 0,
                callbacks = [early_stop])

print_last(res)

Mean Absolute Error - Training: 0.23916
Mean Absolute Error - Testing: 0.23521
Root Mean Squared Error - Training: 0.67582
Root Mean Squared Error - Testing: 0.61915


In [None]:
model = Sequential()

model.add(SimpleRNN(32, input_shape = [None,1]))
model.add(Dense(1, activation = 'linear'))

model.compile(loss = 'mae',
              optimizer = Adam(learning_rate = 0.001), 
              metrics = ['mse'])

res = model.fit(train_ds, 
                validation_data = val_ds, 
                epochs = 50, 
                verbose = 0,
                callbacks = [early_stop])

print_last(res)

Mean Absolute Error - Training: 0.19295
Mean Absolute Error - Testing: 0.18596
Root Mean Squared Error - Training: 0.62909
Root Mean Squared Error - Testing: 0.58414


In [10]:
model = Sequential()

model.add(SimpleRNN(32, input_shape = [None,1], return_sequences = True))
model.add(SimpleRNN(32, input_shape = [None,1]))
model.add(Dense(1, activation = 'linear'))

model.compile(loss = 'mae',
              optimizer = Adam(learning_rate = 0.001), 
              metrics = ['mse'])

res = model.fit(train_ds, 
                validation_data = val_ds, 
                epochs = 50, 
                verbose = 0,
                callbacks = [early_stop])

print_last(res)

Mean Absolute Error - Training: 0.2033
Mean Absolute Error - Testing: 0.18795
Root Mean Squared Error - Training: 0.6388
Root Mean Squared Error - Testing: 0.58395


In [None]:
model = Sequential()

model.add(SimpleRNN(32, input_shape = [None,1], return_sequences = True))
model.add(SimpleRNN(32, input_shape = [None,1], return_sequences = True))
model.add(SimpleRNN(32, input_shape = [None,1]))
model.add(Dense(1, activation = 'linear'))

model.compile(loss = 'mae',
              optimizer = Adam(learning_rate = 0.001), 
              metrics = ['mse'])

res = model.fit(train_ds, 
                validation_data = val_ds, 
                epochs = 50, 
                verbose = 0,
                callbacks = [early_stop])

print_last(res)

Mean Absolute Error - Training: 0.1934
Mean Absolute Error - Testing: 0.20146
Root Mean Squared Error - Training: 0.61372
Root Mean Squared Error - Testing: 0.58956


In [None]:
model = Sequential()

model.add(SimpleRNN(32, input_shape = [None,1], return_sequences = True))
model.add(SimpleRNN(32, input_shape = [None,1], return_sequences = True))
model.add(SimpleRNN(32, input_shape = [None,1]))
model.add(Dense(32, activation = 'relu'))
model.add(Dense(1, activation = 'linear'))

model.compile(loss = 'mae',
              optimizer = Adam(learning_rate = 0.001), 
              metrics = ['mse'])

res = model.fit(train_ds, 
                validation_data = val_ds, 
                epochs = 50, 
                verbose = 0,
                callbacks = [early_stop])

print_last(res)

Mean Absolute Error - Training: 0.19636
Mean Absolute Error - Testing: 0.19464
Root Mean Squared Error - Training: 0.62842
Root Mean Squared Error - Testing: 0.58935


---

Next investigating 16 lengths (15min) at a time to look at 4-hour intervals  

Examining the different types of models with various numbers of SimpleRNN layers, Dense layers, and nodes for each.

In [8]:
seq_length = 16
train_ds = timeseries_dataset_from_array(
    y_train.to_numpy(),
    targets = y_train[seq_length:],
    sequence_length = seq_length,
    batch_size = 96,
    shuffle = True,
    seed = 42
)

val_ds = timeseries_dataset_from_array(
    y_test.to_numpy(),
    targets = y_test[seq_length:],
    sequence_length = seq_length,
    batch_size = 96,
)

In [None]:
model = Sequential()

model.add(SimpleRNN(32, input_shape = [None,1]))
model.add(Dense(1, activation = 'linear'))

model.compile(loss = 'mae',
              optimizer = Adam(learning_rate = 0.001), 
              metrics = ['mse'])

res = model.fit(train_ds, 
                validation_data = val_ds, 
                epochs = 50, 
                verbose = 0,
                callbacks = [early_stop])

print_last(res)

Mean Absolute Error - Training: 0.19298
Mean Absolute Error - Testing: 0.18917
Root Mean Squared Error - Training: 0.63863
Root Mean Squared Error - Testing: 0.5929


In [None]:
model = Sequential()

model.add(SimpleRNN(32, input_shape = [None,1], return_sequences = True))
model.add(SimpleRNN(32, input_shape = [None,1]))
model.add(Dense(1, activation = 'linear'))

model.compile(loss = 'mae',
              optimizer = Adam(learning_rate = 0.001), 
              metrics = ['mse'])

res = model.fit(train_ds, 
                validation_data = val_ds, 
                epochs = 50, 
                verbose = 0,
                callbacks = [early_stop])

print_last(res)

Mean Absolute Error - Training: 0.19299
Mean Absolute Error - Testing: 0.18684
Root Mean Squared Error - Training: 0.62877
Root Mean Squared Error - Testing: 0.5807


---

Investigating 96 lengths (15min) at a time to look at 24-hour intervals  

Examining the different types of models with various numbers of SimpleRNN layers, Dense layers, and nodes for each.

In [None]:
seq_length = 96
train_ds = timeseries_dataset_from_array(
    y_train.to_numpy(),
    targets = y_train[seq_length:],
    sequence_length = seq_length,
    batch_size = 672,
    shuffle = True,
    seed = 42
)

val_ds = timeseries_dataset_from_array(
    y_test.to_numpy(),
    targets = y_test[seq_length:],
    sequence_length = seq_length,
    batch_size = 672,
)

In [None]:
model = Sequential()

model.add(SimpleRNN(32, input_shape = [None,1]))
model.add(Dense(1, activation = 'linear'))

model.compile(loss = 'mae',
              optimizer = Adam(learning_rate = 0.001), 
              metrics = ['mse'])

res = model.fit(train_ds, 
                validation_data = val_ds, 
                epochs = 50, 
                verbose = 0,
                callbacks = [early_stop])

print_last(res)

Mean Absolute Error - Training: 0.19713
Mean Absolute Error - Testing: 0.19164
Root Mean Squared Error - Training: 0.63455
Root Mean Squared Error - Testing: 0.58515


In [None]:
model = Sequential()

model.add(SimpleRNN(32, input_shape = [None,1], return_sequences = True))
model.add(SimpleRNN(32, input_shape = [None,1]))
model.add(Dense(1, activation = 'linear'))

model.compile(loss = 'mae',
              optimizer = Adam(learning_rate = 0.001), 
              metrics = ['mse'])

res = model.fit(train_ds, 
                validation_data = val_ds, 
                epochs = 50, 
                verbose = 0,
                callbacks = [early_stop])

print_last(res)

Mean Absolute Error - Training: 0.19312
Mean Absolute Error - Testing: 0.19007
Root Mean Squared Error - Training: 0.63242
Root Mean Squared Error - Testing: 0.586


In [None]:
model = Sequential()

model.add(SimpleRNN(64, input_shape = [None,1]))
model.add(Dense(1, activation = 'linear'))

model.compile(loss = 'mae',
              optimizer = Adam(learning_rate = 0.001), 
              metrics = ['mse'])

res = model.fit(train_ds, 
                validation_data = val_ds, 
                epochs = 50, 
                verbose = 0,
                callbacks = [early_stop])

print_last(res)

Mean Absolute Error - Training: 0.19078
Mean Absolute Error - Testing: 0.18271
Root Mean Squared Error - Training: 0.63263
Root Mean Squared Error - Testing: 0.58737


In [None]:
model = Sequential()

model.add(SimpleRNN(96, input_shape = [None,1]))
model.add(Dense(1, activation = 'linear'))

model.compile(loss = 'mae',
              optimizer = Adam(learning_rate = 0.001), 
              metrics = ['mse'])

res = model.fit(train_ds, 
                validation_data = val_ds, 
                epochs = 50, 
                verbose = 0,
                callbacks = [early_stop])

print_last(res)

Mean Absolute Error - Training: 0.19423
Mean Absolute Error - Testing: 0.19427
Root Mean Squared Error - Training: 0.63656
Root Mean Squared Error - Testing: 0.59407


# Tuning Optimizer of the Top Two Models
### (with corresponding sequence lengths)


In [17]:
seq_length = 96
train_ds = timeseries_dataset_from_array(
    y_train.to_numpy(),
    targets = y_train[seq_length:],
    sequence_length = seq_length,
    batch_size = 672,
    shuffle = True,
    seed = 42
)

val_ds = timeseries_dataset_from_array(
    y_test.to_numpy(),
    targets = y_test[seq_length:],
    sequence_length = seq_length,
    batch_size = 672,
)

In [10]:
model = Sequential()

model.add(SimpleRNN(64, input_shape = [None,1]))
model.add(Dense(1, activation = 'linear'))

model.compile(loss = 'mae',
              optimizer = Adam(learning_rate = 0.0005), 
              metrics = ['mse'])

res = model.fit(train_ds, 
                validation_data = val_ds, 
                epochs = 50, 
                verbose = 0,
                callbacks = [early_stop])

print_last(res)

Mean Absolute Error - Training: 0.19456
Mean Absolute Error - Testing: 0.19201
Root Mean Squared Error - Training: 0.63696
Root Mean Squared Error - Testing: 0.58862


In [11]:
model = Sequential()

model.add(SimpleRNN(64, input_shape = [None,1]))
model.add(Dense(1, activation = 'linear'))

model.compile(loss = 'mae',
              optimizer = Adam(learning_rate = 0.001), 
              metrics = ['mse'])

res = model.fit(train_ds, 
                validation_data = val_ds, 
                epochs = 50, 
                verbose = 0,
                callbacks = [early_stop])

print_last(res)

Mean Absolute Error - Training: 0.18968
Mean Absolute Error - Testing: 0.18396
Root Mean Squared Error - Training: 0.63455
Root Mean Squared Error - Testing: 0.58532


In [18]:
model = Sequential()

model.add(SimpleRNN(64, input_shape = [None,1]))
model.add(Dense(1, activation = 'linear'))

model.compile(loss = 'mae',
              optimizer = Adam(learning_rate = 0.0015), 
              metrics = ['mse'])

res = model.fit(train_ds, 
                validation_data = val_ds, 
                epochs = 50, 
                verbose = 0,
                callbacks = [early_stop])

print_last(res)

Mean Absolute Error - Training: 0.19287
Mean Absolute Error - Testing: 0.18656
Root Mean Squared Error - Training: 0.63784
Root Mean Squared Error - Testing: 0.58541


In [12]:
model = Sequential()

model.add(SimpleRNN(64, input_shape = [None,1]))
model.add(Dense(1, activation = 'linear'))

model.compile(loss = 'mae',
              optimizer = Adam(learning_rate = 0.002), 
              metrics = ['mse'])

res = model.fit(train_ds, 
                validation_data = val_ds, 
                epochs = 50, 
                verbose = 0,
                callbacks = [early_stop])

print_last(res)

Mean Absolute Error - Training: 0.19061
Mean Absolute Error - Testing: 0.18338
Root Mean Squared Error - Training: 0.63215
Root Mean Squared Error - Testing: 0.5831


In [19]:
model = Sequential()

model.add(SimpleRNN(64, input_shape = [None,1]))
model.add(Dense(1, activation = 'linear'))

model.compile(loss = 'mae',
              optimizer = Adam(learning_rate = 0.0025), 
              metrics = ['mse'])

res = model.fit(train_ds, 
                validation_data = val_ds, 
                epochs = 50, 
                verbose = 0,
                callbacks = [early_stop])

print_last(res)

Mean Absolute Error - Training: 0.19108
Mean Absolute Error - Testing: 0.1847
Root Mean Squared Error - Training: 0.63076
Root Mean Squared Error - Testing: 0.58235


In [20]:
model = Sequential()

model.add(SimpleRNN(64, input_shape = [None,1]))
model.add(Dense(1, activation = 'linear'))

model.compile(loss = 'mae',
              optimizer = Adam(learning_rate = 0.003), 
              metrics = ['mse'])

res = model.fit(train_ds, 
                validation_data = val_ds, 
                epochs = 50, 
                verbose = 0,
                callbacks = [early_stop])

print_last(res)

Mean Absolute Error - Training: 0.19729
Mean Absolute Error - Testing: 0.18889
Root Mean Squared Error - Training: 0.64011
Root Mean Squared Error - Testing: 0.59081


The BEST model for Mean Absolute Error is a SimpleRNN with 64 nodes, adam optimizer with 0.002 learning rate, and with 15-min intervals of 96 (1 day)

Best MAE - 0.18338

---

In [21]:
seq_length = 16
train_ds = timeseries_dataset_from_array(
    y_train.to_numpy(),
    targets = y_train[seq_length:],
    sequence_length = seq_length,
    batch_size = 96,
    shuffle = True,
    seed = 42
)

val_ds = timeseries_dataset_from_array(
    y_test.to_numpy(),
    targets = y_test[seq_length:],
    sequence_length = seq_length,
    batch_size = 96,
)

In [14]:
model = Sequential()

model.add(SimpleRNN(32, input_shape = [None,1], return_sequences = True))
model.add(SimpleRNN(32, input_shape = [None,1]))
model.add(Dense(1, activation = 'linear'))

model.compile(loss = 'mae',
              optimizer = Adam(learning_rate = 0.0009), 
              metrics = ['mse'])

res = model.fit(train_ds, 
                validation_data = val_ds, 
                epochs = 50, 
                verbose = 0,
                callbacks = [early_stop])

print_last(res)

Mean Absolute Error - Training: 0.19509
Mean Absolute Error - Testing: 0.19187
Root Mean Squared Error - Training: 0.63242
Root Mean Squared Error - Testing: 0.58409


In [15]:
model = Sequential()

model.add(SimpleRNN(32, input_shape = [None,1], return_sequences = True))
model.add(SimpleRNN(32, input_shape = [None,1]))
model.add(Dense(1, activation = 'linear'))

model.compile(loss = 'mae',
              optimizer = Adam(learning_rate = 0.001), 
              metrics = ['mse'])

res = model.fit(train_ds, 
                validation_data = val_ds, 
                epochs = 50, 
                verbose = 0,
                callbacks = [early_stop])

print_last(res)

Mean Absolute Error - Training: 0.19472
Mean Absolute Error - Testing: 0.19506
Root Mean Squared Error - Training: 0.63036
Root Mean Squared Error - Testing: 0.58376


In [22]:
model = Sequential()

model.add(SimpleRNN(32, input_shape = [None,1], return_sequences = True))
model.add(SimpleRNN(32, input_shape = [None,1]))
model.add(Dense(1, activation = 'linear'))

model.compile(loss = 'mae',
              optimizer = Adam(learning_rate = 0.0015), 
              metrics = ['mse'])

res = model.fit(train_ds, 
                validation_data = val_ds, 
                epochs = 50, 
                verbose = 0,
                callbacks = [early_stop])

print_last(res)

Mean Absolute Error - Training: 0.19264
Mean Absolute Error - Testing: 0.19154
Root Mean Squared Error - Training: 0.62282
Root Mean Squared Error - Testing: 0.57525


In [16]:
model = Sequential()

model.add(SimpleRNN(32, input_shape = [None,1], return_sequences = True))
model.add(SimpleRNN(32, input_shape = [None,1]))
model.add(Dense(1, activation = 'linear'))

model.compile(loss = 'mae',
              optimizer = Adam(learning_rate = 0.002), 
              metrics = ['mse'])

res = model.fit(train_ds, 
                validation_data = val_ds, 
                epochs = 50, 
                verbose = 0,
                callbacks = [early_stop])

print_last(res)

Mean Absolute Error - Training: 0.1975
Mean Absolute Error - Testing: 0.2135
Root Mean Squared Error - Training: 0.62853
Root Mean Squared Error - Testing: 0.5804


In [23]:
model = Sequential()

model.add(SimpleRNN(32, input_shape = [None,1], return_sequences = True))
model.add(SimpleRNN(32, input_shape = [None,1]))
model.add(Dense(1, activation = 'linear'))

model.compile(loss = 'mae',
              optimizer = Adam(learning_rate = 0.0025), 
              metrics = ['mse'])

res = model.fit(train_ds, 
                validation_data = val_ds, 
                epochs = 50, 
                verbose = 0,
                callbacks = [early_stop])

print_last(res)

Mean Absolute Error - Training: 0.19755
Mean Absolute Error - Testing: 0.21749
Root Mean Squared Error - Training: 0.62439
Root Mean Squared Error - Testing: 0.57543


In [24]:
model = Sequential()

model.add(SimpleRNN(32, input_shape = [None,1], return_sequences = True))
model.add(SimpleRNN(32, input_shape = [None,1]))
model.add(Dense(1, activation = 'linear'))

model.compile(loss = 'mae',
              optimizer = Adam(learning_rate = 0.003), 
              metrics = ['mse'])

res = model.fit(train_ds, 
                validation_data = val_ds, 
                epochs = 50, 
                verbose = 0,
                callbacks = [early_stop])

print_last(res)

Mean Absolute Error - Training: 0.19979
Mean Absolute Error - Testing: 0.20776
Root Mean Squared Error - Training: 0.62893
Root Mean Squared Error - Testing: 0.5804


In [25]:
model = Sequential()

model.add(SimpleRNN(32, input_shape = [None,1], return_sequences = True))
model.add(SimpleRNN(32, input_shape = [None,1]))
model.add(Dense(1, activation = 'linear'))

model.compile(loss = 'mae',
              optimizer = Adam(learning_rate = 0.0035), 
              metrics = ['mse'])

res = model.fit(train_ds, 
                validation_data = val_ds, 
                epochs = 50, 
                verbose = 0,
                callbacks = [early_stop])

print_last(res)

Mean Absolute Error - Training: 0.20235
Mean Absolute Error - Testing: 0.20066
Root Mean Squared Error - Training: 0.62587
Root Mean Squared Error - Testing: 0.57981


The BEST model for Root Mean Squared Error is two SimpleRNNs with 32 nodes each, adam optimizer with 0.0015 learning rate, and with 15-min intervals of 96 (1 day)

Best RMSE - 0.57525

---
# Hourly Modeling
---

In [18]:
hourly = df.resample('H').mean()
hourly = hourly.interpolate(method = 'pad')

In [20]:
y_train, y_test = train_test_split(hourly, shuffle = False, test_size = 0.15)

Investigating at 13-hour intervals

In [21]:
seq_length = 13
train_ds = timeseries_dataset_from_array(
    y_train.to_numpy(),
    targets = y_train[seq_length:],
    sequence_length = seq_length,
    batch_size = 48,
    shuffle = True,
    seed = 42
)

val_ds = timeseries_dataset_from_array(
    y_test.to_numpy(),
    targets = y_test[seq_length:],
    sequence_length = seq_length,
    batch_size = 48,
)

In [22]:
model = Sequential()
model.add(SimpleRNN(1, input_shape = [None,1]))
model.add(Dense(1, activation = 'linear'))

model.compile(loss = 'mae',
              optimizer = Adam(learning_rate = 0.001), 
              metrics = ['mse'])

res = model.fit(train_ds, 
                validation_data = val_ds, 
                epochs = 50, 
                verbose = 0)

print_last(res)

Mean Absolute Error - Training: 0.38861
Mean Absolute Error - Testing: 0.43767
Root Mean Squared Error - Training: 0.75304
Root Mean Squared Error - Testing: 0.78704


In [23]:
model = Sequential()

model.add(SimpleRNN(48, input_shape = [None,1]))
model.add(Dense(1, activation = 'linear'))

model.compile(loss = 'mae',
              optimizer = Adam(learning_rate = 0.001), 
              metrics = ['mse'])

res = model.fit(train_ds, 
                validation_data = val_ds, 
                epochs = 50, 
                verbose = 0,
                callbacks = [early_stop])

print_last(res)

Mean Absolute Error - Training: 0.26817
Mean Absolute Error - Testing: 0.26548
Root Mean Squared Error - Training: 0.6397
Root Mean Squared Error - Testing: 0.61109


In [24]:
model = Sequential()

model.add(SimpleRNN(48, input_shape = [None,1], return_sequences = True))
model.add(SimpleRNN(48, input_shape = [None,1]))
model.add(Dense(1, activation = 'linear'))

model.compile(loss = 'mae',
              optimizer = Adam(learning_rate = 0.001), 
              metrics = ['mse'])

res = model.fit(train_ds, 
                validation_data = val_ds, 
                epochs = 50, 
                verbose = 0,
                callbacks = [early_stop])

print_last(res)

Mean Absolute Error - Training: 0.24879
Mean Absolute Error - Testing: 0.25638
Root Mean Squared Error - Training: 0.58858
Root Mean Squared Error - Testing: 0.59543


Investigating 24-hour intervals

In [25]:
seq_length = 24
train_ds = timeseries_dataset_from_array(
    y_train.to_numpy(),
    targets = y_train[seq_length:],
    sequence_length = seq_length,
    batch_size = 48,
    shuffle = True,
    seed = 42
)

val_ds = timeseries_dataset_from_array(
    y_test.to_numpy(),
    targets = y_test[seq_length:],
    sequence_length = seq_length,
    batch_size = 48,
)

In [26]:
model = Sequential()

model.add(SimpleRNN(48, input_shape = [None,1]))
model.add(Dense(1, activation = 'linear'))

model.compile(loss = 'mae',
              optimizer = Adam(learning_rate = 0.001), 
              metrics = ['mse'])

res = model.fit(train_ds, 
                validation_data = val_ds, 
                epochs = 50, 
                verbose = 0,
                callbacks = [early_stop])

print_last(res)

Mean Absolute Error - Training: 0.23995
Mean Absolute Error - Testing: 0.24659
Root Mean Squared Error - Training: 0.58851
Root Mean Squared Error - Testing: 0.56296


In [27]:
model = Sequential()

model.add(SimpleRNN(48, input_shape = [None,1], return_sequences = True))
model.add(SimpleRNN(48, input_shape = [None,1]))
model.add(Dense(1, activation = 'linear'))

model.compile(loss = 'mae',
              optimizer = Adam(learning_rate = 0.001), 
              metrics = ['mse'])

res = model.fit(train_ds, 
                validation_data = val_ds, 
                epochs = 50, 
                verbose = 0,
                callbacks = [early_stop])

print_last(res)

Mean Absolute Error - Training: 0.22909
Mean Absolute Error - Testing: 0.2468
Root Mean Squared Error - Training: 0.56309
Root Mean Squared Error - Testing: 0.55242


In [28]:
model = Sequential()

model.add(SimpleRNN(72, input_shape = [None,1]))
model.add(Dense(1, activation = 'linear'))

model.compile(loss = 'mae',
              optimizer = Adam(learning_rate = 0.001), 
              metrics = ['mse'])

res = model.fit(train_ds, 
                validation_data = val_ds, 
                epochs = 50, 
                verbose = 0,
                callbacks = [early_stop])

print_last(res)

Mean Absolute Error - Training: 0.23542
Mean Absolute Error - Testing: 0.23287
Root Mean Squared Error - Training: 0.57461
Root Mean Squared Error - Testing: 0.55874


In [29]:
model = Sequential()

model.add(SimpleRNN(72, input_shape = [None,1], return_sequences = True))
model.add(SimpleRNN(48, input_shape = [None,1]))
model.add(Dense(1, activation = 'linear'))

model.compile(loss = 'mae',
              optimizer = Adam(learning_rate = 0.001), 
              metrics = ['mse'])

res = model.fit(train_ds, 
                validation_data = val_ds, 
                epochs = 50, 
                verbose = 0,
                callbacks = [early_stop])

print_last(res)

Mean Absolute Error - Training: 0.23594
Mean Absolute Error - Testing: 0.24107
Root Mean Squared Error - Training: 0.57315
Root Mean Squared Error - Testing: 0.54415


In [30]:
model = Sequential()

model.add(SimpleRNN(48, input_shape = [None,1], return_sequences = True))
model.add(SimpleRNN(72, input_shape = [None,1]))
model.add(Dense(1, activation = 'linear'))

model.compile(loss = 'mae',
              optimizer = Adam(learning_rate = 0.001), 
              metrics = ['mse'])

res = model.fit(train_ds, 
                validation_data = val_ds, 
                epochs = 50, 
                verbose = 0,
                callbacks = [early_stop])

print_last(res)

Mean Absolute Error - Training: 0.24842
Mean Absolute Error - Testing: 0.24889
Root Mean Squared Error - Training: 0.57893
Root Mean Squared Error - Testing: 0.5649


---
# Daily Modeling
---

In [None]:
daily = df.resample('D').max()

In [None]:
y_train, y_test = train_test_split(daily, shuffle = False, test_size = 0.15)

Investigating 1-week intervals

In [None]:
seq_length = 7
train_ds = timeseries_dataset_from_array(
    y_train.to_numpy(),
    targets = y_train[seq_length:],
    sequence_length = seq_length,
    batch_size = 28,
    shuffle = True,
    seed = 42
)

val_ds = timeseries_dataset_from_array(
    y_test.to_numpy(),
    targets = y_test[seq_length:],
    sequence_length = seq_length,
    batch_size = 28,
)

In [None]:
model = Sequential()

model.add(SimpleRNN(1, input_shape = [None,1]))
model.add(Dense(1, activation = 'linear'))

model.compile(loss = 'mae',
              optimizer = Adam(learning_rate = 0.001), 
              metrics = ['mse'])

res = model.fit(train_ds, 
                validation_data = val_ds, 
                epochs = 50, 
                verbose = 0,
                callbacks = [early_stop])

print_last(res)

Mean Absolute Error - Training: 2.97925
Mean Absolute Error - Testing: 3.46389
Root Mean Squared Error - Training: 3.21027
Root Mean Squared Error - Testing: 3.56108


In [None]:
model = Sequential()

model.add(SimpleRNN(7, input_shape = [None,1]))
model.add(Dense(1, activation = 'linear'))

model.compile(loss = 'mae',
              optimizer = Adam(learning_rate = 0.001), 
              metrics = ['mse'])

res = model.fit(train_ds, 
                validation_data = val_ds, 
                epochs = 50, 
                verbose = 0,
                callbacks = [early_stop])

print_last(res)

Mean Absolute Error - Training: 0.70374
Mean Absolute Error - Testing: 0.48941
Root Mean Squared Error - Training: 1.20097
Root Mean Squared Error - Testing: 0.92623


In [None]:
model = Sequential()

model.add(SimpleRNN(12, input_shape = [None,1]))
model.add(Dense(1, activation = 'linear'))

model.compile(loss = 'mae',
              optimizer = Adam(learning_rate = 0.001), 
              metrics = ['mse'])

res = model.fit(train_ds, 
                validation_data = val_ds, 
                epochs = 50, 
                verbose = 0,
                callbacks = [early_stop])

print_last(res)

Mean Absolute Error - Training: 0.71602
Mean Absolute Error - Testing: 0.49184
Root Mean Squared Error - Training: 1.1997
Root Mean Squared Error - Testing: 0.94834


In [None]:
model = Sequential()

model.add(SimpleRNN(31, input_shape = [None,1]))
model.add(Dense(1, activation = 'linear'))

model.compile(loss = 'mae',
              optimizer = Adam(learning_rate = 0.001), 
              metrics = ['mse'])

res = model.fit(train_ds, 
                validation_data = val_ds, 
                epochs = 50, 
                verbose = 0,
                callbacks = [early_stop])

print_last(res)

Mean Absolute Error - Training: 0.67558
Mean Absolute Error - Testing: 0.48527
Root Mean Squared Error - Training: 1.16008
Root Mean Squared Error - Testing: 0.93525


In [None]:
model = Sequential()

model.add(SimpleRNN(365, input_shape = [None,1]))
model.add(Dense(1, activation = 'linear'))

model.compile(loss = 'mae',
              optimizer = Adam(learning_rate = 0.001), 
              metrics = ['mse'])

res = model.fit(train_ds, 
                validation_data = val_ds, 
                epochs = 50, 
                verbose = 0,
                callbacks = [early_stop])

print_last(res)

Mean Absolute Error - Training: 0.71347
Mean Absolute Error - Testing: 0.45891
Root Mean Squared Error - Training: 1.17209
Root Mean Squared Error - Testing: 0.86097


In [None]:
model = Sequential()

model.add(SimpleRNN(7, input_shape = [None,1], return_sequences = True))
model.add(SimpleRNN(7, input_shape = [None,1]))
model.add(Dense(1, activation = 'linear'))

model.compile(loss = 'mae',
              optimizer = Adam(learning_rate = 0.001), 
              metrics = ['mse'])

res = model.fit(train_ds, 
                validation_data = val_ds, 
                epochs = 50, 
                verbose = 0,
                callbacks = [early_stop])

print_last(res)

Mean Absolute Error - Training: 0.66618
Mean Absolute Error - Testing: 0.45112
Root Mean Squared Error - Training: 1.16447
Root Mean Squared Error - Testing: 0.88664


In [None]:
model = Sequential()

model.add(SimpleRNN(30, input_shape = [None,1], return_sequences = True))
model.add(SimpleRNN(365, input_shape = [None,1]))
model.add(Dense(1, activation = 'linear'))

model.compile(loss = 'mae',
              optimizer = Adam(learning_rate = 0.001), 
              metrics = ['mse'])

res = model.fit(train_ds, 
                validation_data = val_ds, 
                epochs = 50, 
                verbose = 0,
                callbacks = [early_stop])

print_last(res)

Mean Absolute Error - Training: 0.7034
Mean Absolute Error - Testing: 0.46779
Root Mean Squared Error - Training: 1.17327
Root Mean Squared Error - Testing: 0.92003


In [None]:
model = Sequential()

model.add(SimpleRNN(30, input_shape = [None,1], return_sequences = True))
model.add(SimpleRNN(90, input_shape = [None,1], return_sequences = True))
model.add(SimpleRNN(365, input_shape = [None,1]))
model.add(Dense(1, activation = 'linear'))

model.compile(loss = 'mae',
              optimizer = Adam(learning_rate = 0.001), 
              metrics = ['mse'])

res = model.fit(train_ds, 
                validation_data = val_ds, 
                epochs = 50, 
                verbose = 0,
                callbacks = [early_stop])

print_last(res)

Mean Absolute Error - Training: 0.74808
Mean Absolute Error - Testing: 0.45809
Root Mean Squared Error - Training: 1.20196
Root Mean Squared Error - Testing: 0.90456


In [None]:
model = Sequential()

model.add(SimpleRNN(30, input_shape = [None,1], return_sequences = True))
model.add(SimpleRNN(365, input_shape = [None,1], return_sequences = True))
model.add(SimpleRNN(365, input_shape = [None,1]))
model.add(Dense(1, activation = 'linear'))

model.compile(loss = 'mae',
              optimizer = Adam(learning_rate = 0.001), 
              metrics = ['mse'])

res = model.fit(train_ds, 
                validation_data = val_ds, 
                epochs = 50, 
                verbose = 0,
                callbacks = [early_stop])

print_last(res)

Mean Absolute Error - Training: 0.70613
Mean Absolute Error - Testing: 0.51573
Root Mean Squared Error - Training: 1.18259
Root Mean Squared Error - Testing: 0.99389


In [None]:
model = Sequential()

model.add(SimpleRNN(7, input_shape = [None,1], return_sequences = True))
model.add(SimpleRNN(30, input_shape = [None,1], return_sequences = True))
model.add(SimpleRNN(365, input_shape = [None,1]))
model.add(Dense(1, activation = 'linear'))

model.compile(loss = 'mae',
              optimizer = Adam(learning_rate = 0.001), 
              metrics = ['mse'])

res = model.fit(train_ds, 
                validation_data = val_ds, 
                epochs = 50, 
                verbose = 0,
                callbacks = [early_stop])

print_last(res)

Mean Absolute Error - Training: 0.71206
Mean Absolute Error - Testing: 0.50099
Root Mean Squared Error - Training: 1.18455
Root Mean Squared Error - Testing: 0.95314


Investigating 30-day intervals


In [None]:
seq_length = 30
train_ds = timeseries_dataset_from_array(
    y_train.to_numpy(),
    targets = y_train[seq_length:],
    sequence_length = seq_length,
    batch_size = 90,
    shuffle = True,
    seed = 42
)

val_ds = timeseries_dataset_from_array(
    y_test.to_numpy(),
    targets = y_test[seq_length:],
    sequence_length = seq_length,
    batch_size = 90,
)

In [None]:
model = Sequential()

model.add(SimpleRNN(30, input_shape = [None,1], return_sequences = True))
model.add(SimpleRNN(365, input_shape = [None,1]))
model.add(Dense(1, activation = 'linear'))

model.compile(loss = 'mae',
              optimizer = Adam(learning_rate = 0.001), 
              metrics = ['mse'])

res = model.fit(train_ds, 
                validation_data = val_ds, 
                epochs = 50, 
                verbose = 0,
                callbacks = [early_stop])

print_last(res)

Mean Absolute Error - Training: 0.68509
Mean Absolute Error - Testing: 0.51687
Root Mean Squared Error - Training: 1.17812
Root Mean Squared Error - Testing: 1.00927


In [None]:
model = Sequential()

model.add(SimpleRNN(30, input_shape = [None,1], return_sequences = True))
model.add(SimpleRNN(90, input_shape = [None,1], return_sequences = True))
model.add(SimpleRNN(365, input_shape = [None,1]))
model.add(Dense(1, activation = 'linear'))

model.compile(loss = 'mae',
              optimizer = Adam(learning_rate = 0.001), 
              metrics = ['mse'])

res = model.fit(train_ds, 
                validation_data = val_ds, 
                epochs = 50, 
                verbose = 0,
                callbacks = [early_stop])

print_last(res)

Mean Absolute Error - Training: 0.64749
Mean Absolute Error - Testing: 0.47644
Root Mean Squared Error - Training: 1.14527
Root Mean Squared Error - Testing: 0.95801


Increasing the intervals didn't seem to help. So investigating smaller intervals (3-day)

In [None]:
seq_length = 3
train_ds = timeseries_dataset_from_array(
    y_train.to_numpy(),
    targets = y_train[seq_length:],
    sequence_length = seq_length,
    batch_size = 21,
    shuffle = True,
    seed = 42
)

val_ds = timeseries_dataset_from_array(
    y_test.to_numpy(),
    targets = y_test[seq_length:],
    sequence_length = seq_length,
    batch_size = 21,
)

In [None]:
model = Sequential()

model.add(SimpleRNN(30, input_shape = [None,1], return_sequences = True))
model.add(SimpleRNN(365, input_shape = [None,1]))
model.add(Dense(1, activation = 'linear'))

model.compile(loss = 'mae',
              optimizer = Adam(learning_rate = 0.001), 
              metrics = ['mse'])

res = model.fit(train_ds, 
                validation_data = val_ds, 
                epochs = 50, 
                verbose = 0,
                callbacks = [early_stop])

print_last(res)

Mean Absolute Error - Training: 0.77399
Mean Absolute Error - Testing: 0.66603
Root Mean Squared Error - Training: 1.23034
Root Mean Squared Error - Testing: 0.90997


In [None]:
model = Sequential()

model.add(SimpleRNN(30, input_shape = [None,1], return_sequences = True))
model.add(SimpleRNN(365, input_shape = [None,1]))
model.add(Dense(64, activation = 'relu'))
model.add(Dropout(0.15))

model.add(Dense(1, activation = 'linear'))

model.compile(loss = 'mae',
              optimizer = Adam(learning_rate = 0.001), 
              metrics = ['mse'])

res = model.fit(train_ds, 
                validation_data = val_ds, 
                epochs = 50, 
                verbose = 0,
                callbacks = [early_stop])

print_last(res)

Mean Absolute Error - Training: 0.87057
Mean Absolute Error - Testing: 0.46883
Root Mean Squared Error - Training: 1.31679
Root Mean Squared Error - Testing: 0.86415


In [None]:
model = Sequential()

model.add(SimpleRNN(30, input_shape = [None,1], return_sequences = True))
model.add(SimpleRNN(365, input_shape = [None,1]))
model.add(Dense(32, activation = 'relu'))
model.add(Dropout(0.15))
model.add(Dense(32, activation = 'relu'))
model.add(Dropout(0.15))
model.add(Dense(1, activation = 'linear'))

model.compile(loss = 'mae',
              optimizer = Adam(learning_rate = 0.001), 
              metrics = ['mse'])

res = model.fit(train_ds, 
                validation_data = val_ds, 
                epochs = 50, 
                verbose = 0,
                callbacks = [early_stop])

print_last(res)

Mean Absolute Error - Training: 0.99465
Mean Absolute Error - Testing: 1.01416
Root Mean Squared Error - Training: 1.38784
Root Mean Squared Error - Testing: 1.1301
