# Multivariate Modeling Using RNN

#### (Meant to be run within Google Colab)

In [13]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, mean_squared_error, mean_absolute_error


from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, LSTM, SimpleRNN
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.preprocessing.sequence import TimeseriesGenerator
from tensorflow.keras.utils import timeseries_dataset_from_array
from tensorflow.keras.callbacks import EarlyStopping

np.random.seed(42)

In [None]:
#mount drive
from google.colab import drive
drive.mount('/content/drive')

In [14]:
#import campus5
from google.colab import files
import io

uploaded = files.upload()

campus5 = io.BytesIO(uploaded['campus5.csv'])
df = pd.read_csv(campus5)

Saving campus5.csv to campus5 (1).csv


In [15]:
df.index = pd.to_datetime(df['Timestamp'])
df.drop(columns = ['Timestamp', 'CampusKey', 'time'], inplace = True)
df.shape

(79319, 7)

In [16]:
#ww stands for With Weather info. Which starts at index #33311
#Interpolating any missing weather interval data
ww = df.iloc[33311:]
ww = ww.interpolate(method = 'linear', limit_direction = 'forward')
ww.isnull().sum()

SolarGeneration        0
ApparentTemperature    0
AirTemperature         0
DewPointTemperature    0
RelativeHumidity       0
WindSpeed              0
WindDirection          0
dtype: int64

In [17]:
#Setting X and Y
features = [i for i in ww.columns if i != 'SolarGeneration']
X = ww[features]
y = ww['SolarGeneration']
X_train, X_test, y_train, y_test = train_test_split(X, y, shuffle = False, test_size = 0.2)

In [None]:
X_train.iloc[-1]

ApparentTemperature     14.100000
AirTemperature          17.200000
DewPointTemperature     12.200000
RelativeHumidity        72.500000
WindSpeed               18.318182
WindDirection          220.363636
Name: 2022-01-18 03:15:00, dtype: float64

In [None]:
X_test.iloc[0]

ApparentTemperature     13.900000
AirTemperature          17.000000
DewPointTemperature     12.100000
RelativeHumidity        73.000000
WindSpeed               18.318182
WindDirection          220.363636
Name: 2022-01-18 03:30:00, dtype: float64

In [7]:
#Standard Scaling
ss = StandardScaler()
X_train_sc = ss.fit_transform(X_train)
X_test_sc = ss.transform(X_test)

In [8]:
def print_last(res):
  '''
  Helper function to just print out the last training and testing loss 
  function values as well as the last metrics of a fitted model
  '''
  print(f"Mean Absolute Error - Training: {np.round(res.history['loss'][-1], 5)}")
  print(f"Mean Absolute Error - Testing: {np.round(res.history['val_loss'][-1], 5)}")
  print(f"Root Mean Squared Error - Training: {np.round(np.sqrt(res.history['mse'][-1]), 5)}")
  print(f"Root Mean Squared Error - Testing: {np.round(np.sqrt(res.history['val_mse'][-1]), 5)}")

First investigating sequences 4 intervals long (1 hour)

In [9]:
#timeseries generator, input shape, and early stopping for modeling

train_sequences = TimeseriesGenerator(X_train_sc, y_train, length = 4, batch_size = 64)
test_sequences = TimeseriesGenerator(X_test_sc, y_test, length = 4, batch_size = 64)
input_shape = train_sequences[0][0][0].shape
early_stop = EarlyStopping(monitor = 'val_loss', patience = 5)

In [None]:
model = Sequential()

model.add(LSTM(96, input_shape = input_shape))
model.add(Dense(1, activation = 'linear'))

model.compile(loss = 'mae',
              optimizer = Adam(learning_rate = 0.001), 
              metrics = ['mse'])

res = model.fit(train_sequences, 
                validation_data = test_sequences, 
                epochs = 50, 
                verbose = 0,
                callbacks = [early_stop])

print_last(res)

Mean Absolute Error - Training: 0.95765
Mean Absolute Error - Testing: 0.76523
Root Mean Squared Error - Training: 1.77713
Root Mean Squared Error - Testing: 1.22163


In [None]:
model = Sequential()

model.add(LSTM(8, input_shape = input_shape, return_sequences = True))
model.add(LSTM(32, return_sequences = False))
model.add(Dense(8, activation = 'relu'))
model.add(Dense(1, activation = 'linear'))

model.compile(loss = 'mae',
              optimizer = Adam(learning_rate = 0.001), 
              metrics = ['mse'])

res = model.fit(train_sequences, 
                validation_data = test_sequences, 
                epochs = 50, 
                verbose = 0,
                callbacks = [early_stop])

print_last(res)

Mean Absolute Error - Training: 0.93588
Mean Absolute Error - Testing: 0.68782
Root Mean Squared Error - Training: 1.77775
Root Mean Squared Error - Testing: 1.24635


In [None]:
model = Sequential()

model.add(LSTM(8, input_shape = input_shape, return_sequences = False))
model.add(Dense(16, activation = 'relu'))
model.add(Dense(512, activation ='relu'))
model.add(Dense(512, activation ='relu'))
model.add(Dense(512, activation ='relu'))
model.add(Dense(1, activation = 'linear'))

model.compile(loss = 'mae',
              optimizer = Adam(learning_rate = 0.001), 
              metrics = ['mse'])

res = model.fit(train_sequences, 
                validation_data = test_sequences, 
                epochs = 50, 
                verbose = 0,
                callbacks = [early_stop])

print_last(res)

Mean Absolute Error - Training: 0.95496
Mean Absolute Error - Testing: 0.72572
Root Mean Squared Error - Training: 1.79568
Root Mean Squared Error - Testing: 1.33184


In [None]:
model = Sequential()

model.add(LSTM(32, input_shape = input_shape, return_sequences = True, ))
model.add(LSTM(32, input_shape = input_shape, return_sequences = True))
model.add(LSTM(64, input_shape = input_shape, return_sequences = False))
model.add(Dense(16, activation = 'relu'))
model.add(Dense(512, activation ='relu'))
model.add(Dense(512, activation ='relu'))
model.add(Dense(1, activation = 'linear'))

model.compile(loss = 'mae',
              optimizer = Adam(learning_rate = 0.001), 
              metrics = ['mse'])

res = model.fit(train_sequences, 
                validation_data = test_sequences, 
                epochs = 50, 
                verbose = 0,
                callbacks = [early_stop])

print_last(res)

Mean Absolute Error - Training: 0.95676
Mean Absolute Error - Testing: 0.71638
Root Mean Squared Error - Training: 1.80317
Root Mean Squared Error - Testing: 1.21187


In [None]:
model = Sequential()

model.add(LSTM(32, input_shape = input_shape, return_sequences = True))
model.add(LSTM(64, input_shape = input_shape, return_sequences = True))
model.add(LSTM(96, input_shape = input_shape, return_sequences = False))
model.add(Dense(16, activation = 'relu'))
model.add(Dense(512, activation ='relu'))
model.add(Dense(512, activation ='relu'))

model.add(Dense(1, activation = 'linear'))

model.compile(loss = 'mae',
              optimizer = Adam(learning_rate = 0.001), 
              metrics = ['mse'])

res = model.fit(train_sequences, 
                validation_data = test_sequences, 
                epochs = 50, 
                verbose = 0,
                callbacks = [early_stop])

print_last(res)

Mean Absolute Error - Training: 0.96606
Mean Absolute Error - Testing: 0.82582
Root Mean Squared Error - Training: 1.80315
Root Mean Squared Error - Testing: 1.45836


Investigating sequences of 16 intervals in length (4 hours)

In [None]:
train_sequences = TimeseriesGenerator(X_train_sc, y_train, length = 16, batch_size = 96)
test_sequences = TimeseriesGenerator(X_test_sc, y_test, length = 16, batch_size = 96)
input_shape = train_sequences[0][0][0].shape

In [None]:
model = Sequential()

model.add(LSTM(96, input_shape = input_shape))
model.add(Dense(1, activation = 'linear'))

model.compile(loss = 'mae',
              optimizer = Adam(learning_rate = 0.001), 
              metrics = ['mse'])

res = model.fit(train_sequences, 
                validation_data = test_sequences, 
                epochs = 50, 
                verbose = 0,
                callbacks = [early_stop])

print_last(res)

Mean Absolute Error - Training: 0.85519
Mean Absolute Error - Testing: 0.43458
Root Mean Squared Error - Training: 1.69813
Root Mean Squared Error - Testing: 0.84498


In [None]:
model = Sequential()

model.add(LSTM(8, input_shape = input_shape, return_sequences = True))
model.add(LSTM(32, return_sequences = False))
model.add(Dense(8, activation = 'relu'))

model.add(Dense(1, activation = 'linear'))

model.compile(loss = 'mae',
              optimizer = Adam(learning_rate = 0.001), 
              metrics = ['mse'])

res = model.fit(train_sequences, 
                validation_data = test_sequences, 
                epochs = 50, 
                verbose = 0,
                callbacks = [early_stop])

print_last(res)

Mean Absolute Error - Training: 0.87984
Mean Absolute Error - Testing: 0.47266
Root Mean Squared Error - Training: 1.72308
Root Mean Squared Error - Testing: 0.91996


In [None]:
model = Sequential()

model.add(LSTM(8, input_shape = input_shape, return_sequences = False))
model.add(Dense(16, activation = 'relu'))
model.add(Dense(512, activation ='relu'))
model.add(Dense(512, activation ='relu'))
model.add(Dense(512, activation ='relu'))

model.add(Dense(1, activation = 'linear'))

model.compile(loss = 'mae',
              optimizer = Adam(learning_rate = 0.001), 
              metrics = ['mse'])

res = model.fit(train_sequences, 
                validation_data = test_sequences, 
                epochs = 50, 
                verbose = 0,
                callbacks = [early_stop])

print_last(res)

Mean Absolute Error - Training: 0.92261
Mean Absolute Error - Testing: 0.58563
Root Mean Squared Error - Training: 1.76271
Root Mean Squared Error - Testing: 1.04964


In [None]:
model = Sequential()

model.add(LSTM(96, input_shape = input_shape, return_sequences = False))
model.add(Dense(16, activation = 'relu'))
model.add(Dense(512, activation ='relu'))
model.add(Dense(512, activation ='relu'))
model.add(Dense(512, activation ='relu'))

model.add(Dense(1, activation = 'linear'))

model.compile(loss = 'mae',
              optimizer = Adam(learning_rate = 0.001), 
              metrics = ['mse'])

res = model.fit(train_sequences, 
                validation_data = test_sequences, 
                epochs = 50, 
                verbose = 0,
                callbacks = [early_stop])

print_last(res)

Mean Absolute Error - Training: 0.85678
Mean Absolute Error - Testing: 0.48245
Root Mean Squared Error - Training: 1.70554
Root Mean Squared Error - Testing: 0.91855


In [None]:
model = Sequential()

model.add(LSTM(32, input_shape = input_shape, return_sequences = True, ))
model.add(LSTM(32, input_shape = input_shape, return_sequences = True))
model.add(LSTM(64, input_shape = input_shape, return_sequences = False))
model.add(Dense(16, activation = 'relu'))
model.add(Dense(512, activation ='relu'))
model.add(Dense(512, activation ='relu'))

model.add(Dense(1, activation = 'linear'))

model.compile(loss = 'mae',
              optimizer = Adam(learning_rate = 0.001), 
              metrics = ['mse'])

res = model.fit(train_sequences, 
                validation_data = test_sequences, 
                epochs = 50, 
                verbose = 0,
                callbacks = [early_stop])

print_last(res)

Mean Absolute Error - Training: 0.88247
Mean Absolute Error - Testing: 0.50426
Root Mean Squared Error - Training: 1.7286
Root Mean Squared Error - Testing: 0.88774


Investigating sequences of 96 intervals in length (1 Day)

In [10]:
train_sequences = TimeseriesGenerator(X_train_sc, y_train, length = 96, batch_size = 672)
test_sequences = TimeseriesGenerator(X_test_sc, y_test, length = 96, batch_size = 672)
input_shape = train_sequences[0][0][0].shape

In [11]:
model = Sequential()

model.add(LSTM(96, input_shape = input_shape))
model.add(Dense(1, activation = 'linear'))

model.compile(loss = 'mae',
              optimizer = Adam(learning_rate = 0.001), 
              metrics = ['mse'])

res = model.fit(train_sequences, 
                validation_data = test_sequences, 
                epochs = 50, 
                verbose = 0,
                callbacks = [early_stop])

print_last(res)

Mean Absolute Error - Training: 0.88815
Mean Absolute Error - Testing: 0.53892
Root Mean Squared Error - Training: 1.72244
Root Mean Squared Error - Testing: 0.99951


In [12]:
model = Sequential()

model.add(LSTM(192, input_shape = input_shape))
model.add(Dense(1, activation = 'linear'))

model.compile(loss = 'mae',
              optimizer = Adam(learning_rate = 0.001), 
              metrics = ['mse'])

res = model.fit(train_sequences, 
                validation_data = test_sequences, 
                epochs = 50, 
                verbose = 0,
                callbacks = [early_stop])

print_last(res)

Mean Absolute Error - Training: 0.8795
Mean Absolute Error - Testing: 0.57291
Root Mean Squared Error - Training: 1.71677
Root Mean Squared Error - Testing: 1.0447


In [None]:
model = Sequential()

model.add(LSTM(8, input_shape = input_shape, return_sequences = True))
model.add(LSTM(32, return_sequences = False))
model.add(Dense(8, activation = 'relu'))

model.add(Dense(1, activation = 'linear'))

model.compile(loss = 'mae',
              optimizer = Adam(learning_rate = 0.001), 
              metrics = ['mse'])

res = model.fit(train_sequences, 
                validation_data = test_sequences, 
                epochs = 50, 
                verbose = 0,
                callbacks = [early_stop])

print_last(res)

Mean Absolute Error - Training: 0.90929
Mean Absolute Error - Testing: 0.74056
Root Mean Squared Error - Training: 1.7613
Root Mean Squared Error - Testing: 1.35755


In [None]:
model = Sequential()

model.add(LSTM(8, input_shape = input_shape, return_sequences = False))
model.add(Dense(16, activation = 'relu'))
model.add(Dense(512, activation ='relu'))
model.add(Dense(512, activation ='relu'))
model.add(Dense(512, activation ='relu'))

model.add(Dense(1, activation = 'linear'))

model.compile(loss = 'mae',
              optimizer = Adam(learning_rate = 0.001), 
              metrics = ['mse'])

res = model.fit(train_sequences, 
                validation_data = test_sequences, 
                epochs = 50, 
                verbose = 0,
                callbacks = [early_stop])

print_last(res)

Mean Absolute Error - Training: 0.90837
Mean Absolute Error - Testing: 0.61511
Root Mean Squared Error - Training: 1.75613
Root Mean Squared Error - Testing: 1.1573


In [None]:
model = Sequential()

model.add(LSTM(32, input_shape = input_shape, return_sequences = True, ))
model.add(LSTM(32, input_shape = input_shape, return_sequences = True))
model.add(LSTM(64, input_shape = input_shape, return_sequences = False))
model.add(Dense(16, activation = 'relu'))
model.add(Dense(512, activation ='relu'))
model.add(Dense(512, activation ='relu'))

model.add(Dense(1, activation = 'linear'))

model.compile(loss = 'mae',
              optimizer = Adam(learning_rate = 0.001), 
              metrics = ['mse'])

res = model.fit(train_sequences, 
                validation_data = test_sequences, 
                epochs = 50, 
                verbose = 0,
                callbacks = [early_stop])

print_last(res)

Mean Absolute Error - Training: 0.89951
Mean Absolute Error - Testing: 0.52922
Root Mean Squared Error - Training: 1.73391
Root Mean Squared Error - Testing: 0.99087


It seems Univariate Modeling gives much better scores. Probably due to the weather data variability compounding to the variability of the Solar Generation

---
# Hourly Modeling
---

In [21]:
hourly = ww.resample('H').mean().ffill()

In [22]:
features = [i for i in hourly.columns if i != 'SolarGeneration']
X = hourly[features]
y = hourly['SolarGeneration']
X_train, X_test, y_train, y_test = train_test_split(X, y, shuffle = False, test_size = 0.2)

In [23]:
ss = StandardScaler()
X_train_sc = ss.fit_transform(X_train)
X_test_sc = ss.transform(X_test)

First investigating 12-hour sequences

In [24]:
train_sequences = TimeseriesGenerator(X_train_sc, y_train, length = 12, batch_size = 48)
test_sequences = TimeseriesGenerator(X_test_sc, y_test, length = 12, batch_size = 48)
input_shape = train_sequences[0][0][0].shape

In [25]:
model = Sequential()

model.add(LSTM(24, input_shape = input_shape))
model.add(Dense(1, activation = 'linear'))

model.compile(loss = 'mae',
              optimizer = Adam(learning_rate = 0.001), 
              metrics = ['mse'])

res = model.fit(train_sequences, 
                validation_data = test_sequences, 
                epochs = 50, 
                verbose = 0,
                callbacks = [early_stop])

print_last(res)

Mean Absolute Error - Training: 0.81503
Mean Absolute Error - Testing: 0.49714
Root Mean Squared Error - Training: 1.60931
Root Mean Squared Error - Testing: 0.83278


In [26]:
model = Sequential()

model.add(LSTM(48, input_shape = input_shape))
model.add(Dense(1, activation = 'linear'))

model.compile(loss = 'mae',
              optimizer = Adam(learning_rate = 0.001), 
              metrics = ['mse'])

res = model.fit(train_sequences, 
                validation_data = test_sequences, 
                epochs = 50, 
                verbose = 0,
                callbacks = [early_stop])

print_last(res)

Mean Absolute Error - Training: 0.8147
Mean Absolute Error - Testing: 0.51314
Root Mean Squared Error - Training: 1.60308
Root Mean Squared Error - Testing: 0.85835


In [27]:
model = Sequential()

model.add(LSTM(144, input_shape = input_shape))
model.add(Dense(1, activation = 'linear'))

model.compile(loss = 'mae',
              optimizer = Adam(learning_rate = 0.001), 
              metrics = ['mse'])

res = model.fit(train_sequences, 
                validation_data = test_sequences, 
                epochs = 50, 
                verbose = 0,
                callbacks = [early_stop])

print_last(res)

Mean Absolute Error - Training: 0.81736
Mean Absolute Error - Testing: 0.484
Root Mean Squared Error - Training: 1.6084
Root Mean Squared Error - Testing: 0.81388


Investigating 24-hour sequences

In [28]:
train_sequences = TimeseriesGenerator(X_train_sc, y_train, length = 24, batch_size = 72)
test_sequences = TimeseriesGenerator(X_test_sc, y_test, length = 24, batch_size = 72)
input_shape = train_sequences[0][0][0].shape

In [29]:
model = Sequential()

model.add(LSTM(24, input_shape = input_shape))
model.add(Dense(1, activation = 'linear'))

model.compile(loss = 'mae',
              optimizer = Adam(learning_rate = 0.001), 
              metrics = ['mse'])

res = model.fit(train_sequences, 
                validation_data = test_sequences, 
                epochs = 50, 
                verbose = 0,
                callbacks = [early_stop])

print_last(res)

Mean Absolute Error - Training: 0.82697
Mean Absolute Error - Testing: 0.49691
Root Mean Squared Error - Training: 1.61382
Root Mean Squared Error - Testing: 0.80966


In [30]:
model = Sequential()

model.add(LSTM(48, input_shape = input_shape))
model.add(Dense(1, activation = 'linear'))

model.compile(loss = 'mae',
              optimizer = Adam(learning_rate = 0.001), 
              metrics = ['mse'])

res = model.fit(train_sequences, 
                validation_data = test_sequences, 
                epochs = 50, 
                verbose = 0,
                callbacks = [early_stop])

print_last(res)

Mean Absolute Error - Training: 0.83473
Mean Absolute Error - Testing: 0.54504
Root Mean Squared Error - Training: 1.61879
Root Mean Squared Error - Testing: 0.8791


In [31]:
model = Sequential()

model.add(LSTM(144, input_shape = input_shape))
model.add(Dense(1, activation = 'linear'))

model.compile(loss = 'mae',
              optimizer = Adam(learning_rate = 0.001), 
              metrics = ['mse'])

res = model.fit(train_sequences, 
                validation_data = test_sequences, 
                epochs = 50, 
                verbose = 0,
                callbacks = [early_stop])

print_last(res)

Mean Absolute Error - Training: 0.82998
Mean Absolute Error - Testing: 0.46425
Root Mean Squared Error - Training: 1.6222
Root Mean Squared Error - Testing: 0.77604


---
# Daily Investigation
---

In [9]:
daily = df.resample('D').max()

In [10]:
features = [i for i in ww.columns if i != 'SolarGeneration']
X = ww[features]
y = ww['SolarGeneration']
X_train, X_test, y_train, y_test = train_test_split(X, y, shuffle = False, test_size = 0.2)

In [11]:
ss = StandardScaler()
X_train_sc = ss.fit_transform(X_train)
X_test_sc = ss.transform(X_test)

First looking at 4-day sequences

In [12]:
train_sequences = TimeseriesGenerator(X_train_sc, y_train, length = 4, batch_size = 12)
test_sequences = TimeseriesGenerator(X_test_sc, y_test, length = 4, batch_size = 12)
input_shape = train_sequences[0][0][0].shape

In [35]:
model = Sequential()

model.add(LSTM(10, input_shape = input_shape))
model.add(Dense(1, activation = 'linear'))

model.compile(loss = 'mae',
              optimizer = Adam(learning_rate = 0.001), 
              metrics = ['mse'])

res = model.fit(train_sequences, 
                validation_data = test_sequences, 
                epochs = 50, 
                verbose = 0,
                callbacks = [early_stop])

print_last(res)

Mean Absolute Error - Training: 0.95609
Mean Absolute Error - Testing: 0.78869
Root Mean Squared Error - Training: 1.77788
Root Mean Squared Error - Testing: 1.33441


In [36]:
model = Sequential()

model.add(LSTM(30, input_shape = input_shape))
model.add(Dense(1, activation = 'linear'))

model.compile(loss = 'mae',
              optimizer = Adam(learning_rate = 0.001), 
              metrics = ['mse'])

res = model.fit(train_sequences, 
                validation_data = test_sequences, 
                epochs = 50, 
                verbose = 0,
                callbacks = [early_stop])

print_last(res)

Mean Absolute Error - Training: 0.93276
Mean Absolute Error - Testing: 0.69935
Root Mean Squared Error - Training: 1.75706
Root Mean Squared Error - Testing: 1.1426


In [37]:
model = Sequential()

model.add(LSTM(10, input_shape = input_shape))
model.add(Dense(1, activation = 'linear'))

model.compile(loss = 'mae',
              optimizer = Adam(learning_rate = 0.001), 
              metrics = ['mse'])

res = model.fit(train_sequences, 
                validation_data = test_sequences, 
                epochs = 50, 
                verbose = 0,
                callbacks = [early_stop])

print_last(res)

Mean Absolute Error - Training: 0.95538
Mean Absolute Error - Testing: 0.73651
Root Mean Squared Error - Training: 1.77653
Root Mean Squared Error - Testing: 1.2406


In [15]:
 model = Sequential()

model.add(SimpleRNN(7, input_shape = input_shape, return_sequences = True))
model.add(SimpleRNN(7))
model.add(Dense(1, activation = 'linear'))

model.compile(loss = 'mae',
              optimizer = Adam(learning_rate = 0.001), 
              metrics = ['mse'])

res = model.fit(train_sequences, 
                validation_data = test_sequences, 
                epochs = 50, 
                verbose = 0,
                callbacks = [early_stop])

print_last(res)

Mean Absolute Error - Training: 0.9844
Mean Absolute Error - Testing: 0.86126
Root Mean Squared Error - Training: 1.77135
Root Mean Squared Error - Testing: 1.45495
