# Univariate Modeling of Other Campuses and Specific Sites Using the Best Found Models of Campus 5 Investigation

#### (Meant to be run within Google Colab)

In [1]:
import pandas as pd
import numpy as np

from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, mean_squared_error, mean_absolute_error

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, SimpleRNN
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.preprocessing.sequence import TimeseriesGenerator
from tensorflow.keras.utils import timeseries_dataset_from_array
from tensorflow.keras.callbacks import EarlyStopping

import matplotlib.pyplot as plt
np.random.seed(42)

In [2]:
#mount drive
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [3]:
#import campus5
from google.colab import files
import io

uploaded = files.upload()

campus1 = io.BytesIO(uploaded['campus1.csv'])
camp1 = pd.read_csv(campus1)
camp1.index = pd.to_datetime(camp1['Timestamp'])

campus2 = io.BytesIO(uploaded['campus2.csv'])
camp2 = pd.read_csv(campus2)
camp2.index = pd.to_datetime(camp2['Timestamp'])

campus3 = io.BytesIO(uploaded['campus3.csv'])
camp3 = pd.read_csv(campus3)
camp3.index = pd.to_datetime(camp3['Timestamp'])

campus4 = io.BytesIO(uploaded['campus4.csv'])
camp4 = pd.read_csv(campus4)
camp4.index = pd.to_datetime(camp4['Timestamp'])

camp3_site6 = io.BytesIO(uploaded['campus3_site6.csv'])
site6 = pd.read_csv(camp3_site6)
site6.index = pd.to_datetime(site6['Timestamp'])

camp3_site8 = io.BytesIO(uploaded['campus3_site8.csv'])
site8 = pd.read_csv(camp3_site8)
site8.index = pd.to_datetime(site8['Timestamp'])

camp3_site10 = io.BytesIO(uploaded['campus3_site10.csv'])
site10 = pd.read_csv(camp3_site10)
site10.index = pd.to_datetime(site10['Timestamp'])

camp3_site12 = io.BytesIO(uploaded['campus3_site12.csv'])
site12 = pd.read_csv(camp3_site12)
site12.index = pd.to_datetime(site12['Timestamp'])

Saving campus1.csv to campus1.csv
Saving campus2.csv to campus2.csv
Saving campus3.csv to campus3.csv
Saving campus3_site6.csv to campus3_site6.csv
Saving campus3_site8.csv to campus3_site8.csv
Saving campus3_site10.csv to campus3_site10.csv
Saving campus3_site12.csv to campus3_site12.csv
Saving campus4.csv to campus4.csv
Saving campus5.csv to campus5.csv


In [4]:
camp1 = camp1['SolarGeneration']
camp2 = camp2['SolarGeneration']
camp3 = camp3['SolarGeneration']
camp4 = camp4['SolarGeneration']
site6 = site6['SolarGeneration']
site8 = site8['SolarGeneration']
site10 = site10['SolarGeneration']
site12 = site12['SolarGeneration']

In [5]:
def print_last(res):
  '''
  Helper function to just print out the last training and testing loss 
  function values as well as the last metrics of a fitted model
  '''
  print(f"Mean Absolute Error - Training: {np.round(res.history['loss'][-1], 5)}")
  print(f"Mean Absolute Error - Testing: {np.round(res.history['val_loss'][-1], 5)}")
  print(f"Root Mean Squared Error - Training: {np.round(np.sqrt(res.history['mse'][-1]), 5)}")
  print(f"Root Mean Squared Error - Testing: {np.round(np.sqrt(res.history['val_mse'][-1]), 5)}")

In [6]:
def best_model_1(df):
  '''

  '''
  y_train, y_test = train_test_split(df, shuffle = False, test_size = 0.15)

  early_stop = EarlyStopping(monitor = 'val_loss', patience = 5)

  seq_length = 96
  train_ds = timeseries_dataset_from_array(
    y_train.to_numpy(),
    targets = y_train[seq_length:],
    sequence_length = seq_length,
    batch_size = 672,
    shuffle = True,
    seed = 42
  )

  val_ds = timeseries_dataset_from_array(
    y_test.to_numpy(),
    targets = y_test[seq_length:],
    sequence_length = seq_length,
    batch_size = 672,
  )

  model = Sequential()

  model.add(SimpleRNN(64, input_shape = [None,1]))
  model.add(Dense(1, activation = 'linear'))

  model.compile(loss = 'mae',
                optimizer = Adam(learning_rate = 0.002), 
                metrics = ['mse'])

  res = model.fit(train_ds, 
                  validation_data = val_ds, 
                  epochs = 50, 
                  verbose = 0,
                  callbacks = [early_stop])

  print_last(res)

In [7]:
def best_model_2(df):
  '''

  '''
  y_train, y_test = train_test_split(df, shuffle = False, test_size = 0.15)

  early_stop = EarlyStopping(monitor = 'val_loss', patience = 5)

  seq_length = 16
  train_ds = timeseries_dataset_from_array(
    y_train.to_numpy(),
    targets = y_train[seq_length:],
    sequence_length = seq_length,
    batch_size = 96,
    shuffle = True,
    seed = 42
  )

  val_ds = timeseries_dataset_from_array(
    y_test.to_numpy(),
    targets = y_test[seq_length:],
    sequence_length = seq_length,
    batch_size = 96,
  )

  model = Sequential()

  model.add(SimpleRNN(32, input_shape = [None,1], return_sequences = True))
  model.add(SimpleRNN(32, input_shape = [None,1]))
  model.add(Dense(1, activation = 'linear'))

  model.compile(loss = 'mae',
                optimizer = Adam(learning_rate = 0.0015), 
                metrics = ['mse'])

  res = model.fit(train_ds, 
                  validation_data = val_ds, 
                  epochs = 50, 
                  verbose = 0,
                  callbacks = [early_stop])

  print_last(res)

## Campus 1 (27 Individual Sites)

In [19]:
best_model_1(camp1)

Mean Absolute Error - Training: 16.52437
Mean Absolute Error - Testing: 24.43195
Root Mean Squared Error - Training: 41.72521
Root Mean Squared Error - Testing: 51.46141


In [23]:
best_model_2(camp1)

Mean Absolute Error - Training: 14.36009
Mean Absolute Error - Testing: 19.40275
Root Mean Squared Error - Training: 36.19741
Root Mean Squared Error - Testing: 41.92194


In [8]:
camp1.describe()

count    79805.000000
mean        65.356255
std        111.950191
min          0.000000
25%          0.000000
50%          0.000000
75%         83.318359
max        488.087891
Name: SolarGeneration, dtype: float64

## Campus 2 (5 Individual Sites)

In [24]:
best_model_1(camp2)

Mean Absolute Error - Training: 1.74422
Mean Absolute Error - Testing: 1.85078
Root Mean Squared Error - Training: 4.64117
Root Mean Squared Error - Testing: 5.06055


In [25]:
best_model_2(camp2)

Mean Absolute Error - Training: 1.78639
Mean Absolute Error - Testing: 1.95337
Root Mean Squared Error - Training: 4.68326
Root Mean Squared Error - Testing: 5.13774


In [9]:
camp2.describe()

count    79319.000000
mean        10.708011
std         16.009950
min          0.000000
25%          0.000000
50%          0.000000
75%         18.267578
max         57.363281
Name: SolarGeneration, dtype: float64

# Campus 3 (8 Individual Sites)

In [26]:
best_model_1(camp3)

Mean Absolute Error - Training: 3.98758
Mean Absolute Error - Testing: 8.07423
Root Mean Squared Error - Training: 11.32356
Root Mean Squared Error - Testing: 18.57514


In [27]:
best_model_2(camp3)

Mean Absolute Error - Training: 3.83372
Mean Absolute Error - Testing: 7.29851
Root Mean Squared Error - Training: 11.04632
Root Mean Squared Error - Testing: 17.88953


In [10]:
camp3.describe()

count    79319.000000
mean        23.040234
std         42.159412
min          0.000000
25%          0.000000
50%          0.000000
75%         25.937500
max        200.214844
Name: SolarGeneration, dtype: float64

## Campus 4 (1 Site)

In [28]:
best_model_1(camp4)

Mean Absolute Error - Training: 0.53562
Mean Absolute Error - Testing: 0.79538
Root Mean Squared Error - Training: 1.7806
Root Mean Squared Error - Testing: 2.79865


In [29]:
best_model_2(camp4)

Mean Absolute Error - Training: 0.54027
Mean Absolute Error - Testing: 0.83392
Root Mean Squared Error - Training: 1.75602
Root Mean Squared Error - Testing: 2.76372


In [11]:
camp4.describe()

count    79319.000000
mean         4.021717
std          5.999975
min          0.000000
25%          0.000000
50%          0.000000
75%          7.250000
max         21.937500
Name: SolarGeneration, dtype: float64

---
# Individual Sites on Campus 3 Modeled:

### Site 6

In [30]:
best_model_1(site6)

Mean Absolute Error - Training: 1.00163
Mean Absolute Error - Testing: 0.8089
Root Mean Squared Error - Training: 3.04606
Root Mean Squared Error - Testing: 2.16687


In [31]:
best_model_2(site6)

Mean Absolute Error - Training: 0.96492
Mean Absolute Error - Testing: 0.87338
Root Mean Squared Error - Training: 2.97394
Root Mean Squared Error - Testing: 2.14722


In [12]:
site6.describe()

count    42041.000000
mean         5.200881
std          7.994980
min          0.000000
25%          0.000000
50%          0.000000
75%          8.546875
max         28.734375
Name: SolarGeneration, dtype: float64

### Site 8

In [32]:
best_model_1(site8)

Mean Absolute Error - Training: 1.08763
Mean Absolute Error - Testing: 0.83864
Root Mean Squared Error - Training: 3.1308
Root Mean Squared Error - Testing: 2.33301


In [33]:
best_model_2(site8)

Mean Absolute Error - Training: 0.9841
Mean Absolute Error - Testing: 0.82053
Root Mean Squared Error - Training: 2.96979
Root Mean Squared Error - Testing: 2.29772


In [13]:
site8.describe()

count    42041.000000
mean         4.808123
std          7.638348
min          0.000000
25%          0.000000
50%          0.000000
75%          7.453125
max         26.968750
Name: SolarGeneration, dtype: float64

### Site 10

In [34]:
best_model_1(site10)

Mean Absolute Error - Training: 0.29328
Mean Absolute Error - Testing: 0.29072
Root Mean Squared Error - Training: 0.84639
Root Mean Squared Error - Testing: 0.88407


In [35]:
best_model_2(site10)

Mean Absolute Error - Training: 0.29991
Mean Absolute Error - Testing: 0.29836
Root Mean Squared Error - Training: 0.843
Root Mean Squared Error - Testing: 0.88562


In [14]:
site10.describe()

count    79319.000000
mean         1.669451
std          2.445814
min          0.000000
25%          0.000000
50%          0.000000
75%          3.046875
max          7.750000
Name: SolarGeneration, dtype: float64

### Site 12

In [36]:
best_model_1(site12)

Mean Absolute Error - Training: 0.47677
Mean Absolute Error - Testing: 0.93893
Root Mean Squared Error - Training: 1.37274
Root Mean Squared Error - Testing: 2.67354


In [37]:
best_model_2(site12)

Mean Absolute Error - Training: 0.46846
Mean Absolute Error - Testing: 0.9102
Root Mean Squared Error - Training: 1.34081
Root Mean Squared Error - Testing: 2.64824


In [15]:
site12.describe()

count    79319.000000
mean         2.827579
std          4.491140
min          0.000000
25%          0.000000
50%          0.000000
75%          4.531250
max         17.593700
Name: SolarGeneration, dtype: float64