In [None]:
import os
import random
import pyreadr

import numpy as np
import pandas as pd

from sklearn import preprocessing
from sklearn.linear_model import LinearRegression

import keras
from keras.models import Sequential
from keras.optimizers import SGD
from keras.layers import Dense, LSTM, TimeDistributed, Dropout, Conv1D, MaxPooling1D, Flatten, RepeatVector
from keras.callbacks import EarlyStopping
import tensorflow as tf

import dues_utilities as utils

In [None]:
tf.random.set_seed(77)
os.environ['PYTHONHASHSEED'] = str(77)
np.random.seed(77)
random.seed(77)

# Data read and formatting

PATH_ENERGY_FEATURIZED = "../data/building_energy_featurized.csv"
PATH_DISTANCES = "D:/smud/smud_distances.csv"
PATH_ENERGY_ACTUAL = "../data/building_energy_actual.csv"
PATH_ENERGY_SIM = "../data/building_energy_sim.csv"

energy_dtype = {
    'apn': str, 
    'year': np.float32, 
    'month': np.float32, 
    'day': np.float32, 
    'hour': np.float32, 
    'kwh': np.float32
}

energy_featurized = pd.read_csv(PATH_ENERGY_FEATURIZED, dtype=np.float32)
dist = pd.read_csv(PATH_DISTANCES, dtype=str)
energy_actual = pd.read_csv(PATH_ENERGY_ACTUAL, dtype=energy_dtype)
energy_sim = pd.read_csv(PATH_ENERGY_SIM, dtype=energy_dtype)

## Supervised Experiments

In [None]:
# Number of timesteps before 't' model uses in each prediction step
timesteps = 6
N_PREV = N_PREV - 1

# Combined simulation and observed data into properly formatted dataframe
energy = utils.get_energy_df(energy_sim, energy_actual)

# Fit Standard Scaler ontraining data
standard_scaler = utils.get_standard_scaler(energy, 'year < 2018', 'kwh_actual')

# Process energy dataframe for training and validation/test.
train_x, train_y = utils.preprocess(energy, 'year < 2018', standard_scaler, n_in=N_PREV, df_name="Train", lstm=False)
val_x, val_y = utils.preprocess(energy, 'year >= 2018 and month < 7', standard_scaler, n_in=N_PREV, df_name="Validation", lstm=False)
test_x, test_y = utils.preprocess(energy, 'year >= 2018 and month >= 7', standard_scaler, n_in=N_PREV, df_name="Test", lstm=False)

n_features = train_x.shape[-1]

# Callback Functions
early_stopping_monitor = EarlyStopping(patience=3)

### Linear Regression

In [None]:
lr_model = LinearRegression()
lr_model = lr_model.fit(train_x, train_y)

utils.print_metrics(lr_model, val_x, val_y)

### Support Vector Regression

In [None]:
import importlib
importlib.reload(utils)

### ANN

In [None]:
ann_model = Sequential()
ann_model.add(Dense(128, activation='relu', input_shape = [n_features]))
ann_model.add(Dense(32, activation='relu'))
ann_model.add(Dense(1))

ann_model.compile(optimizer='adam', loss='mean_absolute_percentage_error')
ann_model.summary()

ann_history = ann_model.fit(
    train_x, train_y, 
    validation_data=[val_x, val_y], 
    batch_size = 5000, 
    shuffle = True, 
    epochs=30, 
    callbacks=[early_stopping_monitor]
)

utils.show_results(ann_history, ann_model, val_x, val_y, "ANN")

# MAPE: 51.44270658493042
# CV(RMSE): 56.72162091525358
# MBE: -18.218822

#### Reshaping for Models with 3D Input

In [None]:
train_x = utils.reshape_for_lstm(train_x, timesteps, df_name="Train")
val_x = utils.reshape_for_lstm(val_x, timesteps, df_name="Validation")
test_x = utils.reshape_for_lstm(test_x, timesteps, df_name="Test")
n_features = train_x.shape[-1]

### Vanilla LSTM

In [None]:
lstm_model = Sequential()
lstm_model.add(Conv1D(128, kernel_size=2, input_shape = (timesteps, n_features)))
lstm_model.add(Flatten())
lstm_model.add(Dense(32, activation='relu'))
lstm_model.add(Dense(1))

lstm_model.compile(optimizer='adam', loss='mape')
lstm_model.summary()

lstm_history = lstm_model.fit(
    train_x, train_y, 
    validation_data=[val_x, val_y],
    batch_size = 5000, 
    shuffle = True, 
    epochs=30, 
    callbacks=[early_stopping_monitor]
)

utils.show_results(lstm_history, lstm_model, val_x, val_y, "Vanilla LSTM")

### CNN

In [None]:
cnn_model = Sequential()
cnn_model.add(Conv1D(128, kernel_size=2, input_shape = (timesteps, n_features)))
cnn_model.add(Flatten())
cnn_model.add(Dense(32, activation='relu'))
cnn_model.add(Dense(1))

cnn_model.compile(optimizer='adam', loss='mape')
cnn_model.summary()

cnn_history = cnn_model.fit(
    train_x, train_y, 
    validation_data=[val_x, val_y],
    batch_size = 5000, 
    shuffle = True, 
    epochs=30, 
    callbacks=[early_stopping_monitor]
)

utils.show_results(cnn_history, cnn_model, val_x, val_y, "CNN")

# MAPE: 32.95547664165497
# CV(RMSE): 43.65593410060542
# MBE: -2.0720825

## Sequence Experiments

In [None]:
energy = utils.get_energy_df(energy_sim, energy_actual, one_hot=False)

energy_train = utils.prep_for_seq_lstm(energy, 'year < 2018', timesteps)
energy_val = utils.prep_for_seq_lstm(energy, 'year >= 2018 and month < 7', timesteps)
energy_test= utils.prep_for_seq_lstm(energy, 'year >= 2018 and month >= 7', timesteps)

standard_scaler = utils.preprocessing.StandardScaler()
standard_scaler.fit(energy_train.drop(columns='kwh_actual'))

train_x, train_y = utils.preprocess(energy_train, None, standard_scaler, n_in=timesteps, df_name="Train", remove_target=False, to_supervised=False)
val_x, val_y = utils.preprocess(energy_val, None, standard_scaler, n_in=timesteps, df_name="Validation", remove_target=False, to_supervised=False)
test_x, test_y = utils.preprocess(energy_test, None, standard_scaler, n_in=timesteps, df_name="Test", remove_target=False, to_supervised=False)

n_features = train_x.shape[-1]

### Vanilla LSTM

In [None]:
lstm_model = Sequential()
lstm_model.add(LSTM(4, activation='relu', input_shape=(timesteps, n_features), return_sequences=True))
lstm_model.add(LSTM(4, activation='relu', return_sequences=True))
lstm_model.add(TimeDistributed(Dense(32)))
lstm_model.add(TimeDistributed(Dense(1)))

lstm_model.compile(optimizer='adam', loss='mape')
lstm_model.summary()

lstm_history = lstm_model.fit(
    train_x, 
    train_y, 
    validation_data=[val_x, val_y], 
    batch_size = 30, 
    shuffle = True, 
    epochs=30, 
    callbacks=[early_stopping_monitor]
)

utils.show_results(lstm_history, lstm_model, val_x, val_y, "Sequential Vanilla LSTM")

### Seq2Seq LSTM

In [None]:
lstm_autoencoder_model = Sequential()
lstm_autoencoder_model.add(LSTM(4, activation='relu', input_shape=(timesteps, n_features), return_sequences=False))
lstm_autoencoder_model.add(RepeatVector(timesteps))
lstm_autoencoder_model.add(LSTM(4, activation='relu', return_sequences=True))
lstm_autoencoder_model.add(TimeDistributed(Dense(32)))
lstm_autoencoder_model.add(TimeDistributed(Dense(1)))

lstm_autoencoder_model.compile(optimizer='adam', loss='mape')
lstm_autoencoder_model.summary()

lstm_autoencoder_history = lstm_autoencoder_model.fit(
    train_x, 
    train_y, 
    validation_data=[val_x, val_y], 
    batch_size = 30, 
    shuffle = True, 
    epochs=30, 
    callbacks=[early_stopping_monitor]
)

utils.show_results(lstm_autoencoder, history, lstm_autoencoder_model, val_x, val_y, "Sequential Seq2Seq LSTM")

### CNN-LSTM

In [None]:
cnn_lstm_model = Sequential()
cnn_lstm_model.add(Conv1D(8, activation='relu', kernel_size=2, input_shape=(timesteps, n_features)))
#cnn_lstm_model.add(Dropout(0.3))
cnn_lstm_model.add(MaxPooling1D(pool_size=2))
cnn_lstm_model.add(Flatten())
cnn_lstm_model.add(RepeatVector(timesteps))
cnn_lstm_model.add(LSTM(8, activation='relu', return_sequences=True))
cnn_lstm_model.add(TimeDistributed(Dense(32, activation='relu')))
cnn_lstm_model.add(TimeDistributed(Dense(1)))
# Above achieves 22% validation MAPE

cnn_lstm_model.compile(optimizer='adam', loss='mape')

early_stopping_monitor = EarlyStopping(patience=5)
cnn_lstm_model.summary()

cnn_lstm_history = cnn_lstm_model.fit(
    train_x, 
    train_y, 
    validation_data=[val_x, val_y], 
    batch_size = 30, 
    shuffle = True, 
    epochs=30, 
    callbacks=[early_stopping_monitor]
)

utils.show_results(cnn_lstm_history, cnn_lstm_model, val_x, val_y, "Sequential CNN-LSTM")

In [None]:
cnn_lstm_dropout_model = Sequential()
cnn_lstm_dropout_model.add(Conv1D(8, activation='relu', kernel_size=2, input_shape=(timesteps, n_features)))
cnn_lstm_dropout_model.add(Dropout(0.3))
cnn_lstm_dropout_model.add(MaxPooling1D(pool_size=2))
cnn_lstm_dropout_model.add(Flatten())
cnn_lstm_dropout_model.add(RepeatVector(timesteps))
cnn_lstm_dropout_model.add(LSTM(8, activation='relu', return_sequences=True))
cnn_lstm_dropout_model.add(TimeDistributed(Dense(32, activation='relu')))
cnn_lstm_dropout_model.add(TimeDistributed(Dense(1)))

cnn_lstm_dropout_model.compile(optimizer='adam', loss='mape')

early_stopping_monitor = EarlyStopping(patience=5)
cnn_lstm_dropout_model.summary()

cnn_lstm_dropout_history = cnn_lstm_dropout_model.fit(
    train_x, 
    train_y, 
    validation_data=[val_x, val_y], 
    batch_size = 30, 
    shuffle = True, 
    epochs=30, 
    callbacks=[early_stopping_monitor]
)

utils.show_results(cnn_lstm_dropout_history, cnn_lstm_dropout_model, val_x, val_y, "Sequential CNN-LSTM")

In [None]:
cnn_lstm_grad_clipping_model = Sequential()
cnn_lstm_grad_clipping_model.add(Conv1D(8, activation='relu', kernel_size=2, input_shape=(timesteps, n_features)))
cnn_lstm_grad_clipping_model.add(MaxPooling1D(pool_size=2))
cnn_lstm_grad_clipping_model.add(Flatten())
cnn_lstm_grad_clipping_model.add(RepeatVector(timesteps))
cnn_lstm_grad_clipping_model.add(LSTM(8, activation='relu', return_sequences=True))
cnn_lstm_grad_clipping_model.add(TimeDistributed(Dense(32, activation='relu')))
cnn_lstm_grad_clipping_model.add(TimeDistributed(Dense(1)))

opt = SGD(lr=0.001, momentum=0.9, clipvalue=0.5)
cnn_lstm_grad_clipping_model.compile(optimizer=opt, loss='mape')

early_stopping_monitor = EarlyStopping(patience=5)
cnn_lstm_grad_clipping_model.summary()

cnn_lstm_grad_clipping_history = cnn_lstm_grad_clipping_model.fit(
    train_x, 
    train_y, 
    validation_data=[val_x, val_y], 
    batch_size = 30, 
    shuffle = True, 
    epochs=30, 
    callbacks=[early_stopping_monitor]
)

utils.show_results(cnn_lstm_grad_clipping_history, cnn_lstm_grad_clipping_model, val_x, val_y, "Sequential CNN-LSTM")

In [None]:
# import numpy as np
# from keras.layers import Input, LSTM
# from keras.models import Model
# from keras.optimizers import Adam
from see_rnn import get_gradients, features_0D, features_1D, features_2D

# def make_model(rnn_layer, batch_shape, units):
#     ipt = Input(batch_shape=batch_shape)
#     x   = rnn_layer(units, activation='tanh', return_sequences=True)(ipt)
#     out = rnn_layer(units, activation='tanh', return_sequences=False)(x)
#     model = Model(ipt, out)
#     model.compile(Adam(4e-3), 'mse')
#     return model
    
# def make_data(batch_shape):
#     return np.random.randn(*batch_shape), \
#            np.random.uniform(-1, 1, (batch_shape[0], units))

# def train_model(model, iterations, batch_shape):
#     x, y = make_data(batch_shape)
#     for i in range(iterations):
#         model.train_on_batch(x, y)
#         print(end='.')  # progbar
#         if i % 40 == 0:
#             x, y = make_data(batch_shape)

# units = 6
# batch_shape = (16, 100, 2*units)

# model = make_model(LSTM, batch_shape, units)
# train_model(model, 300, batch_shape)

# x, y  = make_data(batch_shape)
grads_all  = get_gradients(lstm_model, 5, val_x[:10], val_y[:10])  # return_sequences=True,  layer index 1
# grads_last = get_gradients(model, 2, x, y)  # return_sequences=False, layer index 2

features_1D(grads_all, n_rows=2)
features_2D(grads_all, n_rows=10, norm=(-.01, .01))
features_0D(grads_all[1])


In [None]:
# Todos:

# Investigate exploding gradients, using clip values for gradient clipping
# Investigate visualization strategies for LSTM and Conv Layers