In [1]:
import sys

import keras
from keras.optimizers import *
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import LSTM
from keras.callbacks import EarlyStopping
import tensorflow as tf
from sklearn.preprocessing import MinMaxScaler

from matplotlib import pyplot as plt
from numpy import array
from pandas import DataFrame
import pandas as pd
import numpy as np
import matplotlib
matplotlib.rcParams['figure.figsize'] = (20.0, 10.0) # to make any matplotlib plot automatically a 20x10


import configparser
config = configparser.ConfigParser()
config.read('config/lstmconfig.ini')

scaler = MinMaxScaler(feature_range=(0,1))
eco_tools_path = config['SETUP']['eco_tools_path']
sys.path.append(eco_tools_path)
from ecotools.pi_client import pi_client
pc = pi_client(root = 'readonly')

callbacks_list = [EarlyStopping(monitor='val_loss', patience=5, min_delta=0.001, verbose=2)]
#sess = tf.Session(config=tf.ConfigProto(log_device_placement=True))


Using TensorFlow backend.


In [None]:
point_name = config['PI']['point_name']
start = config['PI']['start']
end = config['PI']['end']
interval = config['PI']['interval']
calculation = config['PI']['calculation']

In [None]:
def create_mulitvariable_df(data):
    data.rename(columns={'aiTIT4045':'OAT'}, inplace=True)

    data["cdd"] = data.OAT - 65.0
    data.loc[data.cdd < 0, "cdd"] = 0
    data["hdd"] = 65.0 - data.OAT
    data.loc[data.hdd < 0, 'hdd'] = 0
    data["cdd2"] = data.cdd**2
    data["hdd2"] = data.hdd**2

    data2 = data.copy()
    ##FIX: if the dataframe passed does not have a month that the origial LSTM trained on, it wont be able
    ##to be used to predict!
    data2["MONTH"]= data2.index.month
    data2["MONTH"] = data2["MONTH"].astype('category')
    data2["TOD"] = data2.index.hour
    data2["TOD"] = data2["TOD"].astype('category')
    data2["DOW"] = data2.index.weekday
    data2["DOW"] = data2["DOW"].astype('category')
    print(f"data2: {data2['MONTH'].unique()}")
    ### Create dummy variables
    l3 = ["MONTH","TOD","DOW"]#,“WEEK”]#,“DOY”]
    data2 = pd.get_dummies(data=data2, columns=l3, drop_first=True)
 
    ### Create Weekend flag
    data2["WEEKEND"] = 0
    data2.loc[(data2.DOW_5 == 1) | (data2.DOW_6 == 1), 'WEEKEND'] = 1
    
    data2["shift1"] = data2.iloc[:,0].shift(2)

    data2["rolling24_mean"] = data2.iloc[:,0].rolling('24h').mean()
    data2["rolling24_max"] = data2.iloc[:,0].rolling('24h').max()
    data2["rolling24_min"] = data2.iloc[:,0].rolling('24h').min()
   
    data2.dropna(inplace=True)
    
    return data2

In [None]:
point_list = [point_name, 'aiTIT4045']
df = pc.get_stream_by_point(point_list, start = start, end = end, calculation = calculation, interval= interval)
df = df.dropna(how='any')
df.head()

In [None]:
def clean_train_data(df):
    #mask1 = (df[point_name] > 2400 )& (df.index.year < 2019)
    mask1 = (df[point_name] > 2400 )
    df1 = df.loc[mask1]
    mask2 = (df.index.year>=2019)
    df2 = df.loc[mask2]
    return df1, df2
    #return pd.concat([df1,df2])   

In [None]:
df, df1 = clean_train_data(df)
print(f"df: {df.shape} \n df1: {df1.shape}")

In [None]:
df = create_mulitvariable_df(df)
df1 = create_mulitvariable_df(df1)
df.head()

In [None]:
np.setdiff1d(df.columns,df1.columns)


In [None]:
def scale_keras(X, y):
    # normalize the dataset
    scaler_x = MinMaxScaler(feature_range=(0, 1))
    scaler_y = MinMaxScaler((0, 1))
    X = scaler_x.fit_transform(X)
    y = scaler_y.fit_transform(np.array(y).reshape((-1,1)))
    # split into train and test sets
    train_size = int(len(X) * 0.7)
    test_size = len(X) - train_size
    X_train, X_test = X[0:train_size], X[test_size:len(X)]
    y_train, y_test = y[0:train_size], y[test_size:len(y)]
    return X_train, X_test, y_train, y_test, scaler_x, scaler_y

In [None]:
y = df[point_name]
X = df.drop(columns=point_name)
y1 = df1[point_name]
X1 = df1.drop(columns=point_name)

In [None]:
X_train, X_test, y_train, y_test, scaler_x, scaler_y = scale_keras(X, y)
X1_train, X1_test, y1_train, y1_test, scaler_x1, scaler_y1 = scale_keras(X1, y1)

In [None]:
# reshape input to be [samples, time steps, features]
X_train = np.reshape(X_train, (X_train.shape[0], 1, X_train.shape[1]))
X_test = np.reshape(X_test, (X_test.shape[0], 1, X_test.shape[1]))

X1_train = np.reshape(X1_train, (X1_train.shape[0], 1, X1_train.shape[1]))
X1_test = np.reshape(X1_test, (X1_test.shape[0], 1, X1_test.shape[1]))

In [None]:
train = DataFrame()
val = DataFrame()
np.random.seed(42)
for i in range(1):
    model = Sequential()
    model.add(LSTM(100, input_shape=(X_train.shape[1], X_train.shape[2]), return_sequences = True))
    model.add(LSTM(100))
    model.add(Dense(1))
    model.compile(optimizer = Adam(lr = 0.001), loss = 'mean_squared_error')
    #X,y = get_train()
    #valX, valY = get_val()
    # fit model
    history = model.fit(X_train, y_train, epochs = 400, validation_split = 0.3, shuffle = False)
    # story history
    train[str(i)] = history.history['loss']
    val[str(i)] = history.history['val_loss']



In [None]:
from keras import backend as K
K.tensorflow_backend._get_available_gpus()

In [None]:
from tensorflow.python.client import device_lib
print(device_lib.list_local_devices())

In [None]:
from matplotlib.pyplot import figure
figure(num=None, figsize=(15, 6), dpi=80, facecolor='w', edgecolor='k')
plt.plot(train, color='blue', label='train')
plt.plot(val, color='orange', label='validation')
plt.title('model train vs validation loss\n 2 Layers-100 Neurons')
plt.ylabel('loss (mse)')
plt.xlabel('epoch')
#plt.savefig('new_300_epochs.png')
plt.show()

In [None]:
from sklearn.metrics import r2_score
from sklearn.metrics import mean_squared_error
import math
plt.plot(y_train, linewidth=1, color='green')
plt.plot(y_test, color='lightgreen', linewidth=1)
plt.title("Training and Test Split")
plt.legend(['Training','Test'])

pred_train = history.model.predict(X_train)
pred_train_inv = scaler_y.inverse_transform(pred_train.reshape(-1,1))
pred_test = history.model.predict(X_test)
pred_test_inv = scaler_y.inverse_transform(pred_test.reshape(-1,1))

plt.figure(figsize=(18,2))
training_comparison = pd.DataFrame({"Actual":scaler_y.inverse_transform(y_train).reshape((-1,)),
                                   "Modeled":pred_train_inv.reshape((-1,))}, index=range(len(y_train)))
training_comparison.sort_index().plot(figsize=(18,3),
                         title="Training Data vs Model Prediction",
                         linewidth=1,
                         color=['blue','red'])
 
plt.figure(figsize=(18,2))
test_comparison = pd.DataFrame({"Actual":scaler_y.inverse_transform(y_test).reshape((-1,)),
                                   "Modeled":pred_test_inv.reshape((-1,))}, index=range(len(y_test)))
test_comparison.sort_index().plot(figsize=(18,3),
                     title="Test Data vs Model Prediction",
                     linewidth=1,
                     colormap='winter')

r2_train = r2_score(scaler_y.inverse_transform(y_train).reshape((-1,)), pred_train_inv.reshape(-1,1))
print(f"R2 of train = {round(r2_train,3)}")
r2_test = r2_score(scaler_y.inverse_transform(y_test).reshape((-1,)), pred_test_inv.reshape(-1,1))
print(f"R2 of test = {round(r2_test,3)}")
print()
rmse_train = math.sqrt(mean_squared_error(scaler_y.inverse_transform(y_train).reshape((-1,)), pred_train_inv.reshape(-1,1)))
print(f"RMSE of train = {round(rmse_train,3)}")
rmse_test = math.sqrt(mean_squared_error(scaler_y.inverse_transform(y_test).reshape((-1,)), pred_test_inv.reshape(-1,1)))
print(f"RMSE of test = {round(rmse_test,3)}")
print()
mae_train = np.median((pred_train_inv.reshape(-1,1) - scaler_y.inverse_transform(y_train).reshape((-1,))))
print(f"MAE of train = {round(mae_train,3)}")
mae_test = np.median((pred_test_inv.reshape(-1,1) - scaler_y.inverse_transform(y_test).reshape((-1,))))
print(f"MAE of test = {round(mae_test,3)}")