# Import Libray

In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from sklearn.metrics import mean_squared_error
from sklearn.utils import shuffle
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
import plotly.graph_objects as go
import plotly.offline as py
import matplotlib.pyplot as plt
import math
import pickle
from keras.callbacks import EarlyStopping
from numpy import concatenate

# Define function 
## Helper consist of 2 function 

### 1. Mean Absolute Error 
To calculate loss from the models

### 2. generateDataset

To form a time series set 

## train_LSTM_Model

Construct the training and testing dataset and deep learning framework


In [3]:
class Helper:
    def mean_absolute_percentage_error(self,y_true, y_pred):
        y_true, y_pred = np.array(y_true), np.array(y_pred)
        return np.mean(np.abs((y_true - y_pred) / y_true)) * 100
    def generateDataSet(self, data, n_in=1, n_out=1, dropnan=True):
        n_vars = 1 if type(data) is list else data.shape[1]
        df = pd.DataFrame(data)
        cols, names = list(), list()
        # input sequence (t-n, ... t-1)
        for i in range(n_in, 0, -1):
            cols.append(df.shift(i))
            names += [('var%d(t-%d)' % (j+1, i)) for j in range(n_vars)]
        # forecast sequence (t, t+1, ... t+n)
        for i in range(0, n_out):
            cols.append(df.shift(-i))
            if i == 0:
                names += [('var%d(t)' % (j+1)) for j in range(n_vars)]
            else:
                names += [('var%d(t+%d)' % (j+1, i)) for j in range(n_vars)]
        # put it all together
        agg = pd.concat(cols, axis=1)
        agg.columns = names
        # drop rows with NaN values
        if dropnan:
            agg.dropna(inplace=True)
        return agg

    def train_LSTM_model(self,reframed):
        n_hours = 9
        n_features = 5
        values = reframed.values
        # Define number of training and testing set # 60 for two months 150 for 5 months
        n_train_hours = 24*150 
        train = values[:n_train_hours, :]
        test = values[n_train_hours:, :]
        # split into input and outputs
        n_obs = n_hours*n_features
        train_X, train_y = train[:, :n_obs], train[:, -n_features]
        test_X, test_y = test[:, :n_obs], test[:, -n_features]
        print(train_X.shape, len(train_X), train_y.shape)
        # reshape input to be 3D [samples, timesteps, features]
        train_X = train_X.reshape((train_X.shape[0], n_hours, n_features))
        test_X = test_X.reshape((test_X.shape[0], n_hours, n_features))
        print(train_X.shape, train_y.shape, test_X.shape, test_y.shape)

        # design network
        model = keras.Sequential()
        model.add(layers.LSTM(50, activation='linear',input_shape=(train_X.shape[1], train_X.shape[2])))
        model.add(layers.Dense(50,activation='linear'))
        model.add(layers.Dense(1))
        es = EarlyStopping(monitor='val_loss', mode='min', verbose=1,patience=50)
        model.compile(loss='mean_squared_error', optimizer='adam')
        # fit network
        history = model.fit(train_X, train_y, epochs=1000, batch_size=24, validation_data=(test_X[:1800], test_y[:1800]), verbose=2, shuffle=False, callbacks=[es])

        yhat = model.predict(test_X)
        test_X = test_X.reshape((test_X.shape[0], n_hours*n_features))
        inv_yhat = concatenate((yhat, test_X[:, -4:]), axis=1)                        
        inv_yhat = inv_yhat[:,0]
        test_y = test_y.reshape((len(test_y), 1))
        inv_y = concatenate((test_y, test_X[:, -4:]), axis=1)
        inv_y = inv_y[:,0]
        # calculate RMSE of inversed value 
        rmse = math.sqrt(mean_squared_error(inv_y, inv_yhat))
        print('Test RMSE: %.3f' % rmse) 

        pct = self.mean_absolute_percentage_error(inv_y,inv_yhat)
        print('MAPE : %.3f' % pct +'%' )

        return model

# Load Dataset and use the function for load forecasting

In [5]:
def main():
    pw=pd.read_csv('changyuanbuilding_interpolation_SGF_202007-202104.csv')

    dataset = pd.DataFrame()
    dataset = pw[['p_sum','ae_tot','weekend','Temperature','session']]
    # dataset = pw[['p_sum','ae_tot','weekend','Temp(¢J)','session']]
    print(dataset.head())

    helper = Helper()
    reframed = helper.generateDataSet(dataset, 9, 1)
    model = helper.train_LSTM_model(reframed)
    model.save("model")

if __name__ == '__main__':
    main()

         p_sum        ae_tot  weekend  Temperature  session
0  9308.932296  27153.204735        0         27.2        1
1  9246.681200  27161.810227        0         29.1        0
2  9184.430104  27171.934375        0         28.9        0
3  9122.179008  27182.831723        0         28.6        0
4  9059.927912  27191.283333        0         28.5        0
(3600, 45) 3600 (3600,)
(3600, 9, 5) (3600,) (3639, 9, 5) (3639,)
Epoch 1/1000
150/150 - 2s - loss: 5561590.5000 - val_loss: 1370832.2500
Epoch 2/1000
150/150 - 1s - loss: 3183979.7500 - val_loss: 925804.8750
Epoch 3/1000
150/150 - 1s - loss: 2398315.2500 - val_loss: 532039.5000
Epoch 4/1000
150/150 - 1s - loss: 1804754.3750 - val_loss: 632010.1250
Epoch 5/1000
150/150 - 1s - loss: 1479772.2500 - val_loss: 680757.6250
Epoch 6/1000
150/150 - 1s - loss: 2772097.2500 - val_loss: 325416.9688
Epoch 7/1000
150/150 - 1s - loss: 2015535.8750 - val_loss: 345763.3438
Epoch 8/1000
150/150 - 1s - loss: 1275098.1250 - val_loss: 417093.6562
Epoch

Load model

In [None]:
model = tf.keras.models.load_model('model')

In [None]:
model

<tensorflow.python.keras.engine.sequential.Sequential at 0x7f5994e9cb90>