In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from sklearn.metrics import mean_squared_error
from sklearn.preprocessing import MinMaxScaler
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import LSTM
from math import sqrt

Using TensorFlow backend.


In [2]:
data = pd.read_csv('data.csv', parse_dates=["Date"], infer_datetime_format=True, index_col='Date')
data = data[['Hour', 'Temp', 'Weekday', 'Value']]
data = data.drop(['Weekday'], axis=1)
data.head()

Unnamed: 0_level_0,Hour,Temp,Value
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2018-07-21 15:20:00,15,88.97001,59.391994
2018-07-21 15:25:00,15,89.020004,59.647995
2018-07-21 15:30:00,15,89.189995,58.943996
2018-07-21 15:35:00,15,89.200005,74.43199
2018-07-21 15:40:00,15,88.48,61.439995


In [3]:
def moving_average(df, window):

    rolling_mean = pd.DataFrame(index=df.index, columns=['Value', 'Temp'])
    rolling_mean['Value'] = df['Value'].rolling(window=window).mean()
    rolling_mean['Temp'] = df['Temp'].rolling(window=window).mean()
    #rolling_mean.dropna(inplace=True)

    return rolling_mean

In [4]:
smooth = moving_average(data, window=24)
data['Temp'] = smooth['Temp']
data['Value'] = smooth['Value']
data.dropna(inplace=True)
data.head()

Unnamed: 0_level_0,Hour,Temp,Value
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2018-07-21 17:15:00,17,86.39917,63.301328
2018-07-21 17:20:00,17,86.268336,62.727995
2018-07-21 17:25:00,17,86.131669,62.098661
2018-07-21 17:30:00,17,85.979585,62.810661
2018-07-21 17:35:00,17,85.820419,62.898661


In [5]:
def lagger(df, c, ft):
    df_lag = pd.DataFrame(index=df.index, columns = [ft+"_lag_"+str(g) for g in range(0, c)])
    for i in range(0,c):
        v = df.shift(-i)
        df_lag[ft+"_lag_"+str(i)] = v
    df_lag.dropna(inplace=True)
    return df_lag

In [6]:
lag = 24
data_lag = pd.DataFrame(index=data.index)
data_lag_ = pd.DataFrame(index=data.index)
for ft in data.columns:
    data_lag_ = lagger(data[ft], lag, ft)
    data_lag = pd.concat([data_lag, data_lag_], axis=1)

data_lag.dropna(inplace=True)
del data_lag_
data_lag.head()

Unnamed: 0_level_0,Hour_lag_0,Hour_lag_1,Hour_lag_2,Hour_lag_3,Hour_lag_4,Hour_lag_5,Hour_lag_6,Hour_lag_7,Hour_lag_8,Hour_lag_9,...,Value_lag_14,Value_lag_15,Value_lag_16,Value_lag_17,Value_lag_18,Value_lag_19,Value_lag_20,Value_lag_21,Value_lag_22,Value_lag_23
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2018-07-21 17:15:00,17.0,17.0,17.0,17.0,17.0,17.0,17.0,17.0,17.0,18.0,...,64.97066,65.855993,66.80266,67.61066,68.573326,68.341326,68.850659,68.405326,68.949326,69.261326
2018-07-21 17:20:00,17.0,17.0,17.0,17.0,17.0,17.0,17.0,17.0,18.0,18.0,...,65.855993,66.80266,67.61066,68.573326,68.341326,68.850659,68.405326,68.949326,69.261326,70.839993
2018-07-21 17:25:00,17.0,17.0,17.0,17.0,17.0,17.0,17.0,18.0,18.0,18.0,...,66.80266,67.61066,68.573326,68.341326,68.850659,68.405326,68.949326,69.261326,70.839993,72.647993
2018-07-21 17:30:00,17.0,17.0,17.0,17.0,17.0,17.0,18.0,18.0,18.0,18.0,...,67.61066,68.573326,68.341326,68.850659,68.405326,68.949326,69.261326,70.839993,72.647993,73.690659
2018-07-21 17:35:00,17.0,17.0,17.0,17.0,17.0,18.0,18.0,18.0,18.0,18.0,...,68.573326,68.341326,68.850659,68.405326,68.949326,69.261326,70.839993,72.647993,73.690659,73.471993


In [7]:
# for j in ['Hour', 'Temp']:
#     for i in range(24,48):
#         data_lag = data_lag.drop([j+"_lag_"+str(i)], axis=1)
        
# data_lag.head()

In [8]:
data_lag.columns

Index(['Hour_lag_0', 'Hour_lag_1', 'Hour_lag_2', 'Hour_lag_3', 'Hour_lag_4',
       'Hour_lag_5', 'Hour_lag_6', 'Hour_lag_7', 'Hour_lag_8', 'Hour_lag_9',
       'Hour_lag_10', 'Hour_lag_11', 'Hour_lag_12', 'Hour_lag_13',
       'Hour_lag_14', 'Hour_lag_15', 'Hour_lag_16', 'Hour_lag_17',
       'Hour_lag_18', 'Hour_lag_19', 'Hour_lag_20', 'Hour_lag_21',
       'Hour_lag_22', 'Hour_lag_23', 'Temp_lag_0', 'Temp_lag_1', 'Temp_lag_2',
       'Temp_lag_3', 'Temp_lag_4', 'Temp_lag_5', 'Temp_lag_6', 'Temp_lag_7',
       'Temp_lag_8', 'Temp_lag_9', 'Temp_lag_10', 'Temp_lag_11', 'Temp_lag_12',
       'Temp_lag_13', 'Temp_lag_14', 'Temp_lag_15', 'Temp_lag_16',
       'Temp_lag_17', 'Temp_lag_18', 'Temp_lag_19', 'Temp_lag_20',
       'Temp_lag_21', 'Temp_lag_22', 'Temp_lag_23', 'Value_lag_0',
       'Value_lag_1', 'Value_lag_2', 'Value_lag_3', 'Value_lag_4',
       'Value_lag_5', 'Value_lag_6', 'Value_lag_7', 'Value_lag_8',
       'Value_lag_9', 'Value_lag_10', 'Value_lag_11', 'Value_lag_12',
    

In [9]:
values = data_lag.values

scaler = MinMaxScaler(feature_range=(0,1))
scaled = scaler.fit_transform(values)
#scaled = scale.transform(values)

scaler_ = MinMaxScaler(feature_range=(0,1))
scale_ = scaler_.fit_transform(values[:,-12:])

In [10]:
train = scaled[:-10,:]
test = scaled[-10:,:]

train_Xh = train[:,:24]#, train[:,12:13]
test_Xh = test[:,:24]

train_Xt = train[:,24:48]
test_Xt = test[:,24:48]

train_X, train_y = train[:,48:-12], train[:,-12:]
test_X, test_y = test[:,48:-12], test[:,-12:]



train_Xh = np.reshape(train_Xh, (train_Xh.shape[0], 1, train_Xh.shape[1]))
test_Xh = np.reshape(test_Xh, (test_Xh.shape[0], 1, test_Xh.shape[1]))

train_Xt = np.reshape(train_Xt, (train_Xt.shape[0], 1, train_Xt.shape[1]))
test_Xt = np.reshape(test_Xt, (test_Xt.shape[0], 1, test_Xt.shape[1]))


train_X = np.reshape(train_X, (train_X.shape[0], 1, train_X.shape[1]))
test_X = np.reshape(test_X, (test_X.shape[0], 1, test_X.shape[1]))

print('shape of trainh', train_Xh.shape)
print('shape of traint', train_Xt.shape)

print('shape of train', train_X.shape, train_y.shape)
print('shape of test', test_X.shape, test_y.shape)

shape of trainh (25795, 1, 24)
shape of traint (25795, 1, 24)
shape of train (25795, 1, 12) (25795, 12)
shape of test (10, 1, 12) (10, 12)


In [11]:
from keras.layers import Input, Dense
from keras.models import Model
from keras.layers import concatenate

hour_in = Input(shape=(train_Xh.shape[1] ,train_Xh.shape[2]), name='hour_in')
temp_in = Input(shape=(train_Xt.shape[1], train_Xt.shape[2]), name='temp_in')
value_in = Input(shape=(train_X.shape[1], train_X.shape[2]), name='value_in')

In [12]:
hour = LSTM(64, batch_input_shape=(1, train_Xh.shape[1], train_Xh.shape[2]), activation='relu')(hour_in)
temp = LSTM(64, batch_input_shape=(1, train_Xt.shape[1], train_Xt.shape[2]), activation='relu')(temp_in)
value = LSTM(64, batch_input_shape=(1, train_X.shape[1],train_X.shape[2]), activation='relu')(value_in)

In [13]:
mid = concatenate([hour, temp, value])
mid = Dense(12, activation='relu')(mid)

In [14]:
model = Model(inputs=[hour_in, temp_in, value_in], outputs=[mid])

In [15]:
model.summary()

__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
hour_in (InputLayer)            (None, 1, 24)        0                                            
__________________________________________________________________________________________________
temp_in (InputLayer)            (None, 1, 24)        0                                            
__________________________________________________________________________________________________
value_in (InputLayer)           (None, 1, 12)        0                                            
__________________________________________________________________________________________________
lstm_1 (LSTM)                   (None, 64)           22784       hour_in[0][0]                    
__________________________________________________________________________________________________
lstm_2 (LS

In [16]:
model.compile(optimizer='adam', loss='mean_absolute_error')
model.fit([train_Xh, train_Xt, train_X], train_y, epochs=50 , batch_size=1)

Epoch 1/50
Epoch 2/50

KeyboardInterrupt: 

In [None]:
test_Xh = test[:,:24]
test_Xt = test[:,24:48]

test_Xh = np.reshape(test_Xh, (test_Xh.shape[0], 1, test_Xh.shape[1]))
test_Xt = np.reshape(test_Xt, (test_Xt.shape[0], 1, test_Xt.shape[1]))

In [None]:
pred = model.predict([test_Xh, test_Xt, test_X], batch_size=1)

In [None]:
# test_Xh = test[:,:24]
# test_Xt = test[:,24:48]

# test_Xh = np.reshape(test_Xh, (test_Xh.shape[0], 1, test_Xh.shape[1]))
# test_Xt = np.reshape(test_Xt, (test_Xt.shape[0], 1, test_Xt.shape[1]))

In [None]:
act = test_y

In [None]:
import matplotlib.pyplot as plt


plt.plot(pred[0], 'r')
plt.plot(act[0])

In [None]:
rsme = np.sqrt(np.mean((pred[0]-act[0])**2))
rsme

In [None]:
scal = MinMaxScaler(feature_range=(0,1))
scald = scal.fit_transform(values[:, -12:])

pred_inv = scal.inverse_transform(pred)
act_inv = scal.inverse_transform(act)

In [None]:
rsme = np.sqrt(np.mean((pred_inv[0]-act_inv[0])**2))
rsme

In [None]:


clr = ['r','b','c','g','y','m', 'k', 'r', 'b','g']
def plot_forecasts(series, forecasts, n_test):
    s = 0
    plt.style.use('seaborn-dark')
#     plt.style.use('fivethirtyeight')
    plt.figure(figsize=(20,10))
    plt.plot(series.values, linewidth=10, alpha=0.3,label='Actual')
    for i in range(len(forecasts)):
        #print(i)
        off_s = len(series) - n_test + i - 1
        off_e = off_s + len(forecasts[i]) + 1
        xaxis = [x for x in range(off_s-9, off_e-9)]

        yaxis = [series.values[off_s-9]]# + forecasts[i]
        #print(type(yaxis))
#         yaxis=[]
        for e in forecasts[i]:
            yaxis.append(e)
        #print(type(yaxis), len(yaxis))
        #print(yaxis)
        s = s +0.1
#         if i%2 != 0:
        plt.plot(xaxis, yaxis, label="test"+str(i+1))#, color=clr[i])#, alpha = s)

    plt.legend(loc = 'upper left', fontsize=20)
    plt.rc('xtick',labelsize=20)
    plt.rc('ytick',labelsize=20)
    plt.xlabel('Sample No.', fontsize=20)
    plt.ylabel('Demand (kW)', fontsize=20)
    plt.title('LSTM prediction for an hour ahead', fontsize=20)
    plt.savefig('LSTM.png', dpi=200)

In [None]:
plot_forecasts(data['Value'][-25:], pred_inv, 10+2)

In [None]:
plt.style.available

In [None]:
rsme