In [1]:
import pandas as pd
from pandas import DataFrame
from pandas import Series
from pandas import concat
from pandas import read_csv
from pandas import datetime
from sklearn.metrics import mean_squared_error
from sklearn.preprocessing import MinMaxScaler
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import LSTM
from math import sqrt
from matplotlib import pyplot
import numpy as np
 
# frame a sequence as a supervised learning problem
def timeseries_to_supervised(data, lag=1):
    df = DataFrame(data)
    columns = [df.shift(i) for i in range(1, lag+1)]
    columns.append(df)
    df = concat(columns, axis=1)
    df.fillna(0, inplace=True)
    return df
 
# create a differenced series
def difference(dataset, interval=1):
    diff = list()
    for i in range(interval, len(dataset)):
        value = dataset[i] - dataset[i - interval]
        diff.append(value)
    return Series(diff)
 
# invert differenced value
def inverse_difference(history, yhat, interval=1):
    return yhat + history[-interval]
 
# scale train and test data to [-1, 1]
def scale(train, test):
    # fit scaler
    scaler = MinMaxScaler(feature_range=(-1, 1))
    scaler = scaler.fit(train)
    # transform train
    train = train.reshape(train.shape[0], train.shape[1])
    train_scaled = scaler.transform(train)
    # transform test
    test = test.reshape(test.shape[0], test.shape[1])
    test_scaled = scaler.transform(test)
    return scaler, train_scaled, test_scaled
 
# inverse scaling for a forecasted value
def invert_scale(scaler, X, value):
    new_row = [x for x in X] + [value]
    array = np.array(new_row)
    array = array.reshape(1, len(array))
    inverted = scaler.inverse_transform(array)
    return inverted[0, -1]
 
# fit an LSTM network to training data
def fit_lstm(train, batch_size, nb_epoch, neurons):
    X, y = train[:, 0:-1], train[:, -1]
    X = X.reshape(X.shape[0], 1, X.shape[1])
    model = Sequential()
    model.add(LSTM(neurons, batch_input_shape=(batch_size, X.shape[1], X.shape[2]), stateful=True))
    model.add(Dense(1))
    model.compile(loss='mean_squared_error', optimizer='adam')
    for i in range(nb_epoch):
        model.fit(X, y, epochs=1, batch_size=batch_size, shuffle=False)
        model.reset_states()
    return model
 
# make a one-step forecast
def forecast_lstm(model, batch_size, X):
    X = X.reshape(1, 1, len(X))
    yhat = model.predict(X, batch_size=batch_size)
    return yhat[0,0]

# line plot of observed vs predicted
#pyplot.plot(raw_values[split_point+1:])
#pyplot.plot(predictions)
#pyplot.show()

Using TensorFlow backend.


In [3]:
df = pd.read_csv('dataset_file_names.csv')
names = df['filename'].values

mse_list = []
prediction_list = []

for x in range(0, len(names)) :
    
    df_test = pd.read_csv('../../ResearchData/data2/2' + names[x])
    split_point = int(len(df_test.index) * .8)
    raw_values = df_test['Electricity:Facility [kW](Hourly)'].values
#     raw_values = df_test[['Gas:Facility [kW](Hourly)']].values
    
    print(x)
    
    split_point = int(len(raw_values) * .2)
    
    # transform data to be stationary
    diff_values = difference(raw_values, 1)

    # transform data to be supervised learning
    supervised = timeseries_to_supervised(diff_values, 1)
    supervised_values = supervised.values
    
    train, test = supervised_values[0:-split_point:], supervised_values[-split_point:]
 
    # transform the scale of the data
    scaler, train_scaled, test_scaled = scale(train, test)
 
    # fit the model
    lstm_model = fit_lstm(train_scaled, 1, 10, 2)
    # forecast the entire training dataset to build up state for forecasting
    train_reshaped = train_scaled[:, 0].reshape(len(train_scaled), 1, 1)
    lstm_model.predict(train_reshaped, batch_size=1)
 
    # walk-forward validation on the test data
    predictions = list()
    for i in range(len(test_scaled)):
        # make one-step forecast
        X, y = test_scaled[i, 0:-1], test_scaled[i, -1]
        yhat = forecast_lstm(lstm_model, 1, X)
        # invert scaling
        yhat = invert_scale(scaler, X, yhat)
        # invert differencing
        yhat = inverse_difference(raw_values, yhat, len(test_scaled)+1-i)
        # store forecast
        predictions.append(yhat)
 
    # report performance
#     mse = float("%.4f" % mean_squared_error(raw_values[-split_point:], predictions))
#     print(mse)
#     print(" ")
    prediction_list.append(predictions)
    mse_list.append(mean_squared_error(raw_values[-split_point:], predictions))

0
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
2
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
3
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
4
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1

TypeError: Expected `context` argument in EagerTensor constructor to have a `_handle` attribute but it did not. Was eager Context initialized?

In [7]:
import pandas as pd

df = pd.read_csv('dataset_file_names.csv')
names = df['filename'].values

for x in range(len(names)) :

    df_test = pd.read_csv('ResearchData/data/' + names[x])
    split_point = int(len(df_test.index) * .8)
    df_test = df_test[['Electricity:Facility [kW](Hourly)']]
    
    df_test.to_csv('ResearchData/data2/2'+ names[x])



In [45]:
df_mse = pd.DataFrame(data=mse_list, columns=['LSTM'])
df_mse['id'] = range(805, len(names))
df_mse = df_mse[['id', 'LSTM']]
df_mse = df_mse.set_index('id')
df_mse_old = pd.read_csv('ResearchData/lstm_mse_gas.csv', index_col='id')
df_mse = pd.concat([df_mse_old, df_mse])
df_mse.to_csv('ResearchData/lstm_mse_gas.csv')

In [46]:
new_prediction_list = []
for x in range(len(prediction_list)) :
    new_prediction_list.append([])
    for y in range(len(prediction_list[x])) :
        new_prediction_list[x].append(prediction_list[x][y][0])

In [47]:
df_predictions = pd.DataFrame()
df_predictions['LSTM'] = new_prediction_list
df_predictions['id'] = range(805, len(names))
df_predictions = df_predictions[['id', 'LSTM']]
df_predictions = df_predictions.set_index('id')
df_predictions_old = pd.read_csv('ResearchData/lstm_predictions_gas.csv', index_col='id')
df = pd.concat([df_predictions_old, df_predictions])
df.to_csv('ResearchData/lstm_predictions_gas.csv')

In [4]:
import pandas as pd
df = pd.read_csv('dataset_file_names.csv')
names = df['filename'].values

    
df_test = pd.read_csv('ResearchData/data/' + names[0])
df_test

Unnamed: 0,Date/Time,Electricity:Facility [kW](Hourly),Gas:Facility [kW](Hourly),Heating:Electricity [kW](Hourly),Heating:Gas [kW](Hourly),Cooling:Electricity [kW](Hourly),HVACFan:Fans:Electricity [kW](Hourly),Electricity:HVAC [kW](Hourly),Fans:Electricity [kW](Hourly),General:InteriorLights:Electricity [kW](Hourly),General:ExteriorLights:Electricity [kW](Hourly),Appl:InteriorEquipment:Electricity [kW](Hourly),Misc:InteriorEquipment:Electricity [kW](Hourly),Water Heater:WaterSystems:Gas [kW](Hourly)
0,01/01 01:00:00,1.019364,11.296072,0,10.843981,0,0.298570,0.298570,0.298570,0.161613,0.035211,0.092943,0.413915,0.423582
1,01/01 02:00:00,0.887116,11.178904,0,10.819559,0,0.297898,0.297898,0.297898,0.094274,0.020540,0.076186,0.381106,0.333096
2,01/01 03:00:00,0.833463,11.059716,0,10.726174,0,0.295327,0.295327,0.295327,0.067339,0.014671,0.062326,0.376688,0.307596
3,01/01 04:00:00,0.818020,11.001560,0,10.661125,0,0.293536,0.293536,0.293536,0.067339,0.014671,0.053976,0.371385,0.314855
4,01/01 05:00:00,0.812916,10.968846,0,10.555038,0,0.290615,0.290615,0.290615,0.067339,0.014671,0.065823,0.357356,0.389193
5,01/01 06:00:00,0.865135,11.164022,0,10.429347,0,0.287154,0.287154,0.287154,0.067339,0.014671,0.080078,0.398781,0.707208
6,01/01 07:00:00,1.087779,11.538775,0,10.202510,0,0.280909,0.280909,0.280909,0.154879,0.033744,0.140384,0.460752,1.304529
7,01/01 08:00:00,1.429298,12.174233,0,10.663021,0,0.293588,0.293588,0.293588,0.316492,0.068955,0.228567,0.489584,1.477491
8,01/01 09:00:00,1.382221,11.680335,0,10.206584,0,0.281021,0.281021,0.281021,0.303024,0.066021,0.301128,0.413915,1.445242
9,01/01 10:00:00,1.220633,11.785785,0,10.444045,0,0.287559,0.287559,0.287559,0.154879,0.033744,0.375065,0.352275,1.317476


In [1]:
mse_list

NameError: name 'mse_list' is not defined

In [5]:
len(mse_list)

339

In [18]:
prediction_list[0][0][0]

4.89105178917848

In [49]:
df_mse.mean()

LSTM    0.209942
dtype: float64

In [48]:
len(mse_list)

131