# Demand Forecasting using Long Short-Term Memory (LSTM)

##### based on https://github.com/IBM/forecast-demand-for-vending-machines 

In [13]:
# seed for reproducibility

In [10]:
import numpy as np 
import pandas as pd 
from subprocess import check_output
from keras.layers.core import Dense, Activation, Dropout
from keras.layers.recurrent import LSTM
from keras.models import Sequential
from keras.optimizers import Adam
from sklearn.model_selection import  train_test_split
import math, time
from sklearn.preprocessing import MinMaxScaler
import matplotlib.pyplot as plt
from numpy import newaxis
from pandas import read_csv
from keras.callbacks import EarlyStopping
from keras.callbacks import ModelCheckpoint
from sklearn.preprocessing import StandardScaler
from keras.models import model_from_json
import h5py
from keras.models import load_model
%matplotlib inline

Using TensorFlow backend.


In [11]:
# read rename convert
df_data_1 = pd.read_csv(body)
df_data_1.head()
series = df_data_1
series = series.astype(float)

NameError: name 'body' is not defined

In [12]:
# plot series
plt.figure(figsize=(20,6))
plt.plot(series.values)
plt.show()

NameError: name 'series' is not defined

<Figure size 1440x432 with 0 Axes>

In [None]:
# normalize data
series = series.values
scaler = MinMaxScaler(feature_range=(0, 1))
series = scaler.fit_transform(series)

In [None]:
# 80/20 train test split
train_size = int(len(series) * 0.80)
test_size = len(series) - train_size
train, test = series[0:train_size,:], series[train_size:len(series),:]
print(len(train), len(test))

In [None]:
# helper function to create train test data sets
def create_dataset(dataset, look_back=1):
    dataX, dataY = [], []
    for i in range(len(dataset)-look_back-1):
        a = dataset[i:(i+look_back), 0]
        dataX.append(a)
        dataY.append(dataset[i + look_back, 0])
    return np.array(dataX), np.array(dataY)

# create train test datasets
look_back = 20
trainX, trainY = create_dataset(train, look_back)
testX, testY = create_dataset(test, look_back)

# review the shapes of these 4

In [None]:
# LSTM requires 3D data so reshape
trainX = np.reshape(trainX, (trainX.shape[0], trainX.shape[1], 1))
testX = np.reshape(testX, (testX.shape[0], testX.shape[1], 1))

# review the shapes

In [None]:
# define LSTM model
print('Build Model...')
model = Sequential()
model.add(LSTM(input_shape=(20,1), kernel_initializer="uniform", return_sequences=True, stateful=False, units=50))
model.add(Dropout(0.2))
model.add(LSTM(50, kernel_initializer="uniform", activation='relu',return_sequences=False))
model.add(Dropout(0.2))
model.add(Dense(32,kernel_initializer="uniform",activation='relu'))
model.add(Dense(1, activation='linear'))
#optimizer = Adam(lr=0.01)
#model.compile(loss="mse", optimizer=optimizer)
model.compile(loss="mse", optimizer='adam')
model.summary()

# params = 4 * (size_of_input + 1 * size_of_output) + 4 * size_of_output^2

In [None]:
# Monitor the accuracy of validation loss ('val_loss') and end the training if there's no improvement in the accuracy after five iterations.
# One of the methods to optimize computation time

early_stopping=EarlyStopping(monitor='val_loss', patience=5, verbose=1, mode='auto')

In [None]:
# fit model
start = time.time()
history = model.fit(trainX, trainY, batch_size=72, epochs=25, verbose=1, shuffle=False, validation_split=0.10, callbacks=[early_stopping])
print("> Compilation Time : ", time.time() - start)

In [None]:
# Calculate model accuracy

def model_score(model, trainX, trainY, testX, testY):
    trainScore = model.evaluate(trainX, trainY, batch_size=72, verbose=0)
    print('Train Score: %.5f MSE (%.2f RMSE)' % (trainScore, math.sqrt(trainScore)))

    testScore = model.evaluate(testX, testY, batch_size=72, verbose=0)
    print('Test Score: %.5f MSE (%.2f RMSE)' % (testScore, math.sqrt(testScore)))
    return trainScore, testScore

model_score(model, trainX, trainY, testX, testY)

In [None]:
# Review the learning of training & validation loss (error evaluation)

plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.show()

In [None]:
# model configuration info
model.get_config()

In [None]:
# function to plot predicted vs actual values
def plot_the_results(predicted_data, true_data, prediction_len):
    fig = plt.figure(facecolor='white', figsize=(16,8))
    ax = fig.add_subplot(111)
    ax.plot(true_data, label='True Data')
    for i, data in enumerate(predicted_data):
        padding = [None for p in range(i * prediction_len)]
        plt.plot(padding + data, label='Prediction')
        plt.plot(padding + data, 'b^')
    plt.show()
    
# function to predict future values
def predict_the_sequences(model, data, window_size, prediction_len):
    prediction_seqs = []
    for i in range(int(len(data)/prediction_len)):
        curr_frame = data[i*prediction_len]
        predicted = []
        for j in range(prediction_len):
            predicted.append(model.predict(curr_frame[newaxis,:,:])[0,0])
            curr_frame = curr_frame[1:]
            curr_frame = np.insert(curr_frame, [window_size-1], predicted[-1], axis=0)
        prediction_seqs.append(predicted)
    return prediction_seqs

'''Predict future values & plot the results
   In this case, we are predicting the current values.
   If we need to predict t+1 then the prediction_len parameter has to be changed to 2
   and if we need t+2 then prediction_len would be 3'''

predictions = predict_the_sequences(model, testX, 20, 1)

plot_the_results(predictions, testY, 1)

In [None]:
# Cross validation for testing accuracy
# apparently use GPU because expensive

from keras.wrappers.scikit_learn import KerasRegressor
from sklearn.model_selection import GridSearchCV
from keras.models import Sequential
from keras.layers import Dense
from sklearn.model_selection import cross_val_score

def build_regressor():
    model = Sequential()
    model.add(LSTM(input_shape=(20,1), kernel_initializer="uniform", return_sequences=True, stateful=False, units=50))
    model.add(Dropout(0.2))
    model.add(LSTM(50, kernel_initializer="uniform", activation='relu',return_sequences=False))
    model.add(Dropout(0.2))
    model.add(Dense(32,kernel_initializer="uniform",activation='relu'))
    model.add(Dense(1, activation='linear'))
    model.compile(optimizer='adam', loss = "mse")
    model.fit(trainX, trainY)
    return regressor
regressor = KerasRegressor(build_fn = build_regressor, batch_size = 72, epochs = 100)
accuracies = cross_val_score(estimator = regressor, X = trainX, y = trainY, cv = 10, n_jobs = -1)
mean = accuracies.mean()
variance = accuracies.std()

In [None]:
# Hyper parameter tuning
# also apparently use GPU because expensive

from keras.wrappers.scikit_learn import KerasRegressor
from sklearn.model_selection import GridSearchCV
from keras.models import Sequential
from keras.layers import Dense

regressor = KerasRegressor(build_fn = build_regressor)
parameters = {'batch_size': [42, 62, 82],
              'epochs': [50, 75, 100],
              'optimizer': ['adam', 'rmsprop']}
grid_search = GridSearchCV(estimator = regressor,
                           param_grid = parameters,
                           scoring = 'accuracy',
                           cv = 10)
grid_search = grid_search.fit(trainX, trainY)
best_parameters = grid_search.best_params_
best_accuracy = grid_search.best_score_

In [None]:
'''Denormalize the predicted values and review.
   Convert the predicted output to a dataframe & print the results'''

predictions = scaler.inverse_transform(predictions)
results = pd.DataFrame(np.round(predictions[-10:]))
print(results)

In [None]:
# tuning related info (end of 11.5)


# Transfer learning - where the money at $$$
model.save('my_model.h5')
print('Model saved to current directory')

In [None]:
# load saved model
model = load_model('filepath/my_model.h5')
print('Model loaded to the session')

#read rename convert
print(df_data_2.head())
print(df_data_2.shape)

new_series = df_data_2
new_series = new_series.astype(float)
new_series.isnull().any()

In [None]:
# plot the data

plt.figure(figsize=(20,6))
plt.plot(new_series.values)
plt.show()

In [None]:
# normalize and reshape

scaler = MinMaxScaler(feature_range=(0, 1))
new_data = scaler.fit_transform(new_series)
print(len(new_data))

look_back = 20
new_testX, new_testY = create_dataset(new_data, look_back)

new_testX = np.reshape(new_testX, (new_testX.shape[0], new_testX.shape[1], 1))
new_testX.shape

In [None]:
# optimize by stopping early
early_stopping=EarlyStopping(monitor='val_loss', patience=10, verbose=1, mode='auto')
model.summary()

In [None]:
# Function to calculate accuracy using three arguments 

def model_score_new(model, new_testX, new_testY):
    NewModelScore = model.evaluate(new_testX, new_testY, batch_size=72, verbose=0)
    print('NewModel Score: %.5f MSE (%.2f RMSE)' % (NewModelScore, math.sqrt(NewModelScore)))
    return NewModelScore

model_score_new(model, new_testX, new_testY)

In [None]:
# plot comparison - future values vs results

predictions = predict_the_sequences(model, new_testX, 20, 1)

plot_the_results(predictions, new_testY, 1)

In [None]:
# save model for reuse

model_json = model.to_json()
with open("filepath/model.json", "w") as json_file:
    json_file.write(model_json)

model.save_weights("filepath/model.h5")
print("Saved model to disk")

In [None]:
# model architecture can be loaded and rebuilt with different configuration

json_file = open('filepath/model.json', 'r')
loaded_model_json = json_file.read()
json_file.close()
loaded_model = model_from_json(loaded_model_json)


loaded_model.load_weights("filepath/model.h5")
print("Loaded model from disk")

# compile, now that model has been loaded
loaded_model.compile(loss="mse", optimizer="adam")
early_stopping=EarlyStopping(monitor='val_loss', patience=4, verbose=1, mode='auto')

In [None]:
# fit the new model

start = time.time()
loaded_model.fit(new_testX, new_testY, batch_size=72, epochs=15, verbose=1, shuffle=False, validation_split=0.05, callbacks=[early_stopping])
print("> Compilation Time : ", time.time() - start)

# Evaluate accuracy

model_score_new(model, new_testX, new_testY)

In [None]:
# Predict future values & plot the results

predictions = predict_the_sequences(model, new_testX, 20, 1)
plot_the_results(predictions, new_testY, 1)

In [None]:
# the end