In [12]:
# LSTM with regression framing
import numpy
import matplotlib.pyplot as plt
import pandas
import math
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import LSTM
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error

# convert an array of values into a dataset matrix
def create_dataset(dataset, look_back=1):
	dataX, dataY = [], []
	for i in range(len(dataset)-look_back-1):
		a = dataset[i:(i+look_back), 0]
		dataX.append(a)
		dataY.append(dataset[i + look_back, 0])
	return numpy.array(dataX), numpy.array(dataY)

# fix random seed for reproducibility
numpy.random.seed(7)

In [13]:
# load the dataset
dataframe = pandas.read_csv('data/NL Pb till Dec 2016 small.csv', usecols=[1], engine='python', skipfooter=3)
dataset = dataframe.values
dataset = dataset.astype('float32')

In [14]:
# normalize the dataset
scaler = MinMaxScaler(feature_range=(0, 1))
dataset = scaler.fit_transform(dataset)

In [15]:
# split into train and test sets
train_size = int(len(dataset) * 0.67)
test_size = len(dataset) - train_size
train, test = dataset[0:train_size,:], dataset[train_size:len(dataset),:]

In [16]:
# reshape into X=t and Y=t+1
look_back = 1
trainX, trainY = create_dataset(train, look_back)
testX, testY = create_dataset(test, look_back)

In [17]:
# reshape input to be [samples, time steps, features]
trainX = numpy.reshape(trainX, (trainX.shape[0], 1, trainX.shape[1]))
testX = numpy.reshape(testX, (testX.shape[0], 1, testX.shape[1]))

In [18]:
# create and fit the LSTM network
model = Sequential()
model.add(LSTM(4, input_dim=look_back))
model.add(Dense(1))
model.compile(loss='mean_squared_error', optimizer='adam')
model.fit(trainX, trainY, nb_epoch=100, batch_size=1, verbose=2)



Epoch 1/100
1s - loss: 0.0128
Epoch 2/100
1s - loss: 0.0073
Epoch 3/100
1s - loss: 0.0070
Epoch 4/100
1s - loss: 0.0070
Epoch 5/100
1s - loss: 0.0069
Epoch 6/100
1s - loss: 0.0069
Epoch 7/100
1s - loss: 0.0069
Epoch 8/100
1s - loss: 0.0068
Epoch 9/100
1s - loss: 0.0069
Epoch 10/100
1s - loss: 0.0068
Epoch 11/100
1s - loss: 0.0068
Epoch 12/100
1s - loss: 0.0068
Epoch 13/100
1s - loss: 0.0067
Epoch 14/100
1s - loss: 0.0068
Epoch 15/100
1s - loss: 0.0067
Epoch 16/100
1s - loss: 0.0067
Epoch 17/100
1s - loss: 0.0067
Epoch 18/100
1s - loss: 0.0067
Epoch 19/100
1s - loss: 0.0067
Epoch 20/100
1s - loss: 0.0066
Epoch 21/100
1s - loss: 0.0067
Epoch 22/100
1s - loss: 0.0067
Epoch 23/100
1s - loss: 0.0067
Epoch 24/100
1s - loss: 0.0067
Epoch 25/100
1s - loss: 0.0067
Epoch 26/100
1s - loss: 0.0067
Epoch 27/100
1s - loss: 0.0067
Epoch 28/100
1s - loss: 0.0067
Epoch 29/100
1s - loss: 0.0066
Epoch 30/100
1s - loss: 0.0067
Epoch 31/100
1s - loss: 0.0067
Epoch 32/100
1s - loss: 0.0067
Epoch 33/100
1s -

<keras.callbacks.History at 0x7f07cd327a10>

In [13]:
import sys
sys.setrecursionlimit(10000)
import pickle
# Save the model
# pickle.dump( model, open( "saveX.p", "wb" ) )

In [19]:
# make predictions
trainPredict = model.predict(trainX)
testPredict = model.predict(testX)

In [20]:
# invert predictions
trainPredict = scaler.inverse_transform(trainPredict)
trainY = scaler.inverse_transform([trainY])
testPredict = scaler.inverse_transform(testPredict)
testY = scaler.inverse_transform([testY])

In [21]:
# calculate root mean squared error
trainScore = math.sqrt(mean_squared_error(trainY[0], trainPredict[:,0]))
print('Train Score: %.2f RMSE' % (trainScore))
testScore = math.sqrt(mean_squared_error(testY[0], testPredict[:,0]))
print('Test Score: %.2f RMSE' % (testScore))

Train Score: 513.34 RMSE
Test Score: 701.33 RMSE
