In [2]:
from pandas import DataFrame
from pandas import Series
from pandas import concat
from pandas import read_csv
from pandas import datetime
from sklearn.metrics import mean_squared_error
from sklearn.preprocessing import MinMaxScaler
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import LSTM
from math import sqrt
from matplotlib import pyplot
from numpy import array
 
# date-time parsing function for loading the dataset
def parser(x):
	return datetime.strptime('190'+x, '%Y-%m')
 
# convert time series into supervised learning problem
def series_to_supervised(data, n_in=1, n_out=1, dropnan=True):
	n_vars = 1 if type(data) is list else data.shape[1]
	df = DataFrame(data)
	cols, names = list(), list()
	# input sequence (t-n, ... t-1)
	for i in range(n_in, 0, -1):
		cols.append(df.shift(i))
		names += [('var%d(t-%d)' % (j+1, i)) for j in range(n_vars)]
	# forecast sequence (t, t+1, ... t+n)
	for i in range(0, n_out):
		cols.append(df.shift(-i))
		if i == 0:
			names += [('var%d(t)' % (j+1)) for j in range(n_vars)]
		else:
			names += [('var%d(t+%d)' % (j+1, i)) for j in range(n_vars)]
	# put it all together
	agg = concat(cols, axis=1)
	agg.columns = names
	# drop rows with NaN values
	if dropnan:
		agg.dropna(inplace=True)
	return agg
 
# create a differenced series
def difference(dataset, interval=1):
	diff = list()
	for i in range(interval, len(dataset)):
		value = dataset[i] - dataset[i - interval]
		diff.append(value)
	return Series(diff)
 
# transform series into train and test sets for supervised learning
def prepare_data(series, n_test, n_lag, n_seq):
	# extract raw values
	raw_values = series.values
	# transform data to be stationary
	diff_series = difference(raw_values, 1)
	diff_values = diff_series.values
	diff_values = diff_values.reshape(len(diff_values), 1)
	# rescale values to -1, 1
	scaler = MinMaxScaler(feature_range=(-1, 1))
	scaled_values = scaler.fit_transform(diff_values)
	scaled_values = scaled_values.reshape(len(scaled_values), 1)
	# transform into supervised learning problem X, y
	supervised = series_to_supervised(scaled_values, n_lag, n_seq)
	supervised_values = supervised.values
	# split into train and test sets
	train, test = supervised_values[0:-n_test], supervised_values[-n_test:]
	return scaler, train, test
 
# fit an LSTM network to training data
def fit_lstm(train, n_lag, n_seq, n_batch, nb_epoch, n_neurons):
	# reshape training into [samples, timesteps, features]
	X, y = train[:, 0:n_lag], train[:, n_lag:]
	X = X.reshape(X.shape[0], 1, X.shape[1])
	# design network
	model = Sequential()
	model.add(LSTM(n_neurons, batch_input_shape=(n_batch, X.shape[1], X.shape[2]), stateful=True))
	model.add(Dense(y.shape[1]))
	model.compile(loss='mean_squared_error', optimizer='adam')
	# fit network
	for i in range(nb_epoch):
		model.fit(X, y, epochs=1, batch_size=n_batch, verbose=0, shuffle=False)
		model.reset_states()
	return model
 
# make one forecast with an LSTM,
def forecast_lstm(model, X, n_batch):
	# reshape input pattern to [samples, timesteps, features]
	X = X.reshape(1, 1, len(X))
	# make forecast
	forecast = model.predict(X, batch_size=n_batch)
	# convert to array
	return [x for x in forecast[0, :]]
 
# evaluate the persistence model
def make_forecasts(model, n_batch, train, test, n_lag, n_seq):
	forecasts = list()
	for i in range(len(test)):
		X, y = test[i, 0:n_lag], test[i, n_lag:]
		# make forecast
		forecast = forecast_lstm(model, X, n_batch)
		# store the forecast
		forecasts.append(forecast)
	return forecasts
 
# invert differenced forecast
def inverse_difference(last_ob, forecast):
	# invert first forecast
	inverted = list()
	inverted.append(forecast[0] + last_ob)
	# propagate difference forecast using inverted first value
	for i in range(1, len(forecast)):
		inverted.append(forecast[i] + inverted[i-1])
	return inverted
 
# inverse data transform on forecasts
def inverse_transform(series, forecasts, scaler, n_test):
	inverted = list()
	for i in range(len(forecasts)):
		# create array from forecast
		forecast = array(forecasts[i])
		forecast = forecast.reshape(1, len(forecast))
		# invert scaling
		inv_scale = scaler.inverse_transform(forecast)
		inv_scale = inv_scale[0, :]
		# invert differencing
		index = len(series) - n_test + i - 1
		last_ob = series.values[index]
		inv_diff = inverse_difference(last_ob, inv_scale)
		# store
		inverted.append(inv_diff)
	return inverted
 
# evaluate the RMSE for each forecast time step
def evaluate_forecasts(test, forecasts, n_lag, n_seq):
	for i in range(n_seq):
		actual = [row[i] for row in test]
		predicted = [forecast[i] for forecast in forecasts]
		rmse = sqrt(mean_squared_error(actual, predicted))
		print('t+%d RMSE: %f' % ((i+1), rmse))
 
# plot the forecasts in the context of the original dataset
def plot_forecasts(series, forecasts, n_test):
	# plot the entire dataset in blue
	pyplot.plot(series.values)
	# plot the forecasts in red
	for i in range(len(forecasts)):
		off_s = len(series) - n_test + i - 1
		off_e = off_s + len(forecasts[i]) + 1
		xaxis = [x for x in range(off_s, off_e)]
		yaxis = [series.values[off_s]] + forecasts[i]
		pyplot.plot(xaxis, yaxis, color='red')
	# show the plot
	pyplot.show()
    


  """


In [None]:
# load dataset
series = read_csv('multistep_lstm.csv', header=0, parse_dates=[0], index_col=0, squeeze=True, date_parser=parser)
# configure
n_lag = 1
n_seq = 3
n_test = 10
n_epochs = 1500
n_batch = 1
n_neurons = 1
# prepare data
scaler, train, test = prepare_data(series, n_test, n_lag, n_seq)
# fit model
model = fit_lstm(train, n_lag, n_seq, n_batch, n_epochs, n_neurons)
# make forecasts
forecasts = make_forecasts(model, n_batch, train, test, n_lag, n_seq)
# inverse transform forecasts and test
forecasts = inverse_transform(series, forecasts, scaler, n_test+2)
actual = [row[n_lag:] for row in test]
actual = inverse_transform(series, actual, scaler, n_test+2)
# evaluate forecasts
evaluate_forecasts(actual, forecasts, n_lag, n_seq)
# plot forecasts
plot_forecasts(series, forecasts, n_test+2)

In [3]:
# load dataset
series = read_csv('multistep_lstm.csv', header=0, parse_dates=[0], index_col=0, squeeze=True, date_parser=parser)
# configure
n_lag = 1
n_seq = 3
n_test = 10
n_epochs = 1500
n_batch = 1
n_neurons = 1
# prepare data
scaler, train, test = prepare_data(series, n_test, n_lag, n_seq)

In [5]:
train

array([[-0.63999169,  0.01392642, -0.40594471,  0.11286635],
       [ 0.01392642, -0.40594471,  0.11286635, -0.18977344],
       [-0.40594471,  0.11286635, -0.18977344,  0.12242777],
       [ 0.11286635, -0.18977344,  0.12242777, -0.17106631],
       [-0.18977344,  0.12242777, -0.17106631, -0.27250052],
       [ 0.12242777, -0.17106631, -0.27250052, -0.43130326],
       [-0.17106631, -0.27250052, -0.43130326,  0.74724589],
       [-0.27250052, -0.43130326,  0.74724589, -0.76678445],
       [-0.43130326,  0.74724589, -0.76678445, -0.10579921],
       [ 0.74724589, -0.76678445, -0.10579921, -0.32695905],
       [-0.76678445, -0.10579921, -0.32695905,  0.11120349],
       [-0.10579921, -0.32695905,  0.11120349,  0.12201206],
       [-0.32695905,  0.11120349,  0.12201206, -0.48118894],
       [ 0.11120349,  0.12201206, -0.48118894,  0.25670339],
       [ 0.12201206, -0.48118894,  0.25670339, -0.39430472],
       [-0.48118894,  0.25670339, -0.39430472,  0.18187487],
       [ 0.25670339, -0.

In [6]:
raw_values = series.values
raw_values

array([266. , 145.9, 183.1, 119.3, 180.3, 168.5, 231.8, 224.5, 192.8,
       122.9, 336.5, 185.9, 194.3, 149.5, 210.1, 273.3, 191.4, 287. ,
       226. , 303.6, 289.9, 421.6, 264.5, 342.3, 339.7, 440.4, 315.9,
       439.3, 401.3, 437.4, 575.5, 407.6, 682. , 475.3, 581.3, 646.9])

In [9]:
diff_series = difference(raw_values,1)
diff_values = diff_series.values
diff_values = diff_values.reshape(len(diff_values),1)

In [10]:
diff_values.shape

(35, 1)

In [11]:
scaler = MinMaxScaler(feature_range=(-1,1))
scaled_values = scaler.fit_transform(diff_values)
scaled_values = scaled_values.reshape(len(scaled_values),1)

In [14]:
scaled_values.shape[1]

1

In [13]:
type(scaled_values)

numpy.ndarray

In [17]:
from pandas import DataFrame
from pandas import Series
from pandas import concat
from pandas import read_csv
from pandas import datetime
from sklearn.metrics import mean_squared_error
from sklearn.preprocessing import MinMaxScaler
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import LSTM
from math import sqrt
from matplotlib import pyplot
from numpy import array

  """


In [51]:
df = DataFrame(scaled_values)
cols, names = list(), list()

In [52]:
for i in range(1,0,-1):
    cols.append(df.shift(i))
    names += ['(var%d(t-%d))' %(j+1, i) for j in range(1)]

In [54]:
cols, names

([           0
  0        NaN
  1  -0.639992
  2   0.013926
  3  -0.405945
  4   0.112866
  5  -0.189773
  6   0.122428
  7  -0.171066
  8  -0.272501
  9  -0.431303
  10  0.747246
  11 -0.766784
  12 -0.105799
  13 -0.326959
  14  0.111203
  15  0.122012
  16 -0.481189
  17  0.256703
  18 -0.394305
  19  0.181875
  20 -0.197672
  21  0.406776
  22 -0.793806
  23  0.182706
  24 -0.151528
  25  0.277905
  26 -0.658283
  27  0.372272
  28 -0.298691
  29  0.009354
  30  0.433382
  31 -0.838703
  32  1.000000
  33 -1.000000
  34  0.299938],
 ['(var1(t-1))'])

In [55]:
for i in range(0,n_seq):
    cols.append(df.shift(i))
    if i==0:
        names += ['(var%d(t))' %(j+1) for j in range(1)]
    else:
        names += ['(var%d(t+%d))' %(j+1, i) for j in range(1)]

In [57]:
names

['(var1(t-1))', '(var1(t))', '(var1(t+1))', '(var1(t+2))']

In [58]:
agg = concat(cols,axis = 1)

In [62]:
agg.columns = names

In [63]:
agg

Unnamed: 0,(var1(t-1)),(var1(t)),(var1(t+1)),(var1(t+2))
0,,-0.639992,,
1,-0.639992,0.013926,-0.639992,
2,0.013926,-0.405945,0.013926,-0.639992
3,-0.405945,0.112866,-0.405945,0.013926
4,0.112866,-0.189773,0.112866,-0.405945
5,-0.189773,0.122428,-0.189773,0.112866
6,0.122428,-0.171066,0.122428,-0.189773
7,-0.171066,-0.272501,-0.171066,0.122428
8,-0.272501,-0.431303,-0.272501,-0.171066
9,-0.431303,0.747246,-0.431303,-0.272501


In [64]:
agg.dropna(inplace=True)

In [67]:
d = agg
d

Unnamed: 0,(var1(t-1)),(var1(t)),(var1(t+1)),(var1(t+2))
2,0.013926,-0.405945,0.013926,-0.639992
3,-0.405945,0.112866,-0.405945,0.013926
4,0.112866,-0.189773,0.112866,-0.405945
5,-0.189773,0.122428,-0.189773,0.112866
6,0.122428,-0.171066,0.122428,-0.189773
7,-0.171066,-0.272501,-0.171066,0.122428
8,-0.272501,-0.431303,-0.272501,-0.171066
9,-0.431303,0.747246,-0.431303,-0.272501
10,0.747246,-0.766784,0.747246,-0.431303
11,-0.766784,-0.105799,-0.766784,0.747246


In [66]:
supervised = series_to_supervised(scaled_values,n_lag, n_seq)
supervised

Unnamed: 0,var1(t-1),var1(t),var1(t+1),var1(t+2)
1,-0.639992,0.013926,-0.405945,0.112866
2,0.013926,-0.405945,0.112866,-0.189773
3,-0.405945,0.112866,-0.189773,0.122428
4,0.112866,-0.189773,0.122428,-0.171066
5,-0.189773,0.122428,-0.171066,-0.272501
6,0.122428,-0.171066,-0.272501,-0.431303
7,-0.171066,-0.272501,-0.431303,0.747246
8,-0.272501,-0.431303,0.747246,-0.766784
9,-0.431303,0.747246,-0.766784,-0.105799
10,0.747246,-0.766784,-0.105799,-0.326959


In [68]:
supervised_values = supervised.values
supervised_values

array([[-0.63999169,  0.01392642, -0.40594471,  0.11286635],
       [ 0.01392642, -0.40594471,  0.11286635, -0.18977344],
       [-0.40594471,  0.11286635, -0.18977344,  0.12242777],
       [ 0.11286635, -0.18977344,  0.12242777, -0.17106631],
       [-0.18977344,  0.12242777, -0.17106631, -0.27250052],
       [ 0.12242777, -0.17106631, -0.27250052, -0.43130326],
       [-0.17106631, -0.27250052, -0.43130326,  0.74724589],
       [-0.27250052, -0.43130326,  0.74724589, -0.76678445],
       [-0.43130326,  0.74724589, -0.76678445, -0.10579921],
       [ 0.74724589, -0.76678445, -0.10579921, -0.32695905],
       [-0.76678445, -0.10579921, -0.32695905,  0.11120349],
       [-0.10579921, -0.32695905,  0.11120349,  0.12201206],
       [-0.32695905,  0.11120349,  0.12201206, -0.48118894],
       [ 0.11120349,  0.12201206, -0.48118894,  0.25670339],
       [ 0.12201206, -0.48118894,  0.25670339, -0.39430472],
       [-0.48118894,  0.25670339, -0.39430472,  0.18187487],
       [ 0.25670339, -0.

In [70]:
train, test = supervised_values[0:-n_test], supervised_values[-n_test,:]

In [71]:
test

array([ 0.1827063 , -0.15152775,  0.2779048 , -0.6582831 ])

In [72]:
train

array([[-0.63999169,  0.01392642, -0.40594471,  0.11286635],
       [ 0.01392642, -0.40594471,  0.11286635, -0.18977344],
       [-0.40594471,  0.11286635, -0.18977344,  0.12242777],
       [ 0.11286635, -0.18977344,  0.12242777, -0.17106631],
       [-0.18977344,  0.12242777, -0.17106631, -0.27250052],
       [ 0.12242777, -0.17106631, -0.27250052, -0.43130326],
       [-0.17106631, -0.27250052, -0.43130326,  0.74724589],
       [-0.27250052, -0.43130326,  0.74724589, -0.76678445],
       [-0.43130326,  0.74724589, -0.76678445, -0.10579921],
       [ 0.74724589, -0.76678445, -0.10579921, -0.32695905],
       [-0.76678445, -0.10579921, -0.32695905,  0.11120349],
       [-0.10579921, -0.32695905,  0.11120349,  0.12201206],
       [-0.32695905,  0.11120349,  0.12201206, -0.48118894],
       [ 0.11120349,  0.12201206, -0.48118894,  0.25670339],
       [ 0.12201206, -0.48118894,  0.25670339, -0.39430472],
       [-0.48118894,  0.25670339, -0.39430472,  0.18187487],
       [ 0.25670339, -0.