# LSTM Multi-Variate Multi-Step


For the purposes of a Kaggle COVID-19 challenge.

As per following blogpost: https://machinelearningmastery.com/how-to-develop-lstm-models-for-time-series-forecasting/

In [1]:
# multivariate multi-step encoder-decoder lstm example
from numpy import array
from numpy import hstack
from keras.models import Sequential
from keras.layers import LSTM
from keras.layers import Dense
from keras.layers import RepeatVector
from keras.layers import TimeDistributed

Using TensorFlow backend.


In [2]:
# split a multivariate sequence into samples
def split_sequences(sequences, n_steps_in, n_steps_out):
	X, y = list(), list()
	for i in range(len(sequences)):
		# find the end of this pattern
		end_ix = i + n_steps_in
		out_end_ix = end_ix + n_steps_out
		# check if we are beyond the dataset
		if out_end_ix > len(sequences):
			break
		# gather input and output parts of the pattern
		seq_x, seq_y = sequences[i:end_ix, :], sequences[end_ix:out_end_ix, :]
		X.append(seq_x)
		y.append(seq_y)
	return array(X), array(y)

In [3]:
# define input sequence
in_seq1 = array([10, 20, 30, 40, 50, 60, 70, 80, 90])
in_seq2 = array([15, 25, 35, 45, 55, 65, 75, 85, 95])
out_seq = array([in_seq1[i]+in_seq2[i] for i in range(len(in_seq1))])

In [4]:
out_seq

array([ 25,  45,  65,  85, 105, 125, 145, 165, 185])

In [5]:
# convert to [rows, columns] structure
in_seq1 = in_seq1.reshape((len(in_seq1), 1))
in_seq2 = in_seq2.reshape((len(in_seq2), 1))
out_seq = out_seq.reshape((len(out_seq), 1))

In [6]:
display(in_seq1)
display(out_seq)

array([[10],
       [20],
       [30],
       [40],
       [50],
       [60],
       [70],
       [80],
       [90]])

array([[ 25],
       [ 45],
       [ 65],
       [ 85],
       [105],
       [125],
       [145],
       [165],
       [185]])

In [7]:
# horizontally stack columns
dataset = hstack((in_seq1, in_seq2, out_seq))
display(dataset)

array([[ 10,  15,  25],
       [ 20,  25,  45],
       [ 30,  35,  65],
       [ 40,  45,  85],
       [ 50,  55, 105],
       [ 60,  65, 125],
       [ 70,  75, 145],
       [ 80,  85, 165],
       [ 90,  95, 185]])

In [22]:
# choose a number of time steps
n_steps_in, n_steps_out = 3, 2

In [23]:
# covert into input/output
X, y = split_sequences(dataset, n_steps_in, n_steps_out)

display(X)
display(y)

array([[[ 10,  15,  25],
        [ 20,  25,  45],
        [ 30,  35,  65]],

       [[ 20,  25,  45],
        [ 30,  35,  65],
        [ 40,  45,  85]],

       [[ 30,  35,  65],
        [ 40,  45,  85],
        [ 50,  55, 105]],

       [[ 40,  45,  85],
        [ 50,  55, 105],
        [ 60,  65, 125]],

       [[ 50,  55, 105],
        [ 60,  65, 125],
        [ 70,  75, 145]]])

array([[[ 40,  45,  85],
        [ 50,  55, 105]],

       [[ 50,  55, 105],
        [ 60,  65, 125]],

       [[ 60,  65, 125],
        [ 70,  75, 145]],

       [[ 70,  75, 145],
        [ 80,  85, 165]],

       [[ 80,  85, 165],
        [ 90,  95, 185]]])

In [24]:
# the dataset knows the number of features, e.g. 2
n_features = X.shape[2]
display(n_features)

3

In [25]:
# define model
model = Sequential()
model.add(LSTM(200, activation='relu', input_shape=(n_steps_in, n_features)))
model.add(RepeatVector(n_steps_out))
model.add(LSTM(200, activation='relu', return_sequences=True))
model.add(TimeDistributed(Dense(n_features)))
model.compile(optimizer='adam', loss='mse')

In [26]:
# fit model
model.fit(X, y, epochs=300, verbose=0)

<keras.callbacks.History at 0x193f67e4dd8>

In [27]:
# demonstrate prediction
x_input = array([[60, 65, 125], [70, 75, 145], [80, 85, 165]])
x_input = x_input.reshape((1, n_steps_in, n_features))
yhat = model.predict(x_input, verbose=0)
print(yhat)

[[[ 92.3952    97.89315  190.13568 ]
  [103.751366 109.12877  212.95303 ]]]
