# Multi-Step MLP Models

references :

- How to Develop Multilayer Perceptron Models for Time Series Forecasting

  https://machinelearningmastery.com/how-to-develop-multilayer-perceptron-models-for-time-series-forecasting/
  


- Exploratory Configuration of a Multilayer Perceptron Network for Time Series Forecasting 

  https://machinelearningmastery.com/exploratory-configuration-multilayer-perceptron-network-time-series-forecasting/  


## Example case

In [None]:
from pandas import DataFrame
from pandas import Series
from pandas import concat
from pandas import read_csv
from pandas import datetime
from sklearn.metrics import mean_squared_error
from sklearn.preprocessing import MinMaxScaler
from keras.models import Sequential
from keras.layers import Dense
from math import sqrt
import matplotlib

##### transform the data

In [21]:
# univariate data preparation
from numpy import array

# split a univariate sequence into samples
def split_sequence(sequence, n_steps_in, n_steps_out):
	X, y = list(), list()
	for i in range(len(sequence)):
		# find the end of this pattern
		end_ix = i + n_steps_in
		out_end_ix = end_ix + n_steps_out
		# check if we are beyond the sequence
		if out_end_ix > len(sequence):
			break
		# gather input and output parts of the pattern
		seq_x, seq_y = sequence[i:end_ix], sequence[end_ix:out_end_ix]
		X.append(seq_x)
		y.append(seq_y)
	return array(X), array(y)

# define input sequence
raw_seq = [10, 20, 30, 40, 50, 60, 70, 80, 90]

# choose a number of time steps
n_steps_in, n_steps_out = 3, 2

# split into samples
X, y = split_sequence(raw_seq, n_steps_in, n_steps_out)

# summarize the data
for i in range(len(X)):
	print(X[i], y[i])

[10 20 30] [40 50]
[20 30 40] [50 60]
[30 40 50] [60 70]
[40 50 60] [70 80]
[50 60 70] [80 90]


##### build model 

In [22]:
# define model
model = Sequential()

# hidden layer, 100 neurons, relu activation function
model.add(Dense(100, activation='relu', input_dim=n_steps)) 

# output layer
model.add(Dense(n_steps_out))

# loss function
model.compile(optimizer='adam', loss='mse')

##### fit model

In [23]:
# fit model
model.fit(X, y, epochs=2000, verbose=0)

<keras.callbacks.History at 0x7fbd00377128>

In [24]:
# demonstrate prediction
x_input = array([70, 80, 90])
x_input = x_input.reshape((1, n_steps))
yhat = model.predict(x_input, verbose=0)
print(yhat)

[[101.38835 113.17945]]


##### calculate MSE

In [27]:
# calculate the MSE

y_test = array([100, 110])
mse = mean_squared_error([y_test], yhat)
mse

6.018214156065369

## Web traffic case

In [1]:
from pandas import DataFrame
from pandas import Series
from pandas import concat
from pandas import read_csv
from pandas import datetime
from sklearn.metrics import mean_squared_error
from sklearn.preprocessing import MinMaxScaler
from keras.models import Sequential
from keras.layers import Dense
from math import sqrt
import matplotlib
import matplotlib.pyplot as plt
import numpy as np

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


##### read dataset

In [2]:
wt_df = read_csv('/home/irza/Downloads/train_2.csv', nrows=1)
wt_df

Unnamed: 0,Page,2015-07-01,2015-07-02,2015-07-03,2015-07-04,2015-07-05,2015-07-06,2015-07-07,2015-07-08,2015-07-09,...,2017-09-01,2017-09-02,2017-09-03,2017-09-04,2017-09-05,2017-09-06,2017-09-07,2017-09-08,2017-09-09,2017-09-10
0,2NE1_zh.wikipedia.org_all-access_spider,18,11,5,13,14,9,9,22,26,...,19,33,33,18,16,27,29,23,54,38


##### select and transform dataframe row to sequence

In [3]:
# take only the values without the page name

def get_seq_by_row(input_df, row_num):
    new_seq = np.delete(input_df.iloc[row_num].values, 0)
    return new_seq

wt_seq = get_seq_by_row(wt_df,0)

In [4]:
wt_seq

array([18, 11, 5, 13, 14, 9, 9, 22, 26, 24, 19, 10, 14, 15, 8, 16, 8, 8,
       16, 7, 11, 10, 20, 18, 15, 14, 49, 10, 16, 18, 8, 5, 9, 7, 13, 9,
       7, 4, 11, 10, 5, 9, 9, 9, 9, 13, 4, 15, 25, 9, 5, 6, 20, 3, 14, 46,
       5, 5, 13, 4, 9, 10, 9, 11, 11, 11, 9, 15, 5, 10, 7, 4, 8, 9, 10, 6,
       13, 16, 6, 24, 9, 11, 12, 8, 14, 6, 6, 11, 14, 6, 10, 20, 7, 15, 8,
       15, 5, 8, 8, 5, 11, 165, 34, 6, 13, 8, 9, 11, 26, 18, 3, 5, 12, 6,
       16, 19, 9, 10, 11, 11, 7, 9, 10, 24, 6, 6, 8, 16, 13, 10, 10, 6, 5,
       20, 6, 47, 9, 9, 12, 11, 17, 15, 14, 11, 97, 11, 12, 11, 14, 15,
       12, 104, 5, 22, 45, 75, 29, 34, 20, 12, 25, 9, 62, 20, 19, 8, 23,
       13, 16, 34, 36, 11, 18, 12, 24, 30, 27, 44, 35, 53, 11, 26, 13, 18,
       9, 16, 6, 19, 20, 19, 22, 30, 14, 16, 22, 15, 15, 26, 16, 13, 27,
       18, 13, 32, 31, 16, 38, 18, 9, 14, 10, 24, 8, 15, 18, 10, 23, 17,
       11, 26, 14, 8, 12, 9, 11, 34, 17, 29, 11, 9, 14, 21, 12, 11, 13,
       11, 13, 16, 13, 19, 21, 14, 11, 35,

##### transform sequence to supervised format

In [5]:
# frame a sequence as a supervised learning problem

def timeseries_to_supervised(data, lag=1, stepahead=1):
    df = DataFrame(data)
    col_num = lag+stepahead
    columns = [df.shift(i) for i in range(1, col_num)]
    columns = list(reversed(columns))
    columns.append(df)
    df = concat(columns, axis=1)    
    return df.values[col_num - 1:,:]

In [6]:
time_step_lag = 2
time_step_ahead = 3

In [7]:
wt_supervised = timeseries_to_supervised(wt_seq, time_step_lag, time_step_ahead)
wt_supervised

array([[18, 11, 5, 13, 14],
       [11, 5, 13, 14, 9],
       [5, 13, 14, 9, 9],
       ...,
       [18, 16, 27, 29, 23],
       [16, 27, 29, 23, 54],
       [27, 29, 23, 54, 38]], dtype=object)

#### split train and test set

In [8]:
def split_train_and_test_set(dataset, num_of_test_set=12):
    num_of_test_set = -1 * num_of_test_set
    return dataset[0:num_of_test_set], dataset[num_of_test_set:]    

In [9]:
train, test = split_train_and_test_set(wt_supervised)

In [10]:
test

array([[25, 47, 44, 26, 24],
       [47, 44, 26, 24, 21],
       [44, 26, 24, 21, 19],
       [26, 24, 21, 19, 33],
       [24, 21, 19, 33, 33],
       [21, 19, 33, 33, 18],
       [19, 33, 33, 18, 16],
       [33, 33, 18, 16, 27],
       [33, 18, 16, 27, 29],
       [18, 16, 27, 29, 23],
       [16, 27, 29, 23, 54],
       [27, 29, 23, 54, 38]], dtype=object)

#### scale sequence value

In [32]:
# scale train and test data to [-1, 1]

def scale(train, test):
    
    # fit scaler
    scaler = MinMaxScaler(feature_range=(-1, 1))
    scaler = scaler.fit(train)

    # transform train
    train = train.reshape(train.shape[0], train.shape[1])
    train_scaled = scaler.transform(train)
    
    # transform test
    test = test.reshape(test.shape[0], test.shape[1])
    test_scaled = scaler.transform(test)
    
    return scaler, train_scaled, test_scaled

In [12]:
scaler, train_scaled, test_scaled = scale(train, test)



In [13]:
test_scaled

array([[-0.90965092, -0.81930185, -0.83162218, -0.90554415, -0.9137577 ],
       [-0.81930185, -0.83162218, -0.90554415, -0.9137577 , -0.92607803],
       [-0.83162218, -0.90554415, -0.9137577 , -0.92607803, -0.93429158],
       [-0.90554415, -0.9137577 , -0.92607803, -0.93429158, -0.87679671],
       [-0.9137577 , -0.92607803, -0.93429158, -0.87679671, -0.87679671],
       [-0.92607803, -0.93429158, -0.87679671, -0.87679671, -0.93839836],
       [-0.93429158, -0.87679671, -0.87679671, -0.93839836, -0.94661191],
       [-0.87679671, -0.87679671, -0.93839836, -0.94661191, -0.90143737],
       [-0.87679671, -0.93839836, -0.94661191, -0.90143737, -0.89322382],
       [-0.93839836, -0.94661191, -0.90143737, -0.89322382, -0.91786448],
       [-0.94661191, -0.90143737, -0.89322382, -0.91786448, -0.79055441],
       [-0.90143737, -0.89322382, -0.91786448, -0.79055441, -0.85626283]])

In [14]:
test_scaled[:,0:-1]

array([[-0.90965092, -0.81930185, -0.83162218, -0.90554415],
       [-0.81930185, -0.83162218, -0.90554415, -0.9137577 ],
       [-0.83162218, -0.90554415, -0.9137577 , -0.92607803],
       [-0.90554415, -0.9137577 , -0.92607803, -0.93429158],
       [-0.9137577 , -0.92607803, -0.93429158, -0.87679671],
       [-0.92607803, -0.93429158, -0.87679671, -0.87679671],
       [-0.93429158, -0.87679671, -0.87679671, -0.93839836],
       [-0.87679671, -0.87679671, -0.93839836, -0.94661191],
       [-0.87679671, -0.93839836, -0.94661191, -0.90143737],
       [-0.93839836, -0.94661191, -0.90143737, -0.89322382],
       [-0.94661191, -0.90143737, -0.89322382, -0.91786448],
       [-0.90143737, -0.89322382, -0.91786448, -0.79055441]])

#### build model

In [16]:
# fit an MLP network to training data

def fit_model(train, batch_size, nb_epoch, neurons, time_step_ahead):
    
    X, y = train[:, 0:-time_step_ahead], train[:, -time_step_ahead:]

    model = Sequential()
    
    # hidden layer
    model.add(Dense(neurons, activation='relu', input_dim=X.shape[1]))
    
    # output layer
    model.add(Dense(time_step_ahead))
    
    # loss function
    model.compile(loss='mean_squared_error', optimizer='adam')
    
    # model fitting
    #model.fit(X, y, epochs=nb_epoch, batch_size=batch_size, verbose=0, shuffle=False)
    model.fit(X, y, epochs=nb_epoch, verbose=0, shuffle=False)
    
    return model

#### fit model 

In [17]:
batch_size = 4
epochs = 1000
neurons = 3

In [18]:
model = fit_model(train_scaled, batch_size, epochs, neurons, time_step_ahead)

#### make forecasting

In [74]:
def evaluate(model, scaled_input, time_step_ahead):
    
    eval_input = scaled_input[:,0:-time_step_ahead]
    
    output = model.predict(eval_input)
    
    return output

In [84]:
train_output = evaluate(model, train_scaled, time_step_ahead)
train_output

array([[-0.9208824 , -0.91849023, -0.91999376],
       [-0.9272164 , -0.92276025, -0.9262004 ],
       [-0.9273118 , -0.9228246 , -0.92629385],
       ...,
       [-0.89292073, -0.89963996, -0.89259446],
       [-0.9121134 , -0.9125787 , -0.91140115],
       [-0.91241574, -0.91278255, -0.91169745]], dtype=float32)

In [76]:
test_output = evaluate(model, test_scaled, time_step_ahead)
test_output

array([[-0.90237355, -0.90601254, -0.90185714],
       [-0.8913369 , -0.8985723 , -0.8910426 ],
       [-0.9003129 , -0.9046234 , -0.899838  ],
       [-0.9111506 , -0.9119296 , -0.91045773],
       [-0.9134821 , -0.9135014 , -0.9127423 ],
       [-0.9159648 , -0.9151751 , -0.9151751 ],
       [-0.91139716, -0.9120958 , -0.91069925],
       [-0.90359914, -0.9068388 , -0.9030581 ],
       [-0.9096866 , -0.9109427 , -0.90902317],
       [-0.9188533 , -0.9171223 , -0.91800547],
       [-0.91550314, -0.9148638 , -0.9147227 ],
       [-0.90856445, -0.9101862 , -0.9079236 ]], dtype=float32)

#### invert scale the prediction

In [82]:
# inverse scaling for a forecasted value

def invert_scale(scaler, X, yhat):    
    new_row = [x for x in X] + [x for x in yhat]    
    array = np.array(new_row)
    array = array.reshape(1, len(array))
    inverted = scaler.inverse_transform(array)
    
    return inverted[0, -len(yhat):]

def invert_scale_prediction(scaler, scaled_set, scaled_output):
    scaled_input = scaled_set[:,0:-time_step_ahead]
    predictions = list()

    for i in range(len(scaled_output)):
        yhat = scaled_output[i]
        X = scaled_input[i]

        # invert scaling
        yhat = invert_scale(scaler, X, yhat)    

        # store forecast
        predictions.append(yhat)
    
    return predictions

In [85]:
train_unscaled_output = invert_scale_prediction(scaler, train_scaled, train_output)
train_unscaled_output

[array([22.26513466, 22.84762874, 22.48151988]),
 array([20.72280401, 21.80787957, 20.97020507]),
 array([20.69958204, 21.79220474, 20.94744754]),
 array([21.88343805, 22.59031481, 22.10750103]),
 array([21.52494889, 22.34863216, 21.75623971]),
 array([20.84680933, 21.89147866, 21.09171402]),
 array([22.13147771, 22.7575275 , 22.35054797]),
 array([24.28995979, 24.21264514, 24.46560496]),
 array([24.63483506, 24.4451406 , 24.80354267]),
 array([23.86946797, 23.92917746, 24.05358917]),
 array([22.30192697, 22.87243271, 22.51757199]),
 array([21.47653109, 22.31600529, 21.70877981]),
 array([22.11789286, 22.74836934, 22.33722436]),
 array([21.56178474, 22.37345064, 21.79230633]),
 array([21.40293196, 22.26636833, 21.63666108]),
 array([21.69740105, 22.46488714, 21.92520857]),
 array([20.61235452, 21.73342413, 20.86199069]),
 array([21.40293196, 22.26636833, 21.63666108]),
 array([21.59859157, 22.39826912, 21.82837296]),
 array([20.77319568, 21.8418417 , 21.0195953 ]),
 array([21.21690947,

In [83]:
test_unscaled_output = invert_scale_prediction(scaler, test_scaled, test_output)
test_unscaled_output

[array([26.77204001, 25.8859477 , 26.89778697]),
 array([29.4594605 , 27.69763869, 29.5311293 ]),
 array([27.27380872, 26.22420472, 27.38945413]),
 array([24.63483506, 24.4451406 , 24.80354267]),
 array([24.06710145, 24.06241351, 24.24724588]),
 array([23.46257553, 23.65486795, 23.65486795]),
 array([24.57479176, 24.40467632, 24.74473304]),
 array([26.47360867, 25.68475837, 26.60534981]),
 array([24.99130681, 24.68545896, 25.15285915]),
 array([22.75922561, 23.18071887, 22.96566892]),
 array([23.57498437, 23.73065865, 23.76502717]),
 array([25.26455683, 24.86966723, 25.42060846])]

#### calculate root mean squared error

In [70]:
def calculate_rmse(original, prediction, time_step_ahead):
    
    test_output = original[:,-time_step_ahead:]
    rmse = sqrt(mean_squared_error(test_output, prediction))
    
    return rmse

In [87]:
train_rmse = calculate_rmse(train, train_unscaled_output, time_step_ahead)

print('Train RMSE: %.3f' % (train_rmse))

Train RMSE: 25.781


In [71]:
test_rmse = calculate_rmse(test, test_unscaled_output, time_step_ahead)

print('Test RMSE: %.3f' % (test_rmse))

Test RMSE: 9.829


#### vary the time step lag

In [126]:
# config

# time_step_lag = 2
time_step_lag_array = np.arange(1,11)

time_step_ahead = 7

num_of_test_set = 12

batch_size = 4
epochs = 1000
neurons = 3



train_rmse_array = []
test_rmse_array = []

for time_step_lag in time_step_lag_array:

    # tranform data to NN input format
    wt_supervised = timeseries_to_supervised(wt_seq, time_step_lag, time_step_ahead)

    # split train and test set
    train, test = split_train_and_test_set(wt_supervised)

    # scale dataset
    scaler, train_scaled, test_scaled = scale(train, test)
    
    # fit model
    model = fit_model(train_scaled, batch_size, epochs, neurons, time_step_ahead)

    # evaluate train set
    train_output = evaluate(model, train_scaled, time_step_ahead)
    train_unscaled_output = invert_scale_prediction(scaler, train_scaled, train_output)
    train_rmse = calculate_rmse(train, train_unscaled_output, time_step_ahead)
    train_rmse_array.append(train_rmse)
    
    # evaluate test set
    test_output = evaluate(model, test_scaled, time_step_ahead)
    test_unscaled_output = invert_scale_prediction(scaler, test_scaled, test_output)
    test_rmse = calculate_rmse(test, test_unscaled_output, time_step_ahead)
    test_rmse_array.append(test_rmse)
    
    print('%d) TrainRMSE=%f, TestRMSE=%f' % (time_step_lag, train_rmse, test_rmse))    



1) TrainRMSE=26.161804, TestRMSE=10.178293
2) TrainRMSE=26.132555, TestRMSE=9.496669
3) TrainRMSE=26.041093, TestRMSE=9.710305
4) TrainRMSE=26.237946, TestRMSE=9.480339
5) TrainRMSE=26.086891, TestRMSE=9.484171
6) TrainRMSE=26.185026, TestRMSE=9.433415
7) TrainRMSE=26.122662, TestRMSE=9.577219
8) TrainRMSE=26.318843, TestRMSE=10.099701
9) TrainRMSE=26.311023, TestRMSE=10.053955
10) TrainRMSE=26.234502, TestRMSE=9.435672


#### plot RMSE

In [127]:
%matplotlib notebook

plt.xlabel('RMSE')
plt.ylabel('time step lag')
plt.plot(time_step_lag_array, train_rmse_array, '-', linewidth=1, color='orange', label='train RMSE')
plt.plot(time_step_lag_array, test_rmse_array, '-', linewidth=1, color='blue', label='test RMSE')  
plt.legend(loc='right')
plt.show()

<IPython.core.display.Javascript object>