# This project is example of multistep timeseries forecasting using LSTM

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

from sklearn.preprocessing import MinMaxScaler
from keras.models import Sequential
from keras.layers import Dense, LSTM, Dropout
from keras.callbacks import EarlyStopping

## Load and prepare data:

In [None]:
df = pd.read_csv('data/data.csv', delimiter=';')
df.columns = ["date", "year", "month", "week", "day", "cnt", "average_sum"]
df["average_sum"] = df["average_sum"].apply(lambda x: float(x.replace(",", ".")))
df = df.set_index("date")
df = df.sort_values(["year", "month", "week", "day"])

## Data normalization:

In [None]:
values = df['cnt'].values.reshape(-1,1)
values = values.astype('float32')
scaler = MinMaxScaler(feature_range=(0, 1))
scaled = scaler.fit_transform(values)

## Split dataset into train and test sets:

In [None]:
train_size = int(len(scaled) * 0.8)
test_size = len(scaled) - train_size

train, test = scaled[0:train_size,:], scaled[train_size:len(scaled),:]

## Convert timeseries into supervised form:

In [None]:
def create_dataset(dataset, look_back=1):
    data_x, data_y = [], []
    for i in range(len(dataset) - look_back):
        a = dataset[i:(i + look_back), 0]
        data_x.append(a)
        data_y.append(dataset[i + look_back, 0])
    return np.array(data_x), np.array(data_y)

In [None]:
train_x, train_y = create_dataset(train, look_back=15)
test_x, test_y = create_dataset(test, look_back=15)

In [None]:
train_x = np.reshape(train_x, (train_x.shape[0], 1, train_x.shape[1]))
test_x = np.reshape(test_x, (test_x.shape[0], 1, test_x.shape[1]))

## Train LSTM with early stopping:

In [None]:
stop = EarlyStopping(monitor='loss',min_delta=0.000000000001, patience=15)

model = Sequential()
model.add(LSTM(128, input_shape=(train_x.shape[1], train_x.shape[2])))
model.add(Dense(1))
model.compile(optimizer='adam', loss='mse', metrics=['mape'])

history = model.fit(
    train_x, 
    train_y, 
    epochs=1000, 
    batch_size=100, 
    validation_data=(test_x, test_y), 
    verbose=1, 
    shuffle=False,
    callbacks=[stop]
)

## Plot train and test loss:

In [None]:
plt.plot(history.history['loss'], label='train')
plt.plot(history.history['val_loss'], label='test')
plt.legend()

In [None]:
def mean_absolute_percentage_error(y_true, y_pred):
    return np.mean(np.abs((y_true - y_pred) / y_true)) * 100

## Revert data into previous form and calculate MAPE:

In [None]:
preds = model.predict(test_x)
preds = scaler.inverse_transform(preds)

test = scaler.inverse_transform(test_y.reshape(-1, 1))

print(mean_absolute_percentage_error(test, preds))
print(mean_absolute_percentage_error(sum(test), sum(preds)))

## Forecast multiple steps ahead:

In [None]:
future = []
data = test_x[-1].reshape(1,1,5)

for i in range(23):
    forecast = model.predict(data)
    future.append(forecast[0][0])  
    data = np.append(data[0][0][1:], forecast)
    data = data.reshape(1,1,5)
    
future = scaler.inverse_transform(np.asarray(future).reshape(-1, 1))
future = [x[0] for x in future]