# RNN-LSTM

In [1]:
import warnings
warnings.simplefilter('ignore')

In [2]:
# libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from darts.timeseries import TimeSeries
from darts.utils.timeseries_generation import datetime_attribute_timeseries
from darts.dataprocessing.transformers import Scaler
from darts.models import RNNModel

In [3]:
# load the data
# YYYY-MM-DD
df = pd.read_csv('../../_data/nyc_data.csv', index_col=0, parse_dates=True)
future_df = pd.read_csv('../../_data/future.csv', index_col=0, parse_dates=True)

In [4]:
df.head()

Unnamed: 0_level_0,Demand,Easter,Thanksgiving,Christmas,Temperature,Marketing
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2015-01-01,720.000885,0,0,0,3.68,41.305
2015-01-02,581.276773,0,0,0,4.73,131.574
2015-01-03,754.117039,0,0,0,7.23,162.7
2015-01-04,622.252774,0,0,0,10.96,160.281
2015-01-05,785.373319,0,0,0,6.92,51.077


In [5]:
# Extract regressors
X_train = df.iloc[:, 1:]
X_future = future_df.iloc[:, 1:]

In [6]:
# merge both
X = pd.concat([X_train, X_future])

In [7]:
# Rename variable
df = df.rename(columns={'Demand': 'y'})
df.head()

Unnamed: 0_level_0,y,Easter,Thanksgiving,Christmas,Temperature,Marketing
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2015-01-01,720.000885,0,0,0,3.68,41.305
2015-01-02,581.276773,0,0,0,4.73,131.574
2015-01-03,754.117039,0,0,0,7.23,162.7
2015-01-04,622.252774,0,0,0,10.96,160.281
2015-01-05,785.373319,0,0,0,6.92,51.077


## Prepare for LSTM

In [8]:
# Time Series object
series = TimeSeries.from_series(df.y)
covariates = TimeSeries.from_dataframe(X)

In [9]:
# year
year_series = datetime_attribute_timeseries(
    pd.date_range(start=series.start_time(),
                  freq=series.freq_str,
                  periods=X.shape[0]),
                  attribute="year",
                  one_hot=False)

# month
month_series = datetime_attribute_timeseries(year_series,
                                            attribute="month",
                                            one_hot=True)

# weekday
weekday_series = datetime_attribute_timeseries(year_series,
                                               attribute="weekday",
                                               one_hot=True)

In [10]:
# preparing scalers
transformer1 = Scaler()
transformer2 = Scaler()

In [11]:
# Scale the Y
y_transformed = transformer1.fit_transform(series)

In [12]:
# Scale the covariates
covariates = covariates.stack(year_series)
covariates_transformed = transformer2.fit_transform(covariates)
covariates_transformed = covariates_transformed.stack(month_series)
covariates_transformed = covariates_transformed.stack(weekday_series)

## LSTM

In [13]:
#get the best parameters
parameters  = pd.read_csv("../../02 Parameter Tuning/04 RNN-LSTM/out/best_params_lstm.csv",
                          index_col = 0)
parameters

Unnamed: 0,14
dropout,0.2
hidden_dim,30.0
input_chunk_length,20.0
lr,0.003
n_epochs,20.0
n_rnn_layers,4.0
training_length,20.0
rmse,83.370327


In [14]:
# getting the parameters
n_rnn_layers = int(parameters.loc["n_rnn_layers"][0])
dropout = float(parameters.loc["dropout"][0])
hidden_dim = int(parameters.loc["hidden_dim"][0])
input_chunk_length = int(parameters.loc["input_chunk_length"][0])
lr = float(parameters.loc["lr"][0])
n_epochs = int(parameters.loc["n_epochs"][0])
training_length = int(parameters.loc["training_length"][0])

In [15]:
# model
# https://unit8co.github.io/darts/generated_api/darts.models.forecasting.rnn_model.html
model = RNNModel(model="LSTM",
                 hidden_dim=hidden_dim,
                 n_rnn_layers=n_rnn_layers,
                 dropout=dropout,
                 n_epochs=n_epochs,
                 optimizer_kwargs={"lr": lr},
                 random_state=1502,
                 training_length=training_length,
                 input_chunk_length=input_chunk_length,
                #  pl_trainer_kwargs={"accelerator": "gpu", "gpus": -1,
                #                     "auto_select_gpus": True},
                 save_checkpoints=True)

In [16]:
# fit the model to the data
model.fit(y_transformed,
          future_covariates=covariates_transformed)

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name          | Type             | Params
---------------------------------------------------
0 | criterion     | MSELoss          | 0     
1 | train_metrics | MetricCollection | 0     
2 | val_metrics   | MetricCollection | 0     
3 | rnn           | LSTM             | 29.3 K
4 | V             | Linear           | 31    
---------------------------------------------------
29.3 K    Trainable params
0         Non-trainable params
29.3 K    Total params
0.117     Total estimated model params size (MB)


Training: |          | 0/? [00:00<?, ?it/s]

`Trainer.fit` stopped: `max_epochs=20` reached.


RNNModel(model=LSTM, hidden_dim=30, n_rnn_layers=4, dropout=0.2, training_length=20, n_epochs=20, optimizer_kwargs={'lr': 0.003}, random_state=1502, input_chunk_length=20, save_checkpoints=True)

## Predictions and exporting

In [17]:
predictions_lstm = model.predict(n=len(future_df),
                                 future_covariates=covariates_transformed)
predictions_lstm = TimeSeries.pd_series(
    transformer1.inverse_transform(predictions_lstm)).rename("lstm")
predictions_lstm

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Predicting: |          | 0/? [00:00<?, ?it/s]

Date
2021-01-01    733.688567
2021-01-02    897.281791
2021-01-03    776.013498
2021-01-04    938.918450
2021-01-05    784.304921
2021-01-06    963.589519
2021-01-07    813.523698
2021-01-08    735.777150
2021-01-09    898.408081
2021-01-10    783.278617
2021-01-11    965.408038
2021-01-12    795.603561
2021-01-13    953.314840
2021-01-14    838.851014
2021-01-15    745.372722
2021-01-16    911.881469
2021-01-17    786.136446
2021-01-18    958.720033
2021-01-19    793.456736
2021-01-20    993.105936
2021-01-21    846.086239
2021-01-22    751.486156
2021-01-23    929.068547
2021-01-24    804.286034
2021-01-25    977.522961
2021-01-26    809.304702
2021-01-27    979.126654
2021-01-28    848.986023
2021-01-29    758.329960
2021-01-30    927.579268
2021-01-31    800.680342
Freq: D, Name: lstm, dtype: float64

In [18]:
# exporting
predictions_lstm.to_csv("./out/predictions_lstm.csv")