# Experimental Code with RNN (LSTM)

***

### Importing Modules and Data

In [1]:
from darts import TimeSeries
import pandas as pd
import numpy as np

from darts.models.forecasting.rnn_model import RNNModel
from sktime.performance_metrics.forecasting import mean_absolute_error

In [2]:
# import monthly micro time series
# ignore header and skip the first row to use integers as column names
full_data = pd.read_csv("../../../Data/Train/Clean/full_m3_monthly_micro_clean.csv", header=None, skiprows=1)

In [3]:
# convert to a list of series, potentially with different lengths
# drop NA values
full_data = [x.dropna() for _, x in full_data.iterrows()]

In [4]:
# forecast horizon
h = 1

In [21]:
# create train and test set using forecast horizon
Y = [x.iloc[:-h] for x in full_data]
Test = [x.iloc[-h:] for x in full_data]
Test = pd.DataFrame([x.reset_index(drop=True) for x in Test]).T

***

### Creating Subsets of Series with the same length and convertime to `TimeSeries` object.

In [6]:
# find all series lengths
ts_lengths = [len(y) for y in full_data]

In [7]:
# find the unique lengths
unique_lengths = np.unique(ts_lengths)

In [35]:
length_clusters = [[i for i in np.where(ts_lengths == unique_lengths[j])[0]] for j in range(len(unique_lengths))]

In [36]:
# define a cluster of series with the same length
cluster_1 = [full_data[i] for i in length_clusters[0]]

In [37]:
# conver to type np.float32 to speed up training
cluster_1 = [x.astype(np.float32) for x in cluster_1]

In [38]:
# concatenate series into dataframe
cluster_1 = pd.concat(cluster_1, axis=1)

In [39]:
# convert dataframe index to a RangeIndex
cluster_1.index = pd.RangeIndex(start=0, stop=cluster_1.shape[0])

In [40]:
# convert dataframe to TimeSeries object
ts_data = TimeSeries.from_dataframe(df=cluster_1)

***

### Model Setup

In [41]:
# RNN parameters
input_chunk_length = 12
model = "LSTM"

In [42]:
# instantiate the model
RNN = RNNModel(input_chunk_length=input_chunk_length,
               model=model)

### Model Training

In [43]:
# fit the model
RNN.fit(series=ts_data)

[2022-05-28 14:51:03,943] INFO | darts.models.forecasting.torch_forecasting_model | Train dataset contains 43 samples.
[2022-05-28 14:51:03,943] INFO | darts.models.forecasting.torch_forecasting_model | Train dataset contains 43 samples.
2022-05-28 14:51:03 darts.models.forecasting.torch_forecasting_model INFO: Train dataset contains 43 samples.
[2022-05-28 14:51:03,950] INFO | darts.models.forecasting.torch_forecasting_model | Time series values are 32-bits; casting model to float32.
[2022-05-28 14:51:03,950] INFO | darts.models.forecasting.torch_forecasting_model | Time series values are 32-bits; casting model to float32.
2022-05-28 14:51:03 darts.models.forecasting.torch_forecasting_model INFO: Time series values are 32-bits; casting model to float32.
2022-05-28 14:51:03 pytorch_lightning.utilities.rank_zero INFO: GPU available: False, used: False
2022-05-28 14:51:03 pytorch_lightning.utilities.rank_zero INFO: TPU available: False, using: 0 TPU cores
2022-05-28 14:51:03 pytorch_ligh

Training: 0it [00:00, ?it/s]

<darts.models.forecasting.rnn_model.RNNModel at 0x22179598280>

### Generate Forecasts

In [44]:
fcasts = RNN.predict(n=h)

Predicting: 2it [00:00, ?it/s]

In [45]:
fcasts = fcasts.pd_dataframe()

In [48]:
cluster_test = Test.iloc[:,length_clusters[0]]

### Assess Forecast Accuracy

In [49]:
mean_absolute_error(cluster_test, fcasts)

2843.4294