In [1]:
import pandas as pd
import numpy as np
# nice time series plots
from sktime.utils.plotting import plot_series

from sktime.transformations.series.detrend import STLTransformer
from sktime.forecasting.trend import STLForecaster

from sktime.performance_metrics.forecasting import mean_absolute_error

from data_processing_functions import *
from forecasting_functions import *

In [2]:
# import monthly micro time series
# ignore header and skip the first row to use integers as column names
full_data = pd.read_csv("../../Data/Train/Clean/full_m3_monthly_micro_clean.csv", header=None, skiprows=1)

In [3]:
# convert to a list of series, potentially with different lengths
# drop NA values
full_data = [x.dropna() for _, x in full_data.iterrows()]

In [4]:
h = 1

In [5]:
# create train and test set using forecast horizon
Y = [x.iloc[:-h] for x in full_data]
Test = [x.iloc[-h:] for x in full_data]
Test = pd.DataFrame([x.reset_index(drop=True) for x in Test]).T

In [6]:
num_series = len(Y)

***

Take the log of the data.

In [7]:
Y_processed, Y_last_window, Y_last_window_trend, pre_detrend = pre_process(ts_data=Y,
                                                                           target_forecast_period=h,
                                                                           log=True,
                                                                           mean_normalize=True)

Perform decomposition using STL.

In [None]:
trend_fcasts = []
seasonal_fcasts = []
remainders = []

for series in Y_processed:
    transformer = STLForecaster(sp=12)
    transformer.fit(series)
    # store remainder for forecasting later
    remainders.append(transformer.resid_ + transformer.trend_)
    # forecast the trend
    trend_fcasts.append(transformer.forecaster_trend_.predict(h))
    # forecast the seasonality
    seasonal_fcasts.append(transformer.forecaster_seasonal_.predict(h))

Forecast the remainder using the machine learning method.

In [8]:
rem_fcasts = LGBM_forecast(ts_data=Y_processed, h=h, lags=28, max_samples_per_ts=None)

|   iter    |  target   | learni... | num_bo... |
-------------------------------------------------
| [95m 2       [0m | [95m-0.1937  [0m | [95m 0.05783 [0m | [95m 404.2   [0m |
| [95m 6       [0m | [95m-0.1935  [0m | [95m 0.05969 [0m | [95m 211.8   [0m |
| [95m 8       [0m | [95m-0.1932  [0m | [95m 0.04071 [0m | [95m 330.6   [0m |
| [95m 12      [0m | [95m-0.1929  [0m | [95m 0.05634 [0m | [95m 212.1   [0m |
Final Result:  {'target': -0.19285174568790603, 'params': {'learning_rate_': 0.05634224871572112, 'num_boost_rounds_': 212.1407063415852}}


Combine forecasts for each piece to get final forecast (which still needs to be post-processed).

In [None]:
fcasts = [trend_fcasts[i] + seasonal_fcasts[i] + rem_fcasts[i] for i in range(num_series)]
# fcasts = [seasonal_fcasts[i] + rem_fcasts[i] for i in range(num_series)]

In [9]:
fcasts = post_process(full_ts_data=Y,
                      forecasts=rem_fcasts,
                      target_forecast_period=h,
                      log=True,
                      mean_normalize=True)

In [10]:
mean_absolute_error(Test, fcasts)

696.5942675429146