## Code for Implementing Univariate LGBM with Cross Validation

***

In [1]:
from sktime.forecasting.compose import make_reduction
from sktime.performance_metrics.forecasting import mean_absolute_error, MeanAbsoluteError
from sktime.forecasting.model_selection import ForecastingGridSearchCV, ExpandingWindowSplitter
from sktime.forecasting.compose import TransformedTargetForecaster

import lightgbm
import numpy as np
import pandas as pd

from data_protection_functions import *
from data_processing_functions import *
from forecasting_functions import *

***

### Import Data and Create Train/Test

In [2]:
# import weekly finance time series
# ignore header and skip the first row to use integers as column names
full_data = pd.read_csv("../../Data/Train/Clean/full_m3_monthly_micro_clean.csv", header=None, skiprows=1)

In [3]:
# convert to a list of series, potentially with different lengths
full_data = [x.dropna() for _, x in full_data.iterrows()]

In [4]:
# forecast horizon
h = 1

In [5]:
Y = [x.iloc[:-h] for x in full_data]
Test = [x.iloc[-h:] for x in full_data]

***

### Apply Data Protection to Generate Protected Dataset

In [6]:
protection_method = "Top_10"
Y_protected = apply_data_protection(Y, coding_type="Top", coding_percentage=0.10)

***

### Pre-Process the Data

In [7]:
Y_processed, Y_last_window, Y_last_window_trend = pre_process(ts_data=Y, 
                                                              log=True)
Y_protected_processed, Y_protected_last_window, Y_protected_last_window_trend = pre_process(ts_data=Y_protected, 
                                                                                            log=True)

***

### Train Models and Generate Forecasts

In [8]:
param_grid = {"window_length": [16, 28, 40]}

In [9]:
forecasts_original = train_and_forecast(ts_data=Y_processed,
                                        horizon_length=h,
                                        forecasting_model="Univariate_LGBM",
                                        protection_method=protection_method,
                                        last_window=Y_last_window,
                                        param_grid=param_grid)

forecasts_protected = train_and_forecast(ts_data=Y_protected_processed,
                                         horizon_length=h,
                                         forecasting_model="Univariate_LGBM",
                                         protection_method=protection_method,
                                         last_window=Y_protected_last_window,
                                         param_grid=param_grid)

***

### Post-Process the Forecasts

In [10]:
forecasts_original = post_process(full_ts_data=Y, 
                                  forecasts=forecasts_original,
                                  log=True)

forecasts_protected = post_process(full_ts_data=Y_protected, 
                                   forecasts=forecasts_protected,
                                   log=True)

***

### Assess Forecast Accuracy

In [11]:
forecasts_original

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,464,465,466,467,468,469,470,471,472,473
0,1929.391112,1120.363435,5203.647953,5702.945278,8273.85006,2021.335154,3578.735699,2715.743664,2887.422083,4146.966092,...,2700.014411,5040.123499,5588.182442,7588.825751,4913.260231,8327.800188,5269.137583,4234.61247,8263.804043,2696.70368


In [12]:
forecasts_protected

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,464,465,466,467,468,469,470,471,472,473
0,1929.391112,1079.612389,5033.494035,5487.328837,8201.62018,1957.831879,3464.506134,2604.863038,2862.842187,4119.552128,...,2667.021008,4693.228933,5436.169227,7600.619858,4921.280058,8032.924633,5267.3539,4317.476917,8265.535047,2737.599791


In [13]:
Test = pd.DataFrame([x.reset_index(drop=True) for x in Test]).T

In [14]:
forecast_results(test_data=Test, original_forecasts=forecasts_original, protected_forecasts=forecasts_protected)

{'Global MAPE': 0.2502,
 'Global Protected MAPE': 0.2513,
 'Global MAE': 705.2435,
 'Global Protected MAE': 717.667,
 'Global MdAE': 705.2435,
 'Global Protected MdAE': 717.667,
 'Original MAE Up': 476.7975,
 'Protected MAE Up': 460.9732,
 'Original MAE Down': 807.7581,
 'Protected MAE Down': 835.1603,
 'Original MdAE Up': 318.2963,
 'Protected MdAE Up': 312.2579,
 'Original MdAE Down': 477.8498,
 'Protected MdAE Down': 487.6453}

***

### All in One Function

In [2]:
# import weekly finance time series
# ignore header and skip the first row to use integers as column names
full_data = pd.read_csv("../../Data/Train/Clean/full_m3_monthly_micro_clean.csv", header=None, skiprows=1)

In [3]:
# convert to a list of series, potentially with different lengths
full_data = [x.dropna() for _, x in full_data.iterrows()]

In [4]:
# forecast horizon
h = 1

In [5]:
Y = [x.iloc[:-h] for x in full_data]
Test = [x.iloc[-h:] for x in full_data]

In [6]:
protection_method = "Top_10"
Y_protected = apply_data_protection(Y, coding_type="Top", coding_percentage=0.10)

In [7]:
param_grid = {"window_length": [16, 28, 40]}

In [8]:
full_forecast_analysis(Y=Y,
                       Y_protected=Y_protected,
                       Test=Test,
                       h=1,
                       protection_method=protection_method,
                       forecasting_model="Univariate_LGBM",
                       log=True,
                       param_grid=param_grid)

{'Global MAPE': 0.2502,
 'Global Protected MAPE': 0.2513,
 'Global MAE': 705.2435,
 'Global Protected MAE': 717.667,
 'Global MdAE': 705.2435,
 'Global Protected MdAE': 717.667,
 'Original MAE Up': 476.7975,
 'Protected MAE Up': 460.9732,
 'Original MAE Down': 807.7581,
 'Protected MAE Down': 835.1603,
 'Original MdAE Up': 318.2963,
 'Protected MdAE Up': 312.2579,
 'Original MdAE Down': 477.8498,
 'Protected MdAE Down': 487.6453}