# Examining the Effects of Additive Noise on LGBM Forecast Accuracy

***

In [1]:
# general modules
import pandas as pd
import numpy as np
import lightgbm as lgb

##### the `helper_functions.py` file contains many custom functions we wrote to aid in our analysis
##### `full_coding_analysis` combines all of the following - train-test split data,
##### data protection, train models, compare accuracies, return accuracy results
from helper_functions import *

In [2]:
# import weekly finance time series
Y = np.genfromtxt("../../Data/Train/Clean/weekly_finance_clean.csv", delimiter = ',', skip_header = 1)
Y = pd.DataFrame(Y)

***

In [3]:
forecaster = lgb.LGBMRegressor()
window_length=10

In [4]:
results_dict_10 = {}
num_stdevs = [1, 2]
horizons = [1, 5, 15]

In [5]:
full_coding_analysis(time_series_data=Y,
                     forecasting_model=forecaster,
                     forecast_horizon=10,
                     num_stdev=1,
                     window_length=window_length)

{'% of forecasted points adjusted downward:': 55.400000000000006,
 '% of forecasted points adjusted upward:': 44.6,
 '% Series with improved accuracy:': array([22.6, 18.9]),
 '% Series with worsened accuracy:': array([77.4, 81.1]),
 '% Series with unchanged accuracy:': array([0., 0.]),
 '% Change mean global accuracy:': array([-187.7, -165.2]),
 '% Change median global accuracy:': array([-69.9, -74.4])}

In [6]:
for n in num_stdevs:
    for h in horizons:
        results_dict_10["h="+str(h)+", "+str(n)+" stan. devs"] = full_coding_analysis(time_series_data=Y, 
                                                                                 forecasting_model=forecaster, 
                                                                                 forecast_horizon=h,
                                                                                 num_stdev=n,
                                                                                 window_length=window_length)

In [7]:
results_dict_10

{'h=1, 1 stan. devs': {'% of forecasted points adjusted downward:': 52.400000000000006,
  '% of forecasted points adjusted upward:': 47.599999999999994,
  '% Series with improved accuracy:': array([20.7, 20.7]),
  '% Series with worsened accuracy:': array([79.3, 79.3]),
  '% Series with unchanged accuracy:': array([0., 0.]),
  '% Change mean global accuracy:': array([-301.6, -301.6]),
  '% Change median global accuracy:': array([-200.1, -200.1])},
 'h=5, 1 stan. devs': {'% of forecasted points adjusted downward:': 50.0,
  '% of forecasted points adjusted upward:': 50.0,
  '% Series with improved accuracy:': array([14. , 12.2]),
  '% Series with worsened accuracy:': array([86. , 87.8]),
  '% Series with unchanged accuracy:': array([0., 0.]),
  '% Change mean global accuracy:': array([-284. , -254.9]),
  '% Change median global accuracy:': array([-193.1, -166.5])},
 'h=15, 1 stan. devs': {'% of forecasted points adjusted downward:': 50.0,
  '% of forecasted points adjusted upward:': 50.0