# Examining the Effects of Additive Noise on LGBM Forecast Accuracy

***

In [1]:
# general modules
import pandas as pd
import numpy as np
import lightgbm as lgb

##### the `helper_functions.py` file contains many custom functions we wrote to aid in our analysis
##### `full_coding_analysis` combines all of the following - train-test split data,
##### data protection, train models, compare accuracies, return accuracy results
from helper_functions import *

In [2]:
# import weekly finance time series
Y = np.genfromtxt("../../Data/Train/Clean/weekly_finance_clean.csv", delimiter = ',', skip_header = 1)
Y = pd.DataFrame(Y)

***

In [3]:
forecaster = lgb.LGBMRegressor()
window_length=10

In [4]:
results_dict_10 = {}
num_stdevs = [1, 2]
horizons = [1, 5, 15]

In [5]:
full_coding_analysis(time_series_data=Y,
                     forecasting_model=forecaster,
                     forecast_horizon=10,
                     num_stdev=1,
                     window_length=window_length)

{'% of forecasted points adjusted downward:': 53.900000000000006,
 '% of forecasted points adjusted upward:': 46.1,
 '% Series with improved accuracy:': array([20.1, 18.9]),
 '% Series with worsened accuracy:': array([79.9, 81.1]),
 '% Series with unchanged accuracy:': array([0., 0.]),
 '% Change mean global accuracy:': array([-178.2, -158. ]),
 '% Change median global accuracy:': array([-52.9, -53. ])}

In [6]:
for n in num_stdevs:
    for h in horizons:
        results_dict_10["h="+str(h)+", "+str(n)+" stan. devs"] = full_coding_analysis(time_series_data=Y, 
                                                                                 forecasting_model=forecaster, 
                                                                                 forecast_horizon=h,
                                                                                 num_stdev=n,
                                                                                 window_length=window_length)

In [7]:
results_dict_10

{'h=1, 1 stan. devs': {'% of forecasted points adjusted downward:': 55.50000000000001,
  '% of forecasted points adjusted upward:': 44.5,
  '% Series with improved accuracy:': array([18.3, 18.3]),
  '% Series with worsened accuracy:': array([81.7, 81.7]),
  '% Series with unchanged accuracy:': array([0., 0.]),
  '% Change mean global accuracy:': array([-328.9, -328.9]),
  '% Change median global accuracy:': array([-214.8, -214.8])},
 'h=5, 1 stan. devs': {'% of forecasted points adjusted downward:': 54.800000000000004,
  '% of forecasted points adjusted upward:': 45.2,
  '% Series with improved accuracy:': array([18.3, 16.5]),
  '% Series with worsened accuracy:': array([81.7, 83.5]),
  '% Series with unchanged accuracy:': array([0., 0.]),
  '% Change mean global accuracy:': array([-273.8, -245.5]),
  '% Change median global accuracy:': array([-176.7, -156.2])},
 'h=15, 1 stan. devs': {'% of forecasted points adjusted downward:': 48.3,
  '% of forecasted points adjusted upward:': 51.7,