# Examining the Effects of Additive Noise on LGBM Forecast Accuracy

***

In [1]:
# general modules
import pandas as pd
import numpy as np
import lightgbm as lgb

##### the `helper_functions.py` file contains many custom functions we wrote to aid in our analysis
##### `full_coding_analysis` combines all of the following - train-test split data,
##### data protection, train models, compare accuracies, return accuracy results
from helper_functions import *

In [2]:
# import weekly finance time series
Y = np.genfromtxt("../../Data/Train/Clean/weekly_finance_clean.csv", delimiter = ',', skip_header = 1)
Y = pd.DataFrame(Y)

***

## Simple Model (window length = 10)

In [3]:
forecaster = lgb.LGBMRegressor()
window_length=10

In [4]:
results_dict_10 = {}
fcasts_10 = {}
fcasts_protected_10 = {}
tests = {}
num_stdevs = [1, 2]
horizons = [1, 20]

In [5]:
for n in num_stdevs:
    for h in horizons:
        idx = "h="+str(h)+", "+str(n)+" stan. devs"
        results_dict_10[idx], tests[idx], fcasts_10[idx], fcasts_protected_10[idx] = full_coding_analysis(time_series_data=Y, 
                                                                                                          forecasting_model=forecaster, 
                                                                                                          forecast_horizon=h,
                                                                                                          num_stdev=n,
                                                                                                          window_length=window_length)

In [6]:
results_dict_10

{'h=1, 1 stan. devs': {'Mean Accuracies': array([3.12, 3.12]),
  'Protected Mean Accuracies:': array([13.81, 13.81]),
  '% Change Mean accuracy:': array([-342.44, -342.44]),
  '% Change Median accuracy:': array([-255.15, -255.15]),
  '% Forecasted Points adjusted downward:': 53.66,
  '% Forecasted Points adjusted upward:': 46.339999999999996,
  '% Series with improved accuracy:': array([21.34, 21.34]),
  '% Series with reduced accuracy:': array([78.66, 78.66]),
  'Original Mean Absolute Error Upward Adjusted:': 4.42,
  'Original Mean Absolute Error Downward Adjusted:': 2.0,
  'Protected Mean Absolute Error Upward Adjusted:': 23.23,
  'Protected Mean Absolute Error Downward Adjusted:': 5.67},
 'h=20, 1 stan. devs': {'Mean Accuracies': array([5.1 , 6.08]),
  'Protected Mean Accuracies:': array([14.86, 15.72]),
  '% Change Mean accuracy:': array([-191.03, -158.34]),
  '% Change Median accuracy:': array([-53.85, -49.09]),
  '% Forecasted Points adjusted downward:': 54.48,
  '% Forecasted P

***
***

## 'Medium' Model (window length = 20)

In [7]:
forecaster = lgb.LGBMRegressor()
window_length = 20

In [8]:
results_dict_20 = {}
fcasts_20 = {}
fcasts_protected_20 = {}
tests = {}
num_stdevs = [1, 2]
horizons = [1, 20]

In [9]:
for n in num_stdevs:
    for h in horizons:
        idx = "h="+str(h)+", "+str(n)+" stan. devs"
        results_dict_20[idx], tests[idx], fcasts_20[idx], fcasts_protected_20[idx] = full_coding_analysis(time_series_data=Y, 
                                                                                                          forecasting_model=forecaster, 
                                                                                                          forecast_horizon=h,
                                                                                                          num_stdev=n,
                                                                                                          window_length=window_length)

In [10]:
results_dict_20

{'h=1, 1 stan. devs': {'Mean Accuracies': array([3.05, 3.05]),
  'Protected Mean Accuracies:': array([12.02, 12.02]),
  '% Change Mean accuracy:': array([-294., -294.]),
  '% Change Median accuracy:': array([-208.06, -208.06]),
  '% Forecasted Points adjusted downward:': 48.17,
  '% Forecasted Points adjusted upward:': 51.83,
  '% Series with improved accuracy:': array([21.95, 21.95]),
  '% Series with reduced accuracy:': array([78.05, 78.05]),
  'Original Mean Absolute Error Upward Adjusted:': 4.07,
  'Original Mean Absolute Error Downward Adjusted:': 1.96,
  'Protected Mean Absolute Error Upward Adjusted:': 18.66,
  'Protected Mean Absolute Error Downward Adjusted:': 4.88},
 'h=20, 1 stan. devs': {'Mean Accuracies': array([5.49, 6.49]),
  'Protected Mean Accuracies:': array([14.07, 14.92]),
  '% Change Mean accuracy:': array([-156.6 , -129.85]),
  '% Change Median accuracy:': array([-55.79, -53.  ]),
  '% Forecasted Points adjusted downward:': 48.78,
  '% Forecasted Points adjusted u

***
***

## Complex Model (window length = 40)

In [11]:
forecaster = lgb.LGBMRegressor()
window_length = 40

In [12]:
results_dict_40 = {}
fcasts_40 = {}
fcasts_protected_40 = {}
tests = {}
num_stdevs = [1, 2]
horizons = [1, 20]

In [13]:
for n in num_stdevs:
    for h in horizons:
        idx = "h="+str(h)+", "+str(n)+" stan. devs"
        results_dict_40[idx], tests[idx], fcasts_40[idx], fcasts_protected_40[idx] = full_coding_analysis(time_series_data=Y, 
                                                                                                          forecasting_model=forecaster, 
                                                                                                          forecast_horizon=h,
                                                                                                          num_stdev=n,
                                                                                                          window_length=window_length)

In [14]:
results_dict_40

{'h=1, 1 stan. devs': {'Mean Accuracies': array([2.79, 2.79]),
  'Protected Mean Accuracies:': array([12.13, 12.13]),
  '% Change Mean accuracy:': array([-334.95, -334.95]),
  '% Change Median accuracy:': array([-157.41, -157.41]),
  '% Forecasted Points adjusted downward:': 48.78,
  '% Forecasted Points adjusted upward:': 51.22,
  '% Series with improved accuracy:': array([21.34, 21.34]),
  '% Series with reduced accuracy:': array([78.66, 78.66]),
  'Original Mean Absolute Error Upward Adjusted:': 3.29,
  'Original Mean Absolute Error Downward Adjusted:': 2.26,
  'Protected Mean Absolute Error Upward Adjusted:': 17.76,
  'Protected Mean Absolute Error Downward Adjusted:': 6.21},
 'h=20, 1 stan. devs': {'Mean Accuracies': array([7.01, 8.01]),
  'Protected Mean Accuracies:': array([15.05, 15.94]),
  '% Change Mean accuracy:': array([-114.77,  -98.96]),
  '% Change Median accuracy:': array([-71.45, -54.3 ]),
  '% Forecasted Points adjusted downward:': 50.849999999999994,
  '% Forecasted 