# Examining the Effects of Additive Noise on LGBM Forecast Accuracy

***

In [1]:
# general modules
import pandas as pd
import numpy as np
import lightgbm as lgb

##### the `helper_functions.py` file contains many custom functions we wrote to aid in our analysis
##### `full_coding_analysis` combines all of the following - train-test split data,
##### data protection, train models, compare accuracies, return accuracy results
from helper_functions import *

In [2]:
# import weekly finance time series
Y = np.genfromtxt("../../Data/Train/Clean/weekly_finance_clean.csv", delimiter = ',', skip_header = 1)
Y = pd.DataFrame(Y)

In [3]:
# detrender = Detrender()
# detrended_series = [detrender.fit_transform(series) for _ , series in Y.iterrows()]
# detrended_series = [i+np.abs(np.min(i))+1.0 for i in detrended_series]
# Y = pd.concat(detrended_series, axis=1).T

***

## Simple Model (window length = 10)

In [4]:
forecaster = lgb.LGBMRegressor()
window_length=10

In [5]:
results_dict_10 = {}
fcasts_10 = {}
fcasts_protected_10 = {}
tests = {}
num_stdevs = [1, 2]
horizons = [1, 20]

In [6]:
for n in num_stdevs:
    for h in horizons:
        idx = "h="+str(h)+", "+str(n)+" stan. devs"
        results_dict_10[idx], tests[idx], fcasts_10[idx], fcasts_protected_10[idx] = full_coding_analysis(time_series_data=Y, 
                                                                                                          forecasting_model=forecaster, 
                                                                                                          forecast_horizon=h,
                                                                                                          num_stdev=n,
                                                                                                          window_length=window_length)

In [7]:
results_dict_10

{'h=1, 1 stan. devs': {'Mean Accuracies': array([0.0312, 0.0312]),
  'Protected Mean Accuracies:': array([0.1432, 0.1432]),
  '% Change Mean accuracy:': array([-3.5877, -3.5877]),
  '% Change Median accuracy:': array([-1.9138, -1.9138]),
  '% Forecasted Points adjusted downward:': 0.4634,
  '% Forecasted Points adjusted upward:': 0.5366,
  '% Series with improved accuracy:': array([0.1707, 0.1707]),
  '% Series with reduced accuracy:': array([0.8293, 0.8293]),
  'Original Mean Absolute Error Upward Adjusted:': 0.0377,
  'Original Mean Absolute Error Downward Adjusted:': 0.0237,
  'Protected Mean Absolute Error Upward Adjusted:': 0.2241,
  'Protected Mean Absolute Error Downward Adjusted:': 0.0495},
 'h=20, 1 stan. devs': {'Mean Accuracies': array([0.051 , 0.0608]),
  'Protected Mean Accuracies:': array([0.1519, 0.1604]),
  '% Change Mean accuracy:': array([-1.9762, -1.6372]),
  '% Change Median accuracy:': array([-0.6108, -0.5434]),
  '% Forecasted Points adjusted downward:': 0.5366,
 

***
***

## 'Medium' Model (window length = 20)

In [8]:
forecaster = lgb.LGBMRegressor()
window_length = 20

In [9]:
results_dict_20 = {}
fcasts_20 = {}
fcasts_protected_20 = {}
tests = {}
num_stdevs = [1, 2]
horizons = [1, 20]

In [10]:
for n in num_stdevs:
    for h in horizons:
        idx = "h="+str(h)+", "+str(n)+" stan. devs"
        results_dict_20[idx], tests[idx], fcasts_20[idx], fcasts_protected_20[idx] = full_coding_analysis(time_series_data=Y, 
                                                                                                          forecasting_model=forecaster, 
                                                                                                          forecast_horizon=h,
                                                                                                          num_stdev=n,
                                                                                                          window_length=window_length)

In [11]:
results_dict_20

{'h=1, 1 stan. devs': {'Mean Accuracies': array([0.0305, 0.0305]),
  'Protected Mean Accuracies:': array([0.1374, 0.1374]),
  '% Change Mean accuracy:': array([-3.502, -3.502]),
  '% Change Median accuracy:': array([-2.1314, -2.1314]),
  '% Forecasted Points adjusted downward:': 0.5488,
  '% Forecasted Points adjusted upward:': 0.4512,
  '% Series with improved accuracy:': array([0.1829, 0.1829]),
  '% Series with reduced accuracy:': array([0.8171, 0.8171]),
  'Original Mean Absolute Error Upward Adjusted:': 0.0419,
  'Original Mean Absolute Error Downward Adjusted:': 0.0212,
  'Protected Mean Absolute Error Upward Adjusted:': 0.2362,
  'Protected Mean Absolute Error Downward Adjusted:': 0.0561},
 'h=20, 1 stan. devs': {'Mean Accuracies': array([0.0549, 0.0649]),
  'Protected Mean Accuracies:': array([0.1548, 0.1629]),
  '% Change Mean accuracy:': array([-1.8221, -1.5102]),
  '% Change Median accuracy:': array([-0.6846, -0.6642]),
  '% Forecasted Points adjusted downward:': 0.4796,
  '

***
***

## Complex Model (window length = 40)

In [12]:
forecaster = lgb.LGBMRegressor()
window_length = 40

In [13]:
results_dict_40 = {}
fcasts_40 = {}
fcasts_protected_40 = {}
tests = {}
num_stdevs = [1, 2]
horizons = [1, 20]

In [14]:
for n in num_stdevs:
    for h in horizons:
        idx = "h="+str(h)+", "+str(n)+" stan. devs"
        results_dict_40[idx], tests[idx], fcasts_40[idx], fcasts_protected_40[idx] = full_coding_analysis(time_series_data=Y, 
                                                                                                          forecasting_model=forecaster, 
                                                                                                          forecast_horizon=h,
                                                                                                          num_stdev=n,
                                                                                                          window_length=window_length)

In [15]:
results_dict_40

{'h=1, 1 stan. devs': {'Mean Accuracies': array([0.0279, 0.0279]),
  'Protected Mean Accuracies:': array([0.1319, 0.1319]),
  '% Change Mean accuracy:': array([-3.7311, -3.7311]),
  '% Change Median accuracy:': array([-2.2785, -2.2785]),
  '% Forecasted Points adjusted downward:': 0.4939,
  '% Forecasted Points adjusted upward:': 0.5061,
  '% Series with improved accuracy:': array([0.2134, 0.2134]),
  '% Series with reduced accuracy:': array([0.7866, 0.7866]),
  'Original Mean Absolute Error Upward Adjusted:': 0.0353,
  'Original Mean Absolute Error Downward Adjusted:': 0.0203,
  'Protected Mean Absolute Error Upward Adjusted:': 0.2079,
  'Protected Mean Absolute Error Downward Adjusted:': 0.0541},
 'h=20, 1 stan. devs': {'Mean Accuracies': array([0.0701, 0.0801]),
  'Protected Mean Accuracies:': array([0.1527, 0.1616]),
  '% Change Mean accuracy:': array([-1.1799, -1.0175]),
  '% Change Median accuracy:': array([-0.527 , -0.5011]),
  '% Forecasted Points adjusted downward:': 0.5226,
 