## Examining the Effects of Additive Noise on Exponential Smoothing Forecast Accuracy

***

In [1]:
# general modules
import pandas as pd
import numpy as np
import sktime

# import exponential smoothing forecasting model
from sktime.forecasting.exp_smoothing import ExponentialSmoothing

# functions for transformation+forecasting pipeline
from sktime.forecasting.compose import TransformedTargetForecaster

# time series transformations
from sktime.transformations.series.detrend import ConditionalDeseasonalizer

##### the `helper_functions.py` file contains many custom functions we wrote to aid in our analysis
##### `full_coding_analysis` combines all of the following - train-test split data,
##### data protection, train models, compare accuracies, return accuracy results
from helper_functions import *

# suppress warnings from exponential smoothing model not converging
import warnings
warnings.filterwarnings('ignore')

In [2]:
# import weekly finance time series
Y = np.genfromtxt("../../Data/Train/Clean/weekly_finance_clean.csv", delimiter = ',', skip_header = 1)
Y = pd.DataFrame(Y)

***

## SES

In [3]:
# define forecasting model
# perform additive deseasonalization conditional on autocorrelation test for seasonality

forecaster = TransformedTargetForecaster(
    [
        ("deseasonalize", ConditionalDeseasonalizer(model="additive", sp=52)),
        ("forecast", ExponentialSmoothing(use_boxcox=False)),
    ]
)

In [4]:
results_dict_ses = {}
fcasts_ses = {}
fcasts_protected_ses = {}
tests = {}
num_stdevs = [1, 2]
horizons = [1, 20]

In [5]:
for n in num_stdevs:
    for h in horizons:
        idx = "h="+str(h)+", "+str(n)+" stan. devs"
        results_dict_ses[idx], tests[idx], fcasts_ses[idx], fcasts_protected_ses[idx] = full_coding_analysis(time_series_data=Y, 
                                                                                                             forecasting_model=forecaster, 
                                                                                                             forecast_horizon=h,
                                                                                                             num_stdev=n)

***
***

In [6]:
adjusted_up = fcasts_ses['h=20, 1 stan. devs'] < fcasts_protected_ses['h=20, 1 stan. devs']
adjusted_up = pd.concat([row for i, row in adjusted_up.iterrows()])

adjusted_down = fcasts_ses['h=20, 1 stan. devs'] > fcasts_protected_ses['h=20, 1 stan. devs']
adjusted_down = pd.concat([row for i, row in adjusted_down.iterrows()])

In [7]:
absolute_error = np.absolute(tests['h=20, 1 stan. devs'] - fcasts_ses['h=20, 1 stan. devs'])
absolute_error = pd.concat([row for i, row in absolute_error.iterrows()])

In [8]:
protected_absolute_error = np.absolute(tests['h=20, 1 stan. devs'] - fcasts_protected_ses['h=20, 1 stan. devs'])
protected_absolute_error = pd.concat([row for i, row in protected_absolute_error.iterrows()])

In [9]:
np.mean(absolute_error[adjusted_up])

0.058868766632536554

In [10]:
np.mean(absolute_error[adjusted_down])

0.0336538758617732

In [11]:
np.mean(protected_absolute_error[adjusted_up])

0.2055825619956616

In [12]:
np.mean(protected_absolute_error[adjusted_down])

0.05334478674245851

***
***

In [13]:
results_dict_ses

{'h=1, 1 stan. devs': {'Mean Accuracies': array([2.18, 2.18]),
  'Protected Mean Accuracies:': array([13.1, 13.1]),
  '% Change Mean accuracy:': array([-500.5, -500.5]),
  '% Change Median accuracy:': array([-229.24, -229.24]),
  '% Forecasted Points adjusted downward:': 64.63,
  '% Forecasted Points adjusted upward:': 35.370000000000005,
  '% Series with improved accuracy:': array([21.34, 21.34]),
  '% Series with reduced accuracy:': array([78.66, 78.66]),
  'Original Mean Absolute Error Upward Adjusted:': 2.42,
  'Original Mean Absolute Error Downward Adjusted:': 2.0500000000000003,
  'Protected Mean Absolute Error Upward Adjusted:': 28.16,
  'Protected Mean Absolute Error Downward Adjusted:': 4.859999999999999},
 'h=20, 1 stan. devs': {'Mean Accuracies': array([4.4 , 5.37]),
  'Protected Mean Accuracies:': array([11.59, 12.35]),
  '% Change Mean accuracy:': array([-163.27, -129.96]),
  '% Change Median accuracy:': array([-50.52, -41.24]),
  '% Forecasted Points adjusted downward:': 

***
***

## DES

In [14]:
# define forecasting model
# perform additive deseasonalization conditional on autocorrelation test for seasonality

forecaster = TransformedTargetForecaster(
    [
        ("deseasonalize", ConditionalDeseasonalizer(model="additive", sp=52)),
        ("forecast", ExponentialSmoothing(trend="additive", use_boxcox=False)),
    ]
)

In [15]:
results_dict_des = {}
fcasts_des = {}
fcasts_protected_des = {}
tests = {}
num_stdevs = [1, 2]
horizons = [1, 20]

In [16]:
for n in num_stdevs:
    for h in horizons:
        idx = "h="+str(h)+", "+str(n)+" stan. devs"
        results_dict_des[idx], tests[idx], fcasts_des[idx], fcasts_protected_des[idx] = full_coding_analysis(time_series_data=Y, 
                                                                                                             forecasting_model=forecaster, 
                                                                                                             forecast_horizon=h,
                                                                                                             num_stdev=n)

In [17]:
results_dict_des

{'h=1, 1 stan. devs': {'Mean Accuracies': array([2.1, 2.1]),
  'Protected Mean Accuracies:': array([11.49, 11.49]),
  '% Change Mean accuracy:': array([-445.94, -445.94]),
  '% Change Median accuracy:': array([-206.35, -206.35]),
  '% Forecasted Points adjusted downward:': 52.44,
  '% Forecasted Points adjusted upward:': 47.56,
  '% Series with improved accuracy:': array([20.12, 20.12]),
  '% Series with reduced accuracy:': array([79.88, 79.88]),
  'Original Mean Absolute Error Upward Adjusted:': 2.39,
  'Original Mean Absolute Error Downward Adjusted:': 1.8499999999999999,
  'Protected Mean Absolute Error Upward Adjusted:': 18.98,
  'Protected Mean Absolute Error Downward Adjusted:': 4.7},
 'h=20, 1 stan. devs': {'Mean Accuracies': array([4.24, 5.18]),
  'Protected Mean Accuracies:': array([12.64, 13.42]),
  '% Change Mean accuracy:': array([-198.48, -159.05]),
  '% Change Median accuracy:': array([-43.61, -37.95]),
  '% Forecasted Points adjusted downward:': 46.43,
  '% Forecasted Po

***
***

## TES

In [18]:
# define forecasting model
# perform additive deseasonalization conditional on autocorrelation test for seasonality

forecaster = TransformedTargetForecaster(
    [
        ("forecast", ExponentialSmoothing(trend="additive",
                                          seasonal="additive",
                                          sp=52,
                                          damped_trend=False, 
                                          use_boxcox=False)),
    ]
)

In [19]:
results_dict_tes = {}
fcasts_tes = {}
fcasts_protected_tes = {}
tests = {}
num_stdevs = [1, 2]
horizons = [1, 20]

In [20]:
for n in num_stdevs:
    for h in horizons:
        idx = "h="+str(h)+", "+str(n)+" stan. devs"
        results_dict_tes[idx], tests[idx], fcasts_tes[idx], fcasts_protected_tes[idx] = full_coding_analysis(time_series_data=Y, 
                                                                                                             forecasting_model=forecaster, 
                                                                                                             forecast_horizon=h,
                                                                                                             num_stdev=n)

In [21]:
results_dict_tes

{'h=1, 1 stan. devs': {'Mean Accuracies': array([2.44, 2.44]),
  'Protected Mean Accuracies:': array([14.74, 14.74]),
  '% Change Mean accuracy:': array([-503.7, -503.7]),
  '% Change Median accuracy:': array([-355.47, -355.47]),
  '% Forecasted Points adjusted downward:': 50.0,
  '% Forecasted Points adjusted upward:': 50.0,
  '% Series with improved accuracy:': array([13.41, 13.41]),
  '% Series with reduced accuracy:': array([86.59, 86.59]),
  'Original Mean Absolute Error Upward Adjusted:': 2.74,
  'Original Mean Absolute Error Downward Adjusted:': 2.1399999999999997,
  'Protected Mean Absolute Error Upward Adjusted:': 22.11,
  'Protected Mean Absolute Error Downward Adjusted:': 7.37},
 'h=20, 1 stan. devs': {'Mean Accuracies': array([4.75, 5.74]),
  'Protected Mean Accuracies:': array([15.13, 16.91]),
  '% Change Mean accuracy:': array([-218.65, -194.43]),
  '% Change Median accuracy:': array([-113.76, -111.3 ]),
  '% Forecasted Points adjusted downward:': 49.88,
  '% Forecasted P