## Examining the Effects of Additive Noise on Exponential Smoothing Forecast Accuracy

***

In [1]:
# general modules
import pandas as pd
import numpy as np
import sktime

# import exponential smoothing forecasting model
from sktime.forecasting.exp_smoothing import ExponentialSmoothing

# functions for transformation+forecasting pipeline
from sktime.forecasting.compose import TransformedTargetForecaster

# time series transformations
from sktime.transformations.series.detrend import ConditionalDeseasonalizer

##### the `helper_functions.py` file contains many custom functions we wrote to aid in our analysis
##### `full_coding_analysis` combines all of the following - train-test split data,
##### data protection, train models, compare accuracies, return accuracy results
from helper_functions import *

# suppress warnings from exponential smoothing model not converging
import warnings
warnings.filterwarnings('ignore')

In [2]:
# import weekly finance time series
Y = np.genfromtxt("../../Data/Train/Clean/weekly_finance_clean.csv", delimiter = ',', skip_header = 1)
Y = pd.DataFrame(Y)

In [3]:
# detrender = Detrender()
# detrended_series = [detrender.fit_transform(series) for _ , series in Y.iterrows()]
# detrended_series = [i+np.abs(np.min(i))+1.0 for i in detrended_series]
# Y = pd.concat(detrended_series, axis=1).T

***

## SES

In [4]:
# define forecasting model
# perform additive deseasonalization conditional on autocorrelation test for seasonality

forecaster = TransformedTargetForecaster(
    [
        ("forecast", ExponentialSmoothing(use_boxcox=False)),
    ]
)

In [5]:
results_dict_ses = {}
fcasts_ses = {}
fcasts_protected_ses = {}
tests = {}
num_stdevs = [1, 2]
horizons = [1, 20]

In [6]:
for n in num_stdevs:
    for h in horizons:
        idx = "h="+str(h)+", "+str(n)+" stan. devs"
        results_dict_ses[idx], tests[idx], fcasts_ses[idx], fcasts_protected_ses[idx] = full_coding_analysis(time_series_data=Y, 
                                                                                                             forecasting_model=forecaster, 
                                                                                                             forecast_horizon=h,
                                                                                                             num_stdev=n)

***
***

In [7]:
adjusted_up = fcasts_ses['h=20, 1 stan. devs'] < fcasts_protected_ses['h=20, 1 stan. devs']
adjusted_up = pd.concat([row for i, row in adjusted_up.iterrows()])

adjusted_down = fcasts_ses['h=20, 1 stan. devs'] > fcasts_protected_ses['h=20, 1 stan. devs']
adjusted_down = pd.concat([row for i, row in adjusted_down.iterrows()])

In [8]:
absolute_error = np.absolute(tests['h=20, 1 stan. devs'] - fcasts_ses['h=20, 1 stan. devs'])
absolute_error = pd.concat([row for i, row in absolute_error.iterrows()])

In [9]:
protected_absolute_error = np.absolute(tests['h=20, 1 stan. devs'] - fcasts_protected_ses['h=20, 1 stan. devs'])
protected_absolute_error = pd.concat([row for i, row in protected_absolute_error.iterrows()])

In [10]:
np.mean(absolute_error[adjusted_up])

0.056041622409181965

In [11]:
np.mean(absolute_error[adjusted_down])

0.03587464750502811

In [12]:
np.mean(protected_absolute_error[adjusted_up])

0.24837399193509896

In [13]:
np.mean(protected_absolute_error[adjusted_down])

0.06163279913763265

***
***

In [14]:
results_dict_ses

{'h=1, 1 stan. devs': {'Mean Accuracies': array([0.0218, 0.0218]),
  'Protected Mean Accuracies:': array([0.1096, 0.1096]),
  '% Change Mean accuracy:': array([-4.0354, -4.0354]),
  '% Change Median accuracy:': array([-1.7981, -1.7981]),
  '% Forecasted Points adjusted downward:': 0.628,
  '% Forecasted Points adjusted upward:': 0.372,
  '% Series with improved accuracy:': array([0.2561, 0.2561]),
  '% Series with reduced accuracy:': array([0.7439, 0.7439]),
  'Original Mean Absolute Error Upward Adjusted:': 0.0271,
  'Original Mean Absolute Error Downward Adjusted:': 0.0186,
  'Protected Mean Absolute Error Upward Adjusted:': 0.2215,
  'Protected Mean Absolute Error Downward Adjusted:': 0.0433},
 'h=20, 1 stan. devs': {'Mean Accuracies': array([0.0445, 0.0543]),
  'Protected Mean Accuracies:': array([0.1413, 0.1484]),
  '% Change Mean accuracy:': array([-2.1774, -1.7335]),
  '% Change Median accuracy:': array([-0.7161, -0.6214]),
  '% Forecasted Points adjusted downward:': 0.5732,
  '

***
***

## DES

In [15]:
# define forecasting model
# perform additive deseasonalization conditional on autocorrelation test for seasonality

forecaster = TransformedTargetForecaster(
    [
        ("forecast", ExponentialSmoothing(trend="additive", use_boxcox=False)),
    ]
)

In [16]:
results_dict_des = {}
fcasts_des = {}
fcasts_protected_des = {}
tests = {}
num_stdevs = [1, 2]
horizons = [1, 20]

In [17]:
for n in num_stdevs:
    for h in horizons:
        idx = "h="+str(h)+", "+str(n)+" stan. devs"
        results_dict_des[idx], tests[idx], fcasts_des[idx], fcasts_protected_des[idx] = full_coding_analysis(time_series_data=Y, 
                                                                                                             forecasting_model=forecaster, 
                                                                                                             forecast_horizon=h,
                                                                                                             num_stdev=n)

In [18]:
results_dict_des

{'h=1, 1 stan. devs': {'Mean Accuracies': array([0.021, 0.021]),
  'Protected Mean Accuracies:': array([0.1181, 0.1181]),
  '% Change Mean accuracy:': array([-4.634, -4.634]),
  '% Change Median accuracy:': array([-1.5348, -1.5348]),
  '% Forecasted Points adjusted downward:': 0.5,
  '% Forecasted Points adjusted upward:': 0.5,
  '% Series with improved accuracy:': array([0.3049, 0.3049]),
  '% Series with reduced accuracy:': array([0.6951, 0.6951]),
  'Original Mean Absolute Error Upward Adjusted:': 0.0234,
  'Original Mean Absolute Error Downward Adjusted:': 0.0185,
  'Protected Mean Absolute Error Upward Adjusted:': 0.1882,
  'Protected Mean Absolute Error Downward Adjusted:': 0.048},
 'h=20, 1 stan. devs': {'Mean Accuracies': array([0.0428, 0.0524]),
  'Protected Mean Accuracies:': array([0.1408, 0.1479]),
  '% Change Mean accuracy:': array([-2.2858, -1.8241]),
  '% Change Median accuracy:': array([-0.4037, -0.338 ]),
  '% Forecasted Points adjusted downward:': 0.4595,
  '% Forecas

***
***

## TES

In [19]:
# define forecasting model
# perform additive deseasonalization conditional on autocorrelation test for seasonality

forecaster = TransformedTargetForecaster(
    [
        ("forecast", ExponentialSmoothing(trend="additive",
                                          seasonal="additive",
                                          sp=52,
                                          damped_trend=False, 
                                          use_boxcox=False)),
    ]
)

In [20]:
results_dict_tes = {}
fcasts_tes = {}
fcasts_protected_tes = {}
tests = {}
num_stdevs = [1, 2]
horizons = [1, 20]

In [21]:
for n in num_stdevs:
    for h in horizons:
        idx = "h="+str(h)+", "+str(n)+" stan. devs"
        results_dict_tes[idx], tests[idx], fcasts_tes[idx], fcasts_protected_tes[idx] = full_coding_analysis(time_series_data=Y, 
                                                                                                             forecasting_model=forecaster, 
                                                                                                             forecast_horizon=h,
                                                                                                             num_stdev=n)

In [22]:
results_dict_tes

{'h=1, 1 stan. devs': {'Mean Accuracies': array([0.0244, 0.0244]),
  'Protected Mean Accuracies:': array([0.1622, 0.1622]),
  '% Change Mean accuracy:': array([-5.6439, -5.6439]),
  '% Change Median accuracy:': array([-4.6937, -4.6937]),
  '% Forecasted Points adjusted downward:': 0.5,
  '% Forecasted Points adjusted upward:': 0.5,
  '% Series with improved accuracy:': array([0.128, 0.128]),
  '% Series with reduced accuracy:': array([0.872, 0.872]),
  'Original Mean Absolute Error Upward Adjusted:': 0.0282,
  'Original Mean Absolute Error Downward Adjusted:': 0.0207,
  'Protected Mean Absolute Error Upward Adjusted:': 0.2329,
  'Protected Mean Absolute Error Downward Adjusted:': 0.0916},
 'h=20, 1 stan. devs': {'Mean Accuracies': array([0.0475, 0.0574]),
  'Protected Mean Accuracies:': array([0.1471, 0.1646]),
  '% Change Mean accuracy:': array([-2.0986, -1.8661]),
  '% Change Median accuracy:': array([-1.202 , -1.1612]),
  '% Forecasted Points adjusted downward:': 0.4945,
  '% Foreca