### Analyzing the Effects of Top/Bottom Coding on The Accuracy of Exponential Smoothing Forecasts

***

***

## Import Modules

Note the functions imported from `helper_functions`, as these are custom functions written by the paper authors. See `helper_functions.py` for comments and functions descriptions.

In [1]:
# general modules
import pandas as pd
import numpy as np
import sktime

# import exponential smoothing forecasting model
from sktime.forecasting.exp_smoothing import ExponentialSmoothing

# functions for transformation+forecasting pipeline
from sktime.forecasting.compose import TransformedTargetForecaster

# time series transformations
from sktime.transformations.series.detrend import ConditionalDeseasonalizer, Detrender

##### the `helper_functions.py` file contains many custom functions we wrote to aid in our analysis
##### `full_coding_analysis` combines all of the following - train-test split data,
##### data protection, train models, compare accuracies, return accuracy results
from helper_functions import full_coding_analysis

# suppress warnings from exponential smoothing model not converging
import warnings
warnings.filterwarnings('ignore')

Import data.

In [2]:
# import weekly finance time series
Y = np.genfromtxt("../../Data/Train/Clean/weekly_finance_clean.csv", delimiter = ',', skip_header = 1)
Y = pd.DataFrame(Y)

In [3]:
detrender = Detrender()
detrended_series = [detrender.fit_transform(series) for _ , series in Y.iterrows()]
detrended_series = [i+np.abs(np.min(i))+1.0 for i in detrended_series]
Y = pd.concat(detrended_series, axis=1).T

## SES

In [4]:
# define forecasting model
# perform additive deseasonalization conditional on autocorrelation test for seasonality

forecaster = TransformedTargetForecaster(
    [
        ("deseasonalize", ConditionalDeseasonalizer(model="additive", sp=52)),
        ("forecast", ExponentialSmoothing(use_boxcox=False)),
    ]
)

In [5]:
results_dict_ses = {}
fcasts_ses = {}
fcasts_protected_ses = {}
tests = {}
types = ["Top", "Bottom"]
percentages = [0.10, 0.20, 0.40]
horizons = [1, 20]

In [6]:
for t in types:
    for p in percentages:
        for h in horizons:
            idx = "h="+str(h)+", "+t+" "+str(p)
            results_dict_ses[idx], tests[idx], fcasts_ses[idx], fcasts_protected_ses[idx] = full_coding_analysis(time_series_data=Y,
                                                                                                                 forecasting_model=forecaster,
                                                                                                                 forecast_horizon=h, 
                                                                                                                 coding_type=t, 
                                                                                                                 coding_percentage=p)

In [7]:
results_dict_ses

{'h=1, Top 0.1': {'Mean Accuracies': array([36.81, 36.81]),
  'Protected Mean Accuracies:': array([37.94, 37.94]),
  '% Change Mean accuracy:': array([-3.07, -3.07]),
  '% Change Median accuracy:': array([-22.38, -22.38]),
  '% Forecasted Points adjusted downward:': 16.46,
  '% Forecasted Points adjusted upward:': 83.54,
  '% Series with improved accuracy:': array([43.9, 43.9]),
  '% Series with reduced accuracy:': array([56.1, 56.1]),
  'Original Mean Absolute Error Upward Adjusted:': 42.370000000000005,
  'Original Mean Absolute Error Downward Adjusted:': 8.61,
  'Protected Mean Absolute Error Upward Adjusted:': 42.120000000000005,
  'Protected Mean Absolute Error Downward Adjusted:': 16.72},
 'h=20, Top 0.1': {'Mean Accuracies': array([42.74, 53.44]),
  'Protected Mean Accuracies:': array([42.88, 53.54]),
  '% Change Mean accuracy:': array([-0.31, -0.18]),
  '% Change Median accuracy:': array([-4.22, -1.04]),
  '% Forecasted Points adjusted downward:': 19.57,
  '% Forecasted Points 

In [8]:
original_forecasts = fcasts_ses['h=20, Bottom 0.1']
protected_forecasts = fcasts_protected_ses['h=20, Bottom 0.1']
test = tests['h=20, Bottom 0.1']

In [9]:
adjusted_up = original_forecasts < protected_forecasts
adjusted_up = pd.concat([row for i, row in adjusted_up.iterrows()])
adjusted_down = original_forecasts > protected_forecasts
adjusted_down = pd.concat([row for i, row in adjusted_down.iterrows()])

In [10]:
absolute_error_original = np.absolute(test - original_forecasts)
absolute_error_protected = np.absolute(test - protected_forecasts)

In [11]:
improved = absolute_error_original > absolute_error_protected
improved = pd.concat([row for i, row in improved.iterrows()])
worsened = absolute_error_original < absolute_error_protected
worsened = pd.concat([row for i, row in worsened.iterrows()])

In [12]:
np.mean(adjusted_down[improved])

0.44002447980416154

In [13]:
np.mean(adjusted_up[improved])

0.5599755201958384

***
***

## DES

In [14]:
# define forecasting model
# perform additive deseasonalization conditional on autocorrelation test for seasonality

forecaster = TransformedTargetForecaster(
    [
        ("deseasonalize", ConditionalDeseasonalizer(model="additive", sp=52)),
        ("forecast", ExponentialSmoothing(trend="additive", use_boxcox=False)),
    ]
)

In [15]:
results_dict_des = {}
fcasts_des = {}
fcasts_protected_des = {}
tests = {}
types = ["Top", "Bottom"]
percentages = [0.10, 0.20, 0.40]
horizons = [1, 20]

In [16]:
for t in types:
    for p in percentages:
        for h in horizons:
            idx = "h="+str(h)+", "+t+" "+str(p)
            results_dict_des[idx], tests[idx], fcasts_des[idx], fcasts_protected_des[idx] = full_coding_analysis(time_series_data=Y,
                                                                                                                 forecasting_model=forecaster,
                                                                                                                 forecast_horizon=h, 
                                                                                                                 coding_type=t, 
                                                                                                                 coding_percentage=p)

In [17]:
results_dict_des

{'h=1, Top 0.1': {'Mean Accuracies': array([36.74, 36.74]),
  'Protected Mean Accuracies:': array([37.96, 37.96]),
  '% Change Mean accuracy:': array([-3.33, -3.33]),
  '% Change Median accuracy:': array([-13.63, -13.63]),
  '% Forecasted Points adjusted downward:': 18.29,
  '% Forecasted Points adjusted upward:': 81.71000000000001,
  '% Series with improved accuracy:': array([39.02, 39.02]),
  '% Series with reduced accuracy:': array([60.98, 60.98]),
  'Original Mean Absolute Error Upward Adjusted:': 40.77,
  'Original Mean Absolute Error Downward Adjusted:': 18.740000000000002,
  'Protected Mean Absolute Error Upward Adjusted:': 40.64,
  'Protected Mean Absolute Error Downward Adjusted:': 25.990000000000002},
 'h=20, Top 0.1': {'Mean Accuracies': array([48.78, 59.91]),
  'Protected Mean Accuracies:': array([49.26, 60.27]),
  '% Change Mean accuracy:': array([-0.98, -0.61]),
  '% Change Median accuracy:': array([-3.49, -5.08]),
  '% Forecasted Points adjusted downward:': 22.4399999999

***
***

## TES

In [18]:
# define forecasting model
# perform additive deseasonalization conditional on autocorrelation test for seasonality

forecaster = TransformedTargetForecaster(
    [
        ("forecast", ExponentialSmoothing(trend="additive",
                                          seasonal="additive",
                                          sp=52,
                                          damped_trend=False, 
                                          use_boxcox=False)),
    ]
)

In [19]:
results_dict_tes = {}
fcasts_tes = {}
fcasts_protected_tes = {}
tests = {}
types = ["Top", "Bottom"]
percentages = [0.10, 0.20, 0.40]
horizons = [1, 20]

In [20]:
for t in types:
    for p in percentages:
        for h in horizons:
            idx = "h="+str(h)+", "+t+" "+str(p)
            results_dict_tes[idx], tests[idx], fcasts_tes[idx], fcasts_protected_tes[idx] = full_coding_analysis(time_series_data=Y,
                                                                                                                 forecasting_model=forecaster,
                                                                                                                 forecast_horizon=h, 
                                                                                                                 coding_type=t, 
                                                                                                                 coding_percentage=p)

In [21]:
results_dict_tes

{'h=1, Top 0.1': {'Mean Accuracies': array([40.45, 40.45]),
  'Protected Mean Accuracies:': array([41.91, 41.91]),
  '% Change Mean accuracy:': array([-3.62, -3.62]),
  '% Change Median accuracy:': array([-13.54, -13.54]),
  '% Forecasted Points adjusted downward:': 26.83,
  '% Forecasted Points adjusted upward:': 73.17,
  '% Series with improved accuracy:': array([43.29, 43.29]),
  '% Series with reduced accuracy:': array([56.71, 56.71]),
  'Original Mean Absolute Error Upward Adjusted:': 39.739999999999995,
  'Original Mean Absolute Error Downward Adjusted:': 42.38,
  'Protected Mean Absolute Error Upward Adjusted:': 39.660000000000004,
  'Protected Mean Absolute Error Downward Adjusted:': 48.05},
 'h=20, Top 0.1': {'Mean Accuracies': array([56.88, 69.7 ]),
  'Protected Mean Accuracies:': array([56.77, 69.49]),
  '% Change Mean accuracy:': array([0.19, 0.31]),
  '% Change Median accuracy:': array([-1.14,  1.03]),
  '% Forecasted Points adjusted downward:': 34.150000000000006,
  '% Fo