### Analyzing the Effects of Top/Bottom Coding on The Accuracy of Exponential Smoothing Forecasts

***

#### Analysis Flow:

* Inputs:
    * Time series data
    * Forecasting Model
    * Protection Method

* Analysis:
    * Pre-process data
    * Fit forecasting models
    * Generate forecasts for a variety of horizons

* Outputs:
    * Forecasts for original data
    * Forecasts for confidential data
    * Accuracy for local forecasts
    * Global accuracy
    * % improvements/reduction in forecast accuracy across all series

***

## Import Modules

Note the functions imported from `helper_functions`, as these are custom functions written by the paper authors. See `helper_functions.py` for comments and functions descriptions.

In [None]:
# general modules
import pandas as pd
import numpy as np
import sktime

# import exponential smoothing forecasting model
from sktime.forecasting.exp_smoothing import ExponentialSmoothing

# functions for transformation+forecasting pipeline
from sktime.forecasting.compose import TransformedTargetForecaster
from sktime.forecasting.compose import EnsembleForecaster

# time series transformations
from sktime.transformations.series.detrend import ConditionalDeseasonalizer, Detrender

##### the `helper_functions.py` file contains many custom functions we wrote to aid in our analysis
##### `full_coding_analysis` combines all of the following - train-test split data,
##### data protection, train models, compare accuracies, return accuracy results
from helper_functions import full_coding_analysis

# suppress warnings from exponential smoothing model not converging
import warnings
warnings.filterwarnings('ignore')

Import data.

In [None]:
# import weekly finance time series
Y = np.genfromtxt("../../Data/Train/Clean/weekly_finance_clean.csv", delimiter = ',', skip_header = 1)
Y = pd.DataFrame(Y)

In [None]:
detrender = Detrender()
detrended_series = [detrender.fit_transform(series) for _ , series in Y.iterrows()]
detrended_series = [i+np.abs(np.min(i))+1.0 for i in detrended_series]
Y = pd.concat(detrended_series, axis=1).T

## SES

In [None]:
# define forecasting model
# perform multiplicative deseasonalization conditional on autocorrelation test for seasonality

forecaster = TransformedTargetForecaster(
    [
        ("deseasonalize", ConditionalDeseasonalizer(model="multiplicative", sp=52)),
        ("forecast", ExponentialSmoothing(use_boxcox=False)),
    ]
)

We obtain results for a combination of forecast horizons, coding types (top and bottom), and coding percentages:

* Forecast Horizons: (1, 5, 15)
* Coding Types: (Top, Bottom)
* Coding Percentages: (0.10, 0.20, 0.40)

In [None]:
results_dict_ses = {}
types = ["Top", "Bottom"]
percentages = [0.10, 0.20, 0.40]
horizons = [1, 5, 15]

In [None]:
for t in types:
    for p in percentages:
        for h in horizons:
            results_dict_ses["h="+str(h)+", "+t+" "+str(p)] = full_coding_analysis(Y, forecaster, forecast_horizon=h, coding_type=t, coding_percentage=p)

In [None]:
results_dict_ses

***
***

## DES

In [None]:
# define forecasting model
# perform multiplicative deseasonalization conditional on autocorrelation test for seasonality

forecaster = TransformedTargetForecaster(
    [
        ("deseasonalize", ConditionalDeseasonalizer(model="multiplicative", sp=52)),
        ("forecast", ExponentialSmoothing(trend="additive", use_boxcox=False)),
    ]
)

We obtain results for a combination of forecast horizons, coding types (top and bottom), and coding percentages:

* Forecast Horizons: (1, 5, 15)
* Coding Types: (Top, Bottom)
* Coding Percentages: (0.10, 0.20, 0.40)

In [None]:
results_dict_des = {}
types = ["Top", "Bottom"]
percentages = [0.10, 0.20, 0.40]
horizons = [1, 5, 15]

In [None]:
for t in types:
    for p in percentages:
        for h in horizons:
            results_dict_des["h="+str(h)+", "+t+" "+str(p)] = full_coding_analysis(Y, forecaster, forecast_horizon=h, coding_type=t, coding_percentage=p)

In [None]:
results_dict_des

***
***

## DES With Damped Trend

In [None]:
# define forecasting model
# perform multiplicative deseasonalization conditional on autocorrelation test for seasonality

forecaster = TransformedTargetForecaster(
    [
        ("deseasonalize", ConditionalDeseasonalizer(model="multiplicative", sp=52)),
        ("forecast", ExponentialSmoothing(trend="additive", damped_trend=True, use_boxcox=False)),
    ]
)

We obtain results for a combination of forecast horizons, coding types (top and bottom), and coding percentages:

* Forecast Horizons: (1, 5, 15)
* Coding Types: (Top, Bottom)
* Coding Percentages: (0.10, 0.20, 0.40)

In [None]:
results_dict_ddes = {}
types = ["Top", "Bottom"]
percentages = [0.10, 0.20, 0.40]
horizons = [1, 5, 15]

In [None]:
for t in types:
    for p in percentages:
        for h in horizons:
            results_dict_ddes["h="+str(h)+", "+t+" "+str(p)] = full_coding_analysis(Y, forecaster, forecast_horizon=h, coding_type=t, coding_percentage=p)

In [None]:
results_dict_ddes

***
***

## TES

In [None]:
# define forecasting model
# perform multiplicative deseasonalization conditional on autocorrelation test for seasonality

forecaster = TransformedTargetForecaster(
    [
        ("forecast", ExponentialSmoothing(trend="additive",
                                          seasonal="multiplicative",
                                          sp=52,
                                          damped_trend=False, 
                                          use_boxcox=False)),
    ]
)

We obtain results for a combination of forecast horizons, coding types (top and bottom), and coding percentages:

* Forecast Horizons: (1, 5, 15)
* Coding Types: (Top, Bottom)
* Coding Percentages: (0.10, 0.20, 0.40)

In [None]:
results_dict_tes = {}
types = ["Top", "Bottom"]
percentages = [0.10, 0.20, 0.40]
horizons = [1, 5, 15]

In [None]:
for t in types:
    for p in percentages:
        for h in horizons:
            results_dict_tes["h="+str(h)+", "+t+" "+str(p)] = full_coding_analysis(Y, forecaster, forecast_horizon=h, coding_type=t, coding_percentage=p)

In [None]:
results_dict_tes

***
***

## TES With Damped Trend

In [None]:
# define forecasting model
# perform multiplicative deseasonalization conditional on autocorrelation test for seasonality

forecaster = TransformedTargetForecaster(
    [
        ("forecast", ExponentialSmoothing(trend="additive",
                                          seasonal="multiplicative",
                                          sp=52,
                                          damped_trend=True, 
                                          use_boxcox=False)),
    ]
)

We obtain results for a combination of forecast horizons, coding types (top and bottom), and coding percentages:

* Forecast Horizons: (1, 5, 15)
* Coding Types: (Top, Bottom)
* Coding Percentages: (0.10, 0.20, 0.40)

In [None]:
results_dict_dtes = {}
types = ["Top", "Bottom"]
percentages = [0.10, 0.20, 0.40]
horizons = [1, 5, 15]

In [None]:
for t in types:
    for p in percentages:
        for h in horizons:
            results_dict_dtes["h="+str(h)+", "+t+" "+str(p)] = full_coding_analysis(Y, forecaster, forecast_horizon=h, coding_type=t, coding_percentage=p)

In [None]:
results_dict_dtes

***
***

## Ensemble Forecaster

In [None]:
ses = ExponentialSmoothing(use_boxcox=False)
holt = ExponentialSmoothing(trend="additive", damped_trend=False, use_boxcox=False)
damped = ExponentialSmoothing(trend="additive", damped_trend=True, use_boxcox=False)

forecaster = TransformedTargetForecaster(
    [
        ("deseasonalize", ConditionalDeseasonalizer(model="multiplicative", sp=52)),
        ("forecast", EnsembleForecaster(
            [
                ("ses", ses),
                ("holt", holt),
                ("damped", damped),
            ]
        )),
    ]
)

We obtain results for a combination of forecast horizons, coding types (top and bottom), and coding percentages:

* Forecast Horizons: (1, 5, 15)
* Coding Types: (Top, Bottom)
* Coding Percentages: (0.10, 0.20, 0.40)

In [None]:
results_dict_ensemble = {}
types = ["Top", "Bottom"]
percentages = [0.10, 0.20, 0.40]
horizons = [1, 5, 15]

In [None]:
for t in types:
    for p in percentages:
        for h in horizons:
            results_dict_ensemble["h="+str(h)+", "+t+" "+str(p)] = full_coding_analysis(Y, forecaster, forecast_horizon=h, coding_type=t, coding_percentage=p)

In [None]:
results_dict_ensemble