## Restructure Data Analysis Framework

***

In [1]:
import pandas as pd
import numpy as np

from data_protection_functions import *
from data_processing_functions import *
from forecasting_functions import *

# nice time series plots
from sktime.utils.plotting import plot_series

# import detrender and deseasonalizer
from sktime.transformations.series.detrend import Detrender

# import exponential smoothing forecasting model
from sktime.forecasting.exp_smoothing import ExponentialSmoothing

import sktime
import lightgbm

from sktime.performance_metrics.forecasting import mean_absolute_percentage_error, median_absolute_percentage_error

### Step 1: Import Time Series Data

In [2]:
# import weekly finance time series
# ignore header and skip the first row to use integers as column names
full_data = pd.read_csv("../../Data/Train/Clean/full_m3_daily_micro_clean.csv", header=None, skiprows=1)

In [3]:
# convert to a list of series, potentially with different lengths
full_data = [x.dropna() for _, x in full_data.iterrows()]

In [4]:
# Y, Test = full_data.iloc[:,:-1], full_data.iloc[:,-1:]

***

### Step 2: Apply Data Protection to Generate Protected Series

At the end of this step, we will have two time series datasets:

* Confidential (original) dataset
* Protected dataset

In [5]:
# Y_protected = apply_data_protection(Y, coding_type="Top", coding_percentage=0.10)

***

### Step 3: Pre-process the Confidential and Sensitive Datasets

In [6]:
# # parameters for SES
# h = 1
# sp = 1
# # seasonality_type = "multiplicative"

In [7]:
# # parameters for LGBM
# h = 10
# window_length = 20

In [8]:
# # transform_dict = {"deseasonalize":{"sp":sp, "seasonality_type":seasonality_type}}
# transform_dict = {}

In [9]:
# Y_processed, Y_last_window, Y_last_window_trend = pre_process(Y, truncate=True, log=True, transform_dict=transform_dict)
# Y_protected_processed, Y_protected_last_window, Y_protected_last_window_trend = pre_process(Y_protected, truncate=True, log=True, transform_dict=transform_dict)

### Step 4: Train Models and Generate Forecasts

In [10]:
# forecasting_model = ExponentialSmoothing(use_boxcox=False)

In [11]:
# forecasts_original = train_and_forecast(ts_data=Y_processed, forecasting_model=forecasting_model, horizon_length=h, last_window=Y_last_window)
# forecasts_protected = train_and_forecast(ts_data=Y_protected_processed, forecasting_model=forecasting_model, horizon_length=h, last_window=Y_protected_last_window)

### Step 5: Post Process the Forecasts

In [12]:
# forecasts_original = post_process(full_ts_data=Y, 
#                                   forecasts=forecasts_original, 
#                                   last_window_with_trend=Y_last_window_trend,
#                                   truncate=True,
#                                   log=True,
#                                   bias_adjusted=False,
#                                   transform_dict=transform_dict)

In [13]:
# forecasts_protected = post_process(full_ts_data=Y_protected, 
#                                    forecasts=forecasts_protected, 
#                                    last_window_with_trend=Y_protected_last_window_trend,
#                                    truncate=True,
#                                    log=True,
#                                    bias_adjusted=False,
#                                    transform_dict=transform_dict)

### Step 6: Forecast Evaluation

In [14]:
# Things to calculate
# series-level MAPE, MdAPE
# the percentage of series that had improved/worsened accuracy

In [15]:
# forecast_results(test_data=Test.T, original_forecasts=forecasts_original, protected_forecasts=forecasts_protected)

In [20]:
# full_forecast_analysis(full_data=full_data,
#                        forecasting_model=ExponentialSmoothing(trend="additive", use_boxcox=False),
#                        h=1,
#                        truncate=True,
#                        log=True,
#                        num_stdev=1)

{'Global MAPE, MdAPE': array([0.2516, 0.2516]),
 'Global Protected MAPE, MdAPE': array([0.3421, 0.3421]),
 'Original MAPE Up, Down': array([0.1666, 0.2898]),
 'Protected MAPE Up, Down': array([0.2501, 0.3835]),
 'Original MdAPE Up, Down': array([0.1197, 0.1407]),
 'Protected MdAPE Up, Down': array([0.1381, 0.2949])}