## Restructure Data Analysis Framework

***

In [1]:
import pandas as pd
import numpy as np

from data_protection_functions import *
from data_processing_functions import *
from forecasting_functions import *

# nice time series plots
from sktime.utils.plotting import plot_series

# import detrender and deseasonalizer
from sktime.transformations.series.detrend import Detrender, Deseasonalizer

# import exponential smoothing forecasting model
from sktime.forecasting.exp_smoothing import ExponentialSmoothing

import sktime
import lightgbm

from sktime.performance_metrics.forecasting import mean_absolute_percentage_error, median_absolute_percentage_error

### Step 1: Import Time Series Data

In [2]:
# import weekly finance time series
# ignore header and skip the first row to use integers as column names
full_data = pd.read_csv("../../Data/Train/Clean/full_m4_weekly_finance_clean.csv", header=None, skiprows=1)

In [3]:
# convert to a list of series, potentially with different lengths
full_data = [x.dropna() for _, x in full_data.iterrows()]

In [4]:
# forecast horizon
h = 1

In [5]:
Y = [x.iloc[:-h] for x in full_data]
Test = [x.iloc[-h:] for x in full_data]

***

### Step 2: Apply Data Protection to Generate Protected Series

At the end of this step, we will have two time series datasets:

* Confidential (original) dataset
* Protected dataset

In [55]:
Y_protected = apply_data_protection(Y, coding_type="Top", coding_percentage=0.10)

***

### Step 3: Pre-process the Confidential and Sensitive Datasets

In [84]:
# parameters for LGBM
window_length = 20

In [85]:
transform_dict = {"windows":{"window_length":window_length}, "deseasonalize":{"sp":52, "seasonality_type":"additive"}}

In [86]:
# transform_dict = {}

In [87]:
forecasting_model = lgb.LGBMRegressor()

In [88]:
# forecasting_model = ExponentialSmoothing(trend="additive", seasonal="additive", sp=52, use_boxcox=False)

In [89]:
Y_processed, Y_last_window, Y_last_window_trend = pre_process(Y, truncate=True, mean_normalize=True, log=True, transform_dict=transform_dict)
Y_protected_processed, Y_protected_last_window, Y_protected_last_window_trend = pre_process(Y_protected, truncate=True, mean_normalize=True, log=True, transform_dict=transform_dict)

### Step 4: Train Models and Generate Forecasts

In [90]:
forecasts_original = train_and_forecast(ts_data=Y_processed, forecasting_model=forecasting_model, horizon_length=h, last_window=Y_last_window)
forecasts_protected = train_and_forecast(ts_data=Y_protected_processed, forecasting_model=forecasting_model, horizon_length=h, last_window=Y_protected_last_window)

### Step 5: Post Process the Forecasts

In [91]:
forecasts_original = post_process(full_ts_data=Y, 
                                  forecasts=forecasts_original, 
                                  last_window_with_trend=Y_last_window_trend,
                                  truncate=True,
                                  mean_normalize=True,
                                  log=True,
                                  bias_adjusted=True,
                                  transform_dict=transform_dict)

In [92]:
forecasts_protected = post_process(full_ts_data=Y_protected, 
                                   forecasts=forecasts_protected, 
                                   last_window_with_trend=Y_protected_last_window_trend,
                                   truncate=True,
                                   mean_normalize=True,
                                   log=True,
                                   bias_adjusted=True,
                                   transform_dict=transform_dict)

### Step 6: Forecast Evaluation

In [93]:
# Things to calculate
# series-level MAPE, MdAPE
# the percentage of series that had improved/worsened accuracy

In [94]:
# Test = pd.DataFrame([x.reset_index(drop=True) for x in Test]).T

In [95]:
forecast_results(test_data=Test, original_forecasts=forecasts_original, protected_forecasts=forecasts_protected)

{'Global MAPE, MdAPE': array([0.032, 0.032]),
 'Global Protected MAPE, MdAPE': array([0.1251, 0.1251]),
 'Original MAPE Up, Down': array([0.0392, 0.0301]),
 'Protected MAPE Up, Down': array([0.0438, 0.1463]),
 'Original MdAPE Up, Down': array([0.0129, 0.0125]),
 'Protected MdAPE Up, Down': array([0.0189, 0.0927])}

In [19]:
# full_forecast_analysis(full_data=full_data,
#                        forecasting_model=lgb.LGBMRegressor(),
#                        h=1,
#                        truncate=True,
#                        log=True,
#                        window_length=20,
#                        coding_type="Top",
#                        coding_percentage=0.10)