# Examining the Effects of DP on LGBM Forecast Accuracy

***

In [1]:
# general modules
import pandas as pd
import numpy as np
import lightgbm as lgb

##### the `helper_functions.py` file contains many custom functions we wrote to aid in our analysis
##### `full_coding_analysis` combines all of the following - train-test split data,
##### data protection, train models, compare accuracies, return accuracy results
from helper_functions import *

# nice time series plots
from sktime.utils.plotting import plot_series

In [2]:
# import weekly finance time series
Y = np.genfromtxt("../../Data/Train/Clean/weekly_finance_clean.csv", delimiter = ',', skip_header = 1)
Y = pd.DataFrame(Y)

In [3]:
# detrender = Detrender()
# detrended_series = [detrender.fit_transform(series) for _ , series in Y.iterrows()]
# detrended_series = [i+np.abs(np.min(i))+1.0 for i in detrended_series]
# Y = pd.concat(detrended_series, axis=1).T

***

## Simple Model (window length = 10)

In [4]:
forecaster = lgb.LGBMRegressor()
window_length = 10

In [5]:
results_dict_10 = {}
fcasts_10 = {}
fcasts_protected_10 = {}
tests = {}
epsilons = [1, 10, 20]
horizons = [1, 20]

In [6]:
for e in epsilons:
    for h in horizons:
        idx = "h="+str(h)+", epsilon = "+str(e)
        results_dict_10[idx], tests[idx], fcasts_10[idx], fcasts_protected_10[idx] = full_coding_analysis(time_series_data=Y, 
                                                                                                          forecasting_model=forecaster, 
                                                                                                          forecast_horizon=h,
                                                                                                          epsilon=e,
                                                                                                          window_length=window_length)

In [7]:
results_dict_10

{'h=1, epsilon = 1': {'Mean Accuracies': array([67.4396, 67.4396]),
  'Protected Mean Accuracies:': array([549.639, 549.639]),
  '% Change Mean accuracy:': array([-7.1501, -7.1501]),
  '% Change Median accuracy:': array([-5.0574, -5.0574]),
  '% Forecasted Points adjusted downward:': 0.3171,
  '% Forecasted Points adjusted upward:': 0.6829,
  '% Series with improved accuracy:': array([0.122, 0.122]),
  '% Series with reduced accuracy:': array([0.878, 0.878]),
  'Original Mean Absolute Error Upward Adjusted:': 83.2448,
  'Original Mean Absolute Error Downward Adjusted:': 33.3976,
  'Protected Mean Absolute Error Upward Adjusted:': 738.9472,
  'Protected Mean Absolute Error Downward Adjusted:': 141.8982},
 'h=20, epsilon = 1': {'Mean Accuracies': array([115.1224, 138.0561]),
  'Protected Mean Accuracies:': array([516.7913, 532.1066]),
  '% Change Mean accuracy:': array([-3.4891, -2.8543]),
  '% Change Median accuracy:': array([-1.653 , -1.4481]),
  '% Forecasted Points adjusted downward:

***
***

## 'Medium' Model (window length = 20)

In [8]:
forecaster = lgb.LGBMRegressor()
window_length = 20

In [9]:
results_dict_20 = {}
fcasts_20 = {}
fcasts_protected_20 = {}
tests = {}
epsilons = [1, 10, 20]
horizons = [1, 20]

In [10]:
for e in epsilons:
    for h in horizons:
        idx = "h="+str(h)+", epsilon = "+str(e)
        results_dict_20[idx], tests[idx], fcasts_20[idx], fcasts_protected_20[idx] = full_coding_analysis(time_series_data=Y, 
                                                                                                          forecasting_model=forecaster, 
                                                                                                          forecast_horizon=h,
                                                                                                          epsilon=e,
                                                                                                          window_length=window_length)

In [11]:
results_dict_20

{'h=1, epsilon = 1': {'Mean Accuracies': array([64.9424, 64.9424]),
  'Protected Mean Accuracies:': array([489.4416, 489.4416]),
  '% Change Mean accuracy:': array([-6.5366, -6.5366]),
  '% Change Median accuracy:': array([-5.6225, -5.6225]),
  '% Forecasted Points adjusted downward:': 0.6037,
  '% Forecasted Points adjusted upward:': 0.3963,
  '% Series with improved accuracy:': array([0.1098, 0.1098]),
  '% Series with reduced accuracy:': array([0.8902, 0.8902]),
  'Original Mean Absolute Error Upward Adjusted:': 37.9052,
  'Original Mean Absolute Error Downward Adjusted:': 82.694,
  'Protected Mean Absolute Error Upward Adjusted:': 145.3944,
  'Protected Mean Absolute Error Downward Adjusted:': 715.3312},
 'h=20, epsilon = 1': {'Mean Accuracies': array([125.6744, 149.4553]),
  'Protected Mean Accuracies:': array([356.5481, 373.3674]),
  '% Change Mean accuracy:': array([-1.8371, -1.4982]),
  '% Change Median accuracy:': array([-1.169 , -1.2053]),
  '% Forecasted Points adjusted down

***
***

## Complex Model (window length = 40)

In [12]:
forecaster = lgb.LGBMRegressor()
window_length = 40

In [13]:
results_dict_40 = {}
fcasts_40 = {}
fcasts_protected_40 = {}
tests = {}
epsilons = [1, 10, 20]
horizons = [1, 20]

In [14]:
for e in epsilons:
    for h in horizons:
        idx = "h="+str(h)+", epsilon = "+str(e)
        results_dict_40[idx], tests[idx], fcasts_40[idx], fcasts_protected_40[idx] = full_coding_analysis(time_series_data=Y, 
                                                                                                          forecasting_model=forecaster, 
                                                                                                          forecast_horizon=h,
                                                                                                          epsilon=e,
                                                                                                          window_length=window_length)

In [15]:
results_dict_40

{'h=1, epsilon = 1': {'Mean Accuracies': array([67.7525, 67.7525]),
  'Protected Mean Accuracies:': array([327.4112, 327.4112]),
  '% Change Mean accuracy:': array([-3.8325, -3.8325]),
  '% Change Median accuracy:': array([-5.9228, -5.9228]),
  '% Forecasted Points adjusted downward:': 0.5671,
  '% Forecasted Points adjusted upward:': 0.4329,
  '% Series with improved accuracy:': array([0.1341, 0.1341]),
  '% Series with reduced accuracy:': array([0.8659, 0.8659]),
  'Original Mean Absolute Error Upward Adjusted:': 45.1255,
  'Original Mean Absolute Error Downward Adjusted:': 85.0268,
  'Protected Mean Absolute Error Upward Adjusted:': 235.9519,
  'Protected Mean Absolute Error Downward Adjusted:': 397.235},
 'h=20, epsilon = 1': {'Mean Accuracies': array([166.4712, 191.1059]),
  'Protected Mean Accuracies:': array([520.5619, 536.5541]),
  '% Change Mean accuracy:': array([-2.127 , -1.8076]),
  '% Change Median accuracy:': array([-1.4224, -1.2455]),
  '% Forecasted Points adjusted down