# Examining the Effects of DP on LGBM Forecast Accuracy

***

In [1]:
# general modules
import pandas as pd
import numpy as np
import lightgbm as lgb

##### the `helper_functions.py` file contains many custom functions we wrote to aid in our analysis
##### `full_coding_analysis` combines all of the following - train-test split data,
##### data protection, train models, compare accuracies, return accuracy results
from helper_functions import *

# nice time series plots
from sktime.utils.plotting import plot_series

In [2]:
# import weekly finance time series
Y = np.genfromtxt("../../Data/Train/Clean/weekly_finance_clean.csv", delimiter = ',', skip_header = 1)
Y = pd.DataFrame(Y)

***

## Simple Model (window length = 10)

In [3]:
forecaster = lgb.LGBMRegressor()
window_length = 10

In [4]:
results_dict_10 = {}
fcasts_10 = {}
fcasts_protected_10 = {}
tests = {}
epsilons = [1, 10, 20]
horizons = [1, 20]

In [5]:
for e in epsilons:
    for h in horizons:
        idx = "h="+str(h)+", epsilon = "+str(e)
        results_dict_10[idx], tests[idx], fcasts_10[idx], fcasts_protected_10[idx] = full_coding_analysis(time_series_data=Y, 
                                                                                                          forecasting_model=forecaster, 
                                                                                                          forecast_horizon=h,
                                                                                                          epsilon=e,
                                                                                                          window_length=window_length)

In [6]:
results_dict_10

{'h=1, epsilon = 1': {'Mean Accuracies': array([3.12, 3.12]),
  'Protected Mean Accuracies:': array([130.78, 130.78]),
  '% Change Mean accuracy:': array([-4090.42, -4090.42]),
  '% Change Median accuracy:': array([-10166.71, -10166.71]),
  '% Forecasted Points adjusted downward:': 2.44,
  '% Forecasted Points adjusted upward:': 97.56,
  '% Series with improved accuracy:': array([1.22, 1.22]),
  '% Series with reduced accuracy:': array([98.78, 98.78]),
  'Original Mean Absolute Error Upward Adjusted:': 3.17,
  'Original Mean Absolute Error Downward Adjusted:': 1.09,
  'Protected Mean Absolute Error Upward Adjusted:': 133.79000000000002,
  'Protected Mean Absolute Error Downward Adjusted:': 10.489999999999998},
 'h=20, epsilon = 1': {'Mean Accuracies': array([5.1 , 6.08]),
  'Protected Mean Accuracies:': array([122.43, 122.53]),
  '% Change Mean accuracy:': array([-2298.31, -1914.07]),
  '% Change Median accuracy:': array([-3447.17, -2775.29]),
  '% Forecasted Points adjusted downward:'

***
***

## 'Medium' Model (window length = 20)

In [7]:
forecaster = lgb.LGBMRegressor()
window_length = 20

In [8]:
results_dict_20 = {}
fcasts_20 = {}
fcasts_protected_20 = {}
tests = {}
epsilons = [1, 10, 20]
horizons = [1, 20]

In [9]:
for e in epsilons:
    for h in horizons:
        idx = "h="+str(h)+", epsilon = "+str(e)
        results_dict_20[idx], tests[idx], fcasts_20[idx], fcasts_protected_20[idx] = full_coding_analysis(time_series_data=Y, 
                                                                                                          forecasting_model=forecaster, 
                                                                                                          forecast_horizon=h,
                                                                                                          epsilon=e,
                                                                                                          window_length=window_length)

In [10]:
results_dict_20

{'h=1, epsilon = 1': {'Mean Accuracies': array([3.05, 3.05]),
  'Protected Mean Accuracies:': array([126.98, 126.98]),
  '% Change Mean accuracy:': array([-4061.47, -4061.47]),
  '% Change Median accuracy:': array([-9342.92, -9342.92]),
  '% Forecasted Points adjusted downward:': 3.05,
  '% Forecasted Points adjusted upward:': 96.95,
  '% Series with improved accuracy:': array([0.61, 0.61]),
  '% Series with reduced accuracy:': array([99.39, 99.39]),
  'Original Mean Absolute Error Upward Adjusted:': 3.1300000000000003,
  'Original Mean Absolute Error Downward Adjusted:': 0.67,
  'Protected Mean Absolute Error Upward Adjusted:': 130.85999999999999,
  'Protected Mean Absolute Error Downward Adjusted:': 3.53},
 'h=20, epsilon = 1': {'Mean Accuracies': array([5.49, 6.49]),
  'Protected Mean Accuracies:': array([125.63, 125.73]),
  '% Change Mean accuracy:': array([-2190.38, -1837.56]),
  '% Change Median accuracy:': array([-3240.04, -2732.52]),
  '% Forecasted Points adjusted downward:': 

***
***

## Complex Model (window length = 40)

In [11]:
forecaster = lgb.LGBMRegressor()
window_length = 40

In [12]:
results_dict_40 = {}
fcasts_40 = {}
fcasts_protected_40 = {}
tests = {}
epsilons = [1, 10, 20]
horizons = [1, 20]

In [13]:
for e in epsilons:
    for h in horizons:
        idx = "h="+str(h)+", epsilon = "+str(e)
        results_dict_40[idx], tests[idx], fcasts_40[idx], fcasts_protected_40[idx] = full_coding_analysis(time_series_data=Y, 
                                                                                                          forecasting_model=forecaster, 
                                                                                                          forecast_horizon=h,
                                                                                                          epsilon=e,
                                                                                                          window_length=window_length)

In [14]:
results_dict_40

{'h=1, epsilon = 1': {'Mean Accuracies': array([2.79, 2.79]),
  'Protected Mean Accuracies:': array([128.53, 128.53]),
  '% Change Mean accuracy:': array([-4510.07, -4510.07]),
  '% Change Median accuracy:': array([-10615.27, -10615.27]),
  '% Forecasted Points adjusted downward:': 3.66,
  '% Forecasted Points adjusted upward:': 96.34,
  '% Series with improved accuracy:': array([1.22, 1.22]),
  '% Series with reduced accuracy:': array([98.78, 98.78]),
  'Original Mean Absolute Error Upward Adjusted:': 2.87,
  'Original Mean Absolute Error Downward Adjusted:': 0.74,
  'Protected Mean Absolute Error Upward Adjusted:': 133.16,
  'Protected Mean Absolute Error Downward Adjusted:': 6.68},
 'h=20, epsilon = 1': {'Mean Accuracies': array([7.01, 8.01]),
  'Protected Mean Accuracies:': array([127.57, 127.69]),
  '% Change Mean accuracy:': array([-1720.83, -1493.76]),
  '% Change Median accuracy:': array([-3225.62, -2610.6 ]),
  '% Forecasted Points adjusted downward:': 3.1399999999999997,
  '%