## Code for Implementing ARIMA Models

In [1]:
# load modules
import numpy as np
import pandas as pd
from sktime.performance_metrics.forecasting import mean_absolute_error, MeanAbsoluteError
from sktime.forecasting.arima import AutoARIMA

from data_protection_functions import *
from data_processing_functions import *
from forecasting_functions import *

# nice time series plots
from sktime.utils.plotting import plot_series
from sktime.forecasting.model_selection import ForecastingGridSearchCV, ExpandingWindowSplitter, SlidingWindowSplitter
from sktime.forecasting.compose import TransformedTargetForecaster

***

### Import Data and Create Train/Test

In [12]:
# import weekly finance time series
# ignore header and skip the first row to use integers as column names
full_data = pd.read_csv("../../Data/Train/Clean/full_m3_monthly_micro_clean.csv", header=None, skiprows=1).iloc[:5,:]

In [13]:
# convert to a list of series, potentially with different lengths
full_data = [x.dropna() for _, x in full_data.iterrows()]

In [14]:
# forecast horizon
h = 1

In [15]:
Y = [x.iloc[:-h] for x in full_data]
Test = [x.iloc[-h:] for x in full_data]

***

### Apply Data Protection to Generate Protected Dataset

In [37]:
protection_method = "Top_10"
Y_protected = apply_data_protection(Y, epsilon=1)

***

### Pre-Process the Data

In [38]:
Y_processed, Y_last_window, Y_last_window_trend, _, _ = pre_process(ts_data=Y,
                                                              target_forecast_period=h,
                                                              log=True)

Y_protected_processed, Y_protected_last_window, Y_protected_last_window_trend, _, _ = pre_process(ts_data=Y_protected,
                                                                                            target_forecast_period=h,
                                                                                            log=True)

***

In [39]:
forecaster = AutoARIMA(D=0, seasonal=True, sp=12, maxiter=10, suppress_warnings=True)

In [49]:
forecaster.fit(Y_processed[2])

In [50]:
forecaster.summary()

0,1,2,3
Dep. Variable:,y,No. Observations:,66.0
Model:,"SARIMAX(0, 1, 1)",Log Likelihood,-40.004
Date:,"Tue, 28 Jun 2022",AIC,86.007
Time:,16:29:58,BIC,92.531
Sample:,0,HQIC,88.581
,- 66,,
Covariance Type:,opg,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
intercept,0.0171,0.007,2.349,0.019,0.003,0.031
ma.L1,-0.9017,0.096,-9.377,0.000,-1.090,-0.713
sigma2,0.1884,0.026,7.268,0.000,0.138,0.239

0,1,2,3
Ljung-Box (L1) (Q):,0.06,Jarque-Bera (JB):,11.16
Prob(Q):,0.8,Prob(JB):,0.0
Heteroskedasticity (H):,0.5,Skew:,-0.06
Prob(H) (two-sided):,0.11,Kurtosis:,5.03


In [51]:
forecaster.get_fitted_params()

{'intercept': 0.017105942330779247,
 'ma.L1': -0.901723547324751,
 'sigma2': 0.1883777618293871,
 'order': (0, 1, 1),
 'seasonal_order': (0, 0, 0, 12),
 'aic': 86.00748349042009,
 'aicc': 86.4009261133709,
 'bic': 92.530645300107,
 'hqic': 88.58128900540382}

In [43]:
forecaster.fit(Y_protected_processed[0])

In [44]:
forecaster.summary()

0,1,2,3
Dep. Variable:,y,No. Observations:,66.0
Model:,SARIMAX,Log Likelihood,-191.891
Date:,"Tue, 28 Jun 2022",AIC,387.783
Time:,16:12:48,BIC,392.162
Sample:,0,HQIC,389.513
,- 66,,
Covariance Type:,opg,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
intercept,5.1028,0.687,7.433,0.000,3.757,6.448
sigma2,19.6292,15.611,1.257,0.209,-10.968,50.226

0,1,2,3
Ljung-Box (L1) (Q):,0.66,Jarque-Bera (JB):,10.01
Prob(Q):,0.42,Prob(JB):,0.01
Heteroskedasticity (H):,0.64,Skew:,-0.24
Prob(H) (two-sided):,0.3,Kurtosis:,1.15


In [45]:
forecaster.get_fitted_params()

{'intercept': 5.102819124768185,
 'sigma2': 19.629162186931264,
 'order': (0, 0, 0),
 'seasonal_order': (0, 0, 0, 12),
 'aic': 387.7829639742373,
 'aicc': 387.9734401647135,
 'bic': 392.16227345829014,
 'hqic': 389.513437294509}

### Train Models and Generate Forecasts

In [8]:
forecasts_original = train_and_forecast(ts_data=Y_processed,
                                        horizon_length=h,
                                        forecasting_model="ARIMA",
                                        protection_method=protection_method)

forecasts_protected = train_and_forecast(ts_data=Y_protected_processed,
                                         horizon_length=h,
                                         forecasting_model="ARIMA",
                                         protection_method=protection_method)

***

### Post Process the Forecasts

In [9]:
forecasts_original = post_process(full_ts_data=Y, 
                                  forecasts=forecasts_original,
                                  log=True)

forecasts_protected = post_process(full_ts_data=Y_protected, 
                                   forecasts=forecasts_protected,
                                   log=True)

***

### Assess Forecast Accuracy

In [10]:
forecasts_original

Unnamed: 0,0,1
0,1501.903547,1963.150103


In [11]:
forecasts_protected

Unnamed: 0,0,1
0,1326.605058,1966.651749


In [12]:
Test = pd.DataFrame([x.reset_index(drop=True) for x in Test]).T

In [13]:
forecast_results(test_data=Test, original_forecasts=forecasts_original, protected_forecasts=forecasts_protected)

{'Global MAPE': 0.312,
 'Global Protected MAPE': 0.3551,
 'Global MAE': 834.9732,
 'Global Protected MAE': 920.8716,
 'Global MdAE': 834.9732,
 'Global Protected MdAE': 920.8716,
 'Original MAE Up': 1164.8499,
 'Protected MAE Up': 1161.3483,
 'Original MAE Down': 505.0965,
 'Protected MAE Down': 680.3949,
 'Original MdAE Up': 1164.8499,
 'Protected MdAE Up': 1161.3483,
 'Original MdAE Down': 505.0965,
 'Protected MdAE Down': 680.3949}

***

### All in One Function

In [14]:
# import weekly finance time series
# ignore header and skip the first row to use integers as column names
full_data = pd.read_csv("../../Data/Train/Clean/full_m3_monthly_micro_clean.csv", header=None, skiprows=1).iloc[300:302,:]

In [15]:
# convert to a list of series, potentially with different lengths
full_data = [x.dropna() for _, x in full_data.iterrows()]

In [16]:
# forecast horizon
h = 1

In [17]:
Y = [x.iloc[:-h] for x in full_data]
Test = [x.iloc[-h:] for x in full_data]

In [18]:
protection_method = "Top_10"
Y_protected = apply_data_protection(Y, coding_type="Top", coding_percentage=0.10)

In [19]:
full_forecast_analysis(Y=Y,
                       Y_protected=Y_protected,
                       Test=Test,
                       h=1,
                       protection_method=protection_method,
                       forecasting_model="ARIMA",
                       log=True)

{'Global MAPE': 0.312,
 'Global Protected MAPE': 0.3551,
 'Global MAE': 834.9732,
 'Global Protected MAE': 920.8716,
 'Global MdAE': 834.9732,
 'Global Protected MdAE': 920.8716,
 'Original MAE Up': 1164.8499,
 'Protected MAE Up': 1161.3483,
 'Original MAE Down': 505.0965,
 'Protected MAE Down': 680.3949,
 'Original MdAE Up': 1164.8499,
 'Protected MdAE Up': 1161.3483,
 'Original MdAE Down': 505.0965,
 'Protected MdAE Down': 680.3949}