## Code for Implementing ARIMA Models

In [1]:
# load modules
import numpy as np
import pandas as pd
from sktime.performance_metrics.forecasting import mean_absolute_error, MeanAbsoluteError
from sktime.forecasting.arima import AutoARIMA

from data_protection_functions import *
from data_processing_functions import *
from forecasting_functions import *

# nice time series plots
from sktime.utils.plotting import plot_series
from sktime.forecasting.model_selection import ForecastingGridSearchCV, ExpandingWindowSplitter, SlidingWindowSplitter
from sktime.forecasting.compose import TransformedTargetForecaster

***

### Import Data and Create Train/Test

In [2]:
# import weekly finance time series
# ignore header and skip the first row to use integers as column names
full_data = pd.read_csv("../../Data/Train/Clean/full_m3_monthly_micro_clean.csv", header=None, skiprows=1).iloc[300:302,:]

In [3]:
# convert to a list of series, potentially with different lengths
full_data = [x.dropna() for _, x in full_data.iterrows()]

In [4]:
# forecast horizon
h = 1

In [5]:
Y = [x.iloc[:-h] for x in full_data]
Test = [x.iloc[-h:] for x in full_data]

***

### Apply Data Protection to Generate Protected Dataset

In [6]:
protection_method = "Top_10"
Y_protected = apply_data_protection(Y, coding_type="Top", coding_percentage=0.10)

***

### Pre-Process the Data

In [7]:
Y_processed, Y_last_window, Y_last_window_trend = pre_process(ts_data=Y, 
                                                              log=True)

Y_protected_processed, Y_protected_last_window, Y_protected_last_window_trend = pre_process(ts_data=Y_protected, 
                                                                                            log=True)

***

### Train Models and Generate Forecasts

In [8]:
forecasts_original = train_and_forecast(ts_data=Y_processed,
                                        horizon_length=h,
                                        forecasting_model="ARIMA",
                                        protection_method=protection_method)

forecasts_protected = train_and_forecast(ts_data=Y_protected_processed,
                                         horizon_length=h,
                                         forecasting_model="ARIMA",
                                         protection_method=protection_method)

***

### Post Process the Forecasts

In [9]:
forecasts_original = post_process(full_ts_data=Y, 
                                  forecasts=forecasts_original,
                                  log=True)

forecasts_protected = post_process(full_ts_data=Y_protected, 
                                   forecasts=forecasts_protected,
                                   log=True)

***

### Assess Forecast Accuracy

In [10]:
forecasts_original

Unnamed: 0,0,1
0,1501.903547,1963.150103


In [11]:
forecasts_protected

Unnamed: 0,0,1
0,1326.605058,1966.651749


In [12]:
Test = pd.DataFrame([x.reset_index(drop=True) for x in Test]).T

In [13]:
forecast_results(test_data=Test, original_forecasts=forecasts_original, protected_forecasts=forecasts_protected)

{'Global MAPE': 0.312,
 'Global Protected MAPE': 0.3551,
 'Global MAE': 834.9732,
 'Global Protected MAE': 920.8716,
 'Global MdAE': 834.9732,
 'Global Protected MdAE': 920.8716,
 'Original MAE Up': 1164.8499,
 'Protected MAE Up': 1161.3483,
 'Original MAE Down': 505.0965,
 'Protected MAE Down': 680.3949,
 'Original MdAE Up': 1164.8499,
 'Protected MdAE Up': 1161.3483,
 'Original MdAE Down': 505.0965,
 'Protected MdAE Down': 680.3949}

***

### All in One Function

In [14]:
# import weekly finance time series
# ignore header and skip the first row to use integers as column names
full_data = pd.read_csv("../../Data/Train/Clean/full_m3_monthly_micro_clean.csv", header=None, skiprows=1).iloc[300:302,:]

In [15]:
# convert to a list of series, potentially with different lengths
full_data = [x.dropna() for _, x in full_data.iterrows()]

In [16]:
# forecast horizon
h = 1

In [17]:
Y = [x.iloc[:-h] for x in full_data]
Test = [x.iloc[-h:] for x in full_data]

In [18]:
protection_method = "Top_10"
Y_protected = apply_data_protection(Y, coding_type="Top", coding_percentage=0.10)

In [19]:
full_forecast_analysis(Y=Y,
                       Y_protected=Y_protected,
                       Test=Test,
                       h=1,
                       protection_method=protection_method,
                       forecasting_model="ARIMA",
                       log=True)

{'Global MAPE': 0.312,
 'Global Protected MAPE': 0.3551,
 'Global MAE': 834.9732,
 'Global Protected MAE': 920.8716,
 'Global MdAE': 834.9732,
 'Global Protected MdAE': 920.8716,
 'Original MAE Up': 1164.8499,
 'Protected MAE Up': 1161.3483,
 'Original MAE Down': 505.0965,
 'Protected MAE Down': 680.3949,
 'Original MdAE Up': 1164.8499,
 'Protected MdAE Up': 1161.3483,
 'Original MdAE Down': 505.0965,
 'Protected MdAE Down': 680.3949}