## Updated Code for Implementing VAR Model

In [86]:
# load modules
import numpy as np
import pandas as pd
import pmdarima as pm
import statsmodels.api as sm
from sktime.transformations.series.difference import Differencer
from sktime.performance_metrics.forecasting import mean_absolute_error, MeanAbsoluteError
from sktime.forecasting.var import VAR
from data_protection_functions import *
from data_processing_functions import *
from forecasting_functions import *
# nice time series plots
from sktime.utils.plotting import plot_series
from sktime.forecasting.model_selection import ForecastingGridSearchCV, ExpandingWindowSplitter, SlidingWindowSplitter
from sktime.forecasting.compose import TransformedTargetForecaster

In [87]:
# import weekly finance time series
# ignore header and skip the first row to use integers as column names
full_data = pd.read_csv("../../Data/Train/Clean/full_m3_monthly_micro_clean.csv", header=None, skiprows=1)

In [88]:
# convert to a list of series, potentially with different lengths
full_data = [x.dropna() for _, x in full_data.iterrows()]

***

In [4]:
# forecast horizon
h = 1

In [5]:
Y = [x.iloc[:-h] for x in full_data]
Test = [x.iloc[-h:] for x in full_data]

## Step 2: Apply Data Protection to Generate Protected Series

In [6]:
protection_method = "Top_10"
Y_protected = apply_data_protection(Y, coding_type="Top", coding_percentage=0.10)

## Step 3: Pre-process the data.

In [7]:
Y_processed, Y_last_window, Y_last_window_trend = pre_process(ts_data=Y, 
                                                              log=True, 
                                                              make_stationary=True, 
                                                              sp=12)
Y_protected_processed, Y_protected_last_window, Y_protected_last_window_trend = pre_process(ts_data=Y_protected, 
                                                                                            log=True,  
                                                                                            make_stationary=True, 
                                                                                            sp=12)

## Step 4: Train Models and Generate Forecasts

In [8]:
# param_grid = {"maxlags": [10, 15, 20, 25, 30, 35, 40], "trend": ["c", "ct", "ctt", "n"]}
param_grid = {"maxlags": [10, 25, 40], "trend": ["c", "n"]}

In [9]:
forecasts_original = train_and_forecast(ts_data=Y_processed,
                                        horizon_length=h,
                                        forecasting_model="VAR",
                                        param_grid=param_grid,
                                        protection_method=protection_method,
                                        last_window=Y_last_window)

forecasts_protected = train_and_forecast(ts_data=Y_protected_processed,
                                         horizon_length=h,
                                         forecasting_model="VAR",
                                         param_grid=param_grid,
                                         protection_method=protection_method,
                                         last_window=Y_protected_last_window)

{'maxlags': 40, 'trend': 'n'}
{'maxlags': 25, 'trend': 'c'}
{'maxlags': 25, 'trend': 'c'}
{'maxlags': 40, 'trend': 'n'}
{'maxlags': 40, 'trend': 'c'}
{'maxlags': 40, 'trend': 'n'}
{'maxlags': 25, 'trend': 'c'}
{'maxlags': 25, 'trend': 'c'}
{'maxlags': 40, 'trend': 'n'}
{'maxlags': 40, 'trend': 'c'}


## Post Process the Forecasts

In [10]:
forecasts_original = post_process(full_ts_data=Y, 
                                  forecasts=forecasts_original, 
                                  last_window_with_trend=Y_last_window_trend,
                                  log=True,
                                  make_stationary=True,
                                  sp=12)

forecasts_protected = post_process(full_ts_data=Y_protected, 
                                   forecasts=forecasts_protected, 
                                   last_window_with_trend=Y_protected_last_window_trend,
                                   log=True,
                                   make_stationary=True,
                                   sp=12)

In [11]:
forecasts_original

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,464,465,466,467,468,469,470,471,472,473
0,1412.002344,764.878538,4007.594888,5551.107254,5225.792988,2489.461665,4152.94807,1421.349483,4962.965708,4660.0698,...,2889.27726,4620.0,7000.0,8640.0,5196.370168,7420.0,5170.0,4820.0,8286.48946,7770.0


In [12]:
forecasts_protected

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,464,465,466,467,468,469,470,471,472,473
0,1446.203762,747.665384,4104.666559,5409.085638,5352.371765,2385.575299,3721.847916,1259.46482,4871.802566,4579.534704,...,2695.120872,4620.0,7000.0,8296.0,4974.978245,7420.0,5170.0,4820.0,8245.323193,4135.0


In [13]:
Test = pd.DataFrame([x.reset_index(drop=True) for x in Test]).T

In [14]:
forecast_results(test_data=Test, original_forecasts=forecasts_original, protected_forecasts=forecasts_protected)

{'Global MAPE': 0.4478,
 'Global Protected MAPE': 0.4377,
 'Global MAE': 1000.0351,
 'Global Protected MAE': 952.1946,
 'Global MdAE': 1000.0351,
 'Global Protected MdAE': 952.1946,
 'Original MAE Up': 809.9396,
 'Protected MAE Up': 798.518,
 'Original MAE Down': 1176.5175,
 'Protected MAE Down': 990.6627,
 'Original MdAE Up': 528.1477,
 'Protected MdAE Up': 435.2355,
 'Original MdAE Down': 730.0,
 'Protected MdAE Down': 690.5352}

***

In [20]:
param_grid = {"maxlags": [10, 25, 40], "trend": ["c", "n"]}

In [21]:
full_forecast_analysis(Y=Y,
                       Y_protected=Y_protected,
                       Test=Test,
                       h=1,
                       protection_method=protection_method,
                       forecasting_model="VAR",
                       make_stationary=True,
                       sp=12,
                       log=True,
                       param_grid=param_grid)

{'maxlags': 40, 'trend': 'n'}
{'maxlags': 25, 'trend': 'c'}
{'maxlags': 25, 'trend': 'c'}
{'maxlags': 40, 'trend': 'n'}
{'maxlags': 40, 'trend': 'c'}
{'maxlags': 40, 'trend': 'n'}
{'maxlags': 25, 'trend': 'c'}
{'maxlags': 25, 'trend': 'c'}
{'maxlags': 40, 'trend': 'n'}
{'maxlags': 40, 'trend': 'c'}


{'Global MAPE': 0.4478,
 'Global Protected MAPE': 0.4377,
 'Global MAE': 1000.0351,
 'Global Protected MAE': 952.1946,
 'Global MdAE': 1000.0351,
 'Global Protected MdAE': 952.1946,
 'Original MAE Up': 809.9396,
 'Protected MAE Up': 798.518,
 'Original MAE Down': 1176.5175,
 'Protected MAE Down': 990.6627,
 'Original MdAE Up': 528.1477,
 'Protected MdAE Up': 435.2355,
 'Original MdAE Down': 730.0,
 'Protected MdAE Down': 690.5352}