## Updated Code for Implementing VAR Model

In [1]:
# load modules
import numpy as np
import pandas as pd
import pmdarima as pm
import statsmodels.api as sm
from sktime.transformations.series.difference import Differencer
from sktime.performance_metrics.forecasting import mean_absolute_error, MeanAbsoluteError
from sktime.forecasting.var import VAR
from data_protection_functions import *
from data_processing_functions import *
from forecasting_functions import *
# nice time series plots
from sktime.utils.plotting import plot_series
from sktime.forecasting.model_selection import ForecastingGridSearchCV, ExpandingWindowSplitter, SlidingWindowSplitter
from sktime.forecasting.compose import TransformedTargetForecaster

In [2]:
# import weekly finance time series
# ignore header and skip the first row to use integers as column names
full_data = pd.read_csv("../../Data/Train/Clean/full_m3_monthly_micro_clean.csv", header=None, skiprows=1)

In [3]:
# convert to a list of series, potentially with different lengths
full_data = [x.dropna() for _, x in full_data.iterrows()]

***

In [4]:
# forecast horizon
h = 1

In [5]:
Y = [x.iloc[:-h] for x in full_data]
Test = [x.iloc[-h:] for x in full_data]

## Step 2: Apply Data Protection to Generate Protected Series

In [6]:
temp = difference_to_stationarity(Y)

In [7]:
temp1 = reverse_difference_to_stationarity(temp, Y)

In [8]:
temp1

[1     2160.0
 2     4200.0
 3     3360.0
 4     2400.0
 5     3600.0
        ...  
 61    1680.0
 62    3720.0
 63    2160.0
 64     480.0
 65    2040.0
 Name: 0, Length: 65, dtype: float64,
 1      120.0
 2     1080.0
 3      840.0
 4     1440.0
 5      480.0
        ...  
 61     840.0
 62     600.0
 63    1320.0
 64    1320.0
 65    2280.0
 Name: 1, Length: 65, dtype: float64,
 1     4860.0
 2     1200.0
 3     3150.0
 4     2130.0
 5     1800.0
        ...  
 61    5190.0
 62    5910.0
 63    4800.0
 64    5640.0
 65    5790.0
 Name: 2, Length: 65, dtype: float64,
 1     2040.0
 2      800.0
 3     1000.0
 4      520.0
 5      500.0
        ...  
 61    6600.0
 62    8100.0
 63    7760.0
 64    6940.0
 65    8020.0
 Name: 3, Length: 65, dtype: float64,
 1      4450.0
 2      3050.0
 3      3050.0
 4      2250.0
 5      2200.0
        ...   
 61    10000.0
 62    10950.0
 63    10700.0
 64     8800.0
 65     7550.0
 Name: 4, Length: 65, dtype: float64,
 1     1350.0
 2     1500.0
 

In [9]:
Y

[0     2640.0
 1     2160.0
 2     4200.0
 3     3360.0
 4     2400.0
        ...  
 61    1680.0
 62    3720.0
 63    2160.0
 64     480.0
 65    2040.0
 Name: 0, Length: 66, dtype: float64,
 0     1920.0
 1      120.0
 2     1080.0
 3      840.0
 4     1440.0
        ...  
 61     840.0
 62     600.0
 63    1320.0
 64    1320.0
 65    2280.0
 Name: 1, Length: 66, dtype: float64,
 0      720.0
 1     4860.0
 2     1200.0
 3     3150.0
 4     2130.0
        ...  
 61    5190.0
 62    5910.0
 63    4800.0
 64    5640.0
 65    5790.0
 Name: 2, Length: 66, dtype: float64,
 0      940.0
 1     2040.0
 2      800.0
 3     1000.0
 4      520.0
        ...  
 61    6600.0
 62    8100.0
 63    7760.0
 64    6940.0
 65    8020.0
 Name: 3, Length: 66, dtype: float64,
 0      1550.0
 1      4450.0
 2      3050.0
 3      3050.0
 4      2250.0
        ...   
 61    10000.0
 62    10950.0
 63    10700.0
 64     8800.0
 65     7550.0
 Name: 4, Length: 66, dtype: float64,
 0     2850.0
 1     1350.0
 

In [6]:
Y_protected = apply_data_protection(Y, epsilon=1)

***

## Step 3: Pre-process the data.

In [7]:
Y_processed, Y_last_window, Y_last_window_trend, _, full_lags  = pre_process(ts_data=Y, 
                                                                  target_forecast_period=h,
                                                                  log=True, 
                                                                  make_stationary=True, 
                                                                  sp=12)

Y_protected_processed, Y_protected_last_window, Y_protected_last_window_trend, _, full_lags_protected = pre_process(ts_data=Y_protected, 
                                                                                               target_forecast_period=h,
                                                                                               log=True,  
                                                                                               make_stationary=True, 
                                                                                               sp=12)

***

In [8]:
# forecasts_original = post_process(full_ts_data=Y, 
#                                   forecasts=Y_processed, 
#                                   target_forecast_period=h,
#                                   full_lags=full_lags,
#                                   log=True,
#                                   make_stationary=True,
#                                   sp=12)

***

***

## Step 4: Train Models and Generate Forecasts

In [9]:
# param_grid = {"maxlags": [10, 15, 20, 25, 30, 35, 40], "trend": ["c", "ct", "ctt", "n"]}
# param_grid = {"maxlags": [10, 25, 40], "trend": ["c", "n"]}

In [10]:
forecasts_original = train_and_forecast(ts_data=Y_processed,
                                        horizon_length=h,
                                        forecasting_model="VAR")

forecasts_protected = train_and_forecast(ts_data=Y_protected_processed,
                                         horizon_length=h,
                                         forecasting_model="VAR")

***

## Post Process the Forecasts

In [11]:
forecasts_original = post_process(full_ts_data=Y, 
                                  forecasts=forecasts_original, 
                                  target_forecast_period=h,
                                  log=True,
                                  make_stationary=True,
                                  sp=12,
                                  full_lags=full_lags)

forecasts_protected = post_process(full_ts_data=Y_protected, 
                                   forecasts=forecasts_protected, 
                                   target_forecast_period=h,
                                   log=True,
                                   make_stationary=False,
                                   sp=12,
                                   full_lags=full_lags_protected)

In [12]:
forecasts_original

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,464,465,466,467,468,469,470,471,472,473
0,798.107469,3154.080207,4369.97244,6368.258517,8996.215254,328.838628,2804.273732,2649.00548,2617.154035,4247.259117,...,2403.334645,3822.946249,5070.454238,7166.416515,4839.675524,8002.019398,5174.22819,4054.280824,8233.050653,2685.604593


In [13]:
forecasts_protected

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,464,465,466,467,468,469,470,471,472,473
0,224.496251,69.291683,669.906393,144.206388,495.973634,491.635122,83.945809,2428.250298,52.314552,403.605092,...,301.449377,11232.002376,565.822681,9973.00331,3325.556729,3151.765838,5245.194967,1853.321809,8138.153155,296.740819


In [14]:
Test = pd.DataFrame([x.reset_index(drop=True) for x in Test]).T

In [15]:
mean_absolute_error(Test, forecasts_original)

753.1416838746653

In [16]:
mean_absolute_error(Test, forecasts_protected)

2839.318499290283

***