In [1]:
import pandas as pd
import numpy as np

from data_processing_functions import *

from sktime.performance_metrics.forecasting import mean_absolute_error

# nice time series plots
from sktime.utils.plotting import plot_series

# from sktime.forecasting.var import VAR

from statsmodels.tsa.vector_ar.var_model import VAR

In [2]:
full_data = pd.read_csv("../../Data/Train/Clean/m3_monthly_micro_h1.csv", header=None, skiprows=1)
protected_full = pd.read_csv("../../Data/Train/Clean/protected_m3_monthly_micro_h1_AN_1.csv")
# convert to a list of series, potentially with different lengths
full_data = [x.dropna() for _, x in full_data.iterrows()]
protected_full = [x.dropna() for _, x in protected_full.iterrows()]
test = pd.read_csv("../../Outputs/Forecasts/Test_h1.csv")
orig_fcasts = pd.read_csv("../../Outputs/Forecasts/VAR_h1_original.csv")
protected_fcasts = pd.read_csv("../../Outputs/Forecasts/VAR_h1_AN_1.csv")

In [3]:
orig_maes = mean_absolute_error(test, orig_fcasts, multioutput="raw_values")

In [4]:
protected_maes = mean_absolute_error(test, protected_fcasts, multioutput="raw_values")

In [5]:
np.argsort(protected_maes)[-1]

288

In [6]:
np.max(protected_maes) - np.min(protected_maes)

105972.76541849073

***

In [None]:
temp, _, _, _ = pre_process(ts_data=full_data[3:6], target_forecast_period=3, log=True, make_stationary=True)

In [None]:
fcast1 = pd.Series([0.1, 0.2, 0.1])
fcast1.index = np.arange(full_data[3].index[-1] + 1, full_data[3].index[-1] + 4)

In [None]:
fcast2 = pd.Series([-0.1, -0.2, 0.2])
fcast2.index = np.arange(full_data[4].index[-1] + 1, full_data[4].index[-1] + 4)

In [None]:
fcasts = [fcast1, fcast2]

In [None]:
temp2 = post_process(full_ts_data=full_data[3:6],
                     forecasts=temp,
                     target_forecast_period=3,
                     log=True,
                     make_stationary=True,
                     is_fitted=True)

In [None]:
temp2

***

In [44]:
temp, _, _, full_lags = pre_process(protected_full[283:289], target_forecast_period=1, log=True, make_stationary=True, sp=12)

In [8]:
# temp, _, _, full_lags = pre_process(full_data[283:289], target_forecast_period=1, log=True, make_stationary=True, sp=12)

In [None]:
ps = temp[0]
ps.index = np.arange(0, len(ps))
plot_series(ps)

In [None]:
ps = temp[1]
ps.index = np.arange(0, len(ps))
plot_series(ps)

In [None]:
ps = temp[2]
ps.index = np.arange(0, len(ps))
plot_series(ps)

In [None]:
ps = temp[3]
ps.index = np.arange(0, len(ps))
plot_series(ps)

In [None]:
ps = temp[4]
ps.index = np.arange(0, len(ps))
plot_series(ps)

In [None]:
ps = temp[5]
ps.index = np.arange(0, len(ps))
plot_series(ps)

In [45]:
# convert list to TxK dataframe
group = pd.concat(temp, axis=1, ignore_index=True)

In [46]:
full_data[283].shape

(124,)

In [47]:
temp[0].shape

(123,)

In [48]:
group.shape

(123, 6)

In [49]:
group.index

Int64Index([  1,   2,   3,   4,   5,   6,   7,   8,   9,  10,
            ...
            114, 115, 116, 117, 118, 119, 120, 121, 122, 123],
           dtype='int64', length=123)

In [50]:
forecaster = VAR(endog=group)
results = forecaster.fit(ic='bic', trend='c')

  self._init_dates(dates, freq)


In [51]:
results.summary()

  Summary of Regression Results   
Model:                         VAR
Method:                        OLS
Date:           Thu, 09, Feb, 2023
Time:                     16:14:21
--------------------------------------------------------------------
No. of Equations:         6.00000    BIC:                   0.496157
Nobs:                     122.000    HQIC:                -0.0770793
Log likelihood:          -968.044    FPE:                   0.626000
AIC:                    -0.469162    Det(Omega_mle):        0.447914
--------------------------------------------------------------------
Results for equation 0
           coefficient       std. error           t-stat            prob
------------------------------------------------------------------------
const         0.053730         0.099182            0.542           0.588
L1.0         -0.418612         0.071066           -5.890           0.000
L1.1         -0.068881         0.182168           -0.378           0.705
L1.2          0.216054 

In [52]:
# number of lags in VAR model
lag_order = results.k_ar

In [53]:
fvs = results.fittedvalues.T

In [54]:
fvs = pd.concat([group.iloc[0:lag_order,:].T, fvs], axis=1, ignore_index=True)

In [55]:
fvs = [fvs.iloc[i,:] for i in range(fvs.shape[0])]

In [56]:
group = [group.T.iloc[i,:] for i in range(group.T.shape[0])]

***

In [57]:
temp1 = fvs[0]
temp2 = group[0]

In [64]:
temp1

0     -8.050717
1      3.886585
2     -3.162461
3     -0.352645
4      0.277939
         ...   
118    0.402346
119   -0.002654
120    0.172391
121    0.193549
122    0.222002
Name: 0, Length: 123, dtype: float64

In [65]:
temp2

1     -8.050717
2      7.727143
3      0.652138
4     -0.226004
5     -0.043878
         ...   
119   -0.185798
120   -0.042757
121   -0.173213
122    0.147827
123    0.233686
Name: 0, Length: 123, dtype: float64

In [66]:
pd.Series(np.r_[100, temp1].cumsum())

0      100.000000
1       91.949283
2       95.835868
3       92.673407
4       92.320762
          ...    
119     98.834032
120     98.831379
121     99.003770
122     99.197319
123     99.419321
Length: 124, dtype: float64

In [67]:
pd.Series(np.r_[100, temp2].cumsum())

0      100.000000
1       91.949283
2       99.676426
3      100.328564
4      100.102560
          ...    
119     99.253779
120     99.211022
121     99.037809
122     99.185635
123     99.419321
Length: 124, dtype: float64

Steps:

- difference the original series
- fit forecasting model, ignoring NaN values
- generate forecast
- add NaN value back in, append forecast to end, and reverse the differencing (make sure that we accommodate forecasts of any length)
- remove the forecasts from the end of the series and save separately

In [None]:
h = 2

In [None]:
# difference the original series
diffed_full = full_data[0].diff()

In [None]:
diffed_full

In [None]:
# remove NA values
diffed_full = diffed_full.dropna()

In [None]:
diffed_full

In [None]:
# store the index of the last period
last_period = diffed_full.index[-1] + 1

In [None]:
# fit forecast model
fcast = pd.Series([10, -10])
# assign the correct index
fcast.index = np.arange(last_period, last_period+h)

In [None]:
fcast

In [None]:
# store the initial value of the time series
# first_val = full_data[0].iloc[0]
first_val = full_data[0].iloc[-1]

In [None]:
first_val

In [None]:
reverse_diffed = np.r_[first_val, fcast].cumsum()# .astype(int)

In [None]:
reverse_diffed[-h:]

In [None]:
full_data[0]

***

In [None]:
group

In [None]:
temp_post = post_process(full_ts_data=protected_full[283:289],
             forecasts=fvs,
             target_forecast_period=1,
             make_stationary=True,
             log=True,
             is_fitted=True)

In [None]:
temp_post.shape

In [None]:
protected_full[283].shape

In [None]:
temp_post

In [None]:
pd.concat(protected_full[283:289], axis=1)

In [None]:
ps = temp_post.iloc[:,5]
ps.index = np.arange(0, len(ps))
plot_series(ps)

In [None]:
results.coefs

In [None]:
intercepts = results.coefs_exog

In [None]:
lag_order

In [None]:
# forecast nfs steps ahead using lag_order prior values
predictions = results.forecast(np.array(group[-lag_order:]), steps=1)

In [None]:
predictions.T

Results on the original versions of the series.

In [None]:
temp, _, _, full_lags = pre_process(full_data[283:289], target_forecast_period=1, log=True, make_stationary=True, sp=12)

In [None]:
# convert list to TxK dataframe
group = pd.concat(temp, axis=1, ignore_index=True)

In [None]:
group

In [None]:
forecaster = VAR(endog=group)
results = forecaster.fit(ic='bic', trend='c')

In [None]:
results.summary()

In [None]:
results.coefs

In [None]:
intercepts = results.coefs_exog

In [None]:
# number of lags in VAR model
lag_order = results.k_ar

In [None]:
lag_order

In [None]:
# forecast nfs steps ahead using lag_order prior values
predictions = results.forecast(np.array(group[-lag_order:]), steps=1)

In [None]:
predictions.T