In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline

# Import Statsmodels
from statsmodels.tsa.api import VAR
from statsmodels.tsa.stattools import adfuller
from statsmodels.tools.eval_measures import rmse, aic

import sys
sys.path.append("/home/hugo/projetos-doutorado/mimo_emb_fts/src/")

from embfts.util.DataSetUtil import DataSetUtil
from embfts.util.StatisticsUtil import StatisticsUtil

In [3]:
data_set_util = DataSetUtil()
statistics = StatisticsUtil()

### Dataset

In [4]:
df = pd.read_csv('/home/hugo/projetos-doutorado/mimo_emb_fts/data/energydata_complete.csv', sep=',')
data = df.drop(labels=['date','rv1','rv2'], axis=1)
data = data_set_util.clean_dataset(data)
data = data_set_util.series_to_supervised_mimo(data, 1, 1)
data.head()

Unnamed: 0,Appliances(t-1),lights(t-1),T1(t-1),RH_1(t-1),T2(t-1),RH_2(t-1),T3(t-1),RH_3(t-1),T4(t-1),RH_4(t-1),...,T8(t),RH_8(t),T9(t),RH_9(t),T_out(t),Press_mm_hg(t),RH_out(t),Windspeed(t),Visibility(t),Tdewpoint(t)
1,60.0,30.0,19.89,47.596667,19.2,44.79,19.79,44.73,19.0,45.566667,...,18.2,48.863333,17.066667,45.56,6.483333,733.6,92.0,6.666667,59.166667,5.2
2,60.0,30.0,19.89,46.693333,19.2,44.7225,19.79,44.79,19.0,45.9925,...,18.2,48.73,17.0,45.5,6.366667,733.7,92.0,6.333333,55.333333,5.1
3,50.0,30.0,19.89,46.3,19.2,44.626667,19.79,44.933333,18.926667,45.89,...,18.1,48.59,17.0,45.4,6.25,733.8,92.0,6.0,51.5,5.0
4,50.0,40.0,19.89,46.066667,19.2,44.59,19.79,45.0,18.89,45.723333,...,18.1,48.59,17.0,45.4,6.133333,733.9,92.0,5.666667,47.666667,4.9
5,60.0,40.0,19.89,46.333333,19.2,44.53,19.79,45.0,18.89,45.53,...,18.1,48.59,17.0,45.29,6.016667,734.0,92.0,5.333333,43.833333,4.8


### How to Select the Order (P) of VAR model

In [29]:
model = VAR(df_differenced)
for i in [1,2,3,4,5,6,7,8,9]:
    result = model.fit(i)
    print('Lag Order =', i)
    print('AIC : ', result.aic)
    print('BIC : ', result.bic)
    print('FPE : ', result.fpe)
    print('HQIC: ', result.hqic, '\n')



Lag Order = 1
AIC :  -90.54855581320692
BIC :  -90.26786795817225
FPE :  4.734366614512718e-40
HQIC:  -90.45666372059412 

Lag Order = 2
AIC :  -91.42301944648885
BIC :  -90.87201519886452
FPE :  1.974637049705538e-40
HQIC:  -91.2426302462617 

Lag Order = 3
AIC :  -91.73912427829066
BIC :  -90.91777971069828
FPE :  1.439478008858676e-40
HQIC:  -91.47022970237467 

Lag Order = 4
AIC :  -91.98256555249752
BIC :  -90.89085673418452
FPE :  1.1284452268824464e-40
HQIC:  -91.62515733161979 

Lag Order = 5
AIC :  -92.17414846134011
BIC :  -90.81205145817908
FPE :  9.31704413879265e-41
HQIC:  -91.72821832502916 

Lag Order = 6
AIC :  -92.39882142362437
BIC :  -90.76631229811242
FPE :  7.442271607638727e-41
HQIC:  -91.86436110020982 

Lag Order = 7
AIC :  -93.68113685477809
BIC :  -91.7781916660362
FPE :  2.0644540234279319e-41
HQIC:  -93.05813807139049 

Lag Order = 8
AIC :  -93.72726078533651
BIC :  -91.55385558910886
FPE :  1.9714092548240662e-41
HQIC:  -93.01571526790707 

Lag Order = 9
AI

In [30]:
x = model.select_order(maxlags=12)
x.summary()

0,1,2,3,4
,AIC,BIC,FPE,HQIC
0.0,-75.48,-75.47,1.661e-33,-75.47
1.0,-90.56,-90.28,4.696e-40,-90.46
2.0,-91.43,-90.88,1.958e-40,-91.25
3.0,-91.75,-90.93,1.426e-40,-91.48
4.0,-91.99,-90.90,1.117e-40,-91.63
5.0,-92.18,-90.82,9.218e-41,-91.74
6.0,-92.41,-90.78,7.358e-41,-91.88
7.0,-93.69,-91.79*,2.041e-41,-93.07*
8.0,-93.74,-91.57,1.948e-41,-93.03


In [31]:
model_fitted = model.fit(4)
model_fitted.summary()

  Summary of Regression Results   
Model:                         VAR
Method:                        OLS
Date:           Mon, 22, Nov, 2021
Time:                     22:51:44
--------------------------------------------------------------------
No. of Equations:         26.0000    BIC:                   -90.8909
Nobs:                     19730.0    HQIC:                  -91.6252
Log likelihood:           182251.    FPE:                1.12845e-40
AIC:                     -91.9826    Det(Omega_mle):     9.82986e-41
--------------------------------------------------------------------
Results for equation Appliances
                    coefficient       std. error           t-stat            prob
---------------------------------------------------------------------------------
const                  0.096411         0.476491            0.202           0.840
L1.Appliances         -0.170025         0.007124          -23.865           0.000
L1.lights              0.412242         0.101098   

In [32]:
def adjust(val, length= 6): return str(val).ljust(length)

from statsmodels.stats.stattools import durbin_watson
out = durbin_watson(model_fitted.resid)

for col, val in zip(df.columns, out):
    print(adjust(col), ':', round(val, 2))

Appliances : 2.02
lights : 2.01
T1     : 2.04
RH_1   : 2.03
T2     : 2.01
RH_2   : 2.0
T3     : 2.02
RH_3   : 2.02
T4     : 2.01
RH_4   : 2.01
T5     : 2.01
RH_5   : 2.0
T6     : 2.0
RH_6   : 2.0
T7     : 2.01
RH_7   : 2.0
T8     : 2.01
RH_8   : 2.0
T9     : 2.01
RH_9   : 2.0
T_out  : 2.03
Press_mm_hg : 2.0
RH_out : 2.04
Windspeed : 2.06
Visibility : 2.06
Tdewpoint : 2.03


In [33]:
# Get the lag order
lag_order = model_fitted.k_ar
print(lag_order)  #> 4

# Input data for forecasting
forecast_input = df_differenced.values[-lag_order:]
forecast_input

4


array([[-1.00000000e+01,  0.00000000e+00, -6.66666667e-02,
        -6.00000000e-02, -1.36000000e-01,  5.42857143e-02,
        -6.66666667e-02,  6.00000000e-02,  0.00000000e+00,
         0.00000000e+00,  3.00000000e-02, -7.33333333e-02,
        -6.00000000e-01,  0.00000000e+00,  5.71428571e-02,
        -8.57142857e-02,  0.00000000e+00, -2.84000000e-01,
         0.00000000e+00,  0.00000000e+00, -1.33333333e-01,
         0.00000000e+00,  3.33333333e-01,  1.66666667e-01,
         8.33333333e-01, -3.33333333e-02],
       [ 1.80000000e+02,  1.00000000e+01,  0.00000000e+00,
         9.66666667e-02, -1.25428571e-01,  6.88571429e-01,
        -8.33333333e-02,  4.66666667e-01,  0.00000000e+00,
         1.40000000e-01,  0.00000000e+00, -6.00000000e-02,
        -5.70000000e-01,  0.00000000e+00, -1.71428571e-02,
        -1.42857143e-02,  0.00000000e+00, -1.30000000e-01,
         0.00000000e+00,  0.00000000e+00, -1.33333333e-01,
         0.00000000e+00,  3.33333333e-01,  1.66666667e-01,
         8.33

In [34]:
# Forecast
fc = model_fitted.forecast(y=forecast_input, steps=nobs)
df_forecast = pd.DataFrame(fc, index=df.index[-nobs:], columns=df.columns + '_2d')
df_forecast

Unnamed: 0_level_0,Appliances_2d,lights_2d,T1_2d,RH_1_2d,T2_2d,RH_2_2d,T3_2d,RH_3_2d,T4_2d,RH_4_2d,...,T8_2d,RH_8_2d,T9_2d,RH_9_2d,T_out_2d,Press_mm_hg_2d,RH_out_2d,Windspeed_2d,Visibility_2d,Tdewpoint_2d
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2016-05-27 17:30:00,-68.105759,-0.17484,-0.026187,0.064232,-0.181937,0.039327,-0.045167,0.04273,0.044545,0.034335,...,0.030103,0.112109,-0.021781,-0.02972,-0.200009,0.006787,0.534041,0.09208,0.574022,-0.063206
2016-05-27 17:40:00,-39.726854,0.091728,-0.003696,0.199808,-0.145375,0.24366,0.01118,0.159085,0.029515,-0.025895,...,0.032293,0.053112,-0.012016,-0.013117,-0.252501,0.010173,0.681725,0.027789,0.224065,-0.087988
2016-05-27 17:50:00,24.98123,-0.769005,0.001097,-0.095652,-0.137438,0.249088,0.008749,0.126954,0.009511,-0.057491,...,0.02191,0.050129,-0.016717,-0.022295,-0.309276,0.016609,0.914379,-0.024547,0.192748,-0.097421
2016-05-27 18:00:00,20.386578,-0.209759,-0.006492,-0.187673,-0.140404,0.166973,0.000306,0.032493,-0.008294,-0.07,...,0.025657,0.074622,-0.019522,-0.006974,-0.345682,0.021218,1.100091,-0.070211,0.045728,-0.09781


In [36]:
def invert_transformation(df_train, df_forecast, second_diff=False):
    """Revert back the differencing to get the forecast to original scale."""
    df_fc = df_forecast.copy()
    columns = df_train.columns
    for col in columns:        
        # Roll back 2nd Diff
        if second_diff:
            df_fc[str(col)+'_1d'] = (df_train[col].iloc[-1]-df_train[col].iloc[-2]) + df_fc[str(col)+'_2d'].cumsum()
        # Roll back 1st Diff
        df_fc[str(col)+'_forecast'] = df_train[col].iloc[-1] + df_fc[str(col)+'_1d'].cumsum()
    return df_fc
df_results = invert_transformation(df_train, df_forecast, second_diff=True)        
df_results.loc[:, ['rgnp_forecast', 'pgnp_forecast', 'ulc_forecast', 'gdfco_forecast',
                   'gdf_forecast', 'gdfim_forecast', 'gdfcf_forecast', 'gdfce_forecast']]

KeyError: "None of [Index(['rgnp_forecast', 'pgnp_forecast', 'ulc_forecast', 'gdfco_forecast',\n       'gdf_forecast', 'gdfim_forecast', 'gdfcf_forecast', 'gdfce_forecast'],\n      dtype='object')] are in the [columns]"