In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline

# Import Statsmodels
from statsmodels.tsa.api import VAR
from statsmodels.tsa.stattools import adfuller
from statsmodels.tools.eval_measures import rmse, aic

import sys
sys.path.append("/home/hugo/projetos-doutorado/mimo_emb_fts/src/")

from embfts.util.DataSetUtil import DataSetUtil
from embfts.util.StatisticsUtil import StatisticsUtil

In [3]:
data_set_util = DataSetUtil()
statistics = StatisticsUtil()

### Dataset

In [4]:
df = pd.read_csv('/home/hugo/projetos-doutorado/mimo_emb_fts/data/energydata_complete.csv', sep=',')
data = df.drop(labels=['date','rv1','rv2'], axis=1)
data = data_set_util.clean_dataset(data)
data = data_set_util.series_to_supervised_mimo(data, 1, 1)
data.head()

Unnamed: 0,Appliances(t-1),lights(t-1),T1(t-1),RH_1(t-1),T2(t-1),RH_2(t-1),T3(t-1),RH_3(t-1),T4(t-1),RH_4(t-1),...,T8(t),RH_8(t),T9(t),RH_9(t),T_out(t),Press_mm_hg(t),RH_out(t),Windspeed(t),Visibility(t),Tdewpoint(t)
1,60.0,30.0,19.89,47.596667,19.2,44.79,19.79,44.73,19.0,45.566667,...,18.2,48.863333,17.066667,45.56,6.483333,733.6,92.0,6.666667,59.166667,5.2
2,60.0,30.0,19.89,46.693333,19.2,44.7225,19.79,44.79,19.0,45.9925,...,18.2,48.73,17.0,45.5,6.366667,733.7,92.0,6.333333,55.333333,5.1
3,50.0,30.0,19.89,46.3,19.2,44.626667,19.79,44.933333,18.926667,45.89,...,18.1,48.59,17.0,45.4,6.25,733.8,92.0,6.0,51.5,5.0
4,50.0,40.0,19.89,46.066667,19.2,44.59,19.79,45.0,18.89,45.723333,...,18.1,48.59,17.0,45.4,6.133333,733.9,92.0,5.666667,47.666667,4.9
5,60.0,40.0,19.89,46.333333,19.2,44.53,19.79,45.0,18.89,45.53,...,18.1,48.59,17.0,45.29,6.016667,734.0,92.0,5.333333,43.833333,4.8


In [5]:
data_train = data.loc[:,'Appliances(t-1)':'Tdewpoint(t-1)']
data_test = data.loc[:,'Appliances(t)':'Tdewpoint(t)']

train_df = data_set_util.sample_first_prows(data_train,0.75)
train_val_df = data_set_util.sample_first_prows(data_test,0.75)

test_df = data_train.iloc[max(train_df.index):]
test_val_df = data_test.iloc[max(train_df.index):]

### How to Select the Order (P) of VAR model

In [6]:
model = VAR(data_train)
for i in [1,2,3,4,5,6,7,8,9]:
    result = model.fit(i)
    print('Lag Order =', i)
    print('AIC : ', result.aic)
    print('BIC : ', result.bic)
    print('FPE : ', result.fpe)
    print('HQIC: ', result.hqic, '\n')



Lag Order = 1
AIC :  -77.69438387121748
BIC :  -77.41369601618281
FPE :  1.8103303365039633e-34
HQIC:  -77.60249177860469 

Lag Order = 2
AIC :  -91.66485297858115
BIC :  -91.11384873095682
FPE :  1.5504590944938538e-40
HQIC:  -91.484463778354 

Lag Order = 3
AIC :  -92.22505775608988
BIC :  -91.4037131884975
FPE :  8.854556357753402e-41
HQIC:  -91.95616318017389 

Lag Order = 4
AIC :  -92.42030059593421
BIC :  -91.32859177762121
FPE :  7.284077703034247e-41
HQIC:  -92.06289237505648 

Lag Order = 5
AIC :  -92.578370225721
BIC :  -91.21627322255998
FPE :  6.219090421194073e-41
HQIC:  -92.13244008941005 

Lag Order = 6
AIC :  -92.71394958893572
BIC :  -91.08144046342377
FPE :  5.430590960557664e-41
HQIC:  -92.17948926552117 

Lag Order = 7
AIC :  -92.89690336816909
BIC :  -90.9939581794272
FPE :  4.522655324952271e-41
HQIC:  -92.27390458478149 

Lag Order = 8
AIC :  -94.1927694236085
BIC :  -92.01936422738085
FPE :  1.237681667256996e-41
HQIC:  -93.48122390617905 

Lag Order = 9
AIC :  

In [7]:
x = model.select_order(maxlags=12)
x.summary()

0,1,2,3,4
,AIC,BIC,FPE,HQIC
0.0,41.57,41.58,1.130e+18,41.57
1.0,-77.71,-77.42,1.791e-34,-77.61
2.0,-91.67,-91.12,1.536e-40,-91.49
3.0,-92.23,-91.41,8.771e-41,-91.97
4.0,-92.43,-91.34,7.211e-41,-92.07
5.0,-92.59,-91.23,6.154e-41,-92.14
6.0,-92.72,-91.09,5.371e-41,-92.19
7.0,-92.91,-91.00,4.471e-41,-92.29
8.0,-94.20,-92.03*,1.224e-41,-93.49*


In [8]:
model_fitted = model.fit(4)
model_fitted.summary()

  Summary of Regression Results   
Model:                         VAR
Method:                        OLS
Date:           Wed, 05, Jan, 2022
Time:                     23:19:22
--------------------------------------------------------------------
No. of Equations:         26.0000    BIC:                   -91.3286
Nobs:                     19730.0    HQIC:                  -92.0629
Log likelihood:           186569.    FPE:                7.28408e-41
AIC:                     -92.4203    Det(Omega_mle):     6.34514e-41
--------------------------------------------------------------------
Results for equation Appliances(t-1)
                         coefficient       std. error           t-stat            prob
--------------------------------------------------------------------------------------
const                     118.184769        67.088430            1.762           0.078
L1.Appliances(t-1)          0.781559         0.007165          109.083           0.000
L1.lights(t-1)            

In [9]:
def adjust(val, length= 6): return str(val).ljust(length)

from statsmodels.stats.stattools import durbin_watson
out = durbin_watson(model_fitted.resid)

for col, val in zip(df.columns, out):
    print(adjust(col), ':', round(val, 2))

date   : 2.01
Appliances : 2.0
lights : 2.05
T1     : 2.0
RH_1   : 2.03
T2     : 2.0
RH_2   : 2.0
T3     : 2.0
RH_3   : 2.01
T4     : 2.0
RH_4   : 2.0
T5     : 2.0
RH_5   : 2.01
T6     : 2.0
RH_6   : 2.04
T7     : 2.0
RH_7   : 2.03
T8     : 2.0
RH_8   : 2.02
T9     : 2.0
RH_9   : 2.02
T_out  : 2.0
Press_mm_hg : 2.02
RH_out : 2.04
Windspeed : 2.02
Visibility : 2.02


In [10]:
# Get the lag order
lag_order = model_fitted.k_ar
print(lag_order)  #> 4

# Input data for forecasting
forecast_input = data_test.values[-lag_order:]
forecast_input

4


array([[ 90.        ,   0.        ,  25.5       ,  46.5       ,
         25.754     ,  42.08      ,  27.13333333,  41.22333333,
         24.7       ,  45.59      ,  23.23      ,  52.32666667,
         24.19666667,   1.        ,  24.55714286,  44.41428571,
         24.7       ,  49.79      ,  23.2       ,  46.79      ,
         22.6       , 755.2       ,  56.        ,   3.5       ,
         24.5       ,  13.3       ],
       [270.        ,  10.        ,  25.5       ,  46.59666667,
         25.62857143,  42.76857143,  27.05      ,  41.69      ,
         24.7       ,  45.73      ,  23.23      ,  52.26666667,
         23.62666667,   1.        ,  24.54      ,  44.4       ,
         24.7       ,  49.66      ,  23.2       ,  46.79      ,
         22.46666667, 755.2       ,  56.33333333,   3.66666667,
         25.33333333,  13.26666667],
       [420.        ,  10.        ,  25.5       ,  46.99      ,
         25.414     ,  43.036     ,  26.89      ,  41.29      ,
         24.7       ,  45.79  

In [15]:
nobs = 1
# Forecast
fc = model_fitted.forecast(y=forecast_input, steps=nobs)
df_forecast = pd.DataFrame(fc, index=data_test.index[-nobs:], columns=data_test.columns)
df_forecast

Unnamed: 0,Appliances(t),lights(t),T1(t),RH_1(t),T2(t),RH_2(t),T3(t),RH_3(t),T4(t),RH_4(t),...,T8(t),RH_8(t),T9(t),RH_9(t),T_out(t),Press_mm_hg(t),RH_out(t),Windspeed(t),Visibility(t),Tdewpoint(t)
19734,323.334761,9.522226,25.491596,46.698298,25.097597,43.065459,26.802397,41.270409,24.757447,46.032604,...,24.774365,49.813361,23.183846,46.800595,21.997074,755.206224,57.555298,4.117658,27.821565,13.14028


In [36]:
def invert_transformation(df_train, df_forecast, second_diff=False):
    """Revert back the differencing to get the forecast to original scale."""
    df_fc = df_forecast.copy()
    columns = df_train.columns
    for col in columns:        
        # Roll back 2nd Diff
        if second_diff:
            df_fc[str(col)+'_1d'] = (df_train[col].iloc[-1]-df_train[col].iloc[-2]) + df_fc[str(col)+'_2d'].cumsum()
        # Roll back 1st Diff
        df_fc[str(col)+'_forecast'] = df_train[col].iloc[-1] + df_fc[str(col)+'_1d'].cumsum()
    return df_fc
df_results = invert_transformation(df_train, df_forecast, second_diff=True)        
df_results.loc[:, ['rgnp_forecast', 'pgnp_forecast', 'ulc_forecast', 'gdfco_forecast',
                   'gdf_forecast', 'gdfim_forecast', 'gdfcf_forecast', 'gdfce_forecast']]

KeyError: "None of [Index(['rgnp_forecast', 'pgnp_forecast', 'ulc_forecast', 'gdfco_forecast',\n       'gdf_forecast', 'gdfim_forecast', 'gdfcf_forecast', 'gdfce_forecast'],\n      dtype='object')] are in the [columns]"