# ARIMA Forecasting to predict $ET_0$

Libs required

In [1]:
%matplotlib inline

import pandas as pd
import numpy as np
import warnings
from sklearn import metrics as mt
from pyramid.arima import auto_arima

from matplotlib import pyplot as plt
import plotly
import plotly.plotly as py
import plotly.graph_objs as go

warnings.filterwarnings("ignore")

In [2]:
et0_treino_ = pd.read_csv('./data/label_train_.csv') 
et0_test = pd.read_csv('./data/label_test.csv')

In [3]:
et0_completed = pd.concat([et0_treino_, et0_test], axis=0)

## Hyper-parameters optimization

In [4]:
sarima_model = auto_arima(et0_completed, start_p=1, start_q=1, max_p=5, max_q=5, m=12,
                          start_P=0, seasonal=True, d=1, D=1, trace=True,
                          error_action='ignore',  # don't want to know if an order does not work
                          suppress_warnings=True,  # don't want convergence warnings
                          stepwise=True)  # set to stepwise

Fit ARIMA: order=(1, 1, 1) seasonal_order=(0, 1, 1, 12); AIC=4339.978, BIC=4365.172, Fit time=7.719 seconds
Fit ARIMA: order=(0, 1, 0) seasonal_order=(0, 1, 0, 12); AIC=5342.046, BIC=5352.124, Fit time=0.075 seconds
Fit ARIMA: order=(1, 1, 0) seasonal_order=(1, 1, 0, 12); AIC=4826.634, BIC=4846.789, Fit time=0.821 seconds
Fit ARIMA: order=(0, 1, 1) seasonal_order=(0, 1, 1, 12); AIC=4401.206, BIC=4421.361, Fit time=1.942 seconds
Fit ARIMA: order=(1, 1, 1) seasonal_order=(1, 1, 1, 12); AIC=4344.463, BIC=4374.696, Fit time=5.737 seconds
Fit ARIMA: order=(1, 1, 1) seasonal_order=(0, 1, 0, 12); AIC=5022.425, BIC=5042.580, Fit time=1.783 seconds
Fit ARIMA: order=(1, 1, 1) seasonal_order=(0, 1, 2, 12); AIC=4341.965, BIC=4372.198, Fit time=21.345 seconds
Fit ARIMA: order=(1, 1, 1) seasonal_order=(1, 1, 2, 12); AIC=4343.498, BIC=4378.770, Fit time=8.305 seconds
Fit ARIMA: order=(2, 1, 1) seasonal_order=(0, 1, 1, 12); AIC=nan, BIC=nan, Fit time=nan seconds
Fit ARIMA: order=(1, 1, 0) seasonal_ord

In [5]:
sarima_model.summary()

0,1,2,3
Dep. Variable:,y,No. Observations:,1153.0
Model:,"SARIMAX(2, 1, 2)x(0, 1, 1, 12)",Log Likelihood,-2161.402
Date:,"Sat, 01 Jun 2019",AIC,4336.805
Time:,16:27:45,BIC,4372.076
Sample:,0,HQIC,4350.125
,- 1153,,
Covariance Type:,opg,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
intercept,-0.0003,0.000,-0.611,0.541,-0.001,0.001
ar.L1,-0.3523,0.167,-2.106,0.035,-0.680,-0.024
ar.L2,0.3417,0.058,5.894,0.000,0.228,0.455
ma.L1,-0.1483,0.171,-0.869,0.385,-0.483,0.186
ma.L2,-0.6402,0.143,-4.491,0.000,-0.920,-0.361
ma.S.L12,-0.9767,0.014,-71.481,0.000,-1.004,-0.950
sigma2,2.5113,0.095,26.362,0.000,2.325,2.698

0,1,2,3
Ljung-Box (Q):,60.38,Jarque-Bera (JB):,18.34
Prob(Q):,0.02,Prob(JB):,0.0
Heteroskedasticity (H):,0.97,Skew:,-0.18
Prob(H) (two-sided):,0.75,Kurtosis:,3.5


## Time Series Forecasting

In [6]:
sarima_model.fit(et0_treino_)

ARIMA(callback=None, disp=0, maxiter=50, method=None, order=(2, 1, 2),
   out_of_sample_size=0, scoring='mse', scoring_args={},
   seasonal_order=(0, 1, 1, 12), solver='lbfgs', start_params=None,

In [7]:
et0_pred = sarima_model.predict(n_periods=len(et0_test))

print('RMSE = {}\nMAE  = {}\n'.format(
    np.sqrt(mt.mean_squared_error(et0_test, et0_pred)), 
    mt.mean_absolute_error(et0_test, et0_pred)))

RMSE = 4.270343321966753
MAE  = 3.7076500114890467

