<a href="https://colab.research.google.com/github/tanaymukherjee/Time-Series-Modeling/blob/master/10_The_Auto_ARIMA_model.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Time Series Analysis

### Definition:
A sequence of information which attaches a time period to each value.

### Common Objective:
1. Determining the stability of financial markets and the efficiency portfolios.
2. Weather forcasting based on past records.

#### Import Libraries

In [2]:
!pip install yfinance
import numpy as np
import pandas as pd
import scipy
import statsmodels.api as sm
import matplotlib.pyplot as plt
import seaborn as sns
import sklearn
from statsmodels.tsa.arima_model import ARIMA
from arch import arch_model
import seaborn as sns
import yfinance
import warnings
warnings.filterwarnings("ignore")
sns.set()



#### Importing the Data and Pre-processing

In [3]:
raw_data = yfinance.download (tickers = "^GSPC ^FTSE ^N225 ^GDAXI", start = "1994-01-07", end = "2018-01-29", 
                              interval = "1d", group_by = 'ticker', auto_adjust = True, treads = True)

[*********************100%***********************]  4 of 4 completed


In [4]:
df_comp = raw_data.copy()

In [5]:
df_comp['spx'] = df_comp['^GSPC'].Close[:]
df_comp['dax'] = df_comp['^GDAXI'].Close[:]
df_comp['ftse'] = df_comp['^FTSE'].Close[:]
df_comp['nikkei'] = df_comp['^N225'].Close[:]

In [6]:
df_comp = df_comp.iloc[1:]
del df_comp['^N225']
del df_comp['^GSPC']
del df_comp['^GDAXI']
del df_comp['^FTSE']
df_comp=df_comp.asfreq('b')
df_comp=df_comp.fillna(method='ffill')

In [7]:
import warnings
warnings.filterwarnings("ignore")

### Creating Returns

In [11]:
df_comp['ret_spx'] = df_comp.spx.pct_change(1)*100
df_comp['ret_ftse'] = df_comp.ftse.pct_change(1)*100
df_comp['ret_dax'] = df_comp.dax.pct_change(1)*100
df_comp['ret_nikkei'] = df_comp.nikkei.pct_change(1)*100

### Splitting the Data

In [13]:
size = int(len(df_comp)*0.8)
df, df_test = df_comp.iloc[:size], df_comp.iloc[size:]

### Fitting a Model

In [9]:
!pip install pmdarima
from pmdarima.arima import auto_arima



In [14]:
model_auto = auto_arima(df.ret_ftse[1:])

In [15]:
model_auto

ARIMA(maxiter=50, method='lbfgs', order=(4, 0, 5), out_of_sample_size=0,
      scoring='mse', scoring_args=None, seasonal_order=(0, 0, 0, 0),
      with_intercept=True)

In [16]:
model_auto.summary()

0,1,2,3
Dep. Variable:,y,No. Observations:,5019.0
Model:,"SARIMAX(4, 0, 5)",Log Likelihood,-7882.658
Date:,"Thu, 18 Jun 2020",AIC,15787.316
Time:,03:17:27,BIC,15859.047
Sample:,0,HQIC,15812.452
,- 5019,,
Covariance Type:,opg,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
intercept,0.0309,0.025,1.246,0.213,-0.018,0.080
ar.L1,0.0135,0.082,0.165,0.869,-0.147,0.174
ar.L2,-0.6690,0.077,-8.645,0.000,-0.821,-0.517
ar.L3,-0.1616,0.072,-2.260,0.024,-0.302,-0.021
ar.L4,0.1898,0.074,2.553,0.011,0.044,0.335
ma.L1,-0.0384,0.081,-0.471,0.637,-0.198,0.121
ma.L2,0.6205,0.078,7.933,0.000,0.467,0.774
ma.L3,0.0592,0.069,0.858,0.391,-0.076,0.194
ma.L4,-0.1836,0.073,-2.510,0.012,-0.327,-0.040

0,1,2,3
Ljung-Box (Q):,67.77,Jarque-Bera (JB):,6360.08
Prob(Q):,0.0,Prob(JB):,0.0
Heteroskedasticity (H):,2.0,Skew:,-0.19
Prob(H) (two-sided):,0.0,Kurtosis:,8.5


### Important Arguments

In [17]:
model_auto = auto_arima(df_comp.ret_ftse[1:], exogenous = df_comp[['ret_spx', 'ret_dax', 'ret_nikkei']][1:], m = 5,
                       max_order = None, max_p = 7, max_q = 7, max_d = 2, max_P = 4, max_Q = 4, max_D = 2,
                       maxiter = 50, alpha = 0.05, n_jobs = -1, trend = 'ct', information_criterion = 'oob',
                       out_of_sample = int(len(df_comp)*0.2))


# !!! Important Note: In pdmarima v1.5.2, out_of_sample_size is replaced with out_of_sample, so make sure to use the latter!


# exogenous -> outside factors (e.g other time series)
# m -> seasonal cycle length
# max_order -> maximum amount of variables to be used in the regression (p + q)
# max_p -> maximum AR components
# max_q -> maximum MA components
# max_d -> maximum Integrations
# maxiter -> maximum iterations we're giving the model to converge the coefficients (becomes harder as the order increases)
# alpha -> level of significance, default is 5%, which we should be using most of the time
# n_jobs -> how many models to fit at a time (-1 indicates "as many as possible")
# trend -> "ct" usually
# information_criterion -> 'aic', 'aicc', 'bic', 'hqic', 'oob' 
#        (Akaike Information Criterion, Corrected Akaike Information Criterion,
#        Bayesian Information Criterion, Hannan-Quinn Information Criterion, or
#        "out of bag"--for validation scoring--respectively)
# out_of_smaple -> validates the model selection (pass the entire dataset, and set 20% to be the out_of_sample_size)

In [18]:
model_auto.summary()

0,1,2,3
Dep. Variable:,y,No. Observations:,6275.0
Model:,"SARIMAX(0, 0, 1)x(0, 0, [1, 2, 3, 4], 5)",Log Likelihood,-6333.37
Date:,"Thu, 18 Jun 2020",AIC,12688.74
Time:,03:25:02,BIC,12762.928
Sample:,01-11-1994,HQIC,12714.445
,- 01-29-2018,,
Covariance Type:,opg,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
intercept,-0.0027,0.014,-0.197,0.844,-0.029,0.024
drift,-1.636e-06,4.19e-06,-0.391,0.696,-9.84e-06,6.57e-06
ret_spx,0.0959,0.006,17.063,0.000,0.085,0.107
ret_dax,0.5581,0.005,114.065,0.000,0.549,0.568
ret_nikkei,0.0703,0.004,16.714,0.000,0.062,0.079
ma.L1,-0.1061,0.007,-14.611,0.000,-0.120,-0.092
ma.S.L5,-0.0283,0.008,-3.357,0.001,-0.045,-0.012
ma.S.L10,-0.0497,0.009,-5.440,0.000,-0.068,-0.032
ma.S.L15,-0.0237,0.009,-2.715,0.007,-0.041,-0.007

0,1,2,3
Ljung-Box (Q):,75.64,Jarque-Bera (JB):,14890.4
Prob(Q):,0.0,Prob(JB):,0.0
Heteroskedasticity (H):,0.54,Skew:,0.24
Prob(H) (two-sided):,0.0,Kurtosis:,10.53
