### Importing Relevant Packages

In [2]:
#Import relevant libraries
import numpy as np
import pandas as pd
import scipy
import statsmodels.api as sm
import matplotlib.pyplot as plt
import seaborn as sns
import sklearn
from statsmodels.tsa.arima_model import ARIMA
import statsmodels.graphics.tsaplots as sgt
import statsmodels.tsa.stattools as sts
from statsmodels.tsa.statespace.sarimax import SARIMAX
from pmdarima.arima import auto_arima
from arch import arch_model
import seaborn as sns
import yfinance
import warnings
from pmdarima.arima import OCSBTest
warnings.filterwarnings("ignore")
sns.set()

### Loading Data and Preprocessing

In [3]:
raw_data = yfinance.download(tickers = "VOW3.DE, PAH3.DE, BMW.DE", start = '1994-01-07', end = '2019-09-01', 
                              interval = '1d', group_by = 'ticker', auto_adjust = True, treads = True)

[*********************100%***********************]  3 of 3 completed


In [4]:
df_comp = raw_data.copy()

In [5]:
df_comp

Unnamed: 0_level_0,BMW.DE,BMW.DE,BMW.DE,BMW.DE,BMW.DE,PAH3.DE,PAH3.DE,PAH3.DE,PAH3.DE,PAH3.DE,VOW3.DE,VOW3.DE,VOW3.DE,VOW3.DE,VOW3.DE
Unnamed: 0_level_1,Open,High,Low,Close,Volume,Open,High,Low,Close,Volume,Open,High,Low,Close,Volume
Date,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2
1996-11-08,10.539517,10.539517,10.516945,10.516945,767000.0,,,,,,,,,,
1996-11-11,10.527945,10.533733,10.465437,10.488587,260000.0,,,,,,,,,,
1996-11-12,10.511159,10.607814,10.471223,10.567880,1066000.0,,,,,,,,,,
1996-11-13,10.617074,10.617074,10.527943,10.551093,793000.0,,,,,,,,,,
1996-11-14,10.536624,10.585241,10.494374,10.585241,351000.0,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2019-08-26,55.687123,56.671896,55.403051,56.406765,1613088.0,49.444336,50.067959,49.177070,49.479973,410859.0,135.935498,137.613957,135.163792,137.150925,646712.0
2019-08-27,56.510926,57.183223,56.122698,56.331017,1777471.0,49.782872,50.335223,49.266155,49.889778,489691.0,137.285985,139.871183,136.552858,138.424240,873148.0
2019-08-28,56.331015,56.567740,55.630308,56.548801,1292671.0,49.907597,50.335223,49.373061,49.907597,327032.0,138.424236,138.867971,137.093043,138.462814,396548.0
2019-08-29,56.416236,57.135878,56.198446,56.974903,986990.0,49.515607,50.762849,49.319610,50.228317,313636.0,138.385649,141.028723,137.729694,140.141266,821156.0


In [None]:
df_comp['spx'] = df_comp['^GSPC'].Close[:]
df_comp['dax'] = df_comp['^GDAXI'].Close[:]
df_comp['ftse'] = df_comp['^FTSE'].Close[:]
df_comp['nikkei'] = df_comp['^N225'].Close[:]

In [None]:
df_comp = df_comp.iloc[1:]
del df_comp['^N225']
del df_comp['^GSPC']
del df_comp['^GDAXI']
del df_comp['^FTSE']
df_comp=df_comp.asfreq('b')
df_comp=df_comp.fillna(method='ffill')

In [None]:
#Creating returns
df_comp['ret_spx'] = df_comp.spx.pct_change(1)*100
df_comp['ret_ftse'] = df_comp.ftse.pct_change(1)*100
df_comp['ret_dax'] = df_comp.dax.pct_change(1)*100
df_comp['ret_nikkei'] = df_comp.nikkei.pct_change(1)*100

In [None]:
#Splitting Data
size = int(len(df_comp)*0.8)
df, df_test = df_comp.iloc[:size], df_comp.iloc[size:]

### Fitting

In [None]:
model_auto = auto_arima(df_comp.ret_ftse[1:])

In [None]:
model_auto

In [None]:
model_auto.summary()

In [None]:
#Non Seasonal
model_auto = auto_arima(df_comp.ret_spx[1:], exogenous=df_comp[['ret_ftse','ret_dax','ret_nikkei']][1:], m = 5, max_order = None, 
                        max_p = 7, max_d = 2, max_P = 4, max_Q = 4, max_D = 2, maxiter = 50, alpha = 0.05, n_jobs = -1,
                       trend = "ct", information_criterion = "oob", out_of_sample_size = int(len(df_comp) * 0.2))
# exogenous -> outside factors (e.g other time series)
# m -> seasonal cycle length
# max_order -> maximum amount of variables to be used in the regression (p + q)
# max_p -> maximum AR components
# max_q -> maximum MA components
# max_d -> maximum Integrations
# maxiter -> maximum iterations we're giving the model to converge the coefficients (becomes harder as the order increases)
# return_valid_fits -> whether or not the method should validate the results 
# alpha -> level of significance, default is 5%, which we should be using most of the time
# n_jobs -> how many models to fit at a time (-1 indicates "as many as possible")
# trend -> "ct" usually
# information_criterion -> 'aic', 'aicc', 'bic', 'hqic', 'oob' 
#        (Akaike Information Criterion, Corrected Akaike Information Criterion,
#        Bayesian Information Criterion, Hannan-Quinn Information Criterion, or
#        "out of bag"--for validation scoring--respectively)
# out_of_smaple_size -> validates the model selection (pass the entire dataset, and set 20% to be the out_of_sample_size)

In [None]:
#Season
model_auto_seasonal = auto_arima(df_comp.ret_ftse[1:], exogenous=df_comp[['ret_spx','ret_dax','ret_nikkei']][1:], m = 5,
                                 seasonal=True, max_order = None, max_p = 7, max_d = 2, max_P = 4, max_Q = 4, max_D = 2, 
                                 maxiter = 50, alpha = 0.05, n_jobs = -1,trend = "ct", information_criterion = "oob", 
                                 out_of_sample_size = int(len(df_comp) * 0.2))