### Packages

In [1]:
import numpy as np
import pandas as pd
import scipy
import statsmodels.api as sm
import matplotlib.pyplot as plt
import seaborn as sns
import sklearn
from statsmodels.tsa.arima.model import ARIMA
from arch import arch_model
import seaborn as sns
import yfinance
import warnings
warnings.filterwarnings("ignore")
sns.set()

### Loading the data

In [6]:
raw_data = yfinance.download (tickers = "^GSPC ^FTSE ^N225 ^GDAXI", start = "1994-01-07", end = "2023-01-29", 
                              interval = "1d", group_by = 'ticker', auto_adjust = True)

[*********************100%***********************]  4 of 4 completed


In [8]:
df_comp = raw_data.copy()

In [11]:
df_comp.info()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 7561 entries, 1994-01-07 to 2023-01-27
Data columns (total 20 columns):
 #   Column            Non-Null Count  Dtype  
---  ------            --------------  -----  
 0   (^GDAXI, Open)    7362 non-null   float64
 1   (^GDAXI, High)    7362 non-null   float64
 2   (^GDAXI, Low)     7362 non-null   float64
 3   (^GDAXI, Close)   7362 non-null   float64
 4   (^GDAXI, Volume)  7362 non-null   float64
 5   (^FTSE, Open)     7340 non-null   float64
 6   (^FTSE, High)     7340 non-null   float64
 7   (^FTSE, Low)      7340 non-null   float64
 8   (^FTSE, Close)    7340 non-null   float64
 9   (^FTSE, Volume)   7340 non-null   float64
 10  (^GSPC, Open)     7316 non-null   float64
 11  (^GSPC, High)     7316 non-null   float64
 12  (^GSPC, Low)      7316 non-null   float64
 13  (^GSPC, Close)    7316 non-null   float64
 14  (^GSPC, Volume)   7316 non-null   float64
 15  (^N225, Open)     7130 non-null   float64
 16  (^N225, High)     7130 n

In [12]:
df_comp['spx'] = df_comp['^GSPC'].Close[:]
df_comp['dax'] = df_comp['^GDAXI'].Close[:]
df_comp['ftse'] = df_comp['^FTSE'].Close[:]
df_comp['nikkei'] = df_comp['^N225'].Close[:]

In [13]:
df_comp.head()

Unnamed: 0_level_0,^GDAXI,^GDAXI,^GDAXI,^GDAXI,^GDAXI,^FTSE,^FTSE,^FTSE,^FTSE,^FTSE,...,^GSPC,^N225,^N225,^N225,^N225,^N225,spx,dax,ftse,nikkei
Unnamed: 0_level_1,Open,High,Low,Close,Volume,Open,High,Low,Close,Volume,...,Volume,Open,High,Low,Close,Volume,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Date,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2
1994-01-07,2218.959961,2227.639893,2201.820068,2224.949951,0.0,3401.399902,3446.800049,3398.699951,3446.0,0.0,...,324920000.0,17842.980469,18131.410156,17787.480469,18124.009766,0.0,469.899994,2224.949951,3446.0,18124.009766
1994-01-10,2231.840088,2238.01001,2222.0,2225.0,0.0,3465.699951,3468.100098,3430.0,3440.600098,0.0,...,319490000.0,18186.519531,18567.060547,18186.519531,18443.439453,0.0,475.269989,2225.0,3440.600098,18443.439453
1994-01-11,2225.429932,2235.610107,2225.179932,2228.100098,0.0,3442.5,3442.5,3413.5,3413.800049,0.0,...,305490000.0,18481.849609,18671.669922,18373.039062,18485.25,0.0,474.130005,2228.100098,3413.800049,18485.25
1994-01-12,2227.120117,2227.790039,2182.060059,2182.060059,0.0,3394.800049,3402.399902,3372.0,3372.0,0.0,...,310690000.0,18447.339844,18807.080078,18301.929688,18793.880859,0.0,474.170013,2182.060059,3372.0,18793.880859
1994-01-13,2171.5,2183.709961,2134.100098,2142.370117,0.0,3380.699951,3383.300049,3356.899902,3360.0,0.0,...,277970000.0,18770.380859,18823.380859,18548.75,18577.259766,0.0,472.470001,2142.370117,3360.0,18577.259766


In [14]:
df_comp = df_comp.iloc[1:]
del df_comp['^N225']
del df_comp['^GSPC']
del df_comp['^GDAXI']
del df_comp['^FTSE']
df_comp=df_comp.asfreq('b')
df_comp=df_comp.fillna(method='ffill')

### Creating Returns

In [15]:
df_comp['ret_spx'] = df_comp.spx.pct_change(1)*100
df_comp['ret_ftse'] = df_comp.ftse.pct_change(1)*100
df_comp['ret_dax'] = df_comp.dax.pct_change(1)*100
df_comp['ret_nikkei'] = df_comp.nikkei.pct_change(1)*100

In [16]:
df_comp.head()

Unnamed: 0_level_0,spx,dax,ftse,nikkei,ret_spx,ret_ftse,ret_dax,ret_nikkei
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
1994-01-10,475.269989,2225.0,3440.600098,18443.439453,,,,
1994-01-11,474.130005,2228.100098,3413.800049,18485.25,-0.23986,-0.778935,0.13933,0.226696
1994-01-12,474.170013,2182.060059,3372.0,18793.880859,0.008438,-1.224443,-2.066336,1.669606
1994-01-13,472.470001,2142.370117,3360.0,18577.259766,-0.358524,-0.355872,-1.818921,-1.152615
1994-01-14,474.910004,2151.050049,3400.600098,18973.699219,0.516435,1.208336,0.405156,2.134004


### Splitting the Data

In [17]:
size = int(len(df_comp)*0.8)
df, df_test = df_comp.iloc[:size], df_comp.iloc[size:]

### Fitting a Model

In [18]:
from pmdarima.arima import auto_arima

In [19]:
model_auto = auto_arima(df.ret_ftse[1:])

In [20]:
model_auto



In [21]:
model_auto.summary()

0,1,2,3
Dep. Variable:,y,No. Observations:,6063.0
Model:,"SARIMAX(4, 0, 5)",Log Likelihood,-9309.789
Date:,"Wed, 05 Apr 2023",AIC,18641.578
Time:,16:38:04,BIC,18715.388
Sample:,01-11-1994,HQIC,18667.197
,- 04-06-2017,,
Covariance Type:,opg,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
intercept,0.0283,0.019,1.475,0.140,-0.009,0.066
ar.L1,0.0556,0.079,0.701,0.483,-0.100,0.211
ar.L2,-0.5897,0.079,-7.447,0.000,-0.745,-0.435
ar.L3,-0.1438,0.072,-1.991,0.046,-0.285,-0.002
ar.L4,0.2797,0.076,3.686,0.000,0.131,0.428
ma.L1,-0.0760,0.079,-0.960,0.337,-0.231,0.079
ma.L2,0.5430,0.080,6.754,0.000,0.385,0.701
ma.L3,0.0624,0.071,0.883,0.377,-0.076,0.201
ma.L4,-0.2800,0.075,-3.715,0.000,-0.428,-0.132

0,1,2,3
Ljung-Box (L1) (Q):,0.0,Jarque-Bera (JB):,7902.02
Prob(Q):,0.99,Prob(JB):,0.0
Heteroskedasticity (H):,0.95,Skew:,-0.19
Prob(H) (two-sided):,0.25,Kurtosis:,8.58


### Important Arguments

In [None]:
model_auto = auto_arima(df_comp.ret_ftse[1:], exogenous = df_comp[['ret_spx', 'ret_dax', 'ret_nikkei']][1:], m = 5,
                       max_order = None, max_p = 7, max_q = 7, max_d = 2, max_P = 4, max_Q = 4, max_D = 2,
                       maxiter = 50, alpha = 0.05, n_jobs = -1, trend = 'ct', information_criterion = 'oob',
                       out_of_sample = int(len(df_comp)*0.2))


# !!! Important Note: In pdmarima v1.5.2, out_of_sample_size is replaced with out_of_sample, so make sure to use the latter!


# exogenous -> outside factors (e.g other time series)
# m -> seasonal cycle length
# max_order -> maximum amount of variables to be used in the regression (p + q)
# max_p -> maximum AR components
# max_q -> maximum MA components
# max_d -> maximum Integrations
# maxiter -> maximum iterations we're giving the model to converge the coefficients (becomes harder as the order increases)
# alpha -> level of significance, default is 5%, which we should be using most of the time
# n_jobs -> how many models to fit at a time (-1 indicates "as many as possible")
# trend -> "ct" usually
# information_criterion -> 'aic', 'aicc', 'bic', 'hqic', 'oob' 
#        (Akaike Information Criterion, Corrected Akaike Information Criterion,
#        Bayesian Information Criterion, Hannan-Quinn Information Criterion, or
#        "out of bag"--for validation scoring--respectively)
# out_of_smaple -> validates the model selection (pass the entire dataset, and set 20% to be the out_of_sample_size)

In [None]:
model_auto.summary()