## Importing the relevant packages

In [2]:
!pip install arch

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting arch
  Downloading arch-5.3.1-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (903 kB)
[K     |████████████████████████████████| 903 kB 9.3 MB/s 
Collecting property-cached>=1.6.4
  Downloading property_cached-1.6.4-py2.py3-none-any.whl (7.8 kB)
Installing collected packages: property-cached, arch
Successfully installed arch-5.3.1 property-cached-1.6.4


In [3]:
!pip install yfinance

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting yfinance
  Downloading yfinance-0.1.86-py2.py3-none-any.whl (29 kB)
Collecting requests>=2.26
  Downloading requests-2.28.1-py3-none-any.whl (62 kB)
[K     |████████████████████████████████| 62 kB 1.2 MB/s 
Installing collected packages: requests, yfinance
  Attempting uninstall: requests
    Found existing installation: requests 2.23.0
    Uninstalling requests-2.23.0:
      Successfully uninstalled requests-2.23.0
Successfully installed requests-2.28.1 yfinance-0.1.86


In [6]:
!pip install pmdarima

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting pmdarima
  Downloading pmdarima-2.0.1-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_28_x86_64.whl (1.8 MB)
[K     |████████████████████████████████| 1.8 MB 5.0 MB/s 
Collecting statsmodels>=0.13.2
  Downloading statsmodels-0.13.5-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (9.9 MB)
[K     |████████████████████████████████| 9.9 MB 7.8 MB/s 
Installing collected packages: statsmodels, pmdarima
  Attempting uninstall: statsmodels
    Found existing installation: statsmodels 0.12.2
    Uninstalling statsmodels-0.12.2:
      Successfully uninstalled statsmodels-0.12.2
Successfully installed pmdarima-2.0.1 statsmodels-0.13.5


In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import statsmodels.graphics.tsaplots as sgt
import statsmodels.tsa.stattools as sts
from statsmodels.tsa.arima_model import ARIMA
from scipy.stats.distributions import chi2
from arch import arch_model
import seaborn as sns
import yfinance
import warnings
warnings.filterwarnings("ignore")
sns.set()

## Loading the data

In [2]:
raw_data = yfinance.download(tickers = "^GSPC ^FTSE ^N225 ^GDAXI", start = "1994-01-07", end = "2018-01-29", interval = "1d", group_by= 'ticker', auto_adjust = True, treads=True)

[*********************100%***********************]  4 of 4 completed


In [3]:
df_comp = raw_data.copy()

In [4]:
df_comp["spx"] = df_comp["^GSPC"].Close[:]
df_comp["dax"] = df_comp["^GDAXI"].Close[:]
df_comp["ftse"] = df_comp["^FTSE"].Close[:]
df_comp["nikkei"] = df_comp["^N225"].Close[:]

In [5]:
df_comp = df_comp.iloc[1:]
del df_comp["^GSPC"]
del df_comp["^GDAXI"]
del df_comp["^FTSE"]
del df_comp["^N225"]
df_comp = df_comp.asfreq('b')
df_comp = df_comp.fillna(method='ffill')

## Creating Returns

In [6]:
df_comp["ret_spx"] = df_comp.spx.pct_change(1)*100
df_comp["ret_dax"] = df_comp.dax.pct_change(1)*100
df_comp["ret_ftse"] = df_comp.ftse.pct_change(1)*100
df_comp["ret_nikkei"] = df_comp.nikkei.pct_change(1)*100

## Splitting the Data

In [7]:
size = int(len(df_comp)*0.80)
df, df_test = df_comp.iloc[:size],df_comp.iloc[size:] 

## Fitting a Model

### Default Best Fit Model

In [8]:
from pmdarima.arima import auto_arima

In [9]:
model_auto = auto_arima(df.ret_ftse[1:])

In [10]:
model_auto

      with_intercept=False)

In [12]:
print(model_auto.summary())

                               SARIMAX Results                                
Dep. Variable:                      y   No. Observations:                 5019
Model:               SARIMAX(4, 0, 5)   Log Likelihood               -7882.776
Date:                Wed, 16 Nov 2022   AIC                          15785.552
Time:                        10:45:51   BIC                          15850.762
Sample:                    01-11-1994   HQIC                         15808.403
                         - 04-05-2013                                         
Covariance Type:                  opg                                         
                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
ar.L1          0.0121      0.082      0.148      0.882      -0.148       0.172
ar.L2         -0.6541      0.077     -8.456      0.000      -0.806      -0.503
ar.L3         -0.1627      0.071     -2.289      0.0

## Important Arguments

In [15]:
# exogenous -> outside factors (e.g other time series)",
# m -> seasonal cycle length",
# max_order -> maximum amount of variables to be used in the regression (p + q)",
# max_p -> maximum AR components",
# max_q -> maximum MA components",
# max_d -> maximum Integrations",
# maxiter -> maximum iterations we're giving the model to converge the coefficients (becomes harder as the order increases)",
# alpha -> level of significance, default is 5%, which we should be using most of the time",
# n_jobs -> how many models to fit at a time (-1 indicates "as many as possible")",
# trend -> "ct" usually", ct-> constant trend 
# information_criterion -> 'aic', 'aicc', 'bic', 'hqic', 'oob' ",
#        (Akaike Information Criterion, Corrected Akaike Information Criterion,",
#        Bayesian Information Criterion, Hannan-Quinn Information Criterion, or",
#        "out of bag"--for validation scoring--respectively)",
# out_of_sample_size -> validates the model selection (pass the entire dataset, and set 20% to be the out_of_sample_size)"

model_auto = auto_arima(df.ret_ftse[1:], exogenous=df[['ret_spx', 'ret_dax','ret_nikkei']][1:], m=5, 
                        max_order=None, max_p=7, max_q=7, max_d=2, max_P=4, max_Q=4, max_D=2,
                        maxiter=50, alpha=0.05, n_jobs=-1, trend="ct", information_criterion="oob",
                        out_of_sample_size=int(len(df_comp)*0.2))

In [18]:
print(model_auto.summary())

                               SARIMAX Results                                
Dep. Variable:                      y   No. Observations:                 5019
Model:                        SARIMAX   Log Likelihood               -7985.012
Date:                Wed, 16 Nov 2022   AIC                          15976.023
Time:                        11:17:46   BIC                          15995.586
Sample:                             0   HQIC                         15982.879
                               - 5019                                         
Covariance Type:                  opg                                         
                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
intercept      0.0320      0.033      0.976      0.329      -0.032       0.096
drift      -6.644e-06   1.05e-05     -0.630      0.528   -2.73e-05     1.4e-05
sigma2         1.1278      0.009    124.271      0.0