## Importing the relevant packages

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import statsmodels.graphics.tsaplots as sgt
import statsmodels.tsa.stattools as sts
from statsmodels.tsa.arima_model import ARIMA
from scipy.stats.distributions import chi2
!pip install arch
from arch import arch_model
from math import sqrt
import seaborn as sns
sns.set()

Collecting arch
  Downloading arch-6.1.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (916 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m916.4/916.4 kB[0m [31m6.4 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: arch
Successfully installed arch-6.1.0


## Importing the Data and Pre-processing

In [2]:
raw_csv_data = pd.read_csv("INDEX2023.csv")
df_comp=raw_csv_data.copy()
df_comp.date = pd.to_datetime(df_comp.date, dayfirst = True)
df_comp.set_index("date", inplace=True)
df_comp=df_comp.asfreq('b')
df_comp=df_comp.fillna(method='ffill')

In [3]:
df_comp['market_value']=df_comp.ftse

In [4]:
del df_comp['spx']
del df_comp['dax']
del df_comp['ftse']
del df_comp['nikkei']
size = int(len(df_comp)*0.8)
df, df_test = df_comp.iloc[:size], df_comp.iloc[size:]

In [5]:
import warnings
warnings.filterwarnings("ignore")

## The LLR Test

In [6]:
def LLR_test(mod_1, mod_2, DF = 1):
    L1 = mod_1.llf
    L2 = mod_2.llf
    LR = (2*(L2-L1))
    p = chi2.sf(LR, DF).round(3)
    return p

## Creating Returns

In [8]:
# suponiendo de que no existe autocorrelacion
df['returns'] = df.market_value.pct_change(1)*100

In [9]:
df

Unnamed: 0_level_0,market_value,returns
date,Unnamed: 1_level_1,Unnamed: 2_level_1
1994-01-10,3440.600098,
1994-01-11,3413.800049,-0.778935
1994-01-12,3372.000000,-1.224443
1994-01-13,3360.000000,-0.355872
1994-01-14,3400.600098,1.208336
...,...,...
2017-08-24,7407.100098,0.330501
2017-08-25,7401.500000,-0.075604
2017-08-28,7401.500000,0.000000
2017-08-29,7337.399902,-0.866042


## The Simple GARCH Model

In [11]:
# incluimos residuos pasados y varianzas pasadas p y q GARCH(1,1)
model_garch_1_1 = arch_model(df.returns[1:], mean = "Constant", vol = "GARCH", p = 1, q = 1)
results_garch_1_1 = model_garch_1_1.fit(update_freq = 5)
results_garch_1_1.summary()

Iteration:      5,   Func. Count:     35,   Neg. LLF: 8341.36952024206
Iteration:     10,   Func. Count:     65,   Neg. LLF: 8333.064383147337
Optimization terminated successfully    (Exit mode 0)
            Current function value: 8333.064383147337
            Iterations: 11
            Function evaluations: 69
            Gradient evaluations: 11


0,1,2,3
Dep. Variable:,returns,R-squared:,0.0
Mean Model:,Constant Mean,Adj. R-squared:,0.0
Vol Model:,GARCH,Log-Likelihood:,-8333.06
Distribution:,Normal,AIC:,16674.1
Method:,Maximum Likelihood,BIC:,16701.0
,,No. Observations:,6167.0
Date:,"Sat, Sep 02 2023",Df Residuals:,6166.0
Time:,16:29:34,Df Model:,1.0

0,1,2,3,4,5
,coef,std err,t,P>|t|,95.0% Conf. Int.
mu,0.0427,1.053e-02,4.052,5.068e-05,"[2.203e-02,6.329e-02]"

0,1,2,3,4,5
,coef,std err,t,P>|t|,95.0% Conf. Int.
omega,0.0135,3.482e-03,3.874,1.069e-04,"[6.665e-03,2.031e-02]"
alpha[1],0.0891,1.150e-02,7.751,9.086e-15,"[6.660e-02, 0.112]"
beta[1],0.8998,1.300e-02,69.226,0.000,"[ 0.874, 0.925]"


Todos son significativos, la verosimilitud aumenta y el AIC baja, por lo tanto, diriamos que, incluir valores pasados de la varianza proporciona una una presicion mucho mayor, es decir, mejora la capacidad prodictiva del modelo. este modelo se convierte en el favorito para medir la volatilidad.

AUMENTANDO LOS ORDENES EN EL MODELO..

## Higher-Lag GARCH Models

In [12]:
# modelo GARCH(1,2)
model_garch_1_2 = arch_model(df.returns[1:], mean = "Constant",  vol = "GARCH", p = 1, q = 2)
results_garch_1_2 = model_garch_1_2.fit(update_freq = 5)
results_garch_1_2.summary()

Iteration:      5,   Func. Count:     40,   Neg. LLF: 8345.055585036644
Iteration:     10,   Func. Count:     73,   Neg. LLF: 8333.066952165578
Optimization terminated successfully    (Exit mode 0)
            Current function value: 8333.064383425113
            Iterations: 12
            Function evaluations: 85
            Gradient evaluations: 12


0,1,2,3
Dep. Variable:,returns,R-squared:,0.0
Mean Model:,Constant Mean,Adj. R-squared:,0.0
Vol Model:,GARCH,Log-Likelihood:,-8333.06
Distribution:,Normal,AIC:,16676.1
Method:,Maximum Likelihood,BIC:,16709.8
,,No. Observations:,6167.0
Date:,"Sat, Sep 02 2023",Df Residuals:,6166.0
Time:,16:29:43,Df Model:,1.0

0,1,2,3,4,5
,coef,std err,t,P>|t|,95.0% Conf. Int.
mu,0.0427,1.052e-02,4.056,5.002e-05,"[2.204e-02,6.328e-02]"

0,1,2,3,4,5
,coef,std err,t,P>|t|,95.0% Conf. Int.
omega,0.0135,3.342e-03,4.036,5.431e-05,"[6.939e-03,2.004e-02]"
alpha[1],0.0891,1.370e-02,6.506,7.708e-11,"[6.229e-02, 0.116]"
beta[1],0.8998,0.208,4.328,1.501e-05,"[ 0.492, 1.307]"
beta[2],0.0000,0.198,0.000,1.000,"[ -0.389, 0.389]"


Existe un coeficiente no significativo, esto significa que tenemos un caso de multicolinealidad y es debido a la relacion entre las variaciones condicionales.

In [13]:
# modelo GARCH(1,3)
model_garch_1_3 = arch_model(df.returns[1:], mean = "Constant",  vol = "GARCH", p = 1, q = 3)
results_garch_1_3 = model_garch_1_3.fit(update_freq = 5)
results_garch_1_3.summary()

Iteration:      5,   Func. Count:     47,   Neg. LLF: 8486.86014652485
Iteration:     10,   Func. Count:     88,   Neg. LLF: 8333.67159674283
Iteration:     15,   Func. Count:    123,   Neg. LLF: 8333.064386802807
Optimization terminated successfully    (Exit mode 0)
            Current function value: 8333.064383210181
            Iterations: 17
            Function evaluations: 136
            Gradient evaluations: 17


0,1,2,3
Dep. Variable:,returns,R-squared:,0.0
Mean Model:,Constant Mean,Adj. R-squared:,0.0
Vol Model:,GARCH,Log-Likelihood:,-8333.06
Distribution:,Normal,AIC:,16678.1
Method:,Maximum Likelihood,BIC:,16718.5
,,No. Observations:,6167.0
Date:,"Sat, Sep 02 2023",Df Residuals:,6166.0
Time:,16:30:19,Df Model:,1.0

0,1,2,3,4,5
,coef,std err,t,P>|t|,95.0% Conf. Int.
mu,0.0427,2.562e-02,1.665,9.591e-02,"[-7.556e-03,9.287e-02]"

0,1,2,3,4,5
,coef,std err,t,P>|t|,95.0% Conf. Int.
omega,0.0135,8.910e-02,0.151,0.880,"[ -0.161, 0.188]"
alpha[1],0.0891,0.606,0.147,0.883,"[ -1.099, 1.277]"
beta[1],0.8998,17.813,5.051e-02,0.960,"[-34.014, 35.813]"
beta[2],1.4442e-10,27.113,5.327e-12,1.000,"[-53.141, 53.141]"
beta[3],0.0000,9.976,0.000,1.000,"[-19.552, 19.552]"


empeora el modelo.

In [14]:
# que pasa añadiendo un error mas. GARCH(2,1)
model_garch_2_1 = arch_model(df.returns[1:], mean = "Constant",  vol = "GARCH", p = 2, q = 1)
results_garch_2_1 = model_garch_2_1.fit(update_freq = 5)
results_garch_2_1.summary()

Iteration:      5,   Func. Count:     41,   Neg. LLF: 8348.606031783644
Iteration:     10,   Func. Count:     76,   Neg. LLF: 8331.822462526336
Optimization terminated successfully    (Exit mode 0)
            Current function value: 8331.808278578366
            Iterations: 14
            Function evaluations: 99
            Gradient evaluations: 14


0,1,2,3
Dep. Variable:,returns,R-squared:,0.0
Mean Model:,Constant Mean,Adj. R-squared:,0.0
Vol Model:,GARCH,Log-Likelihood:,-8331.81
Distribution:,Normal,AIC:,16673.6
Method:,Maximum Likelihood,BIC:,16707.3
,,No. Observations:,6167.0
Date:,"Sat, Sep 02 2023",Df Residuals:,6166.0
Time:,16:30:43,Df Model:,1.0

0,1,2,3,4,5
,coef,std err,t,P>|t|,95.0% Conf. Int.
mu,0.0424,1.057e-02,4.016,5.928e-05,"[2.172e-02,6.314e-02]"

0,1,2,3,4,5
,coef,std err,t,P>|t|,95.0% Conf. Int.
omega,0.0155,4.796e-03,3.225,1.262e-03,"[6.065e-03,2.486e-02]"
alpha[1],0.0702,1.721e-02,4.079,4.517e-05,"[3.648e-02, 0.104]"
alpha[2],0.0274,2.461e-02,1.115,0.265,"[-2.080e-02,7.568e-02]"
beta[1],0.8897,1.946e-02,45.716,0.000,"[ 0.852, 0.928]"


vemos un valor no significativo.

Añadiendo otro error mas, paso lo mismo, veamos

In [15]:
# GARCH(3.1)
model_garch_3_1 = arch_model(df.returns[1:], mean = "Constant",  vol = "GARCH", p = 3, q = 1)
results_garch_3_1 = model_garch_3_1.fit(update_freq = 5)
results_garch_3_1.summary()

Iteration:      5,   Func. Count:     45,   Neg. LLF: 16351.727855238629
Iteration:     10,   Func. Count:     85,   Neg. LLF: 8331.819632529354
Optimization terminated successfully    (Exit mode 0)
            Current function value: 8331.808278866853
            Iterations: 13
            Function evaluations: 106
            Gradient evaluations: 13


0,1,2,3
Dep. Variable:,returns,R-squared:,0.0
Mean Model:,Constant Mean,Adj. R-squared:,0.0
Vol Model:,GARCH,Log-Likelihood:,-8331.81
Distribution:,Normal,AIC:,16675.6
Method:,Maximum Likelihood,BIC:,16716.0
,,No. Observations:,6167.0
Date:,"Sat, Sep 02 2023",Df Residuals:,6166.0
Time:,16:31:11,Df Model:,1.0

0,1,2,3,4,5
,coef,std err,t,P>|t|,95.0% Conf. Int.
mu,0.0424,1.061e-02,4.000,6.323e-05,"[2.164e-02,6.322e-02]"

0,1,2,3,4,5
,coef,std err,t,P>|t|,95.0% Conf. Int.
omega,0.0155,6.085e-03,2.541,1.105e-02,"[3.536e-03,2.739e-02]"
alpha[1],0.0702,1.721e-02,4.080,4.507e-05,"[3.648e-02, 0.104]"
alpha[2],0.0274,2.473e-02,1.110,0.267,"[-2.102e-02,7.591e-02]"
alpha[3],8.1967e-13,2.661e-02,3.080e-11,1.000,"[-5.216e-02,5.216e-02]"
beta[1],0.8897,2.726e-02,32.637,1.237e-233,"[ 0.836, 0.943]"


Los garch 1,1 siempre son los mejores modelos para medir la volatilidad de los retornos y no hay necesidad de probar modelos demasiado complicados.