# Modelado Estocástico
## Clase 9 - Modelos VAR (Vector Autoregressions)

In [1]:
# !pip install arch

In [2]:
import pandas as pd
import numpy as np
from statsmodels.tsa.api import VAR
from arch.unitroot import DFGLS

Pandas nos permite abrir archvios de tipo `.dta`. para eso podemos utilizar el método `read_stata`. En este caso vamos a utilizar el archivo llamado `Precios_y_Dinero`.

In [3]:
df = pd.read_stata("Precios_y_Dinero_data.dta")
print(df.columns.tolist())

['mmyy', 'ipc', 'm', 'm_en_ars']


Además, vamos a generar un índice asociado al período mensual correspondiente, para eso construímos la columna `period`.

In [4]:
inicio = pd.to_datetime(df['mmyy'].iloc[0], format='%b-%y').strftime('%Y-%m')
df["yearmm"] = pd.period_range(start=inicio, periods=len(df), freq="M")
df = df.set_index("yearmm")
df['ipc'] = df['ipc'].astype(float)
df['m'] = df['m'].astype(float)
df.head()

Unnamed: 0_level_0,mmyy,ipc,m,m_en_ars
yearmm,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2003-01,Jan-03,100.0,100.0,29609.92
2003-02,Feb-03,100.57,99.36,29420.37
2003-03,Mar-03,101.16,102.63,30388.08
2003-04,Apr-03,101.21,104.26,30870.99
2003-05,May-03,100.82,111.8,33105.17


Generamos las variables Inflación y Tasa de Crecimiento de la Base Monetaria

In [5]:
df["infl"] = df["ipc"] / df["ipc"].shift(1) - 1
df["crec_m"] = df["m"] / df["m"].shift(1) - 1
df = df[['infl','crec_m']]

## Chequeo de estacionariedad utilizando la prueba de Dickey-Fuller GLS

Debemos corrobar que sean estacionarias. Para eso tenemos diferentes alternativas, acorde con lo visto en clase vamos a utilizar una biblioteca (`arch`) que desarrolla la prueba de Dickey-Fuller GLS (Elliott-Rothenberg-Stock) en línea con el uso del comando `dfgls` de Stata.

In [6]:
test = DFGLS(df["infl"].dropna(),trend="c")
print("Inflación")
print(test.summary())

Inflación
      Dickey-Fuller GLS Results      
Test Statistic                 -3.295
P-value                         0.001
Lags                                1
-------------------------------------

Trend: Constant
Critical Values: -2.68 (1%), -2.06 (5%), -1.74 (10%)
Null Hypothesis: The process contains a unit root.
Alternative Hypothesis: The process is weakly stationary.


In [7]:
test = DFGLS(df["crec_m"].dropna(),trend="c")
print("Tasa de Crecimiento de la Base Monetaria")
print(test.summary())

Tasa de Crecimiento de la Base Monetaria
      Dickey-Fuller GLS Results      
Test Statistic                 -1.174
P-value                         0.227
Lags                               14
-------------------------------------

Trend: Constant
Critical Values: -2.68 (1%), -2.06 (5%), -1.75 (10%)
Null Hypothesis: The process contains a unit root.
Alternative Hypothesis: The process is weakly stationary.


## Corremos el VAR(2) bivariado

In [8]:
model = VAR(df[["infl", "crec_m"]].dropna())
result_var2 = model.fit(2)
print(result_var2.summary())

  Summary of Regression Results   
Model:                         VAR
Method:                        OLS
Date:           Wed, 20, Aug, 2025
Time:                     09:45:49
--------------------------------------------------------------------
No. of Equations:         2.00000    BIC:                   -16.6022
Nobs:                     182.000    HQIC:                  -16.7069
Log likelihood:           1020.33    FPE:                5.16789e-08
AIC:                     -16.7782    Det(Omega_mle):     4.89523e-08
--------------------------------------------------------------------
Results for equation infl
               coefficient       std. error           t-stat            prob
----------------------------------------------------------------------------
const             0.004224         0.001340            3.151           0.002
L1.infl           0.517731         0.075813            6.829           0.000
L1.crec_m         0.020715         0.020682            1.002           0.317


### Selection Order Criteria

In [9]:
print(model.select_order().summary())

 VAR Order Selection (* highlights the minimums)  
       AIC         BIC         FPE         HQIC   
--------------------------------------------------
0       -16.27      -16.24   8.561e-08      -16.26
1       -16.73     -16.62*   5.441e-08      -16.68
2       -16.79      -16.60   5.127e-08      -16.71
3       -16.83      -16.57   4.923e-08     -16.72*
4       -16.82      -16.49   4.935e-08      -16.69
5       -16.81      -16.40   5.025e-08      -16.64
6       -16.82      -16.34   4.960e-08      -16.63
7       -16.84      -16.29   4.863e-08      -16.62
8       -16.83      -16.20   4.918e-08      -16.57
9       -16.81      -16.11   5.025e-08      -16.52
10      -16.87      -16.10   4.714e-08      -16.56
11      -16.86      -16.01   4.764e-08      -16.52
12      -16.98      -16.06   4.232e-08      -16.61
13     -16.99*      -15.99  4.225e-08*      -16.58
14      -16.95      -15.88   4.377e-08      -16.52
--------------------------------------------------


### VAR(8) bivariado

In [10]:
result_var8 = model.fit(8)
print(result_var8.summary())

  Summary of Regression Results   
Model:                         VAR
Method:                        OLS
Date:           Wed, 20, Aug, 2025
Time:                     09:45:49
--------------------------------------------------------------------
No. of Equations:         2.00000    BIC:                   -16.2262
Nobs:                     176.000    HQIC:                  -16.5903
Log likelihood:           1016.34    FPE:                4.87039e-08
AIC:                     -16.8387    Det(Omega_mle):     4.05018e-08
--------------------------------------------------------------------
Results for equation infl
               coefficient       std. error           t-stat            prob
----------------------------------------------------------------------------
const             0.002599         0.001841            1.411           0.158
L1.infl           0.498804         0.080740            6.178           0.000
L1.crec_m         0.020150         0.023985            0.840           0.401


### Estabilidad del VAR (chequeo que no haya raices unitarias)

In [11]:
roots = result_var2.roots
print("Raíces del VAR(2):")
display(roots)
print(np.abs(result_var2.roots))

Raíces del VAR(2):


array([-4.10053618-0.j        ,  0.30599245+2.11843778j,
        0.30599245-2.11843778j,  1.27533308-0.j        ])

[4.10053618 2.14042286 2.14042286 1.27533308]


### Testeo si los residuos estan autocorrelacionados

Este test no es exactamente el de Stata, pero es similar

In [12]:
wb = result_var2.test_whiteness(nlags=12)
print(wb)

<statsmodels.tsa.vector_ar.hypothesis_test_results.WhitenessTestResults object. H_0: residual autocorrelation up to lag 12 is zero: reject at 5% significance level. Test statistic: 89.230, critical value: 55.758>, p-value: 0.000>


Para visualizar los p-values

In [13]:
for h in range(3, 13):
    r = result_var2.test_whiteness(nlags=h, adjusted=True)
    print(h, r.pvalue)

3 0.04920091272004848
4 0.09035470520232061
5 0.11196457499752265
6 0.017345722056598997
7 0.025617532171963914
8 0.037633996159916284
9 0.006060635887250112
10 0.002069402489489914
11 0.0010173444004396087
12 3.4536430816230336e-06


### Testeo por Causalidad en Sentido de Granger

In [14]:
print(result_var2.test_causality("infl", ["crec_m"], kind="f"))

print(result_var2.test_causality("crec_m", ["infl"], kind="f"))

<statsmodels.tsa.vector_ar.hypothesis_test_results.CausalityTestResults object. H_0: crec_m does not Granger-cause infl: fail to reject at 5% significance level. Test statistic: 0.532, critical value: 3.021>, p-value: 0.588>
<statsmodels.tsa.vector_ar.hypothesis_test_results.CausalityTestResults object. H_0: infl does not Granger-cause crec_m: reject at 5% significance level. Test statistic: 4.556, critical value: 3.021>, p-value: 0.011>
