In [1]:
import pandas as pd
from statsmodels.formula.api import ols
import statsmodels.api as sm
from statsmodels.tsa.api import ARDL

from statsmodels.tsa.stattools import adfuller, kpss

In [2]:
df = pd.read_excel("stock_ardl2.xlsx")
df

Unnamed: 0,date,ibov_returns,cpi_inflation,brl_dollar,interest_rates
0,1998-01-01,-0.046778,0.007097,0.005657,0.4290
1,1998-02-01,0.087551,0.004603,0.006608,0.4200
2,1998-03-01,0.130167,0.003397,0.005411,0.3854
3,1998-04-01,-0.022600,0.002397,0.006617,0.3655
4,1998-05-01,-0.156718,0.005001,0.005785,0.3330
...,...,...,...,...,...
296,2022-09-01,0.004693,-0.002900,0.020076,0.1375
297,2022-10-01,0.054527,0.005901,0.001621,0.1375
298,2022-11-01,-0.030602,0.004100,0.003027,0.1375
299,2022-12-01,-0.021825,0.006200,-0.004290,0.1375


In [8]:
# Setting the exogenous variables
exog=df.loc[:, ~df.columns.isin(["ibov_returns", "date"])]

# Define the range of lag values to test
max_lags = 3
lags_range = range(1, max_lags+1)

# Initialize the list of AIC values for each lag combination
aic_list = []

# Loop over the range of lags and fit the ARDL model for each combination
for p in lags_range:
    for q in lags_range:
        model = ARDL(df.ibov_returns, p, exog, q)
        results = model.fit()
        aic = results.aic
        aic_list.append((p, q, aic))

# Find the combination of lags that returns the lowest AIC value
best_lags = min(aic_list, key=lambda x: x[2])
print("Best combination of lags: ({}, {}) with AIC = {}".format(best_lags[0], best_lags[1], best_lags[2]))

Best combination of lags: (1, 2) with AIC = -693.7357196478278


In [9]:
model = ARDL(df.ibov_returns, 1, exog, 2).fit()
print(model.summary())

                              ARDL Model Results                              
Dep. Variable:           ibov_returns   No. Observations:                  301
Model:               ARDL(1, 2, 2, 2)   Log Likelihood                 358.868
Method:               Conditional MLE   S.D. of innovations              0.073
Date:                Sat, 01 Apr 2023   AIC                           -693.736
Time:                        11:22:42   BIC                           -649.290
Sample:                             2   HQIC                          -675.949
                                  301                                         
                        coef    std err          z      P>|z|      [0.025      0.975]
-------------------------------------------------------------------------------------
const                 0.0041      0.011      0.381      0.704      -0.017       0.025
ibov_returns.L1      -0.0379      0.063     -0.600      0.549      -0.162       0.086
cpi_inflation.L0     -0.

## Augmented Ducky Fuller (ADF) TEST

A null hypothesis is given as:

* 𝐻0
  : Time series is not stationary
* 𝐻1
  : Time series is stationary


In [40]:
def adf_test(timeseries):
    print("Results of Dickey-Fuller Test:")
    dftest = adfuller(timeseries, autolag="AIC")
    dfoutput = pd.Series(
        dftest[0:4],
        index=[
            "Test Statistic",
            "p-value",
            "#Lags Used",
            "Number of Observations Used",
        ],
    )
    for key, value in dftest[4].items():
        dfoutput["Critical Value (%s)" % key] = value
    print(dfoutput.apply(lambda x: round(x, 4)))

def kpss_test(timeseries):
    print("Results of KPSS Test:")
    kpsstest = kpss(timeseries, regression="c", nlags="auto")
    kpss_output = pd.Series(
        kpsstest[0:3], index=["Test Statistic", "p-value", "Lags Used"]
    )
    for key, value in kpsstest[3].items():
        kpss_output["Critical Value (%s)" % key] = value
    print(kpss_output)

In [32]:
df.columns

Index(['date', 'ibov_returns', 'cpi_inflation', 'brl_dollar',
       'interest_rates'],
      dtype='object')

In [41]:
adf_test(df.ibov_returns)

Results of Dickey-Fuller Test:
Test Statistic                 -16.3567
p-value                          0.0000
#Lags Used                       0.0000
Number of Observations Used    300.0000
Critical Value (1%)             -3.4523
Critical Value (5%)             -2.8712
Critical Value (10%)            -2.5719
dtype: float64


In [42]:
adf_test(df.cpi_inflation)

Results of Dickey-Fuller Test:
Test Statistic                  -8.3120
p-value                          0.0000
#Lags Used                       0.0000
Number of Observations Used    300.0000
Critical Value (1%)             -3.4523
Critical Value (5%)             -2.8712
Critical Value (10%)            -2.5719
dtype: float64


In [43]:
adf_test(df.interest_rates)

Results of Dickey-Fuller Test:
Test Statistic                  -3.3273
p-value                          0.0137
#Lags Used                       3.0000
Number of Observations Used    297.0000
Critical Value (1%)             -3.4526
Critical Value (5%)             -2.8713
Critical Value (10%)            -2.5720
dtype: float64


In [44]:
adf_test(df.brl_dollar)

Results of Dickey-Fuller Test:
Test Statistic                 -11.5323
p-value                          0.0000
#Lags Used                       0.0000
Number of Observations Used    300.0000
Critical Value (1%)             -3.4523
Critical Value (5%)             -2.8712
Critical Value (10%)            -2.5719
dtype: float64
