In [82]:
%load_ext autoreload
%autoreload 2

import pandas as pd
from datetime import datetime
import os
import torch
from tqdm import tqdm
import matplotlib.pyplot as plt
import numpy as np
import pickle
import importlib

from MODELS import LSTM_BEKK_MODEL, BEKK_GARCH_MODEL, DCC_GARCH_MODEL, SVR_MODEL
from functions import *
from evaluate import *

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


Data cleaning:

Some close price is inappropriate, which lead to over the price fluctuation limit set by the exchange, fixed this by removing the observations that have day-to-day change over the exchange limit.

In [29]:
# ticker_list = ['REE', 'SAM', 'HAP', 'GMD', 'GIL', 'TMS', 'SAV', 'DHA', 'MHC', 'HAS'] # 10 stocks with the most observations
ticker_list = ['REE', 'SAM', 'HAP'] # 3 stocks with the most observations
limits = {
    'hose':0.07,
    'hnx':0.1,
    'upcom':0.15
}

# Holding period
horizon = 1

train_pct = 0.92


In [None]:
# Read and merge into 1 dataset

if "stock_data.csv" in os.listdir("data"):
    merged_df = pd.read_csv(
        os.path.join("data", "stock_data.csv"),
        index_col=None
    ).assign(
        date = lambda df : pd.to_datetime(df["date"])
    )
else:
    # Read and merge data
    hnx = pd.read_csv(os.path.join("data", "CafeF.HNX.Upto31.07.2025.csv")).assign(
        floor = "hnx"
    )
    hsx = pd.read_csv(os.path.join("data", "CafeF.HSX.Upto31.07.2025.csv")).assign(
        floor = "hose"
    )
    upcom = pd.read_csv(os.path.join("data", "CafeF.UPCOM.Upto31.07.2025.csv")).assign(
        floor = "upcom"
    )
    indexes = pd.read_csv(os.path.join("data", "CafeF.INDEX.Upto06.08.2025.csv")).assign(
        floor = "index"
    )

    # Rename columns
    hnx, hsx, upcom, indexes = [
        df.rename(columns={
            "<Ticker>":"ticker",
            "<DTYYYYMMDD>":"date",
            "<Open>":"open",
            "<High>":"high",
            "<Low>":"low",
            "<Close>":"close",
            "<Volume>":"volume"
        }) for df in [hnx, hsx, upcom, indexes]
    ]
        
    # Merge and clean data
    # UPCOM has missing tickers for some reason
    merged_df = pd.concat(
        [hnx, hsx, upcom, indexes],
        axis=0
    ).reset_index(drop=True).dropna(subset="ticker")\
    .assign(
        date=lambda df : df["date"].astype(str).apply(lambda x: datetime.strptime(x, "%Y%m%d").date())
    )
    merged_df.to_csv(
        os.path.join("data", "stock_data.csv"),
        index=False
    ) # Save merged data to save time in future runs


# Data cleaning and merging

data = merged_df.sort_values(["ticker", "date"]).assign(
    returns = lambda df : df.groupby("ticker")["close"].pct_change(),
    log_returns_pct = lambda df : np.log(df["close"] / df.groupby("ticker")["close"].shift(1))*100
)

data = data.loc[data["ticker"].str.len()==3] # Eliminate ETF, and indeces

data["limit"] = data["floor"].map(limits)
outliers = data.loc[data["returns"].abs() > data["limit"]]
clean_df = data.drop(outliers.index) # Remove outliers
print(f"% of observations removed: {round((len(outliers)/len(data))*100, 2)}%")

# NOTE: try out different samples of stocks
pivoted_df = clean_df.pivot_table(values="returns", index="date", columns="ticker") # Pivot data for better usability
pivoted_df = pivoted_df[ticker_list].dropna()

display(pivoted_df.describe())
train_df, test_df = split_train_test(pivoted_df, train_ratio = train_pct)

# Demean returns
train_mean = train_df.mean()
dm_train_df = train_df - train_mean
dm_test_df = test_df - train_mean

% of observations removed: 1.05%


ticker,REE,SAM,HAP
count,5951.0,5951.0,5951.0
mean,0.001091,0.000699,0.00077
std,0.021411,0.023733,0.024894
min,-0.069971,-0.069999,-0.069963
25%,-0.009689,-0.01162,-0.012434
50%,0.0,0.0,0.0
75%,0.011761,0.012037,0.012855
max,0.069962,0.069919,0.069927


In [15]:
len(test_df)

478

In [16]:
# Calcualte test set daily covariance
actual_covs = []
test_returns = np.array(dm_test_df)
for start in range(test_returns.shape[0]):
    returns_t = test_returns[start, :]
    act_cov = np.outer(returns_t, returns_t)
    
    actual_covs.append(act_cov)

Why demean the returns?

Volatility models like BEKK aim to model the covariance structure, not the mean.
By removing the mean from the return, it tells the model to focus on modeling volatility clustering and correlations, as well as preventing the mean return from contaminating the volatility dynamics

The mean from the training set will be used to demean the test set to simulate real world situation.

What's the differences between using static mean and moving average?

### ARCH
An ARCH model is used to predict volatility at a future time step, with the parameter $q$ as the number of lag squared residual error to include in the model 
ARCH uses returns or residuals as volatility shocks.

In [16]:
from arch import arch_model

garch = arch_model(pivoted_df["REE"]*100, vol="ARCH")
garch.fit()

Iteration:      1,   Func. Count:      5,   Neg. LLF: 129357.59275774604
Iteration:      2,   Func. Count:     12,   Neg. LLF: 15382.891572734194
Iteration:      3,   Func. Count:     19,   Neg. LLF: 13170.123600844268
Iteration:      4,   Func. Count:     24,   Neg. LLF: 12577.94370714599
Iteration:      5,   Func. Count:     28,   Neg. LLF: 12577.939139618782
Iteration:      6,   Func. Count:     32,   Neg. LLF: 12577.938832634314
Iteration:      7,   Func. Count:     36,   Neg. LLF: 12577.938830220966
Iteration:      8,   Func. Count:     39,   Neg. LLF: 12577.938830221014
Optimization terminated successfully    (Exit mode 0)
            Current function value: 12577.938830220966
            Iterations: 8
            Function evaluations: 39
            Gradient evaluations: 8


                      Constant Mean - ARCH Model Results                      
Dep. Variable:                    REE   R-squared:                       0.000
Mean Model:             Constant Mean   Adj. R-squared:                  0.000
Vol Model:                       ARCH   Log-Likelihood:               -12577.9
Distribution:                  Normal   AIC:                           25161.9
Method:            Maximum Likelihood   BIC:                           25182.0
                                        No. Observations:                 5951
Date:                Wed, Aug 27 2025   Df Residuals:                     5950
Time:                        10:10:31   Df Model:                            1
                                Mean Model                                
                 coef    std err          t      P>|t|    95.0% Conf. Int.
--------------------------------------------------------------------------
mu             0.0930  2.486e-02      3.740  1.837e-04 [4.426e-0

### GARCH

In [17]:
from arch import arch_model

garch = arch_model(pivoted_df["REE"]*100, vol="GARCH")
garch.fit()

Iteration:      1,   Func. Count:      6,   Neg. LLF: 6841595578418.32
Iteration:      2,   Func. Count:     15,   Neg. LLF: 6411162190.4786215
Iteration:      3,   Func. Count:     23,   Neg. LLF: 14540.272691777205
Iteration:      4,   Func. Count:     30,   Neg. LLF: 12675.511071518553
Iteration:      5,   Func. Count:     38,   Neg. LLF: 12163.288926570278
Iteration:      6,   Func. Count:     44,   Neg. LLF: 12162.193236000765
Iteration:      7,   Func. Count:     49,   Neg. LLF: 12162.192674535536
Iteration:      8,   Func. Count:     54,   Neg. LLF: 12162.192667947853
Iteration:      9,   Func. Count:     59,   Neg. LLF: 12162.192667437183
Optimization terminated successfully    (Exit mode 0)
            Current function value: 12162.192667437183
            Iterations: 9
            Function evaluations: 59
            Gradient evaluations: 9


                     Constant Mean - GARCH Model Results                      
Dep. Variable:                    REE   R-squared:                       0.000
Mean Model:             Constant Mean   Adj. R-squared:                  0.000
Vol Model:                      GARCH   Log-Likelihood:               -12162.2
Distribution:                  Normal   AIC:                           24332.4
Method:            Maximum Likelihood   BIC:                           24359.2
                                        No. Observations:                 5951
Date:                Wed, Aug 27 2025   Df Residuals:                     5950
Time:                        10:10:31   Df Model:                            1
                                Mean Model                                
                 coef    std err          t      P>|t|    95.0% Conf. Int.
--------------------------------------------------------------------------
mu             0.0747  2.140e-02      3.493  4.770e-04 [3.281e-0

# BEKK-GARCH

$$H_t=C'C+A'\epsilon_{t-1}\epsilon_{t-1}'+B'H_{t-1}B$$

- $H_t$: n_assets x n_assets conditional covariance matrix
- $C$: Lower triangular matrix to ensure positive definteness
- $A$: Captures the effect of past shocks (ARCH)
- $B$: Captures the persistence (GARCH)
- $\epsilon_{t-1}$: Vector of past residuals (demeaned returns)

### Usage
- Used for small dimensions of 2-3 assets.
- Contagion studies, volatility spillovers.

### Advantages
- Guarantees positive definite covariance matrices
- Captures spillover effects across assets
- More flexible in modeling asymmetric dependencies

### Disadvantages
- Computationally heavy, parameter explosion: for $N$ assets, get ~$N^2$ parameters
- Harder to estimate high-dimensional data
- May overfit with limited data

Since the model suffers from the curse of dimensionality, so I can only sample 3 stocks, at around 1000 days windows. This should be tuned later

In [17]:
# Run model
if "bekk_results.pkl" in os.listdir("bekk_params"):
    with open(os.path.join("bekk_params", "bekk_results.pkl"), "rb") as f:
        bekk = pickle.load(f)
    with open(os.path.join("bekk_params", "bekk_A.pkl"), "rb") as f:
        A = pickle.load(f)
    with open(os.path.join("bekk_params", "bekk_B.pkl"), "rb") as f:
        B = pickle.load(f)
    with open(os.path.join("bekk_params", "bekk_C.pkl"), "rb") as f:
        C = pickle.load(f) 
else:
    # Fit BEKK-GARCH model, dimensions is greately reduced as this model suffers from curse of dimensionality
    C, A, B, bekk = BEKK_GARCH_MODEL.fit_bekk(dm_train_df.tail(1000).values) 
    
    # Save covariance matrix and model parameters
    with open(os.path.join("bekk_params", "bekk_results.pkl"), "wb") as f:
        pickle.dump(bekk, f)
    with open(os.path.join("bekk_params", "bekk_C.pkl"), "wb") as f:
        pickle.dump(C, f)
    with open(os.path.join("bekk_params", "bekk_A.pkl"), "wb") as f:
        pickle.dump(A, f)
    with open(os.path.join("bekk_params", "bekk_B.pkl"), "wb") as f:
        pickle.dump(B, f)

# Get BEKK-GARCH fitted covariance        
cov_matrix = BEKK_GARCH_MODEL.bekk_fitted_covariances(bekk.x, returns=dm_train_df.values)

In [19]:
# Evaluate
last_return = dm_train_df.iloc[-1].to_numpy()
last_cov_matrix = cov_matrix[-1]
dates_test = dm_test_df.index
bekk_pred_covs = []

train_data = dm_train_df.copy()

for start in range(0, dm_test_df.shape[0]):
    # Forecast x step ahead
    cov_list = BEKK_GARCH_MODEL.bekk_forecast(
        C, A, B, 
        train_data.values, 
        horizon=horizon
    )

    bekk_pred_covs.append(cov_list)
    
    # Adjust for next iteration
    train_data = pd.concat([dm_train_df, dm_test_df.iloc[:start]])
    last_return = dm_test_df.values[start]
    last_cov_matrix = np.array(actual_covs[start])


# DCC-GARCH
Is a two-step model:
1. Estimate univariate GARCH for each asset's variance

$$h_{i,t}=\omega_i + \alpha_i \epsilon_{i,t-1}^2+\beta_i h_{i,t-1}$$

2. Model the correlations dynamically:

$$H_t=D_tR_tD_t$$

With:
- $D_t=diag(\sqrt{h_{1,t}},...,\sqrt{h_{N,t}})$: matrix of standard deviations
- $R_t$: dynamic correlation matrix, updated via:
$$Q_t=(1-\alpha-\beta)\overline{Q}+\alpha \epsilon_{t-1} \epsilon_{t-1}' + \beta Q_{t-1}$$
$$R_t=diag(Q_t)^{-\frac{1}{2}} Q_t diag(Q_t)^{-\frac{1}{2}}$$
- $\overline{Q}$: Unconditional correlation matrix

### Usage
- Designed for high-dimensional portfolios (10-100 assets)
- Used for correlation dynamics, portfolio optimization, hedging

### Advantages
- Computationally efficient, espically in large dimensions
- Decouples volatility (diagonal part) and correlation (off-diagonal part)
- Easier to interpret dynamic correlation structure
- Widely used in epirical finance

### Disadvantages
- Less flexible than BEKK
- Correlations may be biased if univariage GARCH models are misspecified
- Does not directly capture cross-variance spillovers


In [23]:
dm_train_df = dm_train_df.astype(float)
h_mat, eps_mat, garch_params = DCC_GARCH_MODEL.fit_univariate_garch(dm_train_df) # Fit univariate GARCH for each stock
dcc = DCC_GARCH_MODEL.fit_dcc(eps_mat) # Fit DCC(1,1) on standardized residuals
cov_matrix = DCC_GARCH_MODEL.build_covmatrix(h_mat, dcc["Rt"]) # Get full list of conditinoal covariance

In [24]:
# Evaluate

last_return = dm_train_df.iloc[-1].to_numpy()
last_cov_matrix = cov_matrix[-1]

dates_test = dm_test_df.index
dcc_pred_covs = []
train_data = dm_train_df.copy()

for start in range(0, dm_test_df.shape[0]):


    # Forecast 1 step ahead
    cov_list, _, _, _ = DCC_GARCH_MODEL.forecast_dcc_multi_step(
        h_last=h_mat[-1],
        r_last=last_return,
        garch_params=garch_params,
        eps_last=eps_mat[-1],
        Q_last=dcc["Qt"][-1],
        dcc_params=dcc,
        S=dcc["S"]
    )

    dcc_pred_covs.append(cov_list)
    
    # Adjust for next iteration
    train_data = pd.concat([dm_train_df, dm_test_df.iloc[:start]])
    last_return = dm_test_df.values[start]
    last_cov_matrix = np.array(actual_covs[start])

### Supported Vector Regression - SVR


In [30]:
# Prepare data for SVR model, which requires High and Low price

pivoted_data_svr = clean_df.pivot_table(
    columns="ticker", 
    values=["open", "high", "low", "close", "returns"], 
    index="date"
)
pivoted_data_svr.columns = pivoted_data_svr.columns.swaplevel(0, 1)
pivoted_data_svr = pivoted_data_svr.sort_index(axis=1, level=0)
pivoted_data_svr = pivoted_data_svr.loc[:, pivoted_data_svr.columns.get_level_values(0).isin(ticker_list)]
pivoted_data_svr = pivoted_data_svr.dropna() # Drop NA
train_svr, test_svr = split_train_test(pivoted_data_svr, train_ratio=train_pct)

test_svr_returns = test_svr.loc[:, test_svr.columns.get_level_values(1) == "returns"]
test_svr_hl = test_svr.loc[:, test_svr.columns.get_level_values(1).isin(["high", 'low'])]
train_svr_hl = train_svr.loc[:, train_svr.columns.get_level_values(1).isin(["high", 'low'])]
train_svr_returns = train_svr.loc[:, test_svr.columns.get_level_values(1) == "returns"]
train_svr_returns.columns = train_svr_returns.columns.get_level_values(0)


In [73]:
# Predict covariance matrix

if "svr_preds_covs_3A.pkl" not in os.listdir("svr_results"):
    svr_pred_covs = []
    lags = 30
    horizon=1
    train_data = train_svr_hl.copy()

    for start in tqdm(range(0, test_svr_returns.shape[0])):
        
        cov_list = SVR_MODEL.svr_model_forecast(
            train_data, horizon, lags
        )

        # Prepare next iteration
        train_data = pd.concat([
            train_svr_hl, test_svr_hl.iloc[:start]
        ]) # Extend training data
        
        svr_pred_covs.append(cov_list)  

    with open(os.path.join('svr_results', "svr_preds_covs_3A.pkl"), "wb") as f:
        pickle.dump(svr_pred_covs, f)
else:
    with open(os.path.join('svr_results', "svr_preds_covs_3A.pkl"), "rb") as r:
        svr_pred_covs = pickle.load(r)

100%|██████████| 478/478 [43:23<00:00,  5.45s/it]


In [83]:
svr_frob = np.mean([frobenius_loss(true, pred) for true, pred in zip(np.array(actual_covs), np.array(svr_pred_covs).squeeze())])
svr_stein = np.mean([stein_loss(true, pred) for true, pred in zip(np.array(actual_covs), np.array(svr_pred_covs).squeeze())])

In [84]:
svr_stein

np.float64(95026.13209682766)

# LSTM-BEKK

$$H_t = C'C + C_t'C_t + a r_{t-1} r_{t-1}' + b H_{t-1}$$

$C_t$ is dynamically updated through an LSTM network $\overline{C_t}=LSTM(h_{t-1}, r_{t-1})$, with $C_t=LowerTriangular(\overline{C_t})$

In [66]:
# LSTM training parameters
# Fit model

save_path = os.path.join("lstm_model", "lstm_bekk_model_state_dict.pt")

if "lstm_bekk_model_state_dict.pt" in os.listdir("lstm_model"):
    # Load model weights from file
    lstm_bekk_model = LSTM_BEKK_MODEL.load_model(
        path=save_path,
        n_assets=dm_train_df.shape[1],
        config=LSTM_BEKK_MODEL.LSTM_BEKK_config(
            hidden_size=3, # Same as number of assets
            num_layers=1,
            dropout=0.1,
            lr=0.001,
            epochs=600
        )
    )
    lstm_bekk_model.load_state_dict(
        torch.load(save_path)
    )
    lstm_bekk_model.eval()
else:
    lstm_bekk_model = LSTM_BEKK_MODEL.fit_lstm_bekk(
        returns_df=dm_train_df,
        hidden_size=3, # Same as number of assets
        num_layers=1,
        dropout=0.1, 
        lr=0.001,
        epochs=600
    )

    # Save model weights using torch.save
    torch.save(lstm_bekk_model.state_dict(), save_path)

[0000] train NLL : -635.929 | val NLL : -103.055
[0010] train NLL : -1917.824 | val NLL : -220.491
[0020] train NLL : -2448.222 | val NLL : -276.758
[0030] train NLL : -2828.336 | val NLL : -317.902
[0040] train NLL : -3141.146 | val NLL : -352.048
[0050] train NLL : -3415.490 | val NLL : -382.126
[0060] train NLL : -3664.714 | val NLL : -409.521
[0070] train NLL : -3896.128 | val NLL : -435.001
[0080] train NLL : -4114.127 | val NLL : -459.031
[0090] train NLL : -4321.491 | val NLL : -481.906
[0100] train NLL : -4520.044 | val NLL : -503.818
[0110] train NLL : -4711.006 | val NLL : -524.901
[0120] train NLL : -4895.347 | val NLL : -545.258
[0130] train NLL : -5073.753 | val NLL : -564.963
[0140] train NLL : -5246.850 | val NLL : -584.086
[0150] train NLL : -5415.193 | val NLL : -602.686
[0160] train NLL : -5579.291 | val NLL : -620.821
[0170] train NLL : -5739.512 | val NLL : -638.525
[0180] train NLL : -5896.647 | val NLL : -655.905
[0190] train NLL : -6050.794 | val NLL : -672.951
[

In [67]:
# Evaluate model
cov_matrix = lstm_bekk_model.covariance(dm_train_df)
last_return = dm_train_df.iloc[-1].to_numpy()
last_cov_matrix = cov_matrix[-1]

lstm_pred_covs = []

for start in range(dm_test_df.shape[0]):
    
    # Forecast x step ahead
    cov_list = lstm_bekk_model.forecast_multi_step(
        last_returns=last_return,
        last_cov=last_cov_matrix,
        steps=horizon,
        method="zero"
    )
    horizon_return = test_df[start:start+horizon]

    lstm_pred_covs.append(cov_list)

    # Adjust for next iteration
    train_data = pd.concat([dm_train_df, dm_test_df.iloc[:start]])
    last_return = dm_test_df.values[start]
    last_cov_matrix = np.array(actual_covs[start])

In [69]:
lstm_pred_covs

[array([[[0.20417829, 0.13980266, 0.11656322],
         [0.13980266, 0.32857725, 0.10203945],
         [0.11656322, 0.10203945, 0.31013437]]]),
 array([[[0.15609416, 0.10814392, 0.08938675],
         [0.10814392, 0.26023737, 0.07793745],
         [0.08938675, 0.07793745, 0.24614339]]]),
 array([[[0.15574989, 0.10816429, 0.08917254],
         [0.10816429, 0.26031639, 0.07793615],
         [0.08917254, 0.07793615, 0.24601254]]]),
 array([[[0.15574055, 0.10822133, 0.08914788],
         [0.10822133, 0.260444  , 0.0778701 ],
         [0.08914788, 0.0778701 , 0.24604605]]]),
 array([[[0.15574989, 0.10818025, 0.08915066],
         [0.10818025, 0.26025203, 0.07798312],
         [0.08915066, 0.07798312, 0.24600478]]]),
 array([[[0.15588199, 0.10832986, 0.08924622],
         [0.10832986, 0.2603523 , 0.07805121],
         [0.08924622, 0.07805121, 0.24605062]]]),
 array([[[0.15574618, 0.10818287, 0.08916212],
         [0.10818287, 0.26025247, 0.07796676],
         [0.08916212, 0.07796676, 0.245999

In [68]:

with open(os.path.join('dcc_results', "dcc_preds_covs_3A.pkl"), "wb") as f:
    pickle.dump(dcc_pred_covs, f)
with open(os.path.join('bekk_results', "bekk_preds_covs_3A`.pkl"), "wb") as f:
    pickle.dump(bekk_pred_covs, f)
with open(os.path.join('lstm_results', "lstm_preds_covs_3A.pkl"), "wb") as f:
    pickle.dump(lstm_pred_covs, f)