In [1]:
import torch.nn as nn
import torch
from torch.optim import RMSprop
import math
from dataclasses import dataclass
from typing import Optional, Tuple, List, Dict
import numpy as np
from scipy.optimize import minimize
from tqdm import tqdm
import pandas as pd
import os
from datetime import datetime
import functions
from functions import *
from evaluate import *

In [2]:
# ticker_list = ['REE', 'SAM', 'HAP', 'GMD', 'GIL', 'TMS', 'SAV', 'DHA', 'MHC', 'HAS'] # 10 stocks with the most observations
ticker_list = ['REE', 'SAM', 'HAP'] # 3 stocks with the most observations
limits = {
    'hose':0.07,
    'hnx':0.1,
    'upcom':0.15
}
# Read and merge into 1 dataset

if "stock_data.csv" in os.listdir("data"):
    merged_df = pd.read_csv(
        os.path.join("data", "stock_data.csv"),
        index_col=None
    ).assign(
        date = lambda df : pd.to_datetime(df["date"])
    )
else:
    # Read and merge data
    hnx = pd.read_csv(os.path.join("data", "CafeF.HNX.Upto31.07.2025.csv")).assign(
        floor = "hnx"
    )
    hsx = pd.read_csv(os.path.join("data", "CafeF.HSX.Upto31.07.2025.csv")).assign(
        floor = "hose"
    )
    upcom = pd.read_csv(os.path.join("data", "CafeF.UPCOM.Upto31.07.2025.csv")).assign(
        floor = "upcom"
    )
    indexes = pd.read_csv(os.path.join("data", "CafeF.INDEX.Upto06.08.2025.csv")).assign(
        floor = "index"
    )

    # Rename columns
    hnx, hsx, upcom, indexes = [
        df.rename(columns={
            "<Ticker>":"ticker",
            "<DTYYYYMMDD>":"date",
            "<Open>":"open",
            "<High>":"high",
            "<Low>":"low",
            "<Close>":"close",
            "<Volume>":"volume"
        }) for df in [hnx, hsx, upcom, indexes]
    ]
        
    # Merge and clean data
    # UPCOM has missing tickers for some reason
    merged_df = pd.concat(
        [hnx, hsx, upcom, indexes],
        axis=0
    ).reset_index(drop=True).dropna(subset="ticker")\
    .assign(
        date=lambda df : df["date"].astype(str).apply(lambda x: datetime.strptime(x, "%Y%m%d").date())
    )
    merged_df.to_csv(
        os.path.join("data", "stock_data.csv"),
        index=False
    ) # Save merged data to save time in future runs


# Data cleaning and merging

data = merged_df[["date", "ticker", "floor", "close"]].sort_values(["ticker", "date"]).assign(
    returns = lambda df : df.groupby("ticker")["close"].pct_change(),
    log_returns_pct = lambda df : np.log(df["close"] / df.groupby("ticker")["close"].shift(1))*100
)

data = data.loc[data["ticker"].str.len()==3] # Eliminate ETF, and indeces

data["limit"] = data["floor"].map(limits)
outliers = data.loc[data["returns"].abs() > data["limit"]]
clean_df = data.drop(outliers.index) # Remove outliers
print(f"% of observations removed: {round((len(outliers)/len(data))*100, 2)}%")

# NOTE: try out different samples of stocks
pivoted_df = clean_df.pivot_table(values="returns", index="date", columns="ticker") # Pivot data for better usability
pivoted_df = pivoted_df[ticker_list].dropna()

display(pivoted_df.describe())
train_df, test_df = split_train_test(pivoted_df)

% of observations removed: 1.05%


ticker,REE,SAM,HAP
count,5951.0,5951.0,5951.0
mean,0.001091,0.000699,0.00077
std,0.021411,0.023733,0.024894
min,-0.069971,-0.069999,-0.069963
25%,-0.009689,-0.01162,-0.012434
50%,0.0,0.0,0.0
75%,0.011761,0.012037,0.012855
max,0.069962,0.069919,0.069927


In [3]:
mean_train = train_df.mean()
dm_train_df, dm_test_df = train_df - mean_train, test_df - mean_train 

In [4]:
import torch
import torch.nn as nn
import torch.nn.functional as F

In [None]:
# Define LSTM netwrodk to generate dynamic Ct
class LSTM_Components(nn.Module):
    def __init__(self, input_size: int, hidden_size: int, num_layers:int):
        """
        Initialize LSTM network
        Args: 
            input_size: number of assets
            hidden_size: number of features in the hidden state
            num_layers: number of recurrent layers
        """
        super(LSTM_Components, self).__init__()
        
        self.lstm = nn.LSTM(
            input_size, hidden_size, num_layers, batch_first=True
        )

        # The output size is n*(n+1)/2, which is the number of elements in the lower triangular matrix

        num_output_elememts = int(input_size * (input_size + 1) / 2)

        self.linear = nn.Linear(hidden_size, num_output_elememts)
        # Learnable parameter for the Swish activation function
        self.beta = nn.Parameter(torch.tensor(1.0))

    def forward(self, x: torch.Tensor, h_prev: torch.Tensor, c_prev: torch.Tensor):
        """
        Defines the forward pass of LSTM component
        Args:
            x: Input tensor of asset returns (demeaned)
            h_prev: Previous hidden state
            c_prev: previous cell state
        Returns:
            C_t: Lower triangular matrix C_t
            h_new: New hidden state
            c_new: New cell state
        """
        # Ensure input is 3D for LSTM, shape (batch_size, seq_length, input_size)
        if x.dim() == 2:
            x = x.unsqueeze(1)

        output, (h_new, c_new) = self.lstm(x, (h_prev, c_prev))

        # Pass LSTM output through linear layer
        c_t_flat = self.linear(output.squeeze(1))

        # Reshape flat vector to a lower triangular matrix C_t
        C_t = torch.zeros(
            x.shape[2],
            x.shape[2],
            device=x.device
        ) # Initialize matrix n_assets x n_assets
        tril_indices = torch.tril_indices(
            row=x.shape[2],
            col=x.shape[2],
            offset=0
        ) # Get lower-triangular indices
        C_t[tril_indices[0], tril_indices[1]] = c_t_flat.squeeze(0)

        # Apply Swish activation to diagonal elements for regularization
        diag_indices = range(x.shape[2])
        diag_elements = C_t.diag()
        swish_activation = diag_elements * torch.sigmoid(self.beta * diag_elements)
        C_t[diag_indices, diag_indices] = swish_activation

        return C_t, h_new, c_new
class LSTM_BEKK(nn.Module):
    """
    defines the full LSTM-BEKK model
    """
    def __init__(self, num_assets:int, hidden_size:int, num_layers:int):
        """
        Initialize LSTM-BEKK model
        Args: 
            num_assets: Number of assets in portfolio
            hidden_size: The hidden size for LSTM component
            num_layers: Number of layers for LSTM component
        """
        super(LSTM_BEKK, self).__init__()
        self.num_assets = num_assets
        self.hidden_size = hidden_size
        self.num_layers = num_layers

        # Initialize LSTM component
        self.lstm_component = LSTM_Components(num_assets, hidden_size, num_layers)

        # Initialize BEKK parameters
        # Static C (lower triangular)
        c_init = torch.randn(self.num_assets, self.num_assets)
        self.C = nn.Parameter(torch.tril(c_init))

        # Scalar parameters a and b
        self.a = nn.Parameter(torch.rand(1))
        self.b = nn.Parameter(torch.rand(1))

    def forward(self, returns: torch.Tensor):
        """
        Defines the forward pass to compute the sequence of covariance matrices
        Args:
            returns: Tensor of demeaned returns n_periods x n_assets
        Returns:
            Tensor of conditional covariance matrices of shape (n_periods, n_assets, n_assets)
            The final hidden state
            The final cell state
        """
        n_periods, n_assets = returns.shape
        covariance_list = [] # Container

        # Initialize hidden states and cell states
        hidden_t = torch.zeros(
            self.num_layers, 1, self.hidden_size, device=returns.device
        )
        cell_t = torch.zeros(
            self.num_layers, 1, self.hidden_size, device=returns.device
        )

        # Initialize H_0 as the unconditional covariance of returns
        cov_matrix = torch.cov(returns.T)

        # Static covariance component C'C
        static_C = self.C @ self.C.T

        for t in range(n_periods):
            if t > 0:
                prev_returns = returns[t-1, :].unsqueeze(0)
            else:
                prev_returns = torch.zeros_like(returns[0, :]).unsqueeze(0)
            
            # Get dynamic component C_t from LSTM
            C_t, hidden_t, cell_t = self.lstm_component(prev_returns, hidden_t, cell_t)
            dynamic_C = C_t @ C_t.T

            # Previous shock component
            prev_shock = prev_returns.view(-1, 1) @ prev_returns.view(-1, 1).T

            # Apply constraints: a, b >=0 and a + b < 1
            a_constrained = torch.clamp(self.a, min=0)
            b_constrained = torch.clamp(self.b, min=0)
            if a_constrained + b_constrained >= 1.0:
                total = a_constrained + b_constrained
                a_constrained = a_constrained / (total+1e-6)
                b_constrained = b_constrained / (total+1e-6)

            # Calculate covariance matrix using LSTM-BEKK
            cov_matrix = static_C + dynamic_C + a_constrained * prev_shock + b_constrained * cov_matrix
            
            # Ensure the covariance matrix is positive semi-definite NOTE:????
            cov_matrix = (cov_matrix + cov_matrix.T) / 2 

            covariance_list.append(cov_matrix)
        
        return torch.stack(covariance_list), hidden_t, cell_t
    
def negative_log_likelihood(returns: torch.Tensor, covariance_list: torch.Tensor):
    """
    Calculates the negative log-likelihood for LSTM-BEKK model
    Args: 
        returns: The tensor of demeaned returns
        covariance_list: List of conditional covariance matrices
    Returns:
        Total negative log-likelihood
    """
    n_periods, n_assets = returns.shape
    
    log_likelihood = 0.0

    nll = torch.zeros((), device=returns.device, dtype=returns.dtype)
    for t in range(n_periods):
        cov_matrix = covariance_list[t]
        returns_t = returns[t, :].unsqueeze(1)

        cov_matrix = cov_matrix + 1e-6 * torch.eye(n_assets, device=returns.device, dtype=returns.dtype)
        # Cholesky factorization
        L = torch.linalg.cholesky(cov_matrix)

        # Log determinant
        logdet_cov = 2.0 *  torch.log(torch.diag(L)).sum()

        # Quadratic form
        r = returns[t].view(-1, 1)
        y = torch.cholesky_solve(r, L)
        quad = (r.T @ y).squeeze()

        nll = nll + 0.5 * (n_assets * torch.log(torch.tensor(2.0 * torch.pi, device=returns.device, dtype=returns.dtype)) + logdet_cov + quad)


        # # Add a small value to the diagnoal for stability
        # cov_matrix = cov_matrix + torch.eye(n_assets, device=cov_matrix.device) * 1e-6

        # logdet_H = torch.logdet(cov_matrix)
        # term_1 = n_assets * np.log(2 * np.pi)
        # term_2 = logdet_H
        # term_3 = returns_t.T @ torch.inverse(cov_matrix) @ returns_t
        # log_likelihood += term_1 + term_2 + term_3

    # return 0.5 * log_likelihood
    return nll

def train_model(
    model: nn.Module,
    train_data: torch.Tensor,
    epochs:int =300,
    learning_rate: float =0.001
):
    """
    Train LSTM-BEKK Model
    Args:
        model: LSTM-BEKK model instance
        train_data: Training data deameaned returns
        epochs: Number of training epochs
        learning_rate: learning rate for the optimizer
    """
    # Use RMSprop optimizer
    optimizer = torch.optim.RMSprop(model.parameters(), lr=learning_rate)
    for epoch in range(epochs):
        model.train()

        optimizer.zero_grad()

        # Forward pass to get covariance matrices
        covariance_list, _, _ = model(train_data)

        # Calculate loss
        loss = negative_log_likelihood(train_data, covariance_list)

        # Backward pass and optimization
        loss.backward()

        # Gradient clipping to prevent exploding gradients
        torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)

        optimizer.step()

        if (epoch + 1) % 10 == 0:
            print(f"Epoch [{epoch+1}/{epochs}], Loss: {loss.item():.4f}")

def forecast(
    model: nn.Module,
    last_return: torch.Tensor,
    last_cov: torch.Tensor,
    final_h: torch.Tensor,
    final_c: torch.Tensor,
    n_steps: int
):
    """
    Generates n-steps ahead forecasts for the covariance matrix
    Args:
        model: Trained LSTM-BEKK Model
        last_return: Last observed return vector
        last_cov: Last estimated covariance matrix
        final_h: The final hidden state from training
        final_c: The final cell state from training 
        n_step: Number of steps to forecast ahead
    Returns:
        A list of forecasted covariance matrices
    """
    model.eval()
    cov_matrix_fc = []

    # Initialize hidden states and cell states from last training step
    hidden_t = final_h
    cell_t = final_c
    current_r = last_return
    current_cov_matrix = last_cov

    with torch.no_grad():
        for step in range(n_steps):
            # Get dynamic component
            C_t, hidden_t, cell_t = model.lstm_component(
                current_r.unsqueeze(0), hidden_t, cell_t
            )
            dynamic_C = C_t @ C_t.T

            # Shock component
            shock = current_r.view(-1, 1) @ current_r.view(-1, 1).T

            # Static C
            static_C = model.C @ model.C.T

            # Constraints
            a_constrained = torch.clamp(model.a, min=0)
            b_constrained = torch.clamp(model.b, min=0)
            if a_constrained + b_constrained >= 1.0:
                total = a_constrained + b_constrained
                a_constrained = a_constrained / (total + 1e-6)
                b_constrained = b_constrained / (total + 1e-6)

            # Forecast next covariance matrix
            next_cov_matrix = static_C + dynamic_C + a_constrained * shock + b_constrained * current_cov_matrix
            cov_matrix_fc.append(next_cov_matrix)

            # Update for next iteration
            current_cov_matrix = next_cov_matrix
            current_r = torch.zeros_like(last_return) # Assume expected return = 0 for forecast
        
        return cov_matrix_fc

In [7]:
# Convert train data to torch.Tensor
train_tensor = torch.tensor(dm_train_df.values, dtype=torch.float32)
test_tensor = torch.tensor(dm_test_df.values, dtype=torch.float32)

In [8]:
train_tensor.shape

torch.Size([4760, 3])

In [9]:

# Initialize model
n_assets = dm_train_df.shape[1]
lstm_bekk = LSTM_BEKK(
    num_assets=n_assets,
    hidden_size=32,
    num_layers=1
)

# Train model

print("Starting model training...")
train_model(
    model=lstm_bekk,
    train_data=train_tensor,
    epochs=300,
    learning_rate=0.001
)
print("Training finished")

# Get in-sample covariance matrices
lstm_bekk.eval()

with torch.no_grad():
    in_sample_cov_matrices, hidden_final, cell_final = lstm_bekk(train_tensor)

# Convert to a list of numpy arrays for easier inspection
in_sample_cov_list = [H.detach().numpy() for H in in_sample_cov_matrices]


Starting model training...
Epoch [10/300], Loss: 7367.4438
Epoch [20/300], Loss: 6279.7266
Epoch [30/300], Loss: 5221.8701
Epoch [40/300], Loss: 5895.5859
Epoch [50/300], Loss: 4882.2236
Epoch [60/300], Loss: 4139.1196
Epoch [70/300], Loss: 3476.7473
Epoch [80/300], Loss: 2941.0015
Epoch [90/300], Loss: 2413.7588
Epoch [100/300], Loss: 1787.1427
Epoch [110/300], Loss: 1036.6809
Epoch [120/300], Loss: 171.8401
Epoch [130/300], Loss: -827.8760
Epoch [140/300], Loss: -2024.1625
Epoch [150/300], Loss: -3994.2605
Epoch [160/300], Loss: -6410.9673
Epoch [170/300], Loss: -10144.2207
Epoch [180/300], Loss: -13527.7695
Epoch [190/300], Loss: -13927.4668
Epoch [200/300], Loss: -14391.7510
Epoch [210/300], Loss: -14515.2861
Epoch [220/300], Loss: -13862.6191
Epoch [230/300], Loss: -13737.2500
Epoch [240/300], Loss: -14157.9160
Epoch [250/300], Loss: -14466.9424
Epoch [260/300], Loss: -14553.6523
Epoch [270/300], Loss: -14566.1670
Epoch [280/300], Loss: -14621.8945
Epoch [290/300], Loss: -14277.99

In [None]:
horizon = 20
# Evaluate

dates_test = dm_test_df.index

lstm_port_returns, lstm_port_vars, lstm_pred_covs, lstm_act_covs = [], [], [], [] # Containers

# Loop over test period in 20-days non-overlapping horizons
for start in range(0, len(test_tensor) - horizon + 1, horizon):
    train_data = torch.cat([train_tensor, test_tensor[:start]])
    
    last_returns = train_tensor[-1, :]
    last_cov_matrix = in_sample_cov_matrices[-1]
    
    # Forecast x step ahead
    cov_list_tensor_fc = forecast(
        lstm_bekk, 
        last_return=last_returns, 
        last_cov=last_cov_matrix, 
        final_h=hidden_final,
        final_c=cell_final,
        n_steps=horizon
    )
    
    cov_list_fc = [H.detach().numpy() for H in cov_list_tensor_fc]
    
    # Aggregate to 20-days covariance forecast
    agg_covariance = sum(cov_list_fc)

    # Get MVP weights
    mvp_weights, weights_dict = minimum_variance_portfolio(agg_covariance , train_df)

    # Realized returns from next 20-days
    horizon_return = test_df[start:start+horizon]

    # Cummulative return
    port_return = np.array(horizon_return) @ mvp_weights
    lstm_port_returns.append(port_return.sum())

    # Actual covariance
    act_covariance =  horizon_return.T @ horizon_return
    act_var = mvp_weights.T @ act_covariance @ mvp_weights
    lstm_port_vars.append(act_var)
    lstm_act_covs.append(act_covariance)
    lstm_pred_covs.append(agg_covariance)

    # Adjust for next iteration
    last_return = np.array(horizon_return.iloc[[-1]])[0]
    last_cov_matrix = np.array(lstm_act_covs[-1])

lstm_results =  pd.DataFrame({
    "date":dates_test[horizon-1::horizon][:len(lstm_port_returns)], # End of each horizon
    "realized_return":lstm_port_returns,
    "realized_variance":lstm_port_vars 
})

lstm_act_covs = np.array(lstm_act_covs)
lstm_pred_covs = np.array(lstm_pred_covs)

def frobenius(H_pred, H_act):
    errors = np.linalg.norm(
        H_pred - H_act, axis=(1,2)
    ) ** 2
    return errors.mean()

lstm_sr = lstm_results["realized_return"].mean()/lstm_results["realized_return"].std()
lstm_frob = frobenius(H_pred=lstm_pred_covs, H_act=lstm_act_covs)
# lstm_stein = stein_loss(H_pred=lstm_pred_covs, H_true=lstm_act_covs)
# lstm_corr_loss = correlation_loss(H_pred=lstm_pred_covs, H_true=lstm_act_covs)
# lstm_port_aligned = portfolio_aligned_loss(lstm_pred_covs, lstm_act_covs, mvp_weights)

print(f"""
BEKK-GARCH MODEL
      
- Sharpe Ratio = {lstm_sr}
- Frobenius norm = {lstm_frob}
- Stein loss = {lstm_stein}
- Correlation loss = {lstm_corr_loss}
- Portfolio aligned loss = {lstm_port_aligned}
""")



BEKK-GARCH MODEL
      
- Sharpe Ratio = 0.254978288084018
- Frobenius norm = 40970.06935437776
# - Stein loss = 71657.04748348505
# - Correlation loss = 4.924416243762376
# - Portfolio aligned loss = 0.004361537064666071



In [37]:
def qlike_loss(H_forecasts, H_realized):
    """
    Compute QLIKE loss for covariance forecasts.
    H_forecasts: (T, N, N) predicted cov matrices
    H_realized:  (T, N, N) realized cov matrices
    """
    T = H_forecasts.shape[0]
    losses = []
    for t in range(T):
        Hf = H_forecasts[t]
        Hr = H_realized[t]
        try:
            inv_Hf = np.linalg.inv(Hf)
            term1 = np.trace(inv_Hf @ Hr)
            term2 = np.log(np.linalg.det(Hf))
            losses.append(term1 + term2)
        except np.linalg.LinAlgError:
            continue
    return np.mean(losses)

In [39]:
in_sample_cov_list

[array([[2.9203752e-03, 6.6386685e-02, 2.9732024e-02],
        [6.6386685e-02, 3.2982743e+00, 1.4445205e+00],
        [2.9732024e-02, 1.4445205e+00, 6.3480085e-01]], dtype=float32),
 array([[3.7294677e-03, 1.0620831e-01, 4.7140915e-02],
        [1.0620831e-01, 5.3156581e+00, 2.3268247e+00],
        [4.7140915e-02, 2.3268247e+00, 1.0205045e+00]], dtype=float32),
 array([[3.9058886e-03, 1.3016286e-01, 5.7460517e-02],
        [1.3016286e-01, 6.5498304e+00, 2.8659797e+00],
        [5.7460517e-02, 2.8659797e+00, 1.2555202e+00]], dtype=float32),
 array([[3.8855476e-03, 1.4458360e-01, 6.3619956e-02],
        [1.4458360e-01, 7.3048601e+00, 3.1955242e+00],
        [6.3619956e-02, 3.1955242e+00, 1.3989125e+00]], dtype=float32),
 array([[3.8210438e-03, 1.5328044e-01, 6.7316622e-02],
        [1.5328044e-01, 7.7667570e+00, 3.3969879e+00],
        [6.7316622e-02, 3.3969879e+00, 1.4864770e+00]], dtype=float32),
 array([[3.7566756e-03, 1.5853256e-01, 6.9541812e-02],
        [1.5853256e-01, 8.0493221e+

In [11]:

# # Save results
# with open(os.path.join("lstm_model", "bekk_portfolio_return.pkl"), "wb") as f:
#     pickle.dump(bekk_port_returns, f)
# with open(os.path.join("lstm_model", "bekk_portfolio_variance.pkl"), "wb") as f:
#     pickle.dump(bekk_port_vars, f)
# with open(os.path.join("lstm_model", "bekk_actual_covariance.pkl"), "wb") as f:
#     pickle.dump(bekk_act_covs, f)
# with open(os.path.join("lstm_model", "bekk_forecast_covariance.pkl"), "wb") as f:
#     pickle.dump(agg_covariance, f)
# with open(os.path.join("lstm_model", "bekk_weights.pkl"), "wb") as f:
#     pickle.dump(weights_dict, f)

In [17]:
np.array(act_covariance)

array([[ 2.25157808e-03,  9.36740241e-04, -2.99645220e-04],
       [ 9.36740241e-04,  2.83389614e-03, -3.71736016e-05],
       [-2.99645220e-04, -3.71736016e-05,  8.57368688e-03]])

In [15]:
agg_covariance

array([[6.8293512e-02, 3.3129392e+00, 1.4498159e+00],
       [3.3129392e+00, 1.6988930e+02, 7.4276817e+01],
       [1.4498159e+00, 7.4276817e+01, 3.2478916e+01]], dtype=float32)