In [None]:
import random
import numpy as np
import pandas as pd
from pandas.core.missing import validate_limit_direction
from sklearn.mixture import GaussianMixture
from sklearn.mixture import BayesianGaussianMixture
from sklearn.neighbors import KernelDensity
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LinearRegression
from scipy.stats import multivariate_normal
from scipy.stats import truncnorm
from tqdm import tqdm
import cvxpy as cp
import gurobipy as gp
from gurobipy import GRB
from joblib import Parallel, delayed
import torch
import normflows as nf
import matplotlib.pyplot as plt
from itertools import product
from torch.utils.data import TensorDataset, DataLoader, random_split
import os
import pickle
from pathlib import Path
np.set_printoptions(threshold=np.inf)
np.set_printoptions(linewidth=200)



In [2]:
# ===================== Deterministic Setup =====================
def set_all_seeds(seed: int = 42):
    os.environ["PYTHONHASHSEED"] = str(seed)
    os.environ["CUBLAS_WORKSPACE_CONFIG"] = ":16:8"  
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False
    try:
        torch.use_deterministic_algorithms(True)
    except Exception:
        pass

os.environ["OMP_NUM_THREADS"]="1"
os.environ["MKL_NUM_THREADS"]="1"
os.environ["OPENBLAS_NUM_THREADS"]="1"
os.environ["NUMEXPR_NUM_THREADS"]="1"
os.environ["CUBLAS_WORKSPACE_CONFIG"] = ":16:8"

In [3]:
# -- Implementation 3 ---
######################### 2-Wass GMM DRO function ###################################
def Portfolio_2_Wass(xi, eps):
    N, d = xi.shape
    lda = cp.Variable(nonneg=True)
    s = cp.Variable(N)
    x = cp.Variable(d, nonneg=True)

    constraints = []
    for i in range(N):
        constraints.append(
            s[i] >= cp.quad_over_lin(x, 4 * lda) - xi[i] @ x
        )
    constraints.append(cp.sum(x) == 1)

    obj = cp.Minimize(lda * (eps ** 2) + (1 / N) * cp.sum(s))
    prob = cp.Problem(obj, constraints)
    prob.solve(solver=cp.MOSEK, verbose=False)
    return x.value

def Portfolio_2_Wass_MCVaR(xi, eps, tau, eta):
    N, d = xi.shape
    lda = cp.Variable(nonneg=True)
    s = cp.Variable(N)
    x = cp.Variable(d, nonneg=True)
    beta = cp.Variable()

    const = []
    for i in range(N):
        xi_norm2 = float(np.sum(xi[i]**2))
        const.append(cp.norm2(cp.hstack([2 * lda * xi[i] - ((1 / tau) + eta) * x, lda * xi_norm2 + s[i] + (beta/tau) - beta- lda]))
                     <= lda * xi_norm2 + s[i] + (beta/tau) - beta + lda)
        const.append(cp.norm2(cp.hstack([2 * lda * xi[i] - eta * x, lda * xi_norm2 + s[i] -  beta - lda]))
                     <= lda * xi_norm2 + s[i] - beta + lda)
        const.append(lda * xi_norm2 + s[i] >= -(beta/tau) + beta)
        const.append(lda * xi_norm2 + s[i] >= beta)
    const.append(cp.sum(x) == 1)

    obj = cp.Minimize(lda * (eps**2) + (1 / N) * cp.sum(s))
    prob = cp.Problem(obj, const)
    prob.solve(solver=cp.MOSEK)
    return x.value

def transforming_conditional(s, num_components, mu_k, sig_k, p_k, dim_s):
    mu_cond, cov_cond, weights = [], [], []
    for k in range(num_components):
        mu = mu_k[k]
        sigma = sig_k[k]
        mu_s = mu[:dim_s]
        mu_xi = mu[dim_s:]
        sigma_ss = sigma[:dim_s, :dim_s]
        sigma_sx = sigma[:dim_s, dim_s:]
        sigma_xs = sigma[dim_s:, :dim_s]
        sigma_xx = sigma[dim_s:, dim_s:]
        sigma_ss_inv = np.linalg.inv(sigma_ss)
        cond_mu = mu_xi + sigma_xs @ sigma_ss_inv @ (s - mu_s)
        cond_cov = sigma_xx - sigma_xs @ sigma_ss_inv @ sigma_sx
        weight = p_k[k] * multivariate_normal.pdf(s, mean=mu_s, cov=sigma_ss)
        mu_cond.append(cond_mu)
        cov_cond.append(cond_cov)
        weights.append(weight)
    weights = np.array(weights)
    if np.any(np.isnan(weights)) or weights.sum() <= 1e-12:
        weights = np.ones_like(weights) / len(weights)
    else:
        weights /= weights.sum()
    return np.array(mu_cond), np.array(cov_cond), weights

def MC_sampling(K, N, mu_list, cov_list, p_list):
    d = mu_list.shape[1]
    samples = np.zeros((N, d))
    for i in range(N):
        k = np.random.choice(K, p=p_list)
        samples[i] = np.random.multivariate_normal(mu_list[k], cov_list[k])
    return samples

def oos_loss_portfolio(x, xi, tau, eta):
    x = x.reshape(-1)
    xi = xi.reshape(-1)
    beta = cp.Variable()
    yTx = xi @ x
    term1 = -eta * yTx + beta
    term2 = -(eta + 1 / tau) * yTx + (1 - 1 / tau) * beta
    loss_expr = cp.maximum(term1, term2)
    prob = cp.Problem(cp.Minimize(loss_expr))
    prob.solve(solver=cp.MOSEK, verbose=False)
    return prob.value

def oos_loss_valid(x, xi, tau, eta):
    x = x.reshape(-1)
    xi = xi.reshape(-1)
    beta = cp.Variable()
    yTx = xi @ x
    term1 = -eta * yTx + beta
    term2 = -(eta + 1 / tau) * yTx + (1 - 1 / tau) * beta
    loss_expr = cp.maximum(term1, term2)
    prob = cp.Problem(cp.Minimize(loss_expr))
    prob.solve(solver=cp.MOSEK, verbose=False)
    return prob.value

def oos_mean_portfolio(x, xi):
    x = np.asarray(x).reshape(-1)
    xi = np.asarray(xi).reshape(-1)
    return float(x @ xi)

def oos_CVaR_portfolio(x, xi, tau):
    portfolio_returns = xi @ x
    var_level = np.quantile(portfolio_returns, tau)
    cvar = portfolio_returns[portfolio_returns <= var_level].mean()
    return -cvar

def oos_std_portfolio(x, xi):
    portfolio_returns = xi @ x
    return np.std(portfolio_returns)

def oos_sharpe_portfolio(x, xi):
    returns = xi @ x
    mean_ret = np.mean(returns)
    std_ret = np.std(returns)
    return np.sqrt(252) * (mean_ret / std_ret)

def select_K_by_AIC(z_np, max_K):
    aic_scores = []
    models = []
    for k in range(1, max_K + 1):
        gmm = GaussianMixture(n_components=k)
        gmm.fit(z_np)
        aic = gmm.aic(z_np)
        aic_scores.append(aic)
        models.append(gmm)
    best_index = np.argmin(aic_scores)
    best_K = best_index + 1
    return best_K


def _cv_gmm_worker(
    j, asset_idx,
    tau, eta,
    data_cv_train, data_cv_test,
    eps_list, max_K, hidden_node, hidden_layer, block_size, num_bins, total_epoch,
    device
):
    base = 5000
    random.seed(base + j)
    np.random.seed(base + j)
    torch.manual_seed(base + j)
    torch.cuda.manual_seed_all(base + j)  # if using CUDA

    # ====== 아래는 기존 로직 유지 ======
    dim_s, dim_xi = 5, 399

    data_val = data_cv_test.iloc[j]
    time_val = data_val['time']
    start_time = time_val - pd.DateOffset(years=2)

    mask_2year = (data_cv_train['time'] >= start_time) & (data_cv_train['time'] < time_val)
    data_subtrain_all = data_cv_train[mask_2year]
    s_subtrain = data_subtrain_all.iloc[:, 1:6].values 
    xi_subtrain = data_subtrain_all.iloc[:, 6 + asset_idx].to_numpy()

    s_val = data_val.iloc[1:6].values.reshape(1, -1)
    mask_future = (data_cv_train["time"] >= time_val)
    future_row = data_cv_train[mask_future]
    xi_val_day = future_row.iloc[0, 6 + asset_idx].values.reshape(1, -1)

    scaler_s = StandardScaler()
    scaler_xi = StandardScaler()
    s_subtrain_std = scaler_s.fit_transform(s_subtrain)
    xi_subtrain_std = scaler_xi.fit_transform(xi_subtrain)
    data_subtrain_std = np.hstack([s_subtrain_std, xi_subtrain_std])
    data_subtrain_tensor = torch.tensor(data_subtrain_std, dtype=torch.float32, device=device)

    best_K = select_K_by_AIC(data_subtrain_std, max_K=max_K)
    latent_size = dim_s + dim_xi

    nfm, _ = train_nf_model(
        latent_size, best_K, hidden_node, hidden_layer,
        num_bins, block_size, total_epoch,
        data_subtrain_tensor, device
    )

    gmm_x = GaussianMixture(
        n_components=best_K, covariance_type='diag',
        reg_covar=1e-2, random_state = base + j
    ).fit(data_subtrain_std)
    mu_x, diag_sig_x, p_x = gmm_x.means_, gmm_x.covariances_, gmm_x.weights_
    sig_x = np.array([np.diag(diag_sig_x[k]) for k in range(best_K)])

    s_val_std = scaler_s.transform(s_val)
    s_vec = s_val_std.ravel()

    mu_cond_x, cov_cond_x, w_x = transforming_conditional(
        s=s_vec, num_components=best_K, mu_k=mu_x, sig_k=sig_x, p_k=p_x, dim_s=dim_s
    )
    xi_hat_std = (w_x[:, None] * mu_cond_x).sum(axis=0, keepdims=True)

    s_aug_std = np.hstack([s_val_std, xi_hat_std])
    s_tensor = torch.tensor(s_aug_std, dtype=torch.float32, device=device)

    z_s = inverse(nfm, s_tensor)[:, :dim_s][0]
    z_train = inverse(nfm, data_subtrain_tensor)

    gmm_z = GaussianMixture(
        n_components=best_K, covariance_type='diag',
        reg_covar=1e-2, random_state = base + j
    ).fit(z_train)
    mu_k, diag_sig_k, p_k = gmm_z.means_, gmm_z.covariances_, gmm_z.weights_
    sig_k = np.array([np.diag(diag_sig_k[k]) for k in range(best_K)])

    mu_cond, cov_cond, p_cond = transforming_conditional(
        z_s, best_K, mu_k, sig_k, p_k, dim_s
    )

    z_xi_sample = MC_sampling(best_K, 1000, mu_cond, cov_cond, p_cond)
    z_full = np.hstack([np.repeat(z_s.reshape(1, -1), len(z_xi_sample), axis=0), z_xi_sample])

    z_tensor = torch.tensor(z_full, dtype=torch.float32, device=device)
    x_gen_std = forward(nfm, z_tensor)
    xi_MC = scaler_xi.inverse_transform(x_gen_std[:, dim_s:])

    eps_losses = {}
    for eps in eps_list:
        x_cv_gmm = Portfolio_2_Wass_MCVaR(xi_MC, eps, tau, eta)
        losses = oos_loss_valid(x_cv_gmm, xi_val_day, tau, eta) * 100
        eps_losses[eps] = float(np.mean(losses))
        print(f"[GMM-CV] j={j} eps={eps:.4f}, loss={eps_losses[eps]:.4f}")

    return eps_losses, best_K

def cv_GMM(
    tau, eta, data_cv_train, data_cv_test, val_indices, val_asset_indices,
    eps_list, max_K, hidden_node, hidden_layer, block_size, num_bins, total_epoch,
    device, n_jobs=-1
):
    dim_s, dim_xi = 5, 399
    results = Parallel(n_jobs=n_jobs)(
        delayed(_cv_gmm_worker)(
            int(j),               
            asset_idx,            
            tau, eta,
            data_cv_train, data_cv_test,
            eps_list, max_K, hidden_node, hidden_layer, block_size, num_bins, total_epoch,
            device
        )
        for j, asset_idx in zip(val_indices, val_asset_indices)
    )

    eps_loss_dict = {eps: 0.0 for eps in eps_list}
    best_K_list = []
    for eps_losses, best_K in results:
        best_K_list.append(best_K)
        for eps in eps_list:
            eps_loss_dict[eps] += eps_losses[eps]

    for eps in eps_list:
        print(f"[GMM-CV] eps={eps:.4f}, total_loss={eps_loss_dict[eps]:.4f}")

    best_eps = min(eps_loss_dict.items(), key=lambda x: x[1])[0]
    K_mean = float(np.mean(best_K_list))
    return best_eps, K_mean


In [4]:
def train_nf_model(latent_size, best_K, hidden_node, hidden_layer, num_bins, block_size, total_epoch, x, device, batch_size=64, lr=1e-3):
    patience = 30
    val_split = 0.2

    x_np = x.cpu().numpy()
    gmm = GaussianMixture(n_components=best_K,covariance_type='diag', reg_covar=1e-3).fit(x_np)

    means = torch.tensor(gmm.means_, dtype=torch.float32, device=device)
    stds = torch.tensor(np.sqrt(gmm.covariances_), dtype=torch.float32, device=device)
    weights = torch.tensor(gmm.weights_, dtype=torch.float32, device=device)

    flows = [nf.flows.AutoregressiveRationalQuadraticSpline(latent_size, hidden_layer, hidden_node, num_bins=num_bins) for _ in range(block_size)]

    q0 = nf.distributions.GaussianMixture(n_modes=best_K, dim=latent_size, loc=means, scale=stds, weights=weights, trainable=False)
    nfm = nf.NormalizingFlow(q0=q0, flows=flows).to(device)
    optimizer = torch.optim.Adam(nfm.parameters(), lr=lr)
    loss_hist = []

    N = x.size(0)
    val_size = int(N * val_split)
    train_size = N - val_size
    train_dataset, val_dataset = random_split(TensorDataset(x), [train_size, val_size])
    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
    val_loader = DataLoader(val_dataset, batch_size=val_size, shuffle=False)

    best_val_loss = float('inf')
    best_model_state = None
    patience_counter = 0

    for epoch in range(total_epoch):
        nfm.train()
        train_loss_epoch = 0.0
        for batch in train_loader:
            x_batch = batch[0].to(device)
            optimizer.zero_grad()
            loss = nfm.forward_kld(x_batch)
            if not torch.isnan(loss):
                loss.backward()
                optimizer.step()
                train_loss_epoch += loss.item()
        loss_hist.append(train_loss_epoch)

        nfm.eval()
        with torch.no_grad():
            for val_batch in val_loader:
                x_val = val_batch[0].to(device)
                val_loss = nfm.forward_kld(x_val).item()

        if val_loss < best_val_loss - 1e-4:
            best_val_loss = val_loss
            best_model_state = nfm.state_dict()
            patience_counter = 0
        else:
            patience_counter += 1
            if patience_counter >= patience:
                print(f"⏹️ Early stopping at epoch {epoch+1}, best val loss: {best_val_loss:.4f}")
                break

    if best_model_state is not None:
        nfm.load_state_dict(best_model_state)

    return nfm, loss_hist

def inverse(nfm, x):
    with torch.no_grad():
        z_np = nfm.inverse(x).cpu().numpy()
    return z_np

def forward(nfm, z):
    with torch.no_grad():
        x = nfm.forward(z).cpu().numpy()
    return x

In [5]:
def equal_weight_kernel(X_mat: np.array,Y_mat: np.array,X0: np.array) -> np.array:
    Y_mat = np.asarray(Y_mat, dtype=np.float64)
    num_assets = Y_mat.shape[1]
    return np.ones(num_assets) / num_assets

def mean_CVaR_kernel(X_mat:np.array, Y_mat:np.array, X0:np.array, reg_params:float, tau:float,)->np.array:
    Y_mat = np.asarray(Y_mat, dtype=np.float64)

    num_sample = Y_mat.shape[0]
    dim_beta = Y_mat.shape[1]
    alpha = cp.Variable(shape = (1,), name = 'alpha')
    beta = cp.Variable(shape = (dim_beta,), name = 'beta', nonneg=True)
    lambda_ = cp.Variable(shape = (num_sample,), name = 'lambda')
    constraints = [
        cp.sum(beta) == 1,
        lambda_ >= -reg_params*(Y_mat@beta) + alpha,
        lambda_ >= -(reg_params+1/tau)*(Y_mat@beta) + (1-1/tau)*alpha,
    ]
    problem = cp.Problem(cp.Minimize(cp.sum(lambda_)), constraints)
    problem.solve()
    if problem.status != 'optimal':
        raise ValueError('problem is not optimal')
    return beta.value

def DR_mean_CVaR_kernel(X_mat: np.array, Y_mat: np.array, X0: np.array, reg_params: float, tau: float, rho: float):
    Y_mat = np.asarray(Y_mat, dtype=np.float64)

    num_sample = Y_mat.shape[0]
    dim_beta = Y_mat.shape[1]
    alpha = cp.Variable(shape = (1,), name = 'alpha')
    beta = cp.Variable(shape = (dim_beta,), name = 'beta', nonneg=True)
    lambda_ = cp.Variable(shape = (1,), name = 'lambda', nonneg=True)
    inside_exp = cp.Variable(shape = (num_sample,), name = 'inside_exp')
    constraints = [
        cp.sum(beta) == 1,
        inside_exp >= -reg_params*(Y_mat@beta) + alpha + cp.quad_over_lin(reg_params*beta,4*lambda_),
        inside_exp >= (-(reg_params+1/tau)*(Y_mat@beta) +
                       (1-1/tau)*alpha + cp.quad_over_lin((reg_params+1/tau)*beta,4*lambda_)),
    ]
    problem = cp.Problem(cp.Minimize(lambda_*rho + cp.sum(inside_exp)/num_sample), constraints)
    problem.solve()
    if problem.status != 'optimal':
        raise ValueError('problem is not optimal')
    return beta.value

def cond_mean_CVaR_kernel(X_mat: np.array, Y_mat: np.array, X0: np.array, reg_params: float, tau: float, neighbor_quantile: float):
    X_mat = np.asarray(X_mat, dtype=np.float64)
    X0 = np.asarray(X0, dtype=np.float64).reshape(1, -1)
    Y_mat = np.asarray(Y_mat, dtype=np.float64)

    X_dist = np.linalg.norm(X_mat-X0, axis = 1)
    idx = (X_dist <= np.quantile(X_dist, neighbor_quantile))
    dim_beta = Y_mat.shape[1]
    dim_data = np.sum(idx)
    alpha = cp.Variable(shape = (1,), name = 'alpha')
    beta = cp.Variable(shape = (dim_beta,), name = 'beta', nonneg=True)
    lambda_ = cp.Variable(shape = (dim_data,), name = 'lambda')
    constraints = [
        cp.sum(beta) == 1,
        lambda_ >= -reg_params*(Y_mat[idx,:]@beta) + alpha,
        lambda_ >= -(reg_params+1/tau)*(Y_mat[idx,:]@beta) + (1-1/tau)*alpha,
    ]
    problem = cp.Problem(cp.Minimize(cp.sum(lambda_)), constraints)
    problem.solve()
    if problem.status != 'optimal':
        raise ValueError('problem is not optimal')
    return beta.value

def DR_Winf_conditional_mean_CVaR_kernel(X_mat: np.array, Y_mat: np.array, X0: np.array, reg_params: float, tau: float, gamma_quantile: float, rho_quantile: float):
    X_mat = np.asarray(X_mat, dtype=np.float64)
    X0 = np.asarray(X0, dtype=np.float64).reshape(1, -1)
    Y_mat = np.asarray(Y_mat, dtype=np.float64)

    eta = reg_params
    tau_inv = 1 / tau
    X_dist = np.linalg.norm(X_mat - X0, axis=1)
    X_dist[np.isnan(X_dist)] = 1e8
    gamma = np.quantile(X_dist, gamma_quantile)
    rho = np.quantile(X_dist, rho_quantile)
    try:
        idx_I = (X_dist <= gamma + rho)
        idx_I1 = (X_dist + rho <= gamma)
        idx_I2 = idx_I & (~idx_I1)
    except RuntimeWarning:
        print(X_dist)
        print(gamma)
        print(rho)
    norm_x_minus_xp_in_I = X_dist[idx_I] - gamma
    norm_x_minus_xp_in_I[norm_x_minus_xp_in_I < 0] = 0
    y_I = Y_mat[idx_I]

    stock_num = Y_mat.shape[1]
    beta = cp.Variable(stock_num, nonneg=True)
    alpha = cp.Variable(1)
    lambda_ = cp.Variable(shape=(1,))
    u = cp.Variable(shape=(len(y_I),), name='u')
    v_term_1 = alpha - eta * (Y_mat[idx_I] @ beta) + eta * cp.norm(beta) * (rho - norm_x_minus_xp_in_I)
    v_term_2 = ((1 - tau_inv) * alpha
                - (eta + tau_inv) * (Y_mat[idx_I] @ beta)
                + (eta + tau_inv) * cp.norm(beta) * (rho - norm_x_minus_xp_in_I))
    constraints = [
        u[idx_I2[idx_I]] >= 0,
        cp.sum(u) <= 0,
        cp.sum(beta) == 1,
        lambda_ + u >= v_term_1,
        lambda_ + u >= v_term_2
    ]
    problem = cp.Problem(cp.Minimize(lambda_), constraints)
    problem.solve()
    if problem.status != 'optimal':
        raise ValueError('problem is not optimal')
    return beta.value

def DR_W2_conditional_mean_CVaR_kernel(X_mat: np.array, Y_mat: np.array, X0: np.array, reg_params: float, tau: float, epsilon: float, rho_div_rho_min: float,):
    X_mat = np.asarray(X_mat, dtype=np.float64)
    X0 = np.asarray(X0, dtype=np.float64).reshape(1, -1)
    Y_mat = np.asarray(Y_mat, dtype=np.float64)

    def compute_rho_min(X_mat, X0, epsilon):
        X_dist = np.linalg.norm(X_mat - X0, axis=1)
        X_dist[np.isnan(X_dist)] = 1e8
        X_cut = np.quantile(X_dist, q=epsilon, method='higher')
        return (X_dist[X_dist <= X_cut]**2).mean() * epsilon

    rho = rho_div_rho_min * compute_rho_min(X_mat, X0, epsilon)
    X_dist = np.linalg.norm(X_mat - X0, axis=1)
    eta = reg_params
    epsilon_inv = 1 / epsilon
    tau_inv = 1 / tau

    N, stock_num = Y_mat.shape
    beta = cp.Variable(stock_num, nonneg=True)
    alpha = cp.Variable(1)
    lambda1 = cp.Variable(1, nonneg=True)
    lambda2 = cp.Variable(1)
    theta = cp.Variable(N, nonneg=True)
    z = cp.Variable(N, nonneg=True)
    z_tilde = cp.Variable(N, nonneg=True)

    obj = cp.Minimize(lambda1 * rho + lambda2 * epsilon + cp.sum(theta) / N)
    linear_constraints = [
        cp.sum(beta) == 1,
        z == theta + lambda1 * X_dist ** 2 + lambda2 + epsilon_inv * eta * (Y_mat @ beta - alpha),
        z_tilde == (theta + lambda1 * X_dist ** 2 + lambda2
                    + epsilon_inv * (eta + tau_inv) * (Y_mat @ beta)
                    - epsilon_inv * (1 - tau_inv) * alpha)
    ]
    quad_over_lin_constraints = [
        z >= cp.quad_over_lin(epsilon_inv * eta * beta, 4 * lambda1),
        z_tilde >= cp.quad_over_lin(epsilon_inv * (eta + tau_inv) * beta, 4 * lambda1),
    ]
    problem = cp.Problem(obj, linear_constraints + quad_over_lin_constraints)
    problem.solve()
    if problem.status != 'optimal':
        raise ValueError('problem is not optimal')
    return beta.value

In [6]:
def cv_DR_mean_CVaR_kernel(tau, eta, data_cv_train, data_cv_test, val_indices, val_asset_indices, eps_list):
    best_eps, best_loss = None, float('inf')
    for eps in eps_list:
        total_loss = 0
        for j, asset_idx in zip(val_indices, val_asset_indices):
            data_val = data_cv_test.iloc[j]
            time_val = data_val['time']
            start_time = time_val - pd.DateOffset(years=2)

            mask_2year = (data_cv_train['time'] >= start_time) & (data_cv_train['time'] < time_val)
            data_subtrain_all = data_cv_train[mask_2year]
            s_subtrain = data_subtrain_all.iloc[:, 1:6].values
            xi_subtrain = data_subtrain_all.iloc[:, 6:].values
            xi_sub = xi_subtrain[:, asset_idx]

            s_val = data_val.iloc[1:6].values.reshape(1, -1)

            future_rows = data_cv_train[data_cv_train["time"] > time_val]
            xi_val_day = future_rows.iloc[0, 6 + asset_idx].values.reshape(1, -1)

            x = DR_mean_CVaR_kernel(s_subtrain, xi_sub, s_val, eta, tau, rho=eps)
            losses = oos_loss_valid(x, xi_val_day, tau, eta) * 100
            avg_loss = np.mean(losses)
            total_loss += avg_loss

        print(f"[DRMC] eps={eps:.4f}, total_loss={total_loss:.4f}")
        if total_loss < best_loss:
            best_loss = total_loss
            best_eps = eps

    return best_eps

def cv_cond_mean_CVaR_kernel(tau, eta, data_cv_train, data_cv_test, val_indices, val_asset_indices, quantile_list):
    best_q, best_loss = None, float('inf')
    for q in quantile_list:
        total_loss = 0
        for j, asset_idx in zip(val_indices, val_asset_indices):
            data_val = data_cv_test.iloc[j]
            time_val = data_val['time']
            start_time = time_val - pd.DateOffset(years=2)

            mask_2year = (data_cv_train['time'] >= start_time) & (data_cv_train['time'] < time_val)
            data_subtrain_all = data_cv_train[mask_2year]
            s_subtrain = data_subtrain_all.iloc[:, 1:6].values
            xi_subtrain = data_subtrain_all.iloc[:, 6:].values
            xi_sub = xi_subtrain[:, asset_idx]

            s_val = data_val.iloc[1:6].values.reshape(1, -1)

            future_rows = data_cv_train[data_cv_train["time"] > time_val]
            xi_val_day = future_rows.iloc[0, 6 + asset_idx].values.reshape(1, -1)

            x_CMC = cond_mean_CVaR_kernel(s_subtrain, xi_sub, s_val, eta, tau, neighbor_quantile=q)
            losses = oos_loss_valid(x_CMC, xi_val_day, tau, eta) * 100
            avg_loss = np.mean(losses)
            total_loss += avg_loss

        print(f"[CMC] quantile={q:.2f}, loss={total_loss:.4f}")
        if total_loss < best_loss:
            best_loss, best_q = total_loss, q

    return best_q

def cv_DR_Winf_conditional_mean_CVaR_kernel(tau, eta, data_cv_train, data_cv_test, val_indices, val_asset_indices, gamma_quantile_list, rho_quantile_list):
    best_loss, best_gamma_q, best_rho_q = float('inf'), None, None
    for gamma_q in gamma_quantile_list:
        for rho_q in rho_quantile_list:
            total_loss = 0

            for j, asset_idx in zip(val_indices, val_asset_indices):
                data_val = data_cv_test.iloc[j]
                time_val = data_val['time']
                start_time = time_val - pd.DateOffset(years=2)

                mask_2year = (data_cv_train['time'] >= start_time) & (data_cv_train['time'] < time_val)
                data_subtrain_all = data_cv_train[mask_2year]
                s_subtrain = data_subtrain_all.iloc[:, 1:6].values
                xi_subtrain = data_subtrain_all.iloc[:, 6:].values
                xi_sub = xi_subtrain[:, asset_idx]

                s_val = data_val.iloc[1:6].values.reshape(1, -1)
                future_rows = data_cv_train[data_cv_train['time'] > time_val]
                xi_val_day = future_rows.iloc[0, 6 + asset_idx].values.reshape(1, -1)

                x_DRCMC = DR_Winf_conditional_mean_CVaR_kernel(s_subtrain, xi_sub, s_val, eta, tau, gamma_q, rho_q)
                losses = oos_loss_valid(x_DRCMC, xi_val_day, tau, eta) * 100
                avg_loss = np.mean(losses)
                total_loss += avg_loss

            print(f"[DRCMC] gamma={gamma_q:.2f}, rho={rho_q:.2f}, loss={total_loss:.4f}")

            if total_loss < best_loss:
                best_loss = total_loss
                best_gamma_q = gamma_q
                best_rho_q = rho_q

    return best_gamma_q, best_rho_q

def cv_DR_W2_conditional_mean_kernel(tau, eta, data_cv_train, data_cv_test, val_indices, val_asset_indices, quantile_level_list, rho_div_rho_min_list):
    best_loss, best_quantile_level, best_rho_div = float('inf'), None, None
    for q in quantile_level_list:
        for rho_div in rho_div_rho_min_list:
            total_loss = 0
            for j, asset_idx in zip(val_indices, val_asset_indices):
                data_val = data_cv_test.iloc[j]
                time_val = data_val['time']
                start_time = time_val - pd.DateOffset(years=2)

                mask_2year = (data_cv_train['time'] >= start_time) & (data_cv_train['time'] < time_val)
                data_subtrain_all = data_cv_train[mask_2year]
                s_subtrain = data_subtrain_all.iloc[:, 1:6].values
                xi_subtrain = data_subtrain_all.iloc[:, 6:].values
                xi_sub = xi_subtrain[:, asset_idx]

                s_val = data_val.iloc[1:6].values.reshape(1, -1)
                future_rows = data_cv_train[data_cv_train['time'] > time_val]
                xi_val_day = future_rows.iloc[0, 6 + asset_idx].values.reshape(1, -1)

                x_OCTMC = DR_W2_conditional_mean_CVaR_kernel(s_subtrain, xi_sub, s_val, eta, tau, q, rho_div)
                losses = oos_loss_valid(x_OCTMC, xi_val_day, tau, eta) * 100
                avg_loss = np.mean(losses)
                total_loss += avg_loss
            print(f"[OTCMC] quantile={q:.4f}, rho/rho_min={rho_div:.2f}, loss={total_loss:.4f}")

            if total_loss < best_loss:
                best_loss = total_loss
                best_quantile_level = q
                best_rho_div = rho_div

    return best_quantile_level, best_rho_div

In [7]:
def compute_avg_return(xi_mat):
    return np.mean(xi_mat, axis=1)

def NW_weights(x, X_train, h):
    dists = np.linalg.norm(X_train - x, axis=1)
    weights = np.exp(-0.5 * (dists / h)**2)
    return weights / np.sum(weights)

def LSCV_bandwidth(x_train, y_train, bandwidths):
    n = len(x_train)
    errors = []
    for h in bandwidths:
        total_error = 0
        for i in range(n):
            x_i = x_train[i]
            y_i = y_train[i]
            X_rest = np.delete(x_train, i, axis=0)
            y_rest = np.delete(y_train, i)
            w = NW_weights(x_i, X_rest, h)
            y_hat = np.sum(w * y_rest)
            total_error += (y_i - y_hat) ** 2
        errors.append(total_error)
    best_h = bandwidths[np.argmin(errors)]
    return best_h

def preprocess_side_info(s, xi, bandwidth_candidates=None):
    s= np.asarray(s)
    if bandwidth_candidates is None:
        bandwidth_candidates = np.logspace(-2, 1, 20)

    y = compute_avg_return(xi)  # (T,) shape
    s_scaled = s.copy()
    h_list = []

    for j in range(s.shape[1]):
        x_j = s[:, j].reshape(-1, 1)
        h_j = LSCV_bandwidth(x_j, y, bandwidth_candidates)
        h_list.append(h_j)
        s_scaled[:, j] = s[:, j] / h_j
        print(f"Side info {j}: selected bandwidth h = {h_j:.4f}")

    return s_scaled, h_list

In [8]:
def run_trial(j, tau, eta, data_oos_train_set, data_oos_test,
              best_eps_DRMC, best_quantile_CMC, best_gamma_q_DRCMC, best_eps_DRCMC,
              best_quantile_OTCMC, best_eps_OTCMC, best_eps_GMM, max_K,
              hidden_node, hidden_layer, block_size, bins, total_epoch, device):
    base = 1000
    random.seed(base + j)
    np.random.seed(base + j)
    torch.manual_seed(base + j)
    torch.cuda.manual_seed_all(base + j)  # if using CUDA
    dim_s, dim_xi = 5, 399
    method_names = ["GMM", "EW", "MC", "DRMC", "CMC", "DRCMC", "OTCMC"]
    metrics = ["loss", "mean"]
    results = {metric: {method: None for method in method_names} for metric in metrics}
    results["Trial"] = j
    results["K_GMM"] = None
    

    # === Extract test sample ===
    data_val = data_oos_test.iloc[j]
    time_val = data_val["time"]
    s_val = data_val.iloc[1:6].values.reshape(1, -1)

    # === Select asset indices ===
    asset_idx = np.random.choice(399, size=399, replace=False)

    # === Get xi_{t+1} (당일 평가용) ===
    future_rows = data_oos_train_set[data_oos_train_set["time"] > time_val]
    xi_val_day = future_rows.iloc[0, 6 + asset_idx].values.reshape(1, -1)

    # === Training data from [t-2y, t) ===
    start_time = time_val - pd.DateOffset(years=2)
    mask_train = (data_oos_train_set["time"] >= start_time) & (data_oos_train_set["time"] < time_val)
    data_train = data_oos_train_set[mask_train]
    s_train = data_train.iloc[:, 1:6].values
    xi_train = data_train.iloc[:, 6 + asset_idx].values

    # === Classical models ===
    classical_models = {
        "EW":  lambda: equal_weight_kernel(s_train, xi_train, s_val),
        "MC":  lambda: mean_CVaR_kernel(s_train, xi_train, s_val, eta, tau),
        "DRMC": lambda: DR_mean_CVaR_kernel(s_train, xi_train, s_val, eta, tau, best_eps_DRMC),
        "CMC": lambda: cond_mean_CVaR_kernel(s_train, xi_train, s_val, eta, tau, best_quantile_CMC),
        "DRCMC": lambda: DR_Winf_conditional_mean_CVaR_kernel(s_train, xi_train, s_val, eta, tau, best_gamma_q_DRCMC, best_eps_DRCMC),
        "OTCMC": lambda: DR_W2_conditional_mean_CVaR_kernel(s_train, xi_train, s_val, eta, tau, best_quantile_OTCMC, best_eps_OTCMC),
    }

    for name, model_func in classical_models.items():
        try:
            x = model_func()
            losses = oos_loss_portfolio(x, xi_val_day, tau, eta)
            means = oos_mean_portfolio(x, xi_val_day)
            results["loss"][name] = np.mean(losses) * 100
            results["mean"][name] = np.mean(means) * 100
        except Exception as e:
            print(f"[Trial {j}] Error in {name}: {e}")

    # === GMM model ===
    try:
        scaler_s = StandardScaler()
        scaler_xi = StandardScaler()
        s_train_std = scaler_s.fit_transform(s_train)
        xi_train_std = scaler_xi.fit_transform(xi_train)
        data_train_std = np.concatenate([s_train_std, xi_train_std], axis=1)
        data_train_tensor = torch.tensor(data_train_std, dtype=torch.float32, device=device)

        best_K_GMM = select_K_by_AIC(data_train_std, max_K=max_K)
        results["K_GMM"] = best_K_GMM

        nfm, _ = train_nf_model(5 + 399, best_K_GMM, hidden_node, hidden_layer, bins, block_size, total_epoch, data_train_tensor, device)

        gmm_x = GaussianMixture(n_components=best_K_GMM, covariance_type='diag', reg_covar=1e-2, random_state = base + j).fit(data_train_std)
        mu_x, diag_sig_x, p_x = gmm_x.means_, gmm_x.covariances_, gmm_x.weights_
        sig_x = np.array([np.diag(diag_sig_x[k]) for k in range(best_K_GMM)])

        s_val = s_val.reshape(1, -1)
        s_val_std = scaler_s.transform(s_val) 
        s_vec = s_val_std.ravel()

        mu_cond_x, cov_cond_x, p_cond_x = transforming_conditional(s=s_vec, num_components=best_K_GMM, mu_k=mu_x, sig_k=sig_x, p_k=p_x, dim_s=dim_s)
        xi_hat_std = (p_cond_x[:, None] * mu_cond_x).sum(axis=0, keepdims=True)

        gamma_std = np.hstack([s_val_std, xi_hat_std])                      # (1, dim_s + dim_xi)
        gamma_tensor = torch.tensor(gamma_std, dtype=torch.float32, device=device)
        z_s = inverse(nfm, gamma_tensor)[:, :dim_s][0]                      # (dim_s,)

        z_train = inverse(nfm, data_train_tensor)
        gmm_z = GaussianMixture(n_components=best_K_GMM, covariance_type='diag', reg_covar=1e-2, random_state = base + j).fit(z_train)
        mu_z, diag_sig_z, p_z = gmm_z.means_, gmm_z.covariances_, gmm_z.weights_
        sig_z = np.array([np.diag(diag_sig_z[k]) for k in range(best_K_GMM)])

        mu_cond_z, cov_cond_z, p_cond_z = transforming_conditional(s=z_s, num_components=best_K_GMM, mu_k=mu_z, sig_k=sig_z, p_k=p_z, dim_s=dim_s)
        z_xi_sample = MC_sampling(best_K_GMM, 1000, mu_cond_z, cov_cond_z, p_cond_z)
        z_full = np.hstack([np.repeat(z_s.reshape(1, -1), len(z_xi_sample), axis=0), z_xi_sample])

        z_tensor = torch.tensor(z_full, dtype=torch.float32, device=device)
        x_gen_std = forward(nfm, z_tensor)
        xi_MC = scaler_xi.inverse_transform(x_gen_std[:, dim_s:])  # (1000, dim_xi)                 

        x_GMM = Portfolio_2_Wass_MCVaR(xi_MC, best_eps_GMM, tau, eta)
        results["loss"]["GMM"] = np.mean(oos_loss_portfolio(x_GMM, xi_val_day, tau, eta)) * 100
        results["mean"]["GMM"] = np.mean(oos_mean_portfolio(x_GMM, xi_val_day)) * 100
    except Exception as e:
        print(f"[Trial {j}] Error in GMM: {e}")

    # === Flatten results ===
    flattened = {"Trial": results["Trial"], "K_GMM": results["K_GMM"]}
    for metric in metrics:
        for method in method_names:
            flattened[f"{metric}_{method}"] = results[metric][method]
    return flattened

In [None]:
# ============ EXPERIMENT PARAMETERS ============
eps_list = [0.01, 0.05, 0.1, 0.5, 1]
max_K = 3
tau = 0.1
eta_list = [7]
hidden_node = 64
hidden_layer = 2
block_size =  2 
bins = 8
total_epoch = 500
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
rho_list_DRMC = [0.05,0.1,0.25]
neighbor_quantile_list_CMC = [0.05,0.1,0.25]
gamma_quantile_list_DRCMC = [0.05,0.1,0.25]
rho_quantile_list_DRCMC = [0.05,0.1,0.25]
quantile_level_list_OTCMC = [0.05, 0.1,0.15]
rho_div_rho_min_list_OTCMC = [1.05,1.1,1.15]

file_path = "data/Portfolio_data.csv"

base_seed = 0
rng_main = np.random.default_rng(base_seed)


for eta in eta_list:
    # === Load and parse dataset ===
    data = pd.read_csv(file_path)
    dim_s, dim_xi, N = 5, 399, data.shape[0]

    data["time"] = pd.to_datetime(data["time"])
    data.iloc[:, 1:] = data.iloc[:, 1:].astype(np.float64)

    # === Preprocess for bandwidth estimation (2017–2020만 사용) ===
    mask_scale = (data["time"] >= "2017-01-01") & (data["time"] <= "2020-12-31")
    data_pre = data[mask_scale]
    s = data_pre.iloc[:, 1:6]
    xi = data_pre.iloc[:, 6:]
    _, h_list = preprocess_side_info(s, xi)

    # === Scale side info and returns ===
    scaled_s = s.values / np.array(h_list)
    data_scaled = data.copy()
    data_scaled.loc[data_pre.index, data_scaled.columns[1:6]] = scaled_s.astype(float)
    data_scaled.iloc[:, 6:] = data_scaled.iloc[:, 6:] / 100

    # === Define CV splits ===
    mask_cv_train = (data_scaled["time"] >= "2017-01-01") & (data_scaled["time"] <= "2020-12-31")
    mask_cv_test  = (data_scaled["time"] >= "2019-01-01") & (data_scaled["time"] <= "2020-12-31")

    data_cv_train = data_scaled[mask_cv_train]
    data_cv_test  = data_scaled[mask_cv_test]


    val_indices = rng_main.choice(len(data_cv_test), size=50, replace=False)
    val_asset_indices = [rng_main.choice(dim_xi, size=399, replace=False) for _ in val_indices]

    best_eps_DRMC = cv_DR_mean_CVaR_kernel(tau, eta, data_cv_train, data_cv_test, val_indices, val_asset_indices, rho_list_DRMC)
    print(f"[CV-DRMC Finished] best_eps_DRMC = {best_eps_DRMC}")
    best_quantile_CMC = cv_cond_mean_CVaR_kernel(tau, eta, data_cv_train, data_cv_test, val_indices, val_asset_indices, neighbor_quantile_list_CMC)
    print(f"[CV_CMC] best_quantile_CMC = {best_quantile_CMC}")
    best_gamma_q_DRCMC, best_eps_DRCMC = cv_DR_Winf_conditional_mean_CVaR_kernel(
        tau, eta, data_cv_train, data_cv_test, val_indices, val_asset_indices,
        gamma_quantile_list_DRCMC, rho_quantile_list_DRCMC
    )
    print(f"[CV-DRCMC] best_gamma_q_DRCMC = {best_gamma_q_DRCMC}, best_eps_DRCMC = {best_eps_DRCMC}")
    best_quantile_OTCMC, best_eps_OTCMC = cv_DR_W2_conditional_mean_kernel(
        tau, eta, data_cv_train, data_cv_test, val_indices, val_asset_indices,
        quantile_level_list_OTCMC, rho_div_rho_min_list_OTCMC
    )
    print(f"[CV-OTCMC] best_quantile_OTCMC = {best_quantile_OTCMC}, best_eps_OTCMC = {best_eps_OTCMC}")
    best_eps_GMM, K_cv_mean = cv_GMM(
        tau, eta, data_cv_train, data_cv_test, val_indices, val_asset_indices,
        eps_list, max_K, hidden_node, hidden_layer, block_size,
        bins, total_epoch, device,
        n_jobs=-1          
    )
    print(f"[GMM-CV Finished] K={K_cv_mean}, eps={best_eps_GMM}")

    # === Define OOS train and test sets ===
    mask_oos_train = (data_scaled["time"] >= "2019-01-01") & (data_scaled["time"] <= "2022-12-31")
    data_oos_train_set = data_scaled[mask_oos_train]

    # === Define OOS test set directly ===
    mask_oos_test = (data_scaled["time"] >= "2021-01-01") & (data_scaled["time"] <= "2021-12-31")
    data_oos_test = data_scaled[mask_oos_test].reset_index(drop=True)
    val_indices = list(range(len(data_oos_test)))

    # === Run trials in parallel ===
    results = Parallel(n_jobs=-1)(   
        delayed(run_trial)(
            j, tau, eta,
            data_oos_train_set, data_oos_test,
            best_eps_DRMC, best_quantile_CMC, best_gamma_q_DRCMC, best_eps_DRCMC,
            best_quantile_OTCMC, best_eps_OTCMC, best_eps_GMM, max_K,
            hidden_node, hidden_layer, block_size, bins, total_epoch,
            device
        )
        for j in tqdm(val_indices)
    )

    # --- Clean up / Save ---
    results_cleaned = [r for r in results if isinstance(r, dict)]

    if len(results_cleaned) == 0:
        print("⚠️ No valid results; skipping save.")
        continue

    df = pd.DataFrame(results_cleaned)

    if 'Trial' not in df.columns:
        df.insert(0, 'Trial', list(range(len(df))))  

    cols = ['Trial'] + [c for c in df.columns if c != 'Trial']
    df = df[cols]

    mean_row = {'Trial': 'AVG'}
    for col in df.columns:
        if col != 'Trial':
            mean_row[col] = pd.to_numeric(df[col], errors='coerce').mean(skipna=True)
    df = pd.concat([df, pd.DataFrame([mean_row])], ignore_index=True)

    save_path = f"PF_full_eta{eta}.csv"
    df.to_csv(save_path, index=False)
    print(f"Saved to {save_path}")

  return weights / np.sum(weights)


Side info 0: selected bandwidth h = 0.0100
Side info 1: selected bandwidth h = 6.9519
Side info 2: selected bandwidth h = 3.3598
Side info 3: selected bandwidth h = 0.2637
Side info 4: selected bandwidth h = 10.0000
[DRMC] eps=0.0500, total_loss=-36.2326
[DRMC] eps=0.1000, total_loss=-34.3890
[DRMC] eps=0.2500, total_loss=-32.6525
[CV-DRMC Finished] best_eps_DRMC = 0.05
[CMC] quantile=0.05, loss=-152.6289
[CMC] quantile=0.10, loss=-231.9788
[CMC] quantile=0.25, loss=-193.0255
[CV_CMC] best_quantile_CMC = 0.1
[DRCMC] gamma=0.05, rho=0.05, loss=-29.5682
[DRCMC] gamma=0.05, rho=0.10, loss=-29.6216
[DRCMC] gamma=0.05, rho=0.25, loss=-29.6535
[DRCMC] gamma=0.10, rho=0.05, loss=-29.6651
[DRCMC] gamma=0.10, rho=0.10, loss=-29.6395
[DRCMC] gamma=0.10, rho=0.25, loss=-29.6614
[DRCMC] gamma=0.25, rho=0.05, loss=-29.6612
[DRCMC] gamma=0.25, rho=0.10, loss=-29.6673
[DRCMC] gamma=0.25, rho=0.25, loss=-29.6738
[CV-DRCMC] best_gamma_q_DRCMC = 0.25, best_eps_DRCMC = 0.25
[OTCMC] quantile=0.0500, rho/r



[GMM-CV] j=310 eps=1.0000, loss=-50.1481
⏹️ Early stopping at epoch 37, best val loss: 477.0674
[GMM-CV] j=62 eps=1.0000, loss=0.4482
[GMM-CV] j=141 eps=1.0000, loss=4.5448
[GMM-CV] j=129 eps=1.0000, loss=0.0526
[GMM-CV] j=200 eps=1.0000, loss=-3.1986
[GMM-CV] j=16 eps=1.0000, loss=-7.3060
[GMM-CV] j=456 eps=1.0000, loss=-9.8246
[GMM-CV] j=422 eps=1.0000, loss=19.5262
[GMM-CV] j=85 eps=1.0000, loss=3.5764
[GMM-CV] j=81 eps=1.0000, loss=-3.4083
⏹️ Early stopping at epoch 37, best val loss: 466.4203
[GMM-CV] j=444 eps=1.0000, loss=7.6864
⏹️ Early stopping at epoch 41, best val loss: 309.5322
⏹️ Early stopping at epoch 37, best val loss: 456.3369
⏹️ Early stopping at epoch 37, best val loss: 472.0647
[GMM-CV] j=426 eps=1.0000, loss=11.8944
[GMM-CV] j=325 eps=1.0000, loss=-33.6079
[GMM-CV] j=267 eps=0.0100, loss=6.3050
[GMM-CV] j=209 eps=0.0100, loss=-6.3383
[GMM-CV] j=353 eps=0.0100, loss=10.1324
[GMM-CV] j=209 eps=0.0500, loss=-2.6233
[GMM-CV] j=267 eps=0.0500, loss=6.6145
[GMM-CV] j=298

  8%|▊         | 20/252 [00:20<00:06, 37.25it/s]

⏹️ Early stopping at epoch 38, best val loss: 383.4891
⏹️ Early stopping at epoch 41, best val loss: 360.4428
⏹️ Early stopping at epoch 42, best val loss: 355.5120
⏹️ Early stopping at epoch 39, best val loss: 340.0187
⏹️ Early stopping at epoch 40, best val loss: 335.7787
⏹️ Early stopping at epoch 39, best val loss: 351.8180
⏹️ Early stopping at epoch 38, best val loss: 354.0795
⏹️ Early stopping at epoch 39, best val loss: 333.5549
⏹️ Early stopping at epoch 40, best val loss: 361.4333
⏹️ Early stopping at epoch 39, best val loss: 422.6474
⏹️ Early stopping at epoch 40, best val loss: 329.4129
⏹️ Early stopping at epoch 40, best val loss: 332.6748
⏹️ Early stopping at epoch 41, best val loss: 334.0247
⏹️ Early stopping at epoch 40, best val loss: 326.9452
⏹️ Early stopping at epoch 41, best val loss: 340.5678
⏹️ Early stopping at epoch 42, best val loss: 336.4263
⏹️ Early stopping at epoch 42, best val loss: 327.4332
⏹️ Early stopping at epoch 43, best val loss: 344.2090
⏹️ Early s

 16%|█▌        | 40/252 [09:14<57:34, 16.30s/it]

⏹️ Early stopping at epoch 40, best val loss: 387.8802
⏹️ Early stopping at epoch 40, best val loss: 368.6300
⏹️ Early stopping at epoch 40, best val loss: 371.1656
⏹️ Early stopping at epoch 38, best val loss: 387.0206
⏹️ Early stopping at epoch 40, best val loss: 339.2568
⏹️ Early stopping at epoch 38, best val loss: 356.5568
⏹️ Early stopping at epoch 38, best val loss: 342.5642
⏹️ Early stopping at epoch 38, best val loss: 356.3312
⏹️ Early stopping at epoch 38, best val loss: 396.7624
⏹️ Early stopping at epoch 41, best val loss: 339.3880
⏹️ Early stopping at epoch 39, best val loss: 329.9566
⏹️ Early stopping at epoch 41, best val loss: 324.4147
⏹️ Early stopping at epoch 41, best val loss: 361.1432
⏹️ Early stopping at epoch 40, best val loss: 353.1592
⏹️ Early stopping at epoch 41, best val loss: 322.9090
⏹️ Early stopping at epoch 42, best val loss: 338.3307
⏹️ Early stopping at epoch 38, best val loss: 409.3329
⏹️ Early stopping at epoch 38, best val loss: 345.3974
⏹️ Early s

 24%|██▍       | 61/252 [23:31<1:26:59, 27.33s/it]

⏹️ Early stopping at epoch 42, best val loss: 341.5740
⏹️ Early stopping at epoch 41, best val loss: 341.7833
⏹️ Early stopping at epoch 41, best val loss: 345.2389
⏹️ Early stopping at epoch 42, best val loss: 315.4269
⏹️ Early stopping at epoch 40, best val loss: 340.0246
⏹️ Early stopping at epoch 38, best val loss: 420.0184
⏹️ Early stopping at epoch 41, best val loss: 336.0327
⏹️ Early stopping at epoch 39, best val loss: 316.4702
⏹️ Early stopping at epoch 40, best val loss: 346.8635
⏹️ Early stopping at epoch 42, best val loss: 336.7057
⏹️ Early stopping at epoch 41, best val loss: 344.0802
⏹️ Early stopping at epoch 39, best val loss: 357.5720
⏹️ Early stopping at epoch 41, best val loss: 368.2577
⏹️ Early stopping at epoch 43, best val loss: 320.5492
⏹️ Early stopping at epoch 39, best val loss: 340.1085
⏹️ Early stopping at epoch 38, best val loss: 365.9744
⏹️ Early stopping at epoch 40, best val loss: 418.9615
⏹️ Early stopping at epoch 40, best val loss: 345.8032
⏹️ Early s

 33%|███▎      | 82/252 [49:51<2:19:24, 49.20s/it]

⏹️ Early stopping at epoch 40, best val loss: 332.8678
⏹️ Early stopping at epoch 40, best val loss: 369.6581
⏹️ Early stopping at epoch 40, best val loss: 343.5558
⏹️ Early stopping at epoch 39, best val loss: 340.3277
⏹️ Early stopping at epoch 41, best val loss: 377.2713
⏹️ Early stopping at epoch 39, best val loss: 339.2267
⏹️ Early stopping at epoch 40, best val loss: 359.8116
⏹️ Early stopping at epoch 41, best val loss: 345.1405
⏹️ Early stopping at epoch 39, best val loss: 365.5367
⏹️ Early stopping at epoch 42, best val loss: 349.1380
⏹️ Early stopping at epoch 42, best val loss: 327.9343
⏹️ Early stopping at epoch 42, best val loss: 359.1866
⏹️ Early stopping at epoch 40, best val loss: 356.1425
⏹️ Early stopping at epoch 39, best val loss: 331.1034
⏹️ Early stopping at epoch 40, best val loss: 320.8601
⏹️ Early stopping at epoch 41, best val loss: 344.2770
⏹️ Early stopping at epoch 38, best val loss: 333.8329
⏹️ Early stopping at epoch 39, best val loss: 393.7184
⏹️ Early s

 40%|████      | 101/252 [1:17:21<2:54:09, 69.20s/it]

⏹️ Early stopping at epoch 40, best val loss: 336.1010
⏹️ Early stopping at epoch 38, best val loss: 389.0210
⏹️ Early stopping at epoch 39, best val loss: 338.4200
⏹️ Early stopping at epoch 39, best val loss: 329.0978
⏹️ Early stopping at epoch 43, best val loss: 360.4283
⏹️ Early stopping at epoch 38, best val loss: 404.6847
⏹️ Early stopping at epoch 39, best val loss: 391.2231
⏹️ Early stopping at epoch 39, best val loss: 355.8497
⏹️ Early stopping at epoch 40, best val loss: 369.3433
⏹️ Early stopping at epoch 39, best val loss: 371.7058
⏹️ Early stopping at epoch 39, best val loss: 364.6295
⏹️ Early stopping at epoch 39, best val loss: 356.0192
⏹️ Early stopping at epoch 42, best val loss: 340.4077
⏹️ Early stopping at epoch 39, best val loss: 367.5837
⏹️ Early stopping at epoch 39, best val loss: 354.7426
⏹️ Early stopping at epoch 39, best val loss: 403.8477
⏹️ Early stopping at epoch 39, best val loss: 355.9026
⏹️ Early stopping at epoch 40, best val loss: 353.8963
⏹️ Early s

 48%|████▊     | 121/252 [1:38:52<2:23:56, 65.93s/it]

⏹️ Early stopping at epoch 41, best val loss: 362.1420
⏹️ Early stopping at epoch 41, best val loss: 370.2069
⏹️ Early stopping at epoch 39, best val loss: 377.8043
⏹️ Early stopping at epoch 38, best val loss: 379.3455
⏹️ Early stopping at epoch 39, best val loss: 355.9066
⏹️ Early stopping at epoch 43, best val loss: 331.4168
⏹️ Early stopping at epoch 38, best val loss: 387.0553
⏹️ Early stopping at epoch 40, best val loss: 353.2634
⏹️ Early stopping at epoch 42, best val loss: 381.0307
⏹️ Early stopping at epoch 41, best val loss: 351.0064
⏹️ Early stopping at epoch 40, best val loss: 345.5097
⏹️ Early stopping at epoch 39, best val loss: 330.5447
⏹️ Early stopping at epoch 42, best val loss: 349.1621
⏹️ Early stopping at epoch 42, best val loss: 339.5114
⏹️ Early stopping at epoch 42, best val loss: 377.9112
⏹️ Early stopping at epoch 41, best val loss: 353.0242
⏹️ Early stopping at epoch 38, best val loss: 343.4776
⏹️ Early stopping at epoch 39, best val loss: 355.3033
⏹️ Early s

 56%|█████▌    | 141/252 [2:05:12<2:13:04, 71.93s/it]

⏹️ Early stopping at epoch 40, best val loss: 357.7880
⏹️ Early stopping at epoch 38, best val loss: 357.6513
⏹️ Early stopping at epoch 43, best val loss: 323.9693
⏹️ Early stopping at epoch 41, best val loss: 380.1758
⏹️ Early stopping at epoch 42, best val loss: 318.3873
⏹️ Early stopping at epoch 43, best val loss: 342.7015
⏹️ Early stopping at epoch 40, best val loss: 354.7029
⏹️ Early stopping at epoch 40, best val loss: 354.8472
⏹️ Early stopping at epoch 42, best val loss: 364.1978
⏹️ Early stopping at epoch 40, best val loss: 383.1084
⏹️ Early stopping at epoch 39, best val loss: 361.2729
⏹️ Early stopping at epoch 43, best val loss: 369.2578
⏹️ Early stopping at epoch 42, best val loss: 321.2832
⏹️ Early stopping at epoch 41, best val loss: 328.2502
⏹️ Early stopping at epoch 42, best val loss: 358.3524
⏹️ Early stopping at epoch 42, best val loss: 334.7814
⏹️ Early stopping at epoch 40, best val loss: 353.2980
⏹️ Early stopping at epoch 40, best val loss: 327.3541
⏹️ Early s

 65%|██████▌   | 164/252 [2:26:19<1:19:37, 54.29s/it]

⏹️ Early stopping at epoch 40, best val loss: 353.7577
⏹️ Early stopping at epoch 40, best val loss: 355.0843
⏹️ Early stopping at epoch 39, best val loss: 368.5713
⏹️ Early stopping at epoch 42, best val loss: 360.8847
⏹️ Early stopping at epoch 39, best val loss: 362.1388
⏹️ Early stopping at epoch 41, best val loss: 365.5618
⏹️ Early stopping at epoch 40, best val loss: 380.7002
⏹️ Early stopping at epoch 42, best val loss: 349.0020
⏹️ Early stopping at epoch 40, best val loss: 349.4785
⏹️ Early stopping at epoch 40, best val loss: 348.3909
⏹️ Early stopping at epoch 41, best val loss: 337.8170
⏹️ Early stopping at epoch 40, best val loss: 342.8629
⏹️ Early stopping at epoch 42, best val loss: 345.5187
⏹️ Early stopping at epoch 40, best val loss: 359.8344
⏹️ Early stopping at epoch 40, best val loss: 370.7479
⏹️ Early stopping at epoch 41, best val loss: 331.3852
⏹️ Early stopping at epoch 43, best val loss: 343.5678
⏹️ Early stopping at epoch 43, best val loss: 316.4005
⏹️ Early s

 72%|███████▏  | 181/252 [2:53:15<1:35:12, 80.46s/it]

⏹️ Early stopping at epoch 42, best val loss: 366.4751
⏹️ Early stopping at epoch 40, best val loss: 352.5240
⏹️ Early stopping at epoch 42, best val loss: 393.7143
⏹️ Early stopping at epoch 43, best val loss: 316.8986
⏹️ Early stopping at epoch 40, best val loss: 374.9836
⏹️ Early stopping at epoch 41, best val loss: 359.2109
⏹️ Early stopping at epoch 40, best val loss: 338.8047
⏹️ Early stopping at epoch 41, best val loss: 387.1754
⏹️ Early stopping at epoch 40, best val loss: 327.7187
⏹️ Early stopping at epoch 39, best val loss: 342.9884
⏹️ Early stopping at epoch 41, best val loss: 344.7219
⏹️ Early stopping at epoch 40, best val loss: 362.9520
⏹️ Early stopping at epoch 43, best val loss: 331.9252
⏹️ Early stopping at epoch 40, best val loss: 349.8240
⏹️ Early stopping at epoch 40, best val loss: 386.1262
⏹️ Early stopping at epoch 41, best val loss: 348.6972
⏹️ Early stopping at epoch 43, best val loss: 343.5835
⏹️ Early stopping at epoch 39, best val loss: 350.2803
⏹️ Early s

 80%|████████  | 202/252 [3:20:18<1:02:40, 75.20s/it]

⏹️ Early stopping at epoch 40, best val loss: 366.5392
⏹️ Early stopping at epoch 39, best val loss: 378.4660
⏹️ Early stopping at epoch 43, best val loss: 362.1911
⏹️ Early stopping at epoch 40, best val loss: 373.5498
⏹️ Early stopping at epoch 39, best val loss: 333.2215
⏹️ Early stopping at epoch 39, best val loss: 362.7358
⏹️ Early stopping at epoch 39, best val loss: 321.7986
⏹️ Early stopping at epoch 41, best val loss: 376.4541
⏹️ Early stopping at epoch 39, best val loss: 373.5902
⏹️ Early stopping at epoch 40, best val loss: 366.9876
⏹️ Early stopping at epoch 42, best val loss: 327.3172
⏹️ Early stopping at epoch 41, best val loss: 341.9622
⏹️ Early stopping at epoch 44, best val loss: 328.8944
⏹️ Early stopping at epoch 40, best val loss: 356.5591
⏹️ Early stopping at epoch 41, best val loss: 354.7091
⏹️ Early stopping at epoch 43, best val loss: 341.9196
⏹️ Early stopping at epoch 39, best val loss: 356.8148
⏹️ Early stopping at epoch 39, best val loss: 381.5731
⏹️ Early s

 88%|████████▊ | 221/252 [3:51:41<45:46, 88.61s/it]  

⏹️ Early stopping at epoch 40, best val loss: 349.3905
⏹️ Early stopping at epoch 40, best val loss: 364.2865
⏹️ Early stopping at epoch 40, best val loss: 368.6475
⏹️ Early stopping at epoch 44, best val loss: 333.8141
⏹️ Early stopping at epoch 43, best val loss: 320.4958
⏹️ Early stopping at epoch 40, best val loss: 369.5391
⏹️ Early stopping at epoch 39, best val loss: 335.5234
⏹️ Early stopping at epoch 41, best val loss: 378.8310
⏹️ Early stopping at epoch 42, best val loss: 352.8069
⏹️ Early stopping at epoch 39, best val loss: 368.9493
⏹️ Early stopping at epoch 40, best val loss: 334.8273
⏹️ Early stopping at epoch 41, best val loss: 350.7735
⏹️ Early stopping at epoch 40, best val loss: 396.5399
⏹️ Early stopping at epoch 42, best val loss: 316.2936
⏹️ Early stopping at epoch 47, best val loss: 494.0367
⏹️ Early stopping at epoch 41, best val loss: 317.5910
⏹️ Early stopping at epoch 42, best val loss: 347.5466
⏹️ Early stopping at epoch 40, best val loss: 384.9578
⏹️ Early s

100%|██████████| 252/252 [4:21:23<00:00, 62.24s/it]


⏹️ Early stopping at epoch 45, best val loss: 332.8843
⏹️ Early stopping at epoch 41, best val loss: 335.8478
⏹️ Early stopping at epoch 38, best val loss: 403.4236
⏹️ Early stopping at epoch 39, best val loss: 336.8943
⏹️ Early stopping at epoch 41, best val loss: 356.3513
⏹️ Early stopping at epoch 40, best val loss: 373.9826
⏹️ Early stopping at epoch 40, best val loss: 400.9876
⏹️ Early stopping at epoch 39, best val loss: 398.7698
⏹️ Early stopping at epoch 38, best val loss: 367.8878
⏹️ Early stopping at epoch 42, best val loss: 367.1689
⏹️ Early stopping at epoch 40, best val loss: 368.4296
⏹️ Early stopping at epoch 40, best val loss: 398.9670
⏹️ Early stopping at epoch 40, best val loss: 361.1542
⏹️ Early stopping at epoch 41, best val loss: 370.9644
⏹️ Early stopping at epoch 39, best val loss: 406.7003
⏹️ Early stopping at epoch 39, best val loss: 389.2323
⏹️ Early stopping at epoch 40, best val loss: 346.6355
⏹️ Early stopping at epoch 40, best val loss: 404.3913
⏹️ Early s