In [1]:
import random
import numpy as np
import pandas as pd
from pandas.core.missing import validate_limit_direction
from sklearn.mixture import GaussianMixture
from sklearn.mixture import BayesianGaussianMixture
from sklearn.neighbors import KernelDensity
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LinearRegression
from scipy.stats import multivariate_normal
from scipy.stats import truncnorm
from sklearn.neighbors import NearestNeighbors
from tqdm import tqdm
import cvxpy as cp
import gurobipy as gp
from gurobipy import GRB
from Newsvendor import Newsvendor
from joblib import Parallel, delayed
import torch
import normflows as nf
import matplotlib.pyplot as plt
from itertools import product
from torch.utils.data import TensorDataset, DataLoader, random_split
import os
import pickle
from sklearn.neighbors import KernelDensity
# np.set_printoptions(threshold=np.inf)
# np.set_printoptions(linewidth=200)



In [3]:
# -- Implementation 2 ---
######################### 2-Wass GMM DRO function ###################################
def NewsVendor_2_Wass(xi, eps, h, b):
    xi = xi.astype(float)
    N = xi.shape[0]
    lda = cp.Variable(nonneg = True)
    s = cp.Variable(N)
    theta = cp.Variable(nonneg = True)
    q = cp.Variable(nonneg = True)

    const = []
    for i in range(N):
        const.append(cp.norm2(cp.hstack([2 * lda * xi[i] + theta - h, lda * (xi[i]**2) - h * q + s[i] - lda])) <= lda * (xi[i]**2) - h * q + s[i] +lda)
        const.append(cp.norm2(cp.hstack([2 * lda * xi[i] + theta + b, lda * (xi[i]**2) + b * q + s[i] - lda])) <= lda * (xi[i]**2) + b * q + s[i] +lda)
        const.append(lda * (xi[i]**2) - h * q + s[i] >= 0)
        const.append(lda * (xi[i]**2) + b * q + s[i] >= 0)

    obj = cp.Minimize(lda * (eps**2) + (1 / N) * cp.sum(s))
    prob = cp.Problem(obj, const)
    prob.solve(solver = cp.MOSEK, verbose = False)

    return q.value

def generate_data(n, dim_s, dim_xi):
    s = np.random.uniform(-2, 2, size=(n, dim_s))
    b = 50
    W1 = 0.3 * np.ones((dim_xi, dim_s))
    W2 = 5 * np.ones((dim_xi, dim_s))
    eps1 = np.random.uniform(-2, 2, size=(n, dim_xi))
    eps2 = np.random.uniform(-2, 2, size=(n, dim_xi))
    lin  = s @ W1.T
    quad = (s**2) @ W2.T
    xi1 = lin + b + eps1
    xi2 = quad + b - 10 + eps2
    p = np.random.rand(n)
    xi = np.where(p[:, None] > 0.5, xi1, xi2)
    return s, xi, W1, W2

def transforming_conditional(s, num_components, mu_k, sig_k, p_k, dim_s):
    reg = 1e-6
    mu_cond, cov_cond, weights = [], [], []
    for k in range(num_components):
        mu = mu_k[k]
        sigma = sig_k[k]
        mu_s = mu[:dim_s]
        mu_xi = mu[dim_s:]
        sigma_ss = sigma[:dim_s, :dim_s]
        sigma_sx = sigma[:dim_s, dim_s:]
        sigma_xs = sigma[dim_s:, :dim_s]
        sigma_xx = sigma[dim_s:, dim_s:]
        sigma_ss += reg * np.eye(dim_s)
        try:
            sigma_ss_inv = np.linalg.inv(sigma_ss)
        except np.linalg.LinAlgError:
            sigma_ss_inv = np.linalg.pinv(sigma_ss)
        cond_mu = mu_xi + sigma_xs @ sigma_ss_inv @ (s - mu_s)
        cond_cov = sigma_xx - sigma_xs @ sigma_ss_inv @ sigma_sx
        cond_cov = 0.5 * (cond_cov + cond_cov.T)  
        eigvals = np.linalg.eigvalsh(cond_cov)
        if np.any(eigvals <= 1e-10):
            cond_cov += reg * np.eye(cond_cov.shape[0])
        try:
            weight = p_k[k] * multivariate_normal.pdf(s, mean=mu_s, cov=sigma_ss)
        except:
            weight = 0.0
        mu_cond.append(cond_mu)
        cov_cond.append(cond_cov)
        weights.append(weight)
    weights = np.array(weights)
    if np.any(np.isnan(weights)) or weights.sum() <= 1e-12:
        weights = np.ones_like(weights) / len(weights)
    else:
        weights /= weights.sum()
    return np.array(mu_cond), np.array(cov_cond), weights

def MC_sampling(K, N, mu_list, cov_list, p_list):
    d = mu_list.shape[1]
    samples = np.zeros((N, d))
    for i in range(N):
        k = np.random.choice(K, p=p_list)
        samples[i] = np.random.multivariate_normal(mu_list[k], cov_list[k])
    return samples

def oos_loss(q, s, h, b, W1, W2, dim_xi=1):
    n = 100000
    s = s.reshape(1, -1)     
    eps1 = np.random.uniform(-2, 2, size=(n, dim_xi))
    eps2 = np.random.uniform(-2, 2, size=(n, dim_xi))
    lin  = s @ W1.T
    quad = (s**2) @ W2.T
    xi1 = lin + 50 + eps1
    xi2 = quad + 40 + eps2
    p = np.random.rand(n)
    xi_samples = np.where(p[:, None] > 0.5, xi1, xi2)
    q = np.array(q).reshape(1, -1)
    if q.shape[1] == 1 and dim_xi > 1:
        q = np.tile(q, (1, dim_xi))
    losses = h * np.maximum(q - xi_samples, 0) + b * np.maximum(xi_samples - q, 0)
    return np.mean(losses)

def oos_loss_valid(q, xi, h, b):
    loss = h * np.maximum(q - xi, 0) + b * np.maximum(xi - q, 0)
    return np.mean(loss)

def select_K_by_AIC(z_np, max_K):
    aic_scores = []
    models = []
    for k in range(1, max_K + 1):
        gmm = GaussianMixture(n_components=k)
        gmm.fit(z_np)
        aic = gmm.aic(z_np)
        aic_scores.append(aic)
        models.append(gmm)
    best_index = np.argmin(aic_scores)
    best_K = best_index + 1
    return best_K

def cv_GMM(N, max_K, eps_list, xi, s, h, b, hidden_node, hidden_layer, block_size, num_bins, total_epoch, device):
    dim_s, dim_xi = s.shape[1], xi.shape[1]
    latent_size = dim_s+dim_xi
    X = np.concatenate([s, xi], axis=1)           
    split = int(len(X) * 0.9)
    train, val = X[:split], X[split:]

    s_train, xi_train = train[:, :dim_s], train[:, dim_s:]
    s_val,   xi_val   = val[:,   :dim_s], val[:,   dim_s:]

    scaler_s  = StandardScaler().fit(s_train)
    scaler_xi = StandardScaler().fit(xi_train)

    s_train_std  = scaler_s.transform(s_train)
    xi_train_std = scaler_xi.transform(xi_train)
    s_val_std    = scaler_s.transform(s_val)      

    data_train_std = np.concatenate([s_train_std, xi_train_std], axis=1)
    x_train_tensor = torch.tensor(data_train_std, dtype=torch.float32).to(device)
    
    best_K = select_K_by_AIC(data_train_std, max_K=max_K)

    cov_type = 'full' if dim_s <= 10 else 'diag'
    gmm_x = GaussianMixture(n_components=best_K, covariance_type=cov_type, reg_covar=1e-2).fit(data_train_std)
    mu_x, cov_x, p_x = gmm_x.means_, gmm_x.covariances_, gmm_x.weights_
    sig_x = np.array([np.diag(cov) for cov in cov_x]) if cov_type == 'diag' else cov_x

    nfm, _ = train_nf_model(latent_size, best_K, hidden_node, hidden_layer, block_size, num_bins, total_epoch, x_train_tensor, device)

    gamma_std_list = []
    for j in range(s_val_std.shape[0]):
        s_j_std = s_val_std[j].ravel()  # (dim_s,)
        mu_cond_x, cov_cond_x, w_x = transforming_conditional(s=s_j_std, num_components=best_K, mu_k=mu_x, sig_k=sig_x, p_k=p_x, dim_s=dim_s)
        xi_hat_std = (w_x[:, None] * mu_cond_x).sum(axis=0, keepdims=True)  # (1, dim_xi)
        gamma_std_list.append(np.hstack([s_j_std.reshape(1, -1), xi_hat_std]))  # (1, dim_s+dim_xi)
    s_val_aug_std = np.vstack(gamma_std_list)  # (n_val, dim_s+dim_xi)
    s_tensor = torch.tensor(s_val_aug_std, dtype=torch.float32).to(device)

    z_s_val = inverse(nfm, s_tensor)
    z_s_val = z_s_val[:, :dim_s]
    z_sample_np = inverse(nfm, x_train_tensor)

    cov_type = 'full' if dim_s <= 10 else 'diag'
    gmm_z = GaussianMixture(n_components=best_K, covariance_type=cov_type, reg_covar=1e-2).fit(z_sample_np)
    mu_z, cov_z, p_z = gmm_z.means_, gmm_z.covariances_, gmm_z.weights_
    sig_z = np.array([np.diag(cov) for cov in cov_z]) if cov_type == 'diag' else cov_z

    best_eps_result = {'eps': None, 'score': float('inf')}
    for eps in eps_list:
        valid_score = 0.0
        for j in range(z_s_val.shape[0]):
            z_s_j = z_s_val[j].ravel()  # (dim_s,)
            mu_cond, cov_cond, p_cond = transforming_conditional(s=z_s_j, num_components=best_K, mu_k=mu_z, sig_k=sig_z, p_k=p_z, dim_s=dim_s)
            z_xi_sample = MC_sampling(best_K, 500, mu_cond, cov_cond, p_cond)  # (500, dim_xi)
            z_full = np.hstack([np.repeat(z_s_j.reshape(1, -1), 500, axis=0), z_xi_sample])  # (500, dim_s+dim_xi)
            z_tensor = torch.tensor(z_full, dtype=torch.float32).to(device)
            xi_sampled_std = forward(nfm, z_tensor)[:, dim_s:]
            xi_sampled = scaler_xi.inverse_transform(xi_sampled_std)
            xi_sampled = np.maximum(xi_sampled, 0)
            q_gmm = NewsVendor_2_Wass(xi_sampled, eps, h, b)
            valid_score += oos_loss_valid(q_gmm, xi_val[j], h, b)

        # print(f"[eps={eps:.4f}] Validation Score = {valid_score:.4f}")
        if valid_score < best_eps_result['score']:
            best_eps_result.update({'eps': eps, 'score': valid_score})

    return best_K, best_eps_result['eps']

def cv_GMM_nonNF(N, max_K, eps_list, xi, s, h, b, hidden_node, hidden_layer, block_size, num_bins, total_epoch, device):
    # ----- split -----
    dim_s, dim_xi = s.shape[1], xi.shape[1]
    X = np.concatenate([s, xi], axis=1)           
    split = int(len(X) * 0.9)
    train, val = X[:split], X[split:]

    s_train, xi_train = train[:, :dim_s], train[:, dim_s:]
    s_val,   xi_val   = val[:,   :dim_s], val[:,   dim_s:]

    scaler_s  = StandardScaler().fit(s_train)
    scaler_xi = StandardScaler().fit(xi_train)

    s_train_std  = scaler_s.transform(s_train)
    xi_train_std = scaler_xi.transform(xi_train)
    s_val_std    = scaler_s.transform(s_val)      

    data_train_std = np.concatenate([s_train_std, xi_train_std], axis=1)
    best_K = select_K_by_AIC(data_train_std, max_K=max_K)

    # --- Fit GMM on x_std
    cov_type = 'full' if dim_s <= 10 else 'diag'
    gmm_x = GaussianMixture(n_components=best_K, covariance_type=cov_type, reg_covar=1e-2).fit(data_train_std)
    mu_x, cov_x, p_x = gmm_x.means_, gmm_x.covariances_, gmm_x.weights_
    sig_x = np.array([np.diag(cov) for cov in cov_x]) if cov_type == 'diag' else cov_x
    
    # --- Grid over eps to pick best by validation
    best_eps_result = {'eps': None, 'score': float('inf')}
    num_mc = 500

    for eps in eps_list:
        valid_score = 0.0

        for j in range(s_val.shape[0]):
            s_j = s_val_std[j].ravel()  
            mu_cond_x, cov_cond_x, p_cond_x = transforming_conditional(s=s_j, num_components=best_K,mu_k=mu_x, sig_k=sig_x, p_k=p_x, dim_s=dim_s)
            xi_sampled_std = MC_sampling(best_K, num_mc, mu_cond_x, cov_cond_x, p_cond_x)
            xi_sampled = scaler_xi.inverse_transform(xi_sampled_std)
            xi_sampled = np.maximum(xi_sampled, 0)
            q_gmm= NewsVendor_2_Wass(xi_sampled, eps, h, b)
            valid_score += oos_loss_valid(q_gmm, xi_val[j], h, b)

        # print(f"[NON-NF GMM] [K={best_K}] [eps={eps:.4f}] Validation Score = {valid_score:.6f}")
        if valid_score < best_eps_result['score']:
            best_eps_result.update({'eps': eps, 'score': valid_score})

    return best_K, best_eps_result['eps']

In [4]:
def train_nf_model(latent_size, best_K, hidden_node, hidden_layer, num_bins, block_size, total_epoch, x, device, batch_size=64, lr=1e-3, patience=30, val_split=0.2):

    x_np = x.cpu().numpy()
    gmm = GaussianMixture(n_components=best_K, covariance_type='diag', reg_covar=1e-2).fit(x_np)
    means = torch.tensor(gmm.means_, dtype=torch.float32, device=device)
    weights = torch.tensor(gmm.weights_, dtype=torch.float32, device=device)
    stds = torch.tensor(np.sqrt(gmm.covariances_), dtype=torch.float32, device=device)

    flows = [nf.flows.AutoregressiveRationalQuadraticSpline(latent_size, hidden_layer, hidden_node, num_bins=num_bins) for _ in range(block_size)]
    q0 = nf.distributions.GaussianMixture(n_modes=best_K, dim=latent_size, loc=means, scale=stds, weights=weights, trainable=False)
    nfm = nf.NormalizingFlow(q0=q0, flows=flows).to(device)
    optimizer = torch.optim.Adam(nfm.parameters(), lr=lr)

    dataset = TensorDataset(x)
    val_size = int(len(dataset) * val_split)
    train_size = len(dataset) - val_size
    train_dataset, val_dataset = random_split(dataset, [train_size, val_size])

    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
    val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)

    loss_hist = []
    val_loss_hist = []

    best_val_loss = float('inf')
    patience_counter = 0
    best_model_state = None

    for epoch in tqdm(range(total_epoch), desc="Training NF", leave=False):
        nfm.train()
        train_loss_epoch = 0.0
        for batch in train_loader:
            x_batch = batch[0].to(device)
            optimizer.zero_grad()
            loss = nfm.forward_kld(x_batch)
            if not torch.isnan(loss):
                loss.backward()
                optimizer.step()
                train_loss_epoch += loss.item()

        nfm.eval()
        val_loss_epoch = 0.0
        with torch.no_grad():
            for batch in val_loader:
                x_batch = batch[0].to(device)
                loss = nfm.forward_kld(x_batch)
                if not torch.isnan(loss):
                    val_loss_epoch += loss.item()

        loss_hist.append(train_loss_epoch)
        val_loss_hist.append(val_loss_epoch)

        if val_loss_epoch < best_val_loss:
            best_val_loss = val_loss_epoch
            patience_counter = 0
            best_model_state = nfm.state_dict()
        else:
            patience_counter += 1
            if patience_counter >= patience:
                # print(f"Early stopping at epoch {epoch+1} (val loss: {val_loss_epoch:.4f})")
                break

    if best_model_state is not None:
        nfm.load_state_dict(best_model_state)

    return nfm, loss_hist

def inverse(nfm, x):
    with torch.no_grad():
        z_np = nfm.inverse(x).cpu().numpy()
    return z_np

def forward(nfm, z):
    with torch.no_grad():
        x = nfm.forward(z).cpu().numpy()
    return x


In [5]:
############################### NW function #########################
def NW_weights(gamma, gammas, H):
    N = len(gammas)
    numerator = np.zeros(N)
    for i in range(N):
        numerator[i] = np.exp(-np.linalg.norm(gammas[i] - gamma) / H)
    denominator = numerator.sum()
    weight = numerator / denominator if denominator != 0 else np.ones(N) / N
    weight = np.round(weight, 6)
    weight = weight / weight.sum() if weight.sum() != 0 else np.ones(N) / N
    return weight

def cv_lda(Cs, C_H, xi, gammas, h, b):
    valid_scores = []

    split = int(len(xi) * 0.9)
    xi_subtrain, xi_valid = xi[:split], xi[split:]
    gammas_subtrain, gammas_valid = gammas[:split], gammas[split:]
    N = xi_subtrain.shape[0]
    dim_s = gammas.shape[1]
    H = C_H * 1 / (N ** (1 / 6))

    use_kde = dim_s <= 5  # Use KDE only if the dimension is small

    if use_kde:
        kde = KernelDensity(kernel='exponential', bandwidth=1).fit(gammas_subtrain)

    for C in Cs:
        valid_score = 0
        for j in range(len(gammas_valid)):
            gamma = gammas_valid[j]
            if use_kde:
                g_gamma = np.exp(kde.score_samples(gamma.reshape(1, -1))).item()
            else:
                g_gamma = 1.0  # fallback default if KDE is not used

            lda_0 = 1 / np.sqrt(N * (H ** 2) * g_gamma)
            weight = NW_weights(gamma, gammas_subtrain, H)
            params = {
                "b": b,
                "h": h,
                "gamma": gamma,
                "xi": xi_subtrain,
                "weight": weight,
            }
            lda = lda_0 * C
            News = Newsvendor(reg=lda, verbose=False)
            News.fit(params)
            q = News.coef_
            valid_score += oos_loss_valid(q, xi_valid[j], h, b)

        valid_score /= len(gammas_valid)
        valid_scores.append(valid_score)

    idx = np.argmin(valid_scores)
    return Cs[idx]

def cv_lda2(Cs,C_H,xi,gammas,H,h,b):
    valid_scores=[]

    split = int(len(xi) * 0.9)
    xi_subtrain, xi_valid = xi[:split], xi[split:]
    gammas_subtrain, gammas_valid = gammas[:split], gammas[split:]
    
    N = xi_subtrain.shape[0]
    H=C_H * 1/(N**(1/6))
    lda_0 = 1/np.sqrt(N*(H**2))

    for i,C in enumerate(Cs):
        valid_score = 0
        for j in range(gammas_valid.shape[0]):
            gamma = gammas_valid[j]
            weight=NW_weights(gamma,gammas_subtrain,H)
            params={
            "b":b,
            "h":h,
            "gamma": gamma,
            "xi":xi_subtrain,
            "weight":weight,
            }
            lda = C * lda_0
            News=Newsvendor(reg=lda,verbose=False)
            News.fit(params)
            q=News.coef_
            valid_score += oos_loss_valid(q,np.array([xi_valid[j]]),h,b)
            # valid_score += oos_loss_valid(q,np.array([xi_valid[j]]))
        valid_scores.append(valid_score)
    # print(valid_scores)
    idx=np.argmin(valid_scores)
    return(Cs[idx])

def cv_H(Cs, xi, gammas, h, b):
    valid_scores = []

    split = int(len(xi) * 0.9)
    xi_subtrain, xi_valid = xi[:split], xi[split:]
    gammas_subtrain, gammas_valid = gammas[:split], gammas[split:]
    N = xi_subtrain.shape[0]
    H_0 = 1 / (N ** (1 / 6))

    for i, C in enumerate(Cs):
        valid_score = 0
        H = C * H_0

        for j in range(gammas_valid.shape[0]):
            gamma = gammas_valid[j]
            weight = NW_weights(gamma, gammas_subtrain, H)

            weight = np.round(weight, 6)
            weight = weight / weight.sum()
            params = {
                "b": b,
                "h": h,
                "gamma": gamma,
                "xi": xi_subtrain,
                "weight": weight,
            }

            News = Newsvendor(reg=0, verbose=False)
            News.fit(params)
            q = News.coef_
            valid_score += oos_loss_valid(q, xi_valid[j], h, b)

        valid_scores.append(valid_score)

    idx = np.argmin(valid_scores)
    return Cs[idx]

In [6]:
########################### Kannan DRO setting ##############################
def NewsVendor_1_Wass(xi, eps, h, b):
    xi = xi.astype(float)
    N = xi.shape[0]
    lda = cp.Variable(nonneg=True)
    s = cp.Variable(N)
    z = cp.Variable((N,2))
    q = cp.Variable(nonneg=True)

    const = []
    for i in range(N):
        const.append(h * q + z[i,0] * xi[i] <= s[i])
        const.append(-b * q + z[i,1] * xi[i] <= s[i])
        const.append(z[i,0] >= -h)
        const.append(z[i,1] >= b)
        for k in range(2):
            const.append(cp.norm_inf(z[i,k]) <= lda)

    obj = cp.Minimize(lda * eps  + (1 / N) * cp.sum(s))
    prob = cp.Problem(obj, const)
    prob.solve(solver = cp.MOSEK)

    return q.value 

def cv_eps_kannan(eps_list, xi, gammas, h, b):
    valid_scores = []

    split = int(len(xi) - 10)
    xi_subtrain, xi_valid = xi[:split], xi[split:]
    gammas_subtrain, gammas_valid = gammas[:split], gammas[split:]

    model = LinearRegression().fit(gammas_subtrain, xi_subtrain)
    residuals = xi_subtrain - model.predict(gammas_subtrain)

    for i_eps, eps in enumerate(eps_list):
        valid_score = 0
        for j in range(len(gammas_valid)):
            gamma = gammas_valid[j]
            xi_j = np.array([xi_valid[j]])

            f_hat = model.predict(gamma.reshape(1, -1)).item()
            xi_ER = np.maximum(residuals + f_hat, 0)
            q = NewsVendor_1_Wass(xi_ER, eps, h, b)
        
            valid_score += oos_loss_valid(q, xi_j, h, b)

        valid_scores.append((eps, valid_score))

    best_eps, _ = min(valid_scores, key=lambda x: x[1])
    return best_eps


In [7]:
 # ----------------------- Implementation 2 ----------------------------
T = 100
N = 100
dim_s_list = [5]
dim_xi = 1
eps_list = [0.01, 0.05, 0.1, 0.5, 1]
max_K = 3
Cs = CHs = [1,5,10]
h, b = 10, 2
hidden_node, hidden_layer, block_size, bins, total_epoch = 32, 1, 1, 8, 300
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

for dim_s in dim_s_list: 
    print(f"✅✅✅✅✅✅✅Running trials for dim_s = {dim_s}✅✅✅✅✅✅✅✅")
    def run_trial(tt, N):
        max_retry = 30
        random.seed(tt)
        np.random.seed(tt)
        torch.manual_seed(tt)
        torch.cuda.manual_seed_all(tt)  
        for attempt_trial in range(max_retry):
            try:
                s, xi, _, _ = generate_data(N, dim_s, dim_xi)
                gammas = s
                s_single, _, W1_oos, W2_oos = generate_data(1, dim_s, dim_xi)
                gamma = s_single[0]

                #-------------------------------- NW--------------------------------
                C_H = cv_H(CHs, xi, gammas, h, b)
                H = C_H * 1 / (N ** (1 / 6))
                kde = KernelDensity(kernel='exponential', bandwidth=1).fit(gammas)
                g_gamma = np.exp(kde.score_samples(gamma.reshape(1, -1))).item()
                weight = NW_weights(gamma, gammas, H)
                C_smart = cv_lda(Cs, C_H, xi, gammas, h, b)
                lda_0 = 1 / np.sqrt(N * (H ** 2) * g_gamma)
                lda_smart = C_smart * lda_0
                nw_model = Newsvendor(reg=lda_smart, verbose=False)
                nw_model.fit({"b": b, "h": h, "gamma": gamma, "xi": xi, "weight": weight})
                q_nw = nw_model.coef_
                loss_nw = oos_loss(q_nw, gamma, h, b, W1_oos, W2_oos, dim_xi=1)
                print(f"NW Finished {tt} trial for N = {N}, {loss_nw}")

                #--------------------------- NF-GMM-------------------------------
                best_K_NF, best_eps_NF = cv_GMM(N, max_K, eps_list, xi, gammas, h, b, hidden_node, hidden_layer, block_size, bins, total_epoch, device)
                print(f"{tt} trial : {best_K_NF}, {best_eps_NF}")
                scaler_s, scaler_xi =StandardScaler(), StandardScaler()
                s_std = scaler_s.fit_transform(gammas)
                xi_std = scaler_xi.fit_transform(xi)
                s_val = gamma.reshape(1, -1)                                # (1, dim_s)
                s_val_std = scaler_s.transform(s_val) 
                data_std = np.hstack([s_std, xi_std]) 
                x_tensor = torch.tensor(data_std, dtype=torch.float32).to(device)

                cov_type = 'full' if dim_s <= 10 else 'diag'
                gmm_x = GaussianMixture(n_components=best_K_NF, covariance_type=cov_type, reg_covar=1e-2).fit(data_std)
                mu_x, cov_x, p_x = gmm_x.means_, gmm_x.covariances_, gmm_x.weights_
                sig_x = np.array([np.diag(cov) for cov in cov_x]) if cov_type == 'diag' else cov_x


                nfm, _ = train_nf_model(latent_size=dim_s + dim_xi, best_K=best_K_NF, hidden_node=hidden_node, hidden_layer=hidden_layer, block_size=block_size, num_bins=bins, total_epoch=200, x=x_tensor, device=device)

                s_vec = s_val_std.ravel()                                 

                mu_cond_x, cov_cond_x, w_x = transforming_conditional(s=s_vec, num_components=best_K_NF, mu_k=mu_x, sig_k=sig_x, p_k=p_x, dim_s=dim_s)
                xi_hat_std = (w_x[:, None] * mu_cond_x).sum(axis=0, keepdims=True )    # (1, dim_xi)

                gamma_std = np.hstack([s_val_std, xi_hat_std])                         # (1, dim_s+dim_xi)
                gamma_tensor = torch.tensor(gamma_std, dtype=torch.float32).to(device)

                z_s = inverse(nfm, gamma_tensor)[:, :dim_s].reshape(-1)
                z_train = inverse(nfm, x_tensor)

                cov_type = 'full' if dim_s <= 10 else 'diag'
                gmm_z = GaussianMixture(n_components=best_K_NF, covariance_type=cov_type, reg_covar=1e-2).fit(z_train)
                mu_z, cov_z, p_z = gmm_z.means_, gmm_z.covariances_, gmm_z.weights_
                sig_z = np.array([np.diag(cov) for cov in cov_z]) if cov_type == 'diag' else cov_z

                mu_cond_z, cov_cond_z, w_z = transforming_conditional(s=z_s, num_components= best_K_NF, mu_k=mu_z, sig_k=sig_z, p_k=p_z, dim_s=dim_s)
                z_xi_sample = MC_sampling(best_K_NF, 1000, mu_cond_z, cov_cond_z, w_z)
                z_full = np.hstack([np.repeat(z_s.reshape(1, -1), len(z_xi_sample), axis=0), z_xi_sample])

                z_tensor = torch.tensor(z_full, dtype=torch.float32).to(device)
                xi_sampled_std = forward(nfm, z_tensor)[:, dim_s:]
                xi_sampled = scaler_xi.inverse_transform(xi_sampled_std)
                xi_sampled = np.maximum(xi_sampled, 0)        
                q_NF_gmm = NewsVendor_2_Wass(xi_sampled, best_eps_NF, h, b)
                loss_NF_gmm = oos_loss(q_NF_gmm, gamma, h, b, W1_oos, W2_oos, dim_xi=1)
                print(f"Trial {tt} : NF-GMM Finished {tt} trial for N = {N}")

                #-------------------- Non-NF-GMM--------------------------
                best_K_nonNF, best_eps_nonNF = cv_GMM_nonNF(N, max_K, eps_list, xi, gammas, h, b, hidden_node, hidden_layer, block_size, bins, total_epoch, device)
                scaler_xi, scaler_s = StandardScaler(), StandardScaler()
                s_std = scaler_s.fit_transform(gammas)
                xi_std = scaler_xi.fit_transform(xi)
                data = np.hstack([s_std, xi_std])  

                cov_type_nonNF = 'full' if dim_s <= 10 else 'diag'
                gmm_x = GaussianMixture(n_components=best_K_nonNF, covariance_type=cov_type_nonNF, reg_covar=1e-2).fit(data)
                mu_x, cov_x, p_x = gmm_x.means_, gmm_x.covariances_, gmm_x.weights_
                sig_x = np.array([np.diag(cov) for cov in cov_x]) if cov_type_nonNF == 'diag' else cov_x

                s_val = gamma.reshape(1, -1) 
                s_val_std = scaler_s.transform(s_val)  
                s_vec = s_val_std.ravel()               
                mu_cond_x, cov_cond_x, p_cond_x = transforming_conditional(s=s_vec, num_components=best_K_nonNF, mu_k=mu_x, sig_k=sig_x, p_k=p_x, dim_s=dim_s)

                num_mc = 1000
                xi_sampled_std = MC_sampling(best_K_nonNF, num_mc, mu_cond_x, cov_cond_x, p_cond_x)  
                xi_sampled = scaler_xi.inverse_transform(xi_sampled_std)
                xi_sampled = np.maximum(xi_sampled, 0)  
                q_nonNF_gmm = NewsVendor_2_Wass(xi_sampled, best_eps_nonNF, h, b)
                loss_nonNF_gmm = oos_loss(q_nonNF_gmm, gamma, h, b, W1_oos, W2_oos, dim_xi=1)
                print(f"Trial {tt} : GMM (non-NF) Finished for N = {N}")

                #---------------------------Kannan-----------------------------
                best_eps2 = cv_eps_kannan(eps_list, xi, gammas, h, b)
                print(f"Kanna Best eps at {tt} trial for N = {N} : {best_eps2}")
                model = LinearRegression().fit(gammas, xi)
                residuals = xi - model.predict(gammas)
                f_hat = model.predict(np.atleast_2d(gamma)).item()
                xi_er = np.maximum(residuals + f_hat, 0) 
                q_reg = NewsVendor_1_Wass(xi_er, best_eps2, h=h, b=b)
                loss_reg = oos_loss(q_reg, gamma, h, b, W1_oos, W2_oos, dim_xi=1)
                print(f"KANNAN Finished {tt} trial for N = {N}")

                print(f"NF-gmm,nonNF-gmm,kannan,nw : {loss_NF_gmm}, {loss_nonNF_gmm}, {loss_reg}, {loss_nw}")
                return {
                    'K_NF_GMM' : best_K_NF,
                    'K_nonNF_GMM' : best_K_nonNF,
                    'eps_NF_GMM': best_eps_NF,
                    'eps_nonNF_GMM': best_eps_nonNF,
                    'eps_kannan': best_eps2,
                    'loss_NF_GMM': loss_NF_gmm,
                    'loss_nonNF_GMM': loss_nonNF_gmm,
                    'loss_kannan': loss_reg,
                    'loss_nw': loss_nw,
                }
                
            except Exception as e:
                print(f"[Global Retry {attempt_trial+1}/{max_retry}] Trial {tt}, N={N} failed: {e}")
                continue

        print(f"⚠️ Trial {tt}, N={N} failed after {max_retry} retries.")
        return None

    results = Parallel(n_jobs=-1)(delayed(run_trial)(tt, N) for tt in tqdm(range(T)))

    # --- Eliminate None or malformed results ---
    results = [r for r in results if isinstance(r, dict) and r is not None]

    # --- Handle empty case ---
    if len(results) == 0:
        print(f"⚠️ No valid results for N={N}, skipping.")
        continue  #
  
    # --- Add Trial indices (if not already present) ---
    for i, r in enumerate(results):
        r['Trial'] = i

    # --- Construct DataFrame ---   
    df = pd.DataFrame(results)

    # --- Ensure 'Trial' column exists ---
    if 'Trial' in df.columns:
        cols = ['Trial'] + [c for c in df.columns if c != 'Trial']
        df = df[cols]
    else:
        print("⚠️ Warning: 'Trial' column not found. Columns:", df.columns)

    # --- Add AVG row ---
    mean_row = {'Trial': 'AVG'}
    for col in df.columns:
        if col != 'Trial' and pd.api.types.is_numeric_dtype(df[col]):
            mean_row[col] = df[col].mean(skipna=True)
    df = pd.concat([df, pd.DataFrame([mean_row])], ignore_index=True)

    # --- SAVE ---
    save_path = f"NV_{dim_s}d_LinQuad.csv"
    df.to_csv(save_path, index=False) 
    print(f"✅ Saved to {save_path}")

✅✅✅✅✅✅✅Running trials for dim_s = 5✅✅✅✅✅✅✅✅


 20%|██        | 20/100 [00:00<00:00, 122.42it/s]

NW Finished 4 trial for N = 100, 79.9968138282675
NW Finished 0 trial for N = 100, 28.174036068900907
NW Finished 1 trial for N = 100, 32.39322994183075
NW Finished 3 trial for N = 100, 43.541293162736594
NW Finished 14 trial for N = 100, 35.38926168951176
NW Finished 13 trial for N = 100, 26.730074811057612
NW Finished 6 trial for N = 100, 34.74164305444324
NW Finished 5 trial for N = 100, 45.640146305262036
NW Finished 9 trial for N = 100, 43.768415077073236
NW Finished 7 trial for N = 100, 37.390822545905664
NW Finished 12 trial for N = 100, 52.456282289989964
NW Finished 11 trial for N = 100, 38.747105214008464
NW Finished 16 trial for N = 100, 41.01248981311388
NW Finished 2 trial for N = 100, 33.70753876686571
NW Finished 10 trial for N = 100, 4.476112024859743
NW Finished 19 trial for N = 100, 39.42223387833402
NW Finished 8 trial for N = 100, 32.72149323015867
NW Finished 18 trial for N = 100, 51.3103712420618
NW Finished 15 trial for N = 100, 33.00909450296708
NW Finished 17 t

                                                             

4 trial : 3, 0.1


                                                              

0 trial : 1, 0.01
3 trial : 3, 0.01


                                                              

1 trial : 3, 0.05


                                                              

2 trial : 3, 1
13 trial : 1, 0.1


                                                              

8 trial : 2, 0.5


                                                              

17 trial : 1, 0.05


                                                              

18 trial : 3, 0.1
7 trial : 3, 0.01


Training NF:  21%|██        | 42/200 [00:00<00:00, 206.23it/s]

10 trial : 3, 0.1
6 trial : 3, 0.01
19 trial : 3, 0.1
14 trial : 3, 0.01


Training NF:  24%|██▍       | 48/200 [00:00<00:00, 239.64it/s]

12 trial : 3, 0.1
9 trial : 2, 0.5
5 trial : 1, 1


                                                              

11 trial : 1, 0.05
16 trial : 2, 0.01
15 trial : 1, 0.01


                                                              

Trial 4 : NF-GMM Finished 4 trial for N = 100
Trial 3 : NF-GMM Finished 3 trial for N = 100
Trial 0 : NF-GMM Finished 0 trial for N = 100
Trial 1 : NF-GMM Finished 1 trial for N = 100
Trial 2 : NF-GMM Finished 2 trial for N = 100
Trial 13 : NF-GMM Finished 13 trial for N = 100
Trial 8 : NF-GMM Finished 8 trial for N = 100
Trial 17 : NF-GMM Finished 17 trial for N = 100
Trial 18 : NF-GMM Finished 18 trial for N = 100
Trial 7 : NF-GMM Finished 7 trial for N = 100
Trial 10 : NF-GMM Finished 10 trial for N = 100
Trial 19 : NF-GMM Finished 19 trial for N = 100
Trial 6 : NF-GMM Finished 6 trial for N = 100
Trial 14 : NF-GMM Finished 14 trial for N = 100
Trial 9 : NF-GMM Finished 9 trial for N = 100
Trial 5 : NF-GMM Finished 5 trial for N = 100
Trial 11 : NF-GMM Finished 11 trial for N = 100
Trial 12 : NF-GMM Finished 12 trial for N = 100
Trial 15 : NF-GMM Finished 15 trial for N = 100
Trial 16 : NF-GMM Finished 16 trial for N = 100
Trial 4 : GMM (non-NF) Finished for N = 100
Trial 0 : GMM (n

 40%|████      | 40/100 [06:25<11:19, 11.33s/it] 

KANNAN Finished 4 trial for N = 100
NF-gmm,nonNF-gmm,kannan,nw : 98.91053443447217, 143.0314900436873, 61.40315222679702, 79.9968138282675
Kanna Best eps at 0 trial for N = 100 : 1
KANNAN Finished 0 trial for N = 100
NF-gmm,nonNF-gmm,kannan,nw : 21.161619437484035, 30.11866178295512, 24.150298437080775, 28.174036068900907
Kanna Best eps at 3 trial for N = 100 : 0.01
KANNAN Finished 3 trial for N = 100
NF-gmm,nonNF-gmm,kannan,nw : 41.85257104575132, 41.6639115871717, 47.469130984107835, 43.541293162736594
Kanna Best eps at 1 trial for N = 100 : 1
KANNAN Finished 1 trial for N = 100
NF-gmm,nonNF-gmm,kannan,nw : 26.039700840187034, 27.514180530053434, 36.19889168239909, 32.39322994183075
Kanna Best eps at 13 trial for N = 100 : 0.5
Kanna Best eps at 8 trial for N = 100 : 0.01
Kanna Best eps at 2 trial for N = 100 : 0.5
KANNAN Finished 8 trial for N = 100
NF-gmm,nonNF-gmm,kannan,nw : 25.926754708156615, 28.445296560235093, 26.216034649713816, 32.72149323015867
KANNAN Finished 13 trial for 

                                                             

NW Finished 21 trial for N = 100, 18.6409534689889


Training NF:   0%|          | 0/300 [00:00<?, ?it/s]7.75it/s]

NW Finished 22 trial for N = 100, 60.885184448255764


Training NF:   0%|          | 0/300 [00:00<?, ?it/s]         

NW Finished 23 trial for N = 100, 39.38333579581451


                                                             

NW Finished 25 trial for N = 100, 32.88708356243739
NW Finished 24 trial for N = 100, 66.41354050622854
NW Finished 26 trial for N = 100, 18.63607536294117


Training NF:   7%|▋         | 20/300 [00:00<00:08, 34.14it/s]

NW Finished 28 trial for N = 100, 14.491839450320118
NW Finished 27 trial for N = 100, 24.228451077537443


Training NF:   0%|          | 0/300 [00:00<?, ?it/s]8.67it/s]

NW Finished 29 trial for N = 100, 15.755851814667366
NW Finished 31 trial for N = 100, 46.68234395300228


Training NF:   3%|▎         | 8/300 [00:00<00:08, 35.29it/s]]

NW Finished 30 trial for N = 100, 39.52505322256829
NW Finished 32 trial for N = 100, 51.16268246722127
NW Finished 33 trial for N = 100, 44.419484831686205


Training NF:   3%|▎         | 8/300 [00:00<00:08, 36.35it/s]]

NW Finished 34 trial for N = 100, 63.2688841237084
NW Finished 35 trial for N = 100, 27.65253673203263


Training NF:   0%|          | 0/300 [00:00<?, ?it/s]7.30it/s]

NW Finished 36 trial for N = 100, 7.234555580004208
NW Finished 37 trial for N = 100, 14.178531476459641


Training NF:   6%|▌         | 18/300 [00:00<00:07, 35.84it/s]

NW Finished 38 trial for N = 100, 31.30114579778484
NW Finished 39 trial for N = 100, 44.780444239217566


                                                             

20 trial : 2, 0.5


                                                              

21 trial : 1, 0.01


                                                              

23 trial : 3, 0.5


Training NF:  22%|██▎       | 45/200 [00:00<00:00, 222.23it/s]

22 trial : 3, 0.01


                                                              

25 trial : 3, 0.05
33 trial : 3, 0.1


                                                              

24 trial : 2, 0.05
26 trial : 1, 0.5
29 trial : 3, 0.5
28 trial : 2, 1


                                                              

34 trial : 1, 1
36 trial : 3, 1
30 trial : 1, 0.01


Training NF:   0%|          | 0/200 [00:00<?, ?it/s]87.09it/s]

31 trial : 3, 1
27 trial : 3, 0.1
32 trial : 3, 0.5


Training NF:   0%|          | 0/200 [00:00<?, ?it/s]          

39 trial : 3, 1
35 trial : 3, 0.01
38 trial : 2, 0.01
37 trial : 2, 0.05


                                                              

Trial 20 : NF-GMM Finished 20 trial for N = 100
Trial 21 : NF-GMM Finished 21 trial for N = 100
Trial 23 : NF-GMM Finished 23 trial for N = 100
Trial 22 : NF-GMM Finished 22 trial for N = 100
Trial 25 : NF-GMM Finished 25 trial for N = 100
Trial 33 : NF-GMM Finished 33 trial for N = 100
Trial 28 : NF-GMM Finished 28 trial for N = 100
Trial 24 : NF-GMM Finished 24 trial for N = 100
Trial 29 : NF-GMM Finished 29 trial for N = 100
Trial 26 : NF-GMM Finished 26 trial for N = 100
Trial 36 : NF-GMM Finished 36 trial for N = 100
Trial 34 : NF-GMM Finished 34 trial for N = 100
Trial 30 : NF-GMM Finished 30 trial for N = 100
Trial 27 : NF-GMM Finished 27 trial for N = 100
Trial 31 : NF-GMM Finished 31 trial for N = 100
Trial 39 : NF-GMM Finished 39 trial for N = 100
Trial 32 : NF-GMM Finished 32 trial for N = 100
Trial 38 : NF-GMM Finished 38 trial for N = 100
Trial 35 : NF-GMM Finished 35 trial for N = 100
Trial 37 : NF-GMM Finished 37 trial for N = 100
Trial 20 : GMM (non-NF) Finished for N =

 60%|██████    | 60/100 [12:49<09:57, 14.94s/it]

KANNAN Finished 20 trial for N = 100
NF-gmm,nonNF-gmm,kannan,nw : 38.95484174760308, 37.8254871711476, 40.8784817552974, 44.79281477977628
Kanna Best eps at 21 trial for N = 100 : 0.5
KANNAN Finished 21 trial for N = 100
NF-gmm,nonNF-gmm,kannan,nw : 9.60041780047533, 9.768663887325747, 9.459277426678222, 18.6409534689889
Kanna Best eps at 23 trial for N = 100 : 0.5
KANNAN Finished 23 trial for N = 100
NF-gmm,nonNF-gmm,kannan,nw : 34.126900588711734, 34.276076462222385, 43.07813265005893, 39.38333579581451
Kanna Best eps at 28 trial for N = 100 : 0.01
Kanna Best eps at 22 trial for N = 100 : 0.5
Kanna Best eps at 33 trial for N = 100 : 0.01
KANNAN Finished 28 trial for N = 100
NF-gmm,nonNF-gmm,kannan,nw : 9.580123932613734, 8.030079572390203, 10.278002721389946, 14.491839450320118
KANNAN Finished 22 trial for N = 100
NF-gmm,nonNF-gmm,kannan,nw : 51.36625581293609, 51.803450430043746, 50.481126617408336, 60.885184448255764
KANNAN Finished 33 trial for N = 100
NF-gmm,nonNF-gmm,kannan,nw :

                                                             

NW Finished 41 trial for N = 100, 28.482721464852336


                                                             

NW Finished 42 trial for N = 100, 13.515397474365262


Training NF:   0%|          | 0/300 [00:00<?, ?it/s]5.84it/s]

NW Finished 44 trial for N = 100, 51.651187881802485
NW Finished 43 trial for N = 100, 23.055608899418786
NW Finished 45 trial for N = 100, 48.94583565230793


                                                             

NW Finished 46 trial for N = 100, 35.50490541533102


                                                             

NW Finished 47 trial for N = 100, 22.17390891158475


Training NF:   0%|          | 0/300 [00:00<?, ?it/s].01it/s]

NW Finished 49 trial for N = 100, 23.183174637738606
NW Finished 48 trial for N = 100, 48.0360057909077


Training NF:   3%|▎         | 9/300 [00:00<00:07, 38.54it/s]]

NW Finished 50 trial for N = 100, 53.907095444908265


Training NF:   1%|▏         | 4/300 [00:00<00:09, 32.68it/s] 

NW Finished 51 trial for N = 100, 43.19993688545402


Training NF:   0%|          | 0/300 [00:00<?, ?it/s]         

NW Finished 52 trial for N = 100, 39.86902925766957
NW Finished 58 trial for N = 100, 36.40302821992164


Training NF:   3%|▎         | 8/300 [00:00<00:08, 33.74it/s]]

NW Finished 53 trial for N = 100, 56.56791512356728
NW Finished 54 trial for N = 100, 9.811112139213929
NW Finished 56 trial for N = 100, 29.113885546425955
NW Finished 57 trial for N = 100, 27.721227737524924
NW Finished 59 trial for N = 100, 52.480991983644834
NW Finished 55 trial for N = 100, 25.571790096424696


                                                             

40 trial : 3, 0.05


                                                              

41 trial : 3, 0.05


                                                              

43 trial : 1, 0.05


                                                              

45 trial : 1, 0.05


Training NF:   0%|          | 0/200 [00:00<?, ?it/s]          

42 trial : 3, 1
44 trial : 2, 1
46 trial : 3, 0.05


                                                              

50 trial : 3, 0.1


Training NF:   0%|          | 0/200 [00:00<?, ?it/s]          

48 trial : 2, 0.01
47 trial : 1, 1
51 trial : 3, 0.01


Training NF:   0%|          | 0/200 [00:00<?, ?it/s]44.40it/s]

54 trial : 3, 1
49 trial : 1, 0.5
57 trial : 3, 1


Training NF:   0%|          | 0/200 [00:00<?, ?it/s]          

58 trial : 2, 0.05
55 trial : 1, 0.05


Training NF:  12%|█▏        | 23/200 [00:00<00:00, 229.12it/s]

52 trial : 1, 1
56 trial : 2, 0.01


                                                              

Trial 40 : NF-GMM Finished 40 trial for N = 100
59 trial : 1, 0.01
53 trial : 1, 1


                                                              

Trial 41 : NF-GMM Finished 41 trial for N = 100
Trial 43 : NF-GMM Finished 43 trial for N = 100
Trial 45 : NF-GMM Finished 45 trial for N = 100
Trial 42 : NF-GMM Finished 42 trial for N = 100
Trial 46 : NF-GMM Finished 46 trial for N = 100
Trial 44 : NF-GMM Finished 44 trial for N = 100
Trial 50 : NF-GMM Finished 50 trial for N = 100
Trial 48 : NF-GMM Finished 48 trial for N = 100
Trial 47 : NF-GMM Finished 47 trial for N = 100
Trial 54 : NF-GMM Finished 54 trial for N = 100
Trial 51 : NF-GMM Finished 51 trial for N = 100
Trial 49 : NF-GMM Finished 49 trial for N = 100
Trial 57 : NF-GMM Finished 57 trial for N = 100
Trial 58 : NF-GMM Finished 58 trial for N = 100
Trial 55 : NF-GMM Finished 55 trial for N = 100
Trial 52 : NF-GMM Finished 52 trial for N = 100
Trial 56 : NF-GMM Finished 56 trial for N = 100
Trial 59 : NF-GMM Finished 59 trial for N = 100
Trial 53 : NF-GMM Finished 53 trial for N = 100
Trial 40 : GMM (non-NF) Finished for N = 100
Trial 41 : GMM (non-NF) Finished for N = 10

 80%|████████  | 80/100 [19:15<05:33, 16.65s/it]

KANNAN Finished 40 trial for N = 100
NF-gmm,nonNF-gmm,kannan,nw : 3.685519405154927, 7.5800475345080915, 3.571731164705376, 10.202492038595521
Kanna Best eps at 41 trial for N = 100 : 0.05
KANNAN Finished 41 trial for N = 100
NF-gmm,nonNF-gmm,kannan,nw : 23.933655900691086, 41.49910152729637, 42.61393115378294, 28.482721464852336
Kanna Best eps at 45 trial for N = 100 : 0.1
KANNAN Finished 45 trial for N = 100
NF-gmm,nonNF-gmm,kannan,nw : 9.222569129785947, 13.6659257402875, 10.894259414121965, 48.94583565230793
Kanna Best eps at 43 trial for N = 100 : 0.01
KANNAN Finished 43 trial for N = 100
NF-gmm,nonNF-gmm,kannan,nw : 11.13989930295683, 12.998019241390956, 12.042661020736382, 23.055608899418786
Kanna Best eps at 46 trial for N = 100 : 0.05
Kanna Best eps at 42 trial for N = 100 : 0.01
KANNAN Finished 46 trial for N = 100
NF-gmm,nonNF-gmm,kannan,nw : 22.19853609438151, 76.7026466579942, 32.22708787071537, 35.50490541533102
KANNAN Finished 42 trial for N = 100
NF-gmm,nonNF-gmm,kannan

                                                             

NW Finished 61 trial for N = 100, 43.00837864748808


                                                             

NW Finished 62 trial for N = 100, 34.32819252099946
NW Finished 63 trial for N = 100, 53.823418618774674


                                                             

NW Finished 65 trial for N = 100, 31.41368018080867
NW Finished 64 trial for N = 100, 38.27107109382876


                                                             

NW Finished 66 trial for N = 100, 37.4041818148056
NW Finished 67 trial for N = 100, 63.8167475584222


Training NF:   3%|▎         | 8/300 [00:00<00:08, 33.79it/s]]

NW Finished 68 trial for N = 100, 38.477181171911454
NW Finished 69 trial for N = 100, 56.644233668815914


Training NF:   1%|▏         | 4/300 [00:00<00:07, 39.54it/s]]

NW Finished 70 trial for N = 100, 21.394512719127235


                                                             

NW Finished 71 trial for N = 100, 22.171513279990005


Training NF:   0%|          | 0/300 [00:00<?, ?it/s]8.91it/s]

NW Finished 72 trial for N = 100, 26.37480457374393
NW Finished 74 trial for N = 100, 37.60988796773214


Training NF:   3%|▎         | 8/300 [00:00<00:09, 30.89it/s]]

NW Finished 73 trial for N = 100, 26.500957247562894
NW Finished 75 trial for N = 100, 37.6926443579106
NW Finished 76 trial for N = 100, 44.053451604704655
NW Finished 77 trial for N = 100, 34.98641299864645


                                                             

NW Finished 78 trial for N = 100, 21.78381820959094


                                                             

NW Finished 79 trial for N = 100, 56.067045852063444


                                                             

60 trial : 2, 0.1


                                                              

61 trial : 1, 0.01


                                                              

62 trial : 3, 0.05
63 trial : 3, 0.1


                                                              

64 trial : 2, 0.05


Training NF:  10%|█         | 20/200 [00:00<00:00, 190.47it/s]

65 trial : 2, 0.05


                                                              

69 trial : 3, 0.05
70 trial : 1, 0.05


                                                              

68 trial : 2, 1


                                                              

66 trial : 2, 0.01
71 trial : 3, 0.05
73 trial : 3, 0.1
Trial 60 : NF-GMM Finished 60 trial for N = 100


                                                              

67 trial : 2, 0.05
72 trial : 1, 0.5
77 trial : 1, 0.1
76 trial : 1, 0.1


                                                              

75 trial : 2, 0.5


                                                              

74 trial : 3, 1


Training NF:  22%|██▏       | 43/200 [00:00<00:00, 214.89it/s]

78 trial : 2, 0.1
Trial 61 : NF-GMM Finished 61 trial for N = 100


                                                              

Trial 62 : NF-GMM Finished 62 trial for N = 100
Trial 63 : NF-GMM Finished 63 trial for N = 100
79 trial : 3, 0.05


                                                              

Trial 64 : NF-GMM Finished 64 trial for N = 100
Trial 65 : NF-GMM Finished 65 trial for N = 100
Trial 69 : NF-GMM Finished 69 trial for N = 100
Trial 70 : NF-GMM Finished 70 trial for N = 100
Trial 68 : NF-GMM Finished 68 trial for N = 100
Trial 66 : NF-GMM Finished 66 trial for N = 100
Trial 71 : NF-GMM Finished 71 trial for N = 100
Trial 73 : NF-GMM Finished 73 trial for N = 100
Trial 67 : NF-GMM Finished 67 trial for N = 100
Trial 72 : NF-GMM Finished 72 trial for N = 100
Trial 76 : NF-GMM Finished 76 trial for N = 100
Trial 77 : NF-GMM Finished 77 trial for N = 100
Trial 75 : NF-GMM Finished 75 trial for N = 100
Trial 74 : NF-GMM Finished 74 trial for N = 100
Trial 78 : NF-GMM Finished 78 trial for N = 100
Trial 79 : NF-GMM Finished 79 trial for N = 100
Trial 60 : GMM (non-NF) Finished for N = 100
Trial 61 : GMM (non-NF) Finished for N = 100
Trial 62 : GMM (non-NF) Finished for N = 100
Trial 63 : GMM (non-NF) Finished for N = 100
Trial 64 : GMM (non-NF) Finished for N = 100
Trial 6

100%|██████████| 100/100 [25:39<00:00, 15.40s/it]

KANNAN Finished 60 trial for N = 100
NF-gmm,nonNF-gmm,kannan,nw : 21.225144631171716, 21.339413818957816, 33.835555431375724, 32.78809543760148





Kanna Best eps at 61 trial for N = 100 : 0.01
KANNAN Finished 61 trial for N = 100
NF-gmm,nonNF-gmm,kannan,nw : 33.22649895440464, 40.68468969500056, 32.435963782815485, 43.00837864748808
Kanna Best eps at 62 trial for N = 100 : 0.01
KANNAN Finished 62 trial for N = 100
NF-gmm,nonNF-gmm,kannan,nw : 28.16625779033389, 25.932062846042587, 26.338176888902687, 34.32819252099946
Kanna Best eps at 63 trial for N = 100 : 0.1
KANNAN Finished 63 trial for N = 100
NF-gmm,nonNF-gmm,kannan,nw : 45.351272874771745, 59.97457724273698, 45.031075125019626, 53.823418618774674
Kanna Best eps at 64 trial for N = 100 : 0.01
Kanna Best eps at 65 trial for N = 100 : 0.01
KANNAN Finished 64 trial for N = 100
NF-gmm,nonNF-gmm,kannan,nw : 16.138320246717264, 22.538723602577477, 20.28858581076228, 38.27107109382876
KANNAN Finished 65 trial for N = 100
NF-gmm,nonNF-gmm,kannan,nw : 17.540413554557368, 17.592964103372772, 17.437400794553046, 31.41368018080867
Kanna Best eps at 70 trial for N = 100 : 1
KANNAN Finis

Training NF:   8%|▊         | 25/300 [00:00<00:06, 44.59it/s]

NW Finished 81 trial for N = 100, 73.11109449910221


                                                             

NW Finished 82 trial for N = 100, 27.94271141404337


Training NF:   0%|          | 0/300 [00:00<?, ?it/s]7.32it/s]

NW Finished 83 trial for N = 100, 14.928979592671704


                                                             

NW Finished 84 trial for N = 100, 27.309406963289454


Training NF:   0%|          | 0/300 [00:00<?, ?it/s]3.49it/s]

NW Finished 85 trial for N = 100, 27.455146383553455


Training NF:   0%|          | 0/300 [00:00<?, ?it/s]7.76it/s]

NW Finished 86 trial for N = 100, 55.090205569418686


Training NF:   6%|▌         | 17/300 [00:00<00:08, 35.12it/s]

NW Finished 87 trial for N = 100, 36.28186186553669


Training NF:   6%|▌         | 18/300 [00:00<00:07, 36.13it/s]

NW Finished 88 trial for N = 100, 19.01273445103027
NW Finished 89 trial for N = 100, 37.09306289207218


Training NF:   1%|▏         | 4/300 [00:00<00:09, 30.72it/s]]

NW Finished 90 trial for N = 100, 8.58001518972735


Training NF:   4%|▍         | 12/300 [00:00<00:08, 33.42it/s]

NW Finished 91 trial for N = 100, 42.26361131240396
NW Finished 92 trial for N = 100, 59.070185802611114


Training NF:   2%|▏         | 5/300 [00:00<00:07, 39.38it/s]]

NW Finished 93 trial for N = 100, 74.20261946662075


Training NF:   4%|▍         | 13/300 [00:00<00:07, 37.04it/s]

NW Finished 94 trial for N = 100, 32.992208751398486


Training NF:   7%|▋         | 20/300 [00:00<00:07, 35.99it/s]

NW Finished 96 trial for N = 100, 39.86952797898474


Training NF:   1%|▏         | 4/300 [00:00<00:08, 34.77it/s]]

NW Finished 95 trial for N = 100, 53.38284580175383


                                                             

NW Finished 97 trial for N = 100, 41.54275997457156


Training NF:   6%|▌         | 17/300 [00:00<00:07, 36.10it/s]

NW Finished 98 trial for N = 100, 41.247032189121704


                                                             

NW Finished 99 trial for N = 100, 40.48627454941964


                                                             

80 trial : 3, 0.1


                                                              

81 trial : 3, 0.05


                                                              

82 trial : 3, 0.01


                                                              

83 trial : 1, 1


                                                              

84 trial : 3, 0.01
85 trial : 3, 0.1


                                                              

88 trial : 3, 0.01


                                                              

86 trial : 3, 0.5
91 trial : 3, 0.5
Trial 80 : NF-GMM Finished 80 trial for N = 100
87 trial : 3, 0.5


Training NF:   0%|          | 0/200 [00:00<?, ?it/s]25.61it/s]

89 trial : 2, 0.1


                                                              

Trial 81 : NF-GMM Finished 81 trial for N = 100
93 trial : 1, 1
92 trial : 2, 0.1


Training NF:  24%|██▍       | 48/200 [00:00<00:00, 225.39it/s]

94 trial : 1, 0.01
90 trial : 2, 1


                                                              

96 trial : 1, 0.05
95 trial : 1, 0.05


                                                              

Trial 82 : NF-GMM Finished 82 trial for N = 100
97 trial : 3, 0.5


                                                              

98 trial : 2, 0.05
Trial 83 : NF-GMM Finished 83 trial for N = 100


                                                              

Trial 84 : NF-GMM Finished 84 trial for N = 100
Trial 85 : NF-GMM Finished 85 trial for N = 100
99 trial : 2, 0.01


                                                              

Trial 88 : NF-GMM Finished 88 trial for N = 100
Trial 86 : NF-GMM Finished 86 trial for N = 100
Trial 91 : NF-GMM Finished 91 trial for N = 100
Trial 87 : NF-GMM Finished 87 trial for N = 100
Trial 89 : NF-GMM Finished 89 trial for N = 100
Trial 93 : NF-GMM Finished 93 trial for N = 100
Trial 92 : NF-GMM Finished 92 trial for N = 100
Trial 90 : NF-GMM Finished 90 trial for N = 100
Trial 94 : NF-GMM Finished 94 trial for N = 100
Trial 96 : NF-GMM Finished 96 trial for N = 100
Trial 95 : NF-GMM Finished 95 trial for N = 100
Trial 97 : NF-GMM Finished 97 trial for N = 100
Trial 98 : NF-GMM Finished 98 trial for N = 100
Trial 99 : NF-GMM Finished 99 trial for N = 100
Trial 80 : GMM (non-NF) Finished for N = 100
Trial 81 : GMM (non-NF) Finished for N = 100
Trial 82 : GMM (non-NF) Finished for N = 100
Trial 83 : GMM (non-NF) Finished for N = 100
Trial 88 : GMM (non-NF) Finished for N = 100
Trial 84 : GMM (non-NF) Finished for N = 100
Trial 85 : GMM (non-NF) Finished for N = 100
Trial 91 : GM