In [8]:
import numpy as np
import pandas as pd
import pickle
import pyvinecopulib as pv
from scipy.stats import rankdata, kendalltau
from statsmodels.stats.diagnostic import acorr_ljungbox
from scipy.stats import chi2
from scipy.spatial.distance import cdist
import matplotlib.pyplot as plt
import seaborn as sns
import os
from tqdm import tqdm
import json
import importlib
import sys
sys.path.append('../src')

import validation
importlib.reload(validation)
from validation import (
    rolling_var_backtest, kupiec_test, christoffersen_test, plot_rolling_var, 
    plot_pit_diagnostics, compare_dependence, compare_tail_dependence, vine_aic_bic_loglik,
    energy_distance, convert_for_json
)

In [9]:
# === Setup ===
std_resids_path = "../models/garch/standardized_residuals.csv"
evt_param_path = "../models/evt/marginal_distributions.pkl"
copula_path = "../models/copula/best_copula.json"
out_dir = "../validation"
os.makedirs(out_dir, exist_ok=True)

std_resids = pd.read_csv(std_resids_path, index_col=0)
with open(evt_param_path, "rb") as f:
    evt_params = pickle.load(f)

In [10]:
validation_summary = {}

# === 1. Rolling VaR backtest ===
rolling_var_path = os.path.join(out_dir, "rolling_var_results.pkl")
if os.path.exists(rolling_var_path):
    with open(rolling_var_path, "rb") as f:
        results, var_values, idx_list = pickle.load(f)
    print("Loaded rolling VaR backtest results from file.")
else:
    print("Running rolling VaR backtest...")
    results, var_values, idx_list = rolling_var_backtest(std_resids, evt_params, window=500, alpha=0.05, step=1)
    with open(rolling_var_path, "wb") as f:
        pickle.dump((results, var_values, idx_list), f)
    print(f"Saved rolling VaR backtest results to {rolling_var_path}")

validation_summary["rolling_var"] = results
validation_summary["var_values"] = var_values
validation_summary["idx_list"] = idx_list

Running rolling VaR backtest...


Rolling VaR: 100%|██████████| 2111/2111 [13:09<00:00,  2.67it/s]

Saved rolling VaR backtest results to ../validation\rolling_var_results.pkl





In [12]:
# 1a. Kupiec test for VaR
kupiec_summary = {}
print("Kupiec test for VaR:")
for col, res in results.items():
    # Lấy lại chuỗi giá trị thực và VaR dự báo
    # VaR dự báo: var_values[col], giá trị thực: std_resids.iloc[idx_list][col].values
    var_series = np.array(var_values[col])
    realized = std_resids[col].iloc[idx_list].values
    kupiec_summary[col] = kupiec_test(realized, var_series, alpha=0.05)
    print(f"{col}: {kupiec_summary[col]}")
validation_summary["kupiec"] = kupiec_summary

Kupiec test for VaR:
FPT_DATA: {'n': 2111, 'violations': 117, 'expected': 105.55000000000001, 'hit_rate': 0.05542396968261488, 'LR_pof': 1.2649614408878733, 'p_value': 0.26071404266587583, 'reject_H0_5%': False}
HPG_DATA: {'n': 2111, 'violations': 115, 'expected': 105.55000000000001, 'hit_rate': 0.05447655139744197, 'LR_pof': 0.8664914549672176, 'p_value': 0.3519284569546528, 'reject_H0_5%': False}
MBB_DATA: {'n': 2111, 'violations': 118, 'expected': 105.55000000000001, 'hit_rate': 0.05589767882520132, 'LR_pof': 1.4914157177291827, 'p_value': 0.22199693311787916, 'reject_H0_5%': False}
MWG_DATA: {'n': 2111, 'violations': 107, 'expected': 105.55000000000001, 'hit_rate': 0.050686878256750355, 'LR_pof': 0.020877525918876927, 'p_value': 0.8851130993588041, 'reject_H0_5%': False}
VIC_DATA: {'n': 2111, 'violations': 114, 'expected': 105.55000000000001, 'hit_rate': 0.05400284225485552, 'LR_pof': 0.694772039171994, 'p_value': 0.40454596065561155, 'reject_H0_5%': False}


In [15]:
# 1b. Christoffersen test for VaR
christoffersen_summary = {}
for col, res in results.items():
    violations = res.get("violations", None)
    if violations is not None:
        christoffersen_summary[col] = christoffersen_test(
            violations, alpha=0.05
        )
print(f"{col}: {christoffersen_summary[col]}")
validation_summary["christoffersen"] = christoffersen_summary
print("Christoffersen test and Kupiec test for VaR calculated.")

VIC_DATA: {'LRuc': 0.6947720349678548, 'p_uc': 0.40454596207727933, 'LRind': 0.9535745777894569, 'p_ind': 0.32881108527245984, 'LRcc': 1.6483466127573116, 'p_cc': 0.4385974283289368, 'n_violate': 114, 'n_test': 2111, 'hit_rate': 0.05400284225485552, 'expected': 105.55000000000001, 'n00': 1886, 'n01': 110, 'n10': 110, 'n11': 4}
Christoffersen test and Kupiec test for VaR calculated.


In [16]:
# 1c. Plot rolling VaR backtest results
var_violations = {}
for col in var_values:
    if "violations" in results.get(col, {}):
        var_violations[col] = results[col]["violations"]
    else:
        var_violations[col] = [False] * len(var_values[col])
plot_rolling_var(std_resids, var_values, idx_list, var_violations, out_dir=os.path.join(out_dir, "rolling_var_plots"))
print(f"Rolling VaR plots saved to {os.path.join(out_dir, 'rolling_var_plots')}")

Rolling VaR plots saved to ../validation\rolling_var_plots


In [18]:
# === 2. Fit Vinecop on the entire dataset ===
pit_df = pd.read_csv('../models/copula/pit_data.csv', index_col=0)
pit_df = pit_df[std_resids.columns]  # Ensure column order matches
u = pit_df
vine = pv.Vinecop(u.shape[1])
family_set = [
    getattr(pv.BicopFamily, "gaussian", 1),
    getattr(pv.BicopFamily, "student", 2),
    getattr(pv.BicopFamily, "clayton", 3),
    getattr(pv.BicopFamily, "gumbel", 4),
]
controls = pv.FitControlsVinecop(family_set=family_set)
u_clip = np.clip(u.values.astype(float), 1e-6, 1-1e-6)
vine.select(u_clip, controls=controls)

In [19]:
# === 3. PIT diagnostics ===
print("=== Rosenblatt PIT diagnostics ===")
pit = vine.rosenblatt(u.values)
plot_pit_diagnostics(pit, out_dir)
pit_ljungbox_pvalues = []
for i in range(pit.shape[1]): # type: ignore
    lb_p = acorr_ljungbox(pit[:, i], lags=[10], return_df=True)["lb_pvalue"].iloc[0] # type: ignore
    pit_ljungbox_pvalues.append(lb_p)
validation_summary["pit_ljungbox_pvalues"] = pit_ljungbox_pvalues

=== Rosenblatt PIT diagnostics ===
PIT dim 1: Ljung-Box p-value = 0.0657
PIT dim 2: Ljung-Box p-value = 0.2602
PIT dim 3: Ljung-Box p-value = 0.6607
PIT dim 4: Ljung-Box p-value = 0.0418
PIT dim 5: Ljung-Box p-value = 0.2149


In [21]:
# === 4. Dependence metrics ===
u_sim = vine.simulate(len(u))
frob_norm = compare_dependence(pd.DataFrame(u.values), pd.DataFrame(u_sim), out_dir) # type: ignore
validation_summary["frob_norm_kendalltau"] = frob_norm
compare_tail_dependence(pd.DataFrame(u.values), pd.DataFrame(u_sim), out_dir, q=0.95) # type: ignore
validation_summary["tail_dependence_heatmap"] = [
    os.path.join(out_dir, "delta_taildep_lower.png"),
    os.path.join(out_dir, "delta_taildep_upper.png"),
]

# === 5. Model selection metrics (AIC/BIC/loglik) ===
vine_metrics = vine_aic_bic_loglik(vine, u.values)
validation_summary["vine_aic_bic_loglik"] = vine_metrics
print("AIC/BIC/loglik of Vinecop:", vine_metrics)

# === 6. Distance metrics ===
e_dist = energy_distance(u.values, u_sim)
validation_summary["energy_distance"] = e_dist
print(f"Energy distance (real U vs simulated U): {e_dist:.4f}")

Frobenius norm of Kendall's tau difference: 0.0517
AIC/BIC/loglik of Vinecop: {'loglik': 1442.8261011173695, 'aic': -2865.652202234739, 'bic': -2806.9773165477477, 'n_params': 10}
Energy distance (real U vs simulated U): 0.0004


In [22]:
# === 7. Save summary ===
validation_summary_json = {
    k: convert_for_json(v) for k, v in validation_summary.items()
}
with open(
    os.path.join(out_dir, "validation_summary.json"), "w", encoding="utf-8"
) as f:
    json.dump(validation_summary_json, f, indent=2, ensure_ascii=False)

print(f"Saved summary results to {os.path.join(out_dir, 'validation_summary.json')}")
print(f"Validation plots saved to {out_dir}")

Saved summary results to ../validation\validation_summary.json
Validation plots saved to ../validation
