## In-Sample Analysis 

**Phase 1: Model Estimation (Full Sample)**  
(Goal: Estimate parameters for your primary model and all benchmark models using the entire dataset)

### Load Data:
- Load the preprocessed `df_vw_returns_final` (N=95, T=726) into your Python environment.

In [1]:
import polars as pl
import numpy as np
import os
from bellman_filter_dfsv.filters.bellman_information import DFSVBellmanInformationFilter
from bellman_filter_dfsv.models.dfsv import DFSVParamsDataclass
from bellman_filter_dfsv.utils.optimization import run_optimization
from bellman_filter_dfsv.filters.particle import DFSVParticleFilter
from bellman_filter_dfsv.utils.optimization import FilterType
from bellman_filter_dfsv.utils.optimization_helpers import create_stable_initial_params
import time
import jax
import jax.numpy as jnp
import cloudpickle
import pickle
df =pl.read_csv("../vw_returns_final.csv")
df
N=95
K=5
jax.config.update("jax_enable_x64", True)
returns_jax=df.to_jax()
returns_jax.dtype

dtype('float64')

In [2]:
#Initial parameter guess
def create_realistic_initial_params(
    N: int,
    K: int,
    portfolio_sample_variances: np.ndarray # Accept numpy array for convenience
    ) -> DFSVParamsDataclass:
    """
    Creates realistic initial parameter guesses for the DFSV model optimization using JAX.

    Args:
        N: Number of observed series (assets).
        K: Number of latent factors.
        portfolio_sample_variances: A NumPy array containing the full-sample variance
                                     for each of the N portfolio return series.

    Returns:
        DFSVParamsDataclass: Initial parameter values suitable for starting optimization.
    """
    if len(portfolio_sample_variances) != N:
        raise ValueError(f"Length of portfolio_sample_variances ({len(portfolio_sample_variances)}) must match N ({N})")

    # Convert input variances to JAX array
    portfolio_sample_variances_jax = jnp.array(portfolio_sample_variances, dtype=jnp.float64)

    # --- 1. Factor Loadings (lambda_r) ---
    # Initialize as zeros
    lambda_r_init = jnp.zeros((N, K), dtype=jnp.float64)
    # Set the top KxK block to lower triangular with diagonal 1s for identification
    # Use min(N, K) in range for safety if N < K (though unlikely here)
    for i in range(min(N, K)):
        lambda_r_init = lambda_r_init.at[i, i].set(1.0) # Diagonal = 1
        # Ensure upper triangle is zero within the KxK block
        for j in range(i + 1, K):
            lambda_r_init = lambda_r_init.at[i, j].set(0.0) # Upper triangle = 0

    # --- 2. Factor Transition Matrix (Phi_f) ---
    # Start with moderate diagonal persistence and small off-diagonal noise
    diag_phi_f = 0.5
    off_diag_phi_f = 0.01 # Small value for off-diagonal elements
    phi_f_init = jnp.full((K, K), off_diag_phi_f, dtype=jnp.float64)
    phi_f_init = phi_f_init.at[jnp.diag_indices(K)].set(diag_phi_f)
    # Simple stability check/adjustment using JAX eigenvalues
    # Ensure eigenvalues are within unit circle - this simple scaling might not be perfect for larger K
    eigenvalues_f = jnp.linalg.eigvals(phi_f_init) # Use JAX eigvals
    max_eig_f = jnp.max(jnp.abs(eigenvalues_f))
    # Use jax.lax.cond for conditional execution if needed, or simple check
    # Note: Direct comparison might cause issues in JIT context, but okay for initialization
    if max_eig_f >= 0.99: # Use 0.99 to be safe
        print(f"Warning: Initial Phi_f max eigenvalue {max_eig_f:.3f} >= 0.99. Scaling down.")
        phi_f_init = phi_f_init * (0.98 / max_eig_f)

    # --- 3. Log-Volatility Transition Matrix (Phi_h) ---
    # Start with high diagonal persistence, assume independence initially
    diag_phi_h = 0.98
    phi_h_init = jnp.eye(K, dtype=jnp.float64) * diag_phi_h
    # Stability check (already diagonal and < 1, so likely stable)
    eigenvalues_h = jnp.linalg.eigvals(phi_h_init) # Use JAX eigvals
    max_eig_h = jnp.max(jnp.abs(eigenvalues_h))
    if max_eig_h >= 0.99: # Should not trigger for diagonal 0.98
        print(f"Warning: Initial Phi_h max eigenvalue {max_eig_h:.3f} >= 0.99. Scaling down.")
        phi_h_init = phi_h_init * (0.98 / max_eig_h)

    # --- 4. Log-Volatility Mean Vector (mu) ---
    # Based on typical monthly volatility -> log(sigma^2)
    mu_init = jnp.full(K, -6.0, dtype=jnp.float64)

    # --- 5. Log-Volatility Innovation Covariance (Q_h) ---
    # Diagonal matrix, assuming std dev of log-vol shocks ~ 0.2 -> variance ~ 0.04
    q_h_diag_val = 0.04
    q_h_init = jnp.eye(K, dtype=jnp.float64) * q_h_diag_val

    # --- 6. Idiosyncratic Variances (sigma2 - diagonal of Sigma_epsilon) ---
    # Based on sample variance and proportion unexplained by PCA (e.g., 15%)
    proportion_idiosyncratic = 0.15
    # Ensure variances are positive, use maximum with a small floor
    # Use the JAX array version of sample variances
    sigma2_init = jnp.maximum(proportion_idiosyncratic * portfolio_sample_variances_jax, 1e-8)

    # Create the dataclass instance
    initial_params = DFSVParamsDataclass(
        N=N,
        K=K,
        lambda_r=lambda_r_init,
        Phi_f=phi_f_init,
        Phi_h=phi_h_init,
        mu=mu_init,
        sigma2=sigma2_init, # This is the vector of diagonal variances
        Q_h=q_h_init
    )

    return initial_params
sample_var=jnp.var(returns_jax,axis=0)
# print(sample_var)
init_params=create_realistic_initial_params(N,K,sample_var)
init_params

DFSVParamsDataclass(N=95, K=5, lambda_r=Array([[1., 0., 0., 0., 0.],
       [0., 1., 0., 0., 0.],
       [0., 0., 1., 0., 0.],
       [0., 0., 0., 1., 0.],
       [0., 0., 0., 0., 1.],
       [0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0.],
   

### Estimate DFSV-BIF Model:
- Define the DFSV model structure (N=95, K=5, VAR(1), lower-triangular Λ constraint).
- Set initial parameter values.
- Run BIF-based MLE (e.g., DampedTrustRegionBFGS) on t=1 to 726.

In [None]:
#Initial parameters


#NOTE: takes around 3hrs to run, load pickle file 
# bif_filter=DFSVBellmanInformationFilter(N,K)
# # Run optimization with BIF
# start_time = time.time()
# results_bif = run_optimization(
#     filter_type=FilterType.BIF,
#     returns=returns_jax,  # Your returns data
#     initial_params=init_params,
#     optimizer_name="DampedTrustRegionBFGS",  
#     use_transformations=True,  # Enable parameter transformations for better stability
#     max_steps=500,  # Reasonable number of steps based on your examples
#     stability_penalty_weight=1000.0,  # Default penalty weight from your codebase
#     verbose=True,
#     log_params=False,  # Enable parameter logging for analysis
#     log_interval=1,
#     fix_mu=False
# )

# Load results
bif_pkl = "bif_full_result_20250425_144625.pkl"
with open(bif_pkl, "rb") as f:
    results_bif = cloudpickle.load(f)
print(results_bif)
# print(theta_bif, loglik_bif, time_bif, conv_bif)

time_bif = time.time() - start_time
theta_bif = result.final_params  # Final optimized parameters
loglik_bif = -result.final_loss  # Convert minimization objective to log-likelihood
conv_bif = result.success  # Convergence status
print(f"Estimated parameters: {theta_bif}")
print(f"BIF Optimization completed in {time_bif:.2f} seconds")
print(f"Convergence status: {conv_bif}")
print(f"Final log-likelihood: {loglik_bif:.4f}")

OptimizerResult(filter_type=<FilterType.BIF: 1>, optimizer_name='DampedTrustRegionBFGS', uses_transformations=True, fix_mu=False, prior_config_name='No Priors', success=Array(True, dtype=bool), result_code=optimistix._solution.RESULTS<>, final_loss=Array(-222084.10371758, dtype=float64), steps=Array(289, dtype=int64, weak_type=True), time_taken=10511.067908525467, error_message=None, final_params=DFSVParamsDataclass(N=95, K=5, lambda_r=Array([[ 1.00000000e+00,  0.00000000e+00,  0.00000000e+00,
         0.00000000e+00,  0.00000000e+00],
       [ 8.64240933e-01,  1.00000000e+00,  0.00000000e+00,
         0.00000000e+00,  0.00000000e+00],
       [ 9.44707338e-01, -8.46640119e-02,  1.00000000e+00,
         0.00000000e+00,  0.00000000e+00],
       [ 7.96250567e-01,  1.99145606e-02, -9.74564516e-03,
         1.00000000e+00,  0.00000000e+00],
       [ 8.18062280e-01,  1.74249316e-02,  1.00618529e-01,
         2.76201051e-01,  1.00000000e+00],
       [ 7.39165123e-01, -4.19852043e-02,  2.81170

### Record Metrics (DFSV-BIF):
- Estimated parameters `Θ^_BIF`
- Final pseudo-log-likelihood `L_BIF`
- Number of free parameters `p_DFSV`
- Total estimation time
- Convergence status/flags

In [None]:
#load pickle file


bif_params=results_bif.final_params

OptimizerResult(filter_type=<FilterType.BIF: 1>, optimizer_name='DampedTrustRegionBFGS', uses_transformations=True, fix_mu=False, prior_config_name='No Priors', success=Array(False, dtype=bool), result_code=optimistix._solution.RESULTS<The maximum number of steps was reached in the nonlinear solver. The problem may not be solveable (e.g., a root-find on a function that has no roots), or you may need to increase `max_steps`.>, final_loss=Array(-76371.57597898, dtype=float64), steps=Array(10, dtype=int64, weak_type=True), time_taken=64.89801907539368, error_message=None, final_params=DFSVParamsDataclass(N=95, K=5, lambda_r=Array([[1., 0., 0., 0., 0.],
       [0., 1., 0., 0., 0.],
       [0., 0., 1., 0., 0.],
       [0., 0., 0., 1., 0.],
       [0., 0., 0., 0., 1.],
       [0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0.],
       [0., 0., 0., 0.,

In [4]:
#Save result
from pathlib import Path
from model_fitting import save_result
# Create output directory
output_dir = Path("outputs/empirical/insample")
output_dir.mkdir(parents=True, exist_ok=True)
save_result(result, output_dir)

Full result object saved to outputs/empirical/insample/bif_full_result_20250425_144625.pkl


### Estimate DFSV-PF Model (Benchmark 1):
- Define DFSV model structure (same as BIF).
- Set initial params, specify particles (e.g., P=10000).
- Run PF-based MLE (e.g., ArmijoBFGS).

In [21]:
bif_params
# TODO: Define model, set params, run PF MLE
theta_pf = ...
loglik_pf = ...
time_pf = ...
conv_pf = ...
print(bif_params)

DFSVParamsDataclass(N=95, K=5, lambda_r=Array([[1., 0., 0., 0., 0.],
       [0., 1., 0., 0., 0.],
       [0., 0., 1., 0., 0.],
       [0., 0., 0., 1., 0.],
       [0., 0., 0., 0., 1.],
       [0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0.],
   

### Record Metrics (DFSV-PF):
- Estimated parameters `Θ^_PF`
- Final log-likelihood `L_PF`
- Number of free parameters `p_DFSV`
- Total estimation time
- Convergence status/flags

In [10]:
# TODO: Display DFSV-PF results
print(theta_pf, loglik_pf, time_pf, conv_pf)

Ellipsis Ellipsis Ellipsis Ellipsis


### Estimate DCC-GARCH Model (Benchmark 2):
- Use `arch` library for univariate GARCH(1,1).
- Specify DCC(1,1) model.
- Fit combined DCC-GARCH on t=1 to 726.

In [11]:
# TODO: Define and fit DCC-GARCH model
theta_dcc = ...
loglik_dcc = ...
time_dcc = ...
conv_dcc = ...

### Record Metrics (DCC-GARCH):
- Estimated parameters `Θ^_DCC`
- Final log-likelihood `L_DCC`
- Number of free parameters `p_DCC`
- Total estimation time
- Convergence status/flags

In [12]:
# TODO: Display DCC-GARCH results
print(theta_dcc, loglik_dcc, time_dcc, conv_dcc)

Ellipsis Ellipsis Ellipsis Ellipsis


### Estimate Factor-CV Model (Benchmark 3):
- (Optional) PCA pre-step for initial Λ and f_t with K=5.
- Define state-space model in `statsmodels.tsa.statespace`.
- Fit model using Kalman Filter MLE.

In [13]:
# TODO: Define and fit Factor-CV state-space model
theta_cv = ...
loglik_cv = ...
time_cv = ...
conv_cv = ...

### Record Metrics (Factor-CV):
- Estimated parameters `Θ^_CV`
- Final log-likelihood `L_CV`
- Number of free parameters `p_CV`
- Total estimation time
- Convergence status/flags