## Data Simulation

In [8]:
import numpy as np
from numpy.random import default_rng
from sklearn.covariance import EmpiricalCovariance

rng = default_rng(123)

In [18]:
def simulate_sigma(d, rho=0.5):
    """
    Simple correlation matrix generator as a stand-in for a vine-generated Σ
        used in Kunzel et al.
    """
    Sigma = rho * np.ones((d, d)) + (1 - rho) * np.eye(d)
    return Sigma

def mu0(x):
    # example control response function
    return x[:, 0]

def mu1(x):
    # example treated response function
    return x[:, 0] + 0.5 * x[:, 1]

def propensity_e(x, alpha=0.5):
    """
    Propensity score function determining probability of treatment = 1
        e(x) = P(W=1|X=x),
        with 'alpha' controlling strength of confounding
        (alpha = 0 is completely random treatment assignment)
    """
    logits = alpha * x[:, 0]
    return 1 / (1 + np.exp(-logits))

def simulate_dataset(N, d=5, alpha=0.5, rho=0.5, rng=None):
    rng = rng or default_rng()
    Sigma = simulate_sigma(d, rho=rho)

    # 1. Generate features X ~ N(0, Σ)
    X = rng.multivariate_normal(mean=np.zeros(d), cov=Sigma, size=N)

    # 2. Generate potential outcomes
    eps0 = rng.normal(0, 1, size=N)
    eps1 = rng.normal(0, 1, size=N)
    Y0 = mu0(X) + eps0
    Y1 = mu1(X) + eps1

    # True treatment effect (CATE)
    tau = Y1 - Y0

    # 3. Generate treatment assignment W ~ Bern(e(X))
    e_x = propensity_e(X, alpha=alpha)
    W = rng.binomial(1, e_x)

    # Generate observed outcome
    Y = np.where(W == 1, Y1, Y0)

    return X, W, Y, Y0, Y1, tau, e_x
