In [129]:
import numpy as np
import itertools
import pandas as pd
from scipy.stats import multivariate_normal, norm
import gurobipy as gp
from gurobipy import GRB
from IPython.display import display, Math

##### 2.1 Problem Setup and Weighting Framework

Calculate the true factorial effect of both main effect and interaction effect in the simulation

In [204]:
# Set up the parameters
K = 3  # Number of binary factors
Q = 2 ** K  # Number of possible treatment combinations
N = 1000  # Number of subjects

# Generate all possible treatment combinations
treatment_combinations = np.array(list(itertools.product([-1, 1], repeat=K)))

# Simulate covariates X (D-dimensional, here D=3 for simplicity)
D = 3
X = np.random.normal(0, 1, size=(N, D))

# Simulate potential outcomes for each individual and each treatment combination
# Assume potential outcomes depend on treatment and covariates linearly
beta_treatment = np.random.randn(Q)  # Random effect size for each treatment combination
beta_covariate = np.random.randn(D)  # Effect size for covariates

# For each subject, generate potential outcomes
Y_potential = np.zeros((N, Q))
for i in range(N):
    for q in range(Q):
        Y_potential[i, q] = beta_treatment[q] + X[i].dot(beta_covariate) + np.random.normal(0, 1)

# Assign each individual a random treatment from the possible combinations
Z_indices = np.random.choice(Q, size=N)  # Random treatment assignment
Y_observed = np.array([Y_potential[i, Z_indices[i]] for i in range(N)])  # Observed outcome

# Estimate main effects
# Contrast vector for each factor
main_effects = []
for k in range(K):
    g_k = np.array([1 if comb[k] == 1 else -1 for comb in treatment_combinations])
    E_Y = np.array([np.mean(Y_observed[Z_indices == q]) if np.any(Z_indices == q) else 0 for q in range(Q)])
    tau_k = (1 / (2 ** (K - 1))) * g_k.T @ E_Y
    main_effects.append(tau_k)

# Estimate interaction effects between two factors
interaction_effects = {}
for k1, k2 in itertools.combinations(range(K), 2):
    g_k1 = np.array([1 if comb[k1] == 1 else -1 for comb in treatment_combinations])
    g_k2 = np.array([1 if comb[k2] == 1 else -1 for comb in treatment_combinations])
    g_interaction = g_k1 * g_k2
    tau_k1k2 = (1 / (2 ** (K - 1))) * g_interaction @ E_Y
    interaction_effects[(k1, k2)] = tau_k1k2

# Present results
results = {
    "Main Effects": main_effects,
    "Interaction Effects": interaction_effects
}

# Convert results to DataFrame for display
main_effects_df = pd.DataFrame({
    'Factor': [f'z_{k+1}' for k in range(K)],
    'Estimated Main Effect': main_effects
})

interaction_effects_df = pd.DataFrame(
    [(f'z_{k1+1} & z_{k2+1}', effect) for (k1, k2), effect in interaction_effects.items()],
    columns=['Interaction', 'Estimated Effect']
)

# Display the DataFrames using standard methods
print("Main Effects Estimates:\n")
print(main_effects_df)

print("\nInteraction Effects Estimates:\n")
print(interaction_effects_df)


Main Effects Estimates:

  Factor  Estimated Main Effect
0    z_1               0.097909
1    z_2               0.370757
2    z_3               0.510474

Interaction Effects Estimates:

  Interaction  Estimated Effect
0   z_1 & z_2          0.974234
1   z_1 & z_3          0.829300
2   z_2 & z_3          0.143967


##### 2.2 Weighting for Observational Factorial Studies

Calculate the estimated factorial effect tau_hat in observational factorial study by applying a weighting function

In [131]:

# Simulate f(X) and f_z(X) for weighting function
# Assume f(X) is standard normal and f_z(X) shifts mean for treatment groups
f_X = norm.pdf(X)
f_z = np.zeros(N)
for i in range(N):
    z_idx = Z_indices[i]
    treatment_effect = treatment_combinations[z_idx]
    shifted_mean = treatment_effect * 0.5  # Shifted by treatment effect
    f_z[i] = np.prod(norm.pdf(X[i], loc=shifted_mean, scale=1))

# Weighting function w_z(X) = f(X) / f_z(X)
w_z = np.prod(f_X, axis=1) / f_z
w_i = N * w_z / (2 ** (K - 1) * np.bincount(Z_indices, minlength=Q)[Z_indices])

# Calculate the weighting estimator for factorial effects
weighted_effects = []
for k in range(K):
    g_k = np.array([1 if comb[k] == 1 else -1 for comb in treatment_combinations])
    g_k_pos = np.maximum(g_k, 0)
    g_k_neg = np.maximum(-g_k, 0)

    # A_iK^+ and A_iK^-
    A_iK_pos = np.array([np.sum([g_k_pos[q] * (Z_indices[i] == q) for q in range(Q)]) for i in range(N)])
    A_iK_neg = 1 - A_iK_pos

    # Weighted estimators for tau_K^+ and tau_K^-
    tau_k_pos = (1 / N) * np.sum(w_i * A_iK_pos * Y_observed)
    tau_k_neg = (1 / N) * np.sum(w_i * A_iK_neg * Y_observed)

    # Combine to get the weighted factorial effect
    tau_k_weighted = tau_k_pos - tau_k_neg
    weighted_effects.append(tau_k_weighted)

# Interaction effects using the weighting estimator
interaction_effects_weighted = {}
for k1, k2 in itertools.combinations(range(K), 2):
    g_k1 = np.array([1 if comb[k1] == 1 else -1 for comb in treatment_combinations])
    g_k2 = np.array([1 if comb[k2] == 1 else -1 for comb in treatment_combinations])
    g_interaction = g_k1 * g_k2

    g_interaction_pos = np.maximum(g_interaction, 0)
    g_interaction_neg = np.maximum(-g_interaction, 0)

    A_iK_pos = np.array([np.sum([g_interaction_pos[q] * (Z_indices[i] == q) for q in range(Q)]) for i in range(N)])
    A_iK_neg = 1 - A_iK_pos

    tau_inter_pos = (1 / N) * np.sum(w_i * A_iK_pos * Y_observed)
    tau_inter_neg = (1 / N) * np.sum(w_i * A_iK_neg * Y_observed)

    tau_inter_weighted = tau_inter_pos - tau_inter_neg
    interaction_effects_weighted[(k1, k2)] = tau_inter_weighted

# Prepare results
weighted_main_effects_df = pd.DataFrame({
    'Factor': [f'z_{k+1}' for k in range(K)],
    'Weighted Effect': weighted_effects
})

weighted_interaction_effects_df = pd.DataFrame(
    [(f'z_{k1+1} & z_{k2+1}', effect) for (k1, k2), effect in interaction_effects_weighted.items()],
    columns=['Interaction', 'Weighted Effect']
)

# Display results
print("Weighted Main Effects Using Weighting Estimator:\n", weighted_main_effects_df)
print("\nWeighted Interaction Effects Using Weighting Estimator:\n", weighted_interaction_effects_df)


Weighted Main Effects Using Weighting Estimator:
   Factor  Weighted Effect
0    z_1        -2.987851
1    z_2         4.016794
2    z_3         0.297760

Weighted Interaction Effects Using Weighting Estimator:
   Interaction  Weighted Effect
0   z_1 & z_2        -1.370218
1   z_1 & z_3        -3.425903
2   z_2 & z_3         0.728277


#### 3.1 Weighting for Estimating a Single Factorial Effect 

##### 3.1.1 General Additive Outcome Model

The model assumes outcomes are affected by both covariates and factorial interactions.

The weighting balances both: Covariates across treatment assignments and Higher-order interactions that can bias the factorial effect estimation.

In [132]:

# Parameters
K = 3  # Number of binary factors
Q = 2 ** K  # Number of treatment combinations
N = 1000  # Number of individuals

# Generate treatment combinations
treatment_combinations = np.array(list(itertools.product([-1, 1], repeat=K)))

# Simulate covariates X (D-dimensional)
D = 3
X = np.random.normal(0, 1, size=(N, D))

# Simulate potential outcomes using a general additive model
S = 3  # Number of basis functions
alpha = np.random.randn(S)
beta = np.random.randn(Q)

# Define basis functions h_s(X) = X^s (simple polynomial basis)
def h_s(X, s):
    return X ** (s + 1)

# Compute mu(X) and nu(z)
mu_X = np.sum([alpha[s] * h_s(X, s).sum(axis=1) for s in range(S)], axis=0)
nu_z = np.zeros((N, Q))
for i in range(N):
    for q in range(Q):
        z = treatment_combinations[q]
        nu_z[i, q] = beta[q] * np.prod(z)

# Simulate potential outcomes with reshaping for broadcasting
Y_potential = mu_X[:, np.newaxis] + nu_z + np.random.normal(0, 1, size=(N, Q))


# Random assignment of treatments
Z_indices = np.random.choice(Q, size=N)
Y_observed = np.array([Y_potential[i, Z_indices[i]] for i in range(N)])

# Simulate f(X) and f_z(X) for weighting
f_X = norm.pdf(X)
f_z = np.zeros(N)
for i in range(N):
    z_idx = Z_indices[i]
    treatment_effect = treatment_combinations[z_idx]
    shifted_mean = treatment_effect * 0.5
    f_z[i] = np.prod(norm.pdf(X[i], loc=shifted_mean, scale=1))

# Weighting function
w_z = np.prod(f_X, axis=1) / f_z
w_i = N * w_z / (2 ** (K - 1) * np.bincount(Z_indices, minlength=Q)[Z_indices])

# Calculate the new weighting estimator
weighted_effects = []
for k in range(K):
    g_k = np.array([1 if comb[k] == 1 else -1 for comb in treatment_combinations])
    g_k_pos = np.maximum(g_k, 0)
    g_k_neg = np.maximum(-g_k, 0)

    # A_iK^+ and A_iK^-
    A_iK_pos = np.array([np.sum([g_k_pos[q] * (Z_indices[i] == q) for q in range(Q)]) for i in range(N)])
    A_iK_neg = 1 - A_iK_pos

    # Weighted estimators for tau_K^+ and tau_K^-
    tau_k_pos = (1 / N) * np.sum(w_i * A_iK_pos * Y_observed)
    tau_k_neg = (1 / N) * np.sum(w_i * A_iK_neg * Y_observed)

    # Combine to get the weighted factorial effect
    tau_k_weighted = tau_k_pos - tau_k_neg
    weighted_effects.append(tau_k_weighted)

# Interaction effects using the new weighting estimator
interaction_effects_weighted = {}
for k1, k2 in itertools.combinations(range(K), 2):
    g_k1 = np.array([1 if comb[k1] == 1 else -1 for comb in treatment_combinations])
    g_k2 = np.array([1 if comb[k2] == 1 else -1 for comb in treatment_combinations])
    g_interaction = g_k1 * g_k2

    g_interaction_pos = np.maximum(g_interaction, 0)
    g_interaction_neg = np.maximum(-g_interaction, 0)

    A_iK_pos = np.array([np.sum([g_interaction_pos[q] * (Z_indices[i] == q) for q in range(Q)]) for i in range(N)])
    A_iK_neg = 1 - A_iK_pos

    tau_inter_pos = (1 / N) * np.sum(w_i * A_iK_pos * Y_observed)
    tau_inter_neg = (1 / N) * np.sum(w_i * A_iK_neg * Y_observed)

    tau_inter_weighted = tau_inter_pos - tau_inter_neg
    interaction_effects_weighted[(k1, k2)] = tau_inter_weighted

# Prepare results
weighted_main_effects_df = pd.DataFrame({
    'Factor': [f'z_{k+1}' for k in range(K)],
    'Weighted Effect': weighted_effects
})

weighted_interaction_effects_df = pd.DataFrame(
    [(f'z_{k1+1} & z_{k2+1}', effect) for (k1, k2), effect in interaction_effects_weighted.items()],
    columns=['Interaction', 'Weighted Effect']
)

# Display results
print("Weighted Main Effects Using New Weighting Estimator:\n", weighted_main_effects_df)
print("\nWeighted Interaction Effects Using New Weighting Estimator:\n", weighted_interaction_effects_df)


Weighted Main Effects Using New Weighting Estimator:
   Factor  Weighted Effect
0    z_1       -24.132946
1    z_2       -23.197599
2    z_3       -31.894444

Weighted Interaction Effects Using New Weighting Estimator:
   Interaction  Weighted Effect
0   z_1 & z_2       -12.284402
1   z_1 & z_3        -1.435613
2   z_2 & z_3       -12.994322


##### 3.1.2 Outcome Model with Treatment Effect Heterogeneity

In [133]:
# Parameters
K = 3  # Number of binary factors
Q = 2 ** K  # Number of treatment combinations
N = 1000  # Number of individuals

# Generate treatment combinations
treatment_combinations = np.array(list(itertools.product([-1, 1], repeat=K)))

# Simulate covariates X (D-dimensional)
D = 3
X = np.random.normal(0, 1, size=(N, D))

# Simulate potential outcomes using heterogeneous treatment effects
S = 3  # Number of basis functions
alpha = np.random.randn(S, Q)  # Different coefficients for each treatment
beta = np.random.randn(Q)

# Define basis functions h_s(X) = X^s (polynomial basis)
def h_s(X, s):
    return X ** (s + 1)

# Define heterogeneous basis functions q_sJ(X, z)
def q_sJ(X, z, s):
    return h_s(X, s) * np.prod(z)

# Compute mu(X, z) with heterogeneity
mu_XZ = np.zeros((N, Q))
for i in range(N):
    for q in range(Q):
        z = treatment_combinations[q]
        mu_XZ[i, q] = sum([alpha[s, q] * h_s(X[i], s).sum() for s in range(S)])

# Simulate nu(z)
nu_z = np.zeros((N, Q))
for i in range(N):
    for q in range(Q):
        z = treatment_combinations[q]
        nu_z[i, q] = beta[q] * np.prod(z)

# Generate potential outcomes
Y_potential = mu_XZ + nu_z + np.random.normal(0, 1, size=(N, Q))

# Random assignment of treatments
Z_indices = np.random.choice(Q, size=N)
Y_observed = np.array([Y_potential[i, Z_indices[i]] for i in range(N)])

# Simulate f(X) and f_z(X) for weighting function
f_X = norm.pdf(X)
f_z = np.zeros(N)
for i in range(N):
    z_idx = Z_indices[i]
    treatment_effect = treatment_combinations[z_idx]
    shifted_mean = treatment_effect * 0.5
    f_z[i] = np.prod(norm.pdf(X[i], loc=shifted_mean, scale=1))

# Weighting function
w_z = np.prod(f_X, axis=1) / f_z
w_i = N * w_z / (2 ** (K - 1) * np.bincount(Z_indices, minlength=Q)[Z_indices])

# Calculate the weighting estimator with heterogeneity
weighted_effects = []
for k in range(K):
    g_k = np.array([1 if comb[k] == 1 else -1 for comb in treatment_combinations])
    g_k_pos = np.maximum(g_k, 0)
    g_k_neg = np.maximum(-g_k, 0)

    # A_iK^+ and A_iK^-
    A_iK_pos = np.array([np.sum([g_k_pos[q] * (Z_indices[i] == q) for q in range(Q)]) for i in range(N)])
    A_iK_neg = 1 - A_iK_pos

    # Weighted estimators for tau_K^+ and tau_K^-
    tau_k_pos = (1 / N) * np.sum(w_i * A_iK_pos * Y_observed)
    tau_k_neg = (1 / N) * np.sum(w_i * A_iK_neg * Y_observed)

    # Combine to get the weighted factorial effect
    tau_k_weighted = tau_k_pos - tau_k_neg
    weighted_effects.append(tau_k_weighted)

# Interaction effects with heterogeneity
interaction_effects_weighted = {}
for k1, k2 in itertools.combinations(range(K), 2):
    g_k1 = np.array([1 if comb[k1] == 1 else -1 for comb in treatment_combinations])
    g_k2 = np.array([1 if comb[k2] == 1 else -1 for comb in treatment_combinations])
    g_interaction = g_k1 * g_k2

    g_interaction_pos = np.maximum(g_interaction, 0)
    g_interaction_neg = np.maximum(-g_interaction, 0)

    A_iK_pos = np.array([np.sum([g_interaction_pos[q] * (Z_indices[i] == q) for q in range(Q)]) for i in range(N)])
    A_iK_neg = 1 - A_iK_pos

    tau_inter_pos = (1 / N) * np.sum(w_i * A_iK_pos * Y_observed)
    tau_inter_neg = (1 / N) * np.sum(w_i * A_iK_neg * Y_observed)

    tau_inter_weighted = tau_inter_pos - tau_inter_neg
    interaction_effects_weighted[(k1, k2)] = tau_inter_weighted

# Prepare results
weighted_main_effects_df = pd.DataFrame({
    'Factor': [f'z_{k+1}' for k in range(K)],
    'Weighted Effect': weighted_effects
})

weighted_interaction_effects_df = pd.DataFrame(
    [(f'z_{k1+1} & z_{k2+1}', effect) for (k1, k2), effect in interaction_effects_weighted.items()],
    columns=['Interaction', 'Weighted Effect']
)

# Display results
print("Weighted Main Effects with Heterogeneity:\n", weighted_main_effects_df)
print("\nWeighted Interaction Effects with Heterogeneity:\n", weighted_interaction_effects_df)


Weighted Main Effects with Heterogeneity:
   Factor  Weighted Effect
0    z_1         0.482502
1    z_2        -5.983616
2    z_3         6.097514

Weighted Interaction Effects with Heterogeneity:
   Interaction  Weighted Effect
0   z_1 & z_2        -1.208426
1   z_1 & z_3        -1.007165
2   z_2 & z_3         7.781101


#### Simulate the proposed weighting estimator using additive balance constraints under a general additive model. 
##### The proposed weighting estimator using balance constraints under the general additive model assumption (additive balance constraints) and covariate basis functions h𝑠(𝐗) = 𝑋𝑠, 𝑠 = 1, ... , 5, 

I wrote the forms of the potential outcomes of Y1, Y2, Y3 based on the simulation setting in Section 5 and then calculate their expectations E[Y]. And then followed the Secontion 3.1.1 on he balance constrains we need to optimize using gurobi to minimize the weighting wi. After optimization, extract the optimized weights and then first calculate the true factorial effects tau, which the formula is $\tau_k = \frac{1}{2^{K-1}} \mathbf{g}_k^{\mathrm{T}} \mathbb{E}[\mathbf{Y}]$, then calculate the estimated weighted factorial effects tua_hat in the Section 2.2. Lastly, calculte the RMSE which The RMSE is calculated as the square root of the average of (\hat\tau-\tau)^2.


$$
\sum_{i=1}^{N} w_i A_{i \kappa}^{\Omega} h_s(\mathbf{X}_i) = \sum_{i=1}^{N} h_s(\mathbf{X}_i), \quad \text{for } \Omega = +, - \text{ and } s = 1, \dots, S,
$$


In [295]:
np.random.seed(42)

# Step 1: Generate Xi from a multivariate normal distribution
def generate_covariates(N, mu, rho):
    K = len(mu)
    Sigma = rho * np.ones((K, K)) + (1 - rho) * np.eye(K)  # Covariance matrix
    X = multivariate_normal.rvs(mean=mu, cov=Sigma, size=N)
    return X

# Step 2: Generate 3 treatment assignments Zik based on logistic regression
def generate_treatment(X, betas):
    N = X.shape[0]
    K = len(betas)  # Number of factors
    Z = np.zeros((N, K))  # Initialize treatment matrix

    # Generate treatments independently for each factor
    for k in range(K):
        logits = X @ betas[k]  # Same logistic function applied for simplicity
        prob = 1 / (1 + np.exp(-logits))  # Probability for treatment assignment
        Z[:, k] = np.random.binomial(1, prob)  # Generate binary assignment for each factor

    return Z

# Step 3: Simulate potential outcomes Yi1, Yi2, Yi3
def simulate_outcomes(X, Z):
    N = X.shape[0]
    epsilon1 = np.random.normal(0, 1, N)
    epsilon2 = np.random.normal(0, 1, N)
    epsilon3 = np.random.normal(0, 1, N)

    # Simulate outcomes based on the paper's specifications
    Y1 = 2 * np.sum(X, axis=1) + np.sum(Z, axis=1) + epsilon1
    interaction_term = np.sum(X[:, :3] * Z, axis=1)  # Interaction with first 3 covariates
    Y2 = 2 * np.sum(X, axis=1) + interaction_term + epsilon2
    Y3 = np.sin(X[:, 0]) + np.cos(X[:, 1]) + (np.minimum(1, X[:, 0]) + X[:, 1]) * Z[:, 0] + np.sum(X[:, :2] * Z[:, 1:3], axis=1) + epsilon3

    return Y1, Y2, Y3

# Step 4: Optimize weights using Gurobi with variance minimization
def optimize_weights(X, Z):
    N = X.shape[0]
    model = gp.Model("WeightOptimization")
    model.setParam('OutputFlag', 1)

    w = model.addVars(N, lb=0, name="w")  # Non-negative weights

    # Add balance constraints
    for s in range(X.shape[1]):
        model.addConstr(gp.quicksum(w[i] * X[i, s] for i in range(N)) == np.sum(X[:, s]))

    # Objective: Minimize variance of weights
    objective = gp.QuadExpr()
    for i in range(N):
        objective += w[i] * w[i]
    model.setObjective(objective, GRB.MINIMIZE)

    model.optimize()

    # Extract optimized weights
    if model.status == GRB.INFEASIBLE:
        print("The model is infeasible.")
        return None

    weights = np.array([w[i].X for i in range(N)])
    return weights

# Step 5: Calculate the true factorial main effects tau for each treatment factor
def calculate_tau(Y, Z, K):
    n_combinations = 2 ** K
    treatment_combinations = np.array([[int(x) for x in format(i, f'0{K}b')] for i in range(n_combinations)])
    Q = len(treatment_combinations)

    # Assign each individual to a treatment combination index
    Z_indices = np.array([int("".join(map(str, map(int, row))), 2) for row in Z.astype(int)])

    main_effects = []
    for k in range(K):  # For each treatment factor
        # Contrast vector for the kth factor
        g_k = np.array([1 if comb[k] == 1 else -1 for comb in treatment_combinations])

        # Calculate mean outcomes for each treatment combination
        E_Y = np.array([np.mean(Y[Z_indices == q]) if np.any(Z_indices == q) else 0 for q in range(Q)])

        # Calculate tau_k for each factor
        tau_k = (1 / (2 ** (K - 1))) * g_k.T @ E_Y
        main_effects.append(tau_k)

    return np.array(main_effects)

# Step 6: Calculate estimated factorial effects tau_hat for each treatment factor with weights
def calculate_tau_hat(Y, weights, Z, K):
    N = len(Y)
    Q = 2 ** K  # Number of treatment combinations
    weighted_effects = []

    # Generate all possible treatment combinations
    treatment_combinations = [list(map(int, bin(i)[2:].zfill(K))) for i in range(Q)]

    # Assign each individual to a treatment combination index
    Z_indices = np.array([int("".join(map(str, map(int, row))), 2) for row in Z.astype(int)])

    for k in range(K):
        # Create contrast vector specific to treatment factor k
        g_k = np.array([1 if comb[k] == 1 else -1 for comb in treatment_combinations])

        # Positive and negative contrasts
        g_k_pos = np.maximum(g_k, 0)
        g_k_neg = np.maximum(-g_k, 0)

        # A_iK^+ and A_iK^-
        A_iK_pos = np.array([np.sum([g_k_pos[q] * (Z_indices[i] == q) for q in range(Q)]) for i in range(N)])
        A_iK_neg = np.array([np.sum([g_k_neg[q] * (Z_indices[i] == q) for q in range(Q)]) for i in range(N)])

        # Weighted estimators for tau_K^+ and tau_K^-
        tau_k_pos = (1 / N) * np.sum(weights * A_iK_pos * Y)
        tau_k_neg = (1 / N) * np.sum(weights * A_iK_neg * Y)

        # Combine to get the weighted factorial effect
        tau_k_weighted = tau_k_pos - tau_k_neg
        weighted_effects.append(tau_k_weighted)

    return np.array(weighted_effects)

# Step 7: Calculate RMSE
def calculate_rmse(tau_hat, tau):
    return np.sqrt(np.mean((tau_hat - tau) ** 2))

# Step 8: Check covariate balance by calculating standardized mean differences (SMD)

In [296]:
# Running the simulation
np.random.seed(42)

N = 200
mu = np.array([0.1, 0.1, 0.1, 0, 0]).T  # Mean vector for 5 covariates
rho = 0
repetitions = 100
beta1 = np.array([1/4, 2/4, 0, 3/4, 1])
beta2 = np.array([3/4, 1/4, 1, 0, 2/4])
beta3 = np.array([1, 0, 3/4, 2/4, 1/4])
betas = [beta1, beta2, beta3]
K = 3
outcome_results = np.zeros((3, repetitions, 2, K))

for rep in range(repetitions):
    X = generate_covariates(N, mu, rho)
    Z = generate_treatment(X, betas)
    Y1,Y2,Y3 = simulate_outcomes(X, Z)
    additive_weights = optimize_weights(X, Z)

    outcomes = [Y1, Y2, Y3]
    for idx, Y in enumerate(outcomes):
        tau_true = calculate_tau(Y, Z, K)
        tau_hat = calculate_tau_hat(Y, additive_weights, Z, K)
        rmse = calculate_rmse(tau_hat, tau_true)
        outcome_results[idx, rep, 1, :] = (tau_hat - tau_true) ** 2  # Squared error for RMSE calculation

# Aggregate results
average_rmse = np.sqrt(np.mean(outcome_results[:, :, 1, :], axis=1))

# Display results
model_names = ['Additive Outcome (Y1)', 'Heterogeneous Treatment Effect (Y2)', 'Misspecified Outcome (Y3)']
for idx, model in enumerate(model_names):
    print(f"\nModel: {model}")
    print(f"True factorial effects (tau): {tau_true}")
    print(f"Estimated factorial effects (tau_hat): {tau_hat}")
    print("Average RMSE for each factor:", average_rmse[idx])

Set parameter OutputFlag to value 1
Gurobi Optimizer version 12.0.1 build v12.0.1rc0 (mac64[x86] - Darwin 23.6.0 23G93)

CPU model: Intel(R) Core(TM) i5-1038NG7 CPU @ 2.00GHz
Thread count: 4 physical cores, 8 logical processors, using up to 8 threads

Optimize a model with 5 rows, 200 columns and 1000 nonzeros
Model fingerprint: 0xe9f29f77
Model has 200 quadratic objective terms
Coefficient statistics:
  Matrix range     [3e-03, 4e+00]
  Objective range  [0e+00, 0e+00]
  QObjective range [2e+00, 2e+00]
  Bounds range     [0e+00, 0e+00]
  RHS range        [1e+01, 2e+01]
Presolve time: 0.02s
Presolved: 5 rows, 200 columns, 1000 nonzeros
Presolved model has 200 quadratic objective terms
Ordering time: 0.00s

Barrier statistics:
 AA' NZ     : 1.000e+01
 Factor NZ  : 1.500e+01
 Factor Ops : 5.500e+01 (less than 1 second per iteration)
 Threads    : 1

                  Objective                Residual
Iter       Primal          Dual         Primal    Dual     Compl     Time
   0   5.000624

#### Proposed Weighting Estimator with Treatment Effect Heterogeneity: Incorporates interaction balance constraints.

##### The proposed weighting estimator using balance constraints under the outcome model assumption with treatment effect het- erogeneity (interaction balance constraints) and the same set of basis functions as (iii)

I wrote the forms of the potential outcomes of Y1, Y2, Y3 based on the simulation setting in Section 5 and then calculate their expectations E[Y]. And then followed the Secontion 3.1.1 on he balance constrains we need to optimize using gurobi to minimize the weighting wi. After optimization, extract the optimized weights and then first calculate the true factorial effects tau, which the formula is $\tau_k = \frac{1}{2^{K-1}} \mathbf{g}_k^{\mathrm{T}} \mathbb{E}[\mathbf{Y}]$, then calculate the estimated weighted factorial effects tua_hat in the Section 2.2. Lastly, calculte the RMSE which The RMSE is calculated as the square root of the average of (\hat\tau-\tau)^2.

$$
\sum_{i=1}^{N} w_i A_{i \kappa}^{\Omega} \mathbf{q}(\mathbf{X}_i, \mathbf{Z}_i) = \frac{1}{2^{K-1}} \sum_{\mathbf{z} \in \mathcal{Z}} g_{\kappa \mathbf{z}}^{\Omega} \sum_{i=1}^{N} \mathbf{q}(\mathbf{X}_i, \mathbf{z}), \quad \text{for } \Omega = +, -.
$$


In [287]:
np.random.seed(42)

def generate_covariates(N, mu, rho):
    K = len(mu)
    Sigma = rho * np.ones((K, K)) + (1 - rho) * np.eye(K)  # Covariance matrix
    X = multivariate_normal.rvs(mean=mu, cov=Sigma, size=N)
    return X

def generate_treatment(X, betas):
    N = X.shape[0]
    K = 3  # Number of factors
    Z = np.zeros((N, K))  # Initialize treatment matrix

    # Generate treatments independently for each factor
    for k in range(K):
        logits = X @ betas[k]  # Logistic function
        prob = 1 / (1 + np.exp(-logits))
        Z[:, k] = np.random.binomial(1, prob)  # Generate binary assignment

    return Z

# Simulate all three outcome models
def simulate_all_outcomes(X, Z):
    N = X.shape[0]
    epsilon1 = np.random.normal(0, 1, N)
    epsilon2 = np.random.normal(0, 1, N)
    epsilon3 = np.random.normal(0, 1, N)

    # Model 1: Additive outcome
    Y1 = 2 * np.sum(X, axis=1) + np.sum(Z, axis=1) + epsilon1

    # Model 2: Heterogeneous treatment effect (interaction between X and Z)
    interaction_term = np.sum((X[:, :3] * Z), axis=1)
    Y2 = 2 * np.sum(X, axis=1) + interaction_term + epsilon2

    # Model 3: Misspecified nonlinear outcome
    Y3 = (np.sin(X[:, 0]) + np.cos(X[:, 1]) +
          (np.minimum(1, X[:, 0]) + X[:, 1]) * Z[:, 0] +
          np.sum(X[:, :2] * Z[:, 1:3], axis=1) + epsilon3)

    return Y1, Y2, Y3

def optimize_weights_with_heterogeneity(X, Z):
    N = X.shape[0]
    model = gp.Model("HeterogeneousWeightOptimization")
    model.setParam('OutputFlag', 1)  # Enable Gurobi output

    # Define variables
    w = model.addVars(N, lb=0, name="w")  # Non-negative weights

    # Add balance constraints for both covariates and their interactions with treatments
    for s in range(X.shape[1]):  # For each covariate
        model.addConstr(gp.quicksum(w[i] * X[i, s] for i in range(N)) == np.sum(X[:, s]))

    for k in range(Z.shape[1]):  # For each treatment interaction
        for s in range(X.shape[1]):  # Interactions with covariates
            interaction_term = X[:, s] * Z[:, k]
            model.addConstr(gp.quicksum(w[i] * interaction_term[i] for i in range(N)) == np.sum(interaction_term))

    # Objective: Minimize variance (sum of squared weights)
    objective = gp.QuadExpr()
    for i in range(N):
        objective += w[i] * w[i]
    model.setObjective(objective, GRB.MINIMIZE)

    # Run optimization
    model.optimize()

    if model.status == GRB.INFEASIBLE:
        print("Model is infeasible.")
        return None

    # Extract optimized weights
    weights = np.array([w[i].X for i in range(N)])
    return weights

def calculate_tau(Y, Z, K):
    n_combinations = 2 ** K
    treatment_combinations = np.array([[int(x) for x in format(i, f'0{K}b')] for i in range(n_combinations)])
    Q = len(treatment_combinations)

    # Assign each individual to a treatment combination index
    Z_indices = np.array([int("".join(map(str, map(int, row))), 2) for row in Z.astype(int)])

    main_effects = []
    for k in range(K):  # For each treatment factor
        # Contrast vector for the kth factor
        g_k = np.array([1 if comb[k] == 1 else -1 for comb in treatment_combinations])

        # Calculate mean outcomes for each treatment combination
        E_Y = np.array([np.mean(Y[Z_indices == q]) if np.any(Z_indices == q) else 0 for q in range(Q)])

        # Calculate tau_k for each factor
        tau_k = (1 / (2 ** (K - 1))) * g_k.T @ E_Y
        main_effects.append(tau_k)

    return np.array(main_effects)

def calculate_tau_hat(Y, weights, Z, K):
    N = len(Y)
    Q = 2 ** K  # Number of treatment combinations
    weighted_effects = []

    # Generate all possible treatment combinations
    treatment_combinations = [list(map(int, bin(i)[2:].zfill(K))) for i in range(Q)]

    # Assign each individual to a treatment combination index
    Z_indices = np.array([int("".join(map(str, map(int, row))), 2) for row in Z.astype(int)])

    for k in range(K):
        # Create contrast vector specific to treatment factor k
        g_k = np.array([1 if comb[k] == 1 else -1 for comb in treatment_combinations])

        # Positive and negative contrasts
        g_k_pos = np.maximum(g_k, 0)
        g_k_neg = np.maximum(-g_k, 0)

        # A_iK^+ and A_iK^-
        A_iK_pos = np.array([np.sum([g_k_pos[q] * (Z_indices[i] == q) for q in range(Q)]) for i in range(N)])
        A_iK_neg = np.array([np.sum([g_k_neg[q] * (Z_indices[i] == q) for q in range(Q)]) for i in range(N)])

        # Weighted estimators for tau_K^+ and tau_K^-
        tau_k_pos = (1 / N) * np.sum(weights * A_iK_pos * Y)
        tau_k_neg = (1 / N) * np.sum(weights * A_iK_neg * Y)

        # Combine to get the weighted factorial effect
        tau_k_weighted = tau_k_pos - tau_k_neg
        weighted_effects.append(tau_k_weighted)

    return np.array(weighted_effects)


def calculate_rmse(tau_hat, tau):
    return np.sqrt(np.mean((tau_hat - tau) ** 2))


# Check covariate balance using optimized weights


In [292]:
np.random.seed(42)

N = 200  # Sample size
mu = np.array([0.1, 0.1, 0.1, 0, 0]).T  # Mean vector for 5 covariates
rho = 0  # Covariance
repetitions = 100
K = 3
beta1 = np.array([1/4, 2/4, 0, 3/4, 1])
beta2 = np.array([3/4, 1/4, 1, 0, 2/4])
beta3 = np.array([1, 0, 3/4, 2/4, 1/4])
betas = [beta1, beta2, beta3]
outcome_results = np.zeros((3, repetitions, 2, K))

for rep in range(repetitions):
    X = generate_covariates(N, mu, rho)
    Z = generate_treatment(X, betas)
    Y1,Y2,Y3 = simulate_all_outcomes(X, Z)
    weights = optimize_weights_with_heterogeneity(X, Z)

    outcomes = [Y1, Y2, Y3]
    for idx, Y in enumerate(outcomes):
        print(f"\nModel: {model_names[idx]}")
        tau_true = calculate_tau(Y, Z, K)
        tau_hat = calculate_tau_hat(Y, weights, Z, K)
        rmse = calculate_rmse(tau_hat, tau_true)
        outcome_results[idx, rep, 1, :] = (tau_hat - tau_true) ** 2  # Squared error for RMSE calculation

# Aggregate results
average_rmse = np.sqrt(np.mean(outcome_results[:, :, 1, :], axis=1))

# Display results
model_names = ['Additive Outcome (Y1)', 'Heterogeneous Treatment Effect (Y2)', 'Misspecified Outcome (Y3)']
for idx, model in enumerate(model_names):
    print(f"\nModel: {model}")
    print(f"True factorial effects (tau): {tau_true}")
    print(f"Estimated factorial effects (tau_hat): {tau_hat}")
    print("Average RMSE for each factor:", average_rmse[idx])


Set parameter OutputFlag to value 1
Gurobi Optimizer version 12.0.1 build v12.0.1rc0 (mac64[x86] - Darwin 23.6.0 23G93)

CPU model: Intel(R) Core(TM) i5-1038NG7 CPU @ 2.00GHz
Thread count: 4 physical cores, 8 logical processors, using up to 8 threads

Optimize a model with 20 rows, 200 columns and 2560 nonzeros
Model fingerprint: 0xfbc4cb83
Model has 200 quadratic objective terms
Coefficient statistics:
  Matrix range     [3e-03, 4e+00]
  Objective range  [0e+00, 0e+00]
  QObjective range [2e+00, 2e+00]
  Bounds range     [0e+00, 0e+00]
  RHS range        [2e+00, 5e+01]
Presolve time: 0.02s
Presolved: 20 rows, 200 columns, 2560 nonzeros
Presolved model has 200 quadratic objective terms
Ordering time: 0.00s

Barrier statistics:
 AA' NZ     : 1.900e+02
 Factor NZ  : 2.100e+02
 Factor Ops : 2.870e+03 (less than 1 second per iteration)
 Threads    : 1

                  Objective                Residual
Iter       Primal          Dual         Primal    Dual     Compl     Time
   0   5.0097