#### Matching part

Balance Constrains: Balancing univariate moments by introducing additional decision 

In [68]:
import gurobipy as gp
from gurobipy import GRB

# Define sets
T = [1, 2, 3]  # Treated units
C = [4, 5, 6, 7, 8, 9]  # Control units
I = [1, 2]  # Covariates
m = 2  # Number of controls per treated unit

# Example distances δ_t,c
delta = {(t, c): abs(t - c) for t in T for c in C}

# Example covariate values (randomized for illustration)
x = {
    (4, 1): 10, (4, 2): 20, (5, 1): 15, (5, 2): 25,
    (6, 1): 12, (6, 2): 22, (7, 1): 11, (7, 2): 21,
    (8, 1): 14, (8, 2): 24, (9, 1): 13, (9, 2): 23
}

# Mean covariate values for treated group (example values)
x_T = {1: 13, 2: 23}  # Mean for each covariate in treated group

# Covariate importance weights (example)
omega = {1: 1.0, 2: 1.0}

# Create Gurobi model
model = gp.Model("Optimal_Matching_Balance")

# Decision variables
a = model.addVars(T, C, vtype=GRB.BINARY, name="a")
z = model.addVars(I, vtype=GRB.CONTINUOUS, name="z")

# Objective function: minimize sum of distances and covariate imbalance
model.setObjective(
    gp.quicksum(delta[t, c] * a[t, c] for t in T for c in C) +
    gp.quicksum(omega[i] * z[i] for i in I),
    GRB.MINIMIZE
)

# Constraint: Each treated unit is matched to exactly m controls
for t in T:
    model.addConstr(gp.quicksum(a[t, c] for c in C) == m, f"Match_{t}")

# Constraint: Each control is used at most once
for c in C:
    model.addConstr(gp.quicksum(a[t, c] for t in T) <= 1, f"Control_{c}")

# Constraint: Covariate balance constraints
for i in I:
    model.addConstr(
        z[i] >= gp.quicksum(a[t, c] * x[c, i] for t in T for c in C) / (m * len(T)) - x_T[i],
        f"Balance_Pos_{i}"
    )
    model.addConstr(
        z[i] >= -gp.quicksum(a[t, c] * x[c, i] for t in T for c in C) / (m * len(T)) + x_T[i],
        f"Balance_Neg_{i}"
    )

# Solve the model
model.optimize()

# Print results
if model.status == GRB.OPTIMAL:
    print("Optimal Matching Found:")
    for t in T:
        for c in C:
            if a[t, c].x > 0.5:  # Selected Matches
                print(f"Treated {t} matched with Control {c}")
    print("\nCovariate imbalance values:")
    for i in I:
        print(f"z[{i}] = {z[i].x}")
else:
    print("No optimal solution found.")


Gurobi Optimizer version 12.0.1 build v12.0.1rc0 (mac64[x86] - Darwin 23.6.0 23G93)

CPU model: Intel(R) Core(TM) i5-1038NG7 CPU @ 2.00GHz
Thread count: 4 physical cores, 8 logical processors, using up to 8 threads

Optimize a model with 13 rows, 20 columns and 112 nonzeros
Model fingerprint: 0x0a26da86
Variable types: 2 continuous, 18 integer (18 binary)
Coefficient statistics:
  Matrix range     [1e+00, 4e+00]
  Objective range  [1e+00, 8e+00]
  Bounds range     [1e+00, 1e+00]
  RHS range        [1e+00, 2e+01]
Found heuristic solution: objective 28.0000000
Presolve removed 3 rows and 1 columns
Presolve time: 0.00s
Presolved: 10 rows, 19 columns, 52 nonzeros
Variable types: 0 continuous, 19 integer (18 binary)

Root relaxation: cutoff, 10 iterations, 0.00 seconds (0.00 work units)

    Nodes    |    Current Node    |     Objective Bounds      |     Work
 Expl Unexpl |  Obj  Depth IntInf | Incumbent    BestBd   Gap | It/Node Time

     0     0     cutoff    0        28.00000   28.00000

Balance Constrain: Balance the means, second moments (variance), and cross-product correlation of two covariates

In [69]:
import gurobipy as gp
from gurobipy import GRB

# Define sets
T = [1, 2, 3]  # Treated units
C = [4, 5, 6, 7, 8, 9]  # Control units
P = [1, 2]  # Covariates (p1 and p2)
m = 2  # Number of controls per treated unit

# Example distances δ_t,c
delta = {(t, c): abs(t - c) for t in T for c in C}

# Example covariate values (randomized)
x = {
    (4, 1): 10, (4, 2): 20, (5, 1): 15, (5, 2): 25,
    (6, 1): 12, (6, 2): 22, (7, 1): 11, (7, 2): 21,
    (8, 1): 14, (8, 2): 24, (9, 1): 13, (9, 2): 23
}

# Compute moments for treated units (Example)
x_T = {1: 13, 2: 23}  # Mean
x2_T = {1: 170, 2: 530}  # Second moment (squared)
x_cross_T = 299  # Mean of cross-product x_p1 * x_p2

# Covariate importance weights (Ensure all keys 1 to 5 exist)
omega = {1: 1.0, 2: 1.0, 3: 1.0, 4: 1.0, 5: 1.0}

# Create Gurobi model
model = gp.Model("Optimal_Matching_Multivariate")

# Decision variables
a = model.addVars(T, C, vtype=GRB.BINARY, name="a")
z = model.addVars(range(1, 6), vtype=GRB.CONTINUOUS, name="z")  # Ensured indices 1 to 5

# Objective function: Minimize sum of distances and covariate imbalances
model.setObjective(
    gp.quicksum(delta[t, c] * a[t, c] for t in T for c in C) + gp.quicksum(omega[i] * z[i] for i in range(1, 6)),
    GRB.MINIMIZE
)

# Constraint: Each treated unit is matched to exactly m controls
for t in T:
    model.addConstr(gp.quicksum(a[t, c] for c in C) == m, f"Match_{t}")

# Constraint: Each control is used at most once
for c in C:
    model.addConstr(gp.quicksum(a[t, c] for t in T) <= 1, f"Control_{c}")

# Covariate balance constraints (Means, Second Moments, Cross Product)
model.addConstr(
    z[1] >= gp.quicksum(a[t, c] * x[c, 1] for t in T for c in C) / (m * len(T)) - x_T[1],
    "Mean_Balance_p1_Pos"
)
model.addConstr(
    z[1] >= -gp.quicksum(a[t, c] * x[c, 1] for t in T for c in C) / (m * len(T)) + x_T[1],
    "Mean_Balance_p1_Neg"
)

model.addConstr(
    z[2] >= gp.quicksum(a[t, c] * x[c, 1]**2 for t in T for c in C) / (m * len(T)) - x2_T[1],
    "SecondMoment_Balance_p1_Pos"
)
model.addConstr(
    z[2] >= -gp.quicksum(a[t, c] * x[c, 1]**2 for t in T for c in C) / (m * len(T)) + x2_T[1],
    "SecondMoment_Balance_p1_Neg"
)

model.addConstr(
    z[3] >= gp.quicksum(a[t, c] * x[c, 2] for t in T for c in C) / (m * len(T)) - x_T[2],
    "Mean_Balance_p2_Pos"
)
model.addConstr(
    z[3] >= -gp.quicksum(a[t, c] * x[c, 2] for t in T for c in C) / (m * len(T)) + x_T[2],
    "Mean_Balance_p2_Neg"
)

model.addConstr(
    z[4] >= gp.quicksum(a[t, c] * x[c, 2]**2 for t in T for c in C) / (m * len(T)) - x2_T[2],
    "SecondMoment_Balance_p2_Pos"
)
model.addConstr(
    z[4] >= -gp.quicksum(a[t, c] * x[c, 2]**2 for t in T for c in C) / (m * len(T)) + x2_T[2],
    "SecondMoment_Balance_p2_Neg"
)

model.addConstr(
    z[5] >= gp.quicksum(a[t, c] * x[c, 1] * x[c, 2] for t in T for c in C) / (m * len(T)) - x_cross_T,
    "CrossProduct_Balance_Pos"
)
model.addConstr(
    z[5] >= -gp.quicksum(a[t, c] * x[c, 1] * x[c, 2] for t in T for c in C) / (m * len(T)) + x_cross_T,
    "CrossProduct_Balance_Neg"
)

# Solve the model
model.optimize()

# Print results
if model.status == GRB.OPTIMAL:
    print("Optimal Matching Found:")
    for t in T:
        for c in C:
            if a[t, c].x > 0.5:
                print(f"Treated {t} matched with Control {c}")
    print("\nCovariate imbalance values:")
    for i in range(1, 6):
        print(f"z[{i}] = {z[i].x}")
else:
    print("No optimal solution found.")


Gurobi Optimizer version 12.0.1 build v12.0.1rc0 (mac64[x86] - Darwin 23.6.0 23G93)

CPU model: Intel(R) Core(TM) i5-1038NG7 CPU @ 2.00GHz
Thread count: 4 physical cores, 8 logical processors, using up to 8 threads

Optimize a model with 19 rows, 23 columns and 226 nonzeros
Model fingerprint: 0x09b7484f
Variable types: 5 continuous, 18 integer (18 binary)
Coefficient statistics:
  Matrix range     [1e+00, 1e+02]
  Objective range  [1e+00, 8e+00]
  Bounds range     [1e+00, 1e+00]
  RHS range        [1e+00, 5e+02]
Found heuristic solution: objective 74.5000000
Presolve removed 6 rows and 1 columns
Presolve time: 0.00s
Presolved: 13 rows, 22 columns, 100 nonzeros
Variable types: 0 continuous, 22 integer (18 binary)

Root relaxation: infeasible, 13 iterations, 0.00 seconds (0.00 work units)

    Nodes    |    Current Node    |     Objective Bounds      |     Work
 Expl Unexpl |  Obj  Depth IntInf | Incumbent    BestBd   Gap | It/Node Time

     0     0 infeasible    0        74.50000   74.

Balance Constrain: Incorporate quantile balance and the Kolmogorov-Smirnov (K-S) statistic

In [70]:
import gurobipy as gp
from gurobipy import GRB

# Define sets
T = [1, 2, 3]  # Treated units
C = [4, 5, 6, 7, 8, 9]  # Control units
P = [1, 2]  # Covariates
m = 2  # Number of controls per treated unit

# Define quantiles for each covariate
G = {1: [10, 15, 20], 2: [20, 25, 30]}  # Example quantile grid for p1 and p2

# Example distances δ_t,c
delta = {(t, c): abs(t - c) for t in T for c in C}

# Example covariate values (randomized)
x = {
    (4, 1): 10, (4, 2): 20, (5, 1): 15, (5, 2): 25,
    (6, 1): 12, (6, 2): 22, (7, 1): 11, (7, 2): 21,
    (8, 1): 14, (8, 2): 24, (9, 1): 13, (9, 2): 23
}

# Compute empirical CDF for treated units at quantiles h_g
h = {
    1: {10: 0.2, 15: 0.5, 20: 0.8},  # Empirical CDF for covariate 1
    2: {20: 0.25, 25: 0.6, 30: 0.9}  # Empirical CDF for covariate 2
}

# Covariate importance weights
omega = {1: 1.0, 2: 1.0}

# Create Gurobi model
model = gp.Model("Optimal_Matching_KS")

# Decision variables
a = model.addVars(T, C, vtype=GRB.BINARY, name="a")
z = model.addVars(P, vtype=GRB.CONTINUOUS, name="z")

# Objective function: Minimize sum of distances and Kolmogorov-Smirnov imbalances
model.setObjective(
    gp.quicksum(delta[t, c] * a[t, c] for t in T for c in C) +
    gp.quicksum(omega[p] * z[p] for p in P),
    GRB.MINIMIZE
)

# Constraint: Each treated unit is matched to exactly m controls
for t in T:
    model.addConstr(gp.quicksum(a[t, c] for c in C) == m, f"Match_{t}")

# Constraint: Each control is used at most once
for c in C:
    model.addConstr(gp.quicksum(a[t, c] for t in T) <= 1, f"Control_{c}")

# Kolmogorov-Smirnov Constraints
for p in P:
    for g_p in G[p]:
        # Compute indicator function: 1 if x[c, p] < g_p, 0 otherwise
        indicator_sum = gp.quicksum(a[t, c] * (1 if x[c, p] < g_p else 0) for t in T for c in C) / (m * len(T))
        
        # KS Upper Bound Constraint
        model.addConstr(omega[p] * z[p] >= h[p][g_p] - indicator_sum, f"KS_Upper_{p}_{g_p}")

        # KS Lower Bound Constraint
        model.addConstr(omega[p] * z[p] >= -h[p][g_p] + indicator_sum, f"KS_Lower_{p}_{g_p}")

# Solve the model
model.optimize()

# Print results
if model.status == GRB.OPTIMAL:
    print("Optimal Matching Found:")
    for t in T:
        for c in C:
            if a[t, c].x > 0.5:
                print(f"Treated {t} matched with Control {c}")
    print("\nKolmogorov-Smirnov imbalance values:")
    for p in P:
        print(f"z[{p}] = {z[p].x}")
else:
    print("No optimal solution found.")


Gurobi Optimizer version 12.0.1 build v12.0.1rc0 (mac64[x86] - Darwin 23.6.0 23G93)

CPU model: Intel(R) Core(TM) i5-1038NG7 CPU @ 2.00GHz
Thread count: 4 physical cores, 8 logical processors, using up to 8 threads

Optimize a model with 21 rows, 20 columns and 180 nonzeros
Model fingerprint: 0x750ad862
Variable types: 2 continuous, 18 integer (18 binary)
Coefficient statistics:
  Matrix range     [2e-01, 1e+00]
  Objective range  [1e+00, 8e+00]
  Bounds range     [1e+00, 1e+00]
  RHS range        [2e-01, 2e+00]
Found heuristic solution: objective 27.5833333
Presolve removed 9 rows and 0 columns
Presolve time: 0.00s
Presolved: 12 rows, 20 columns, 84 nonzeros
Variable types: 0 continuous, 20 integer (18 binary)

Root relaxation: cutoff, 13 iterations, 0.00 seconds (0.00 work units)

    Nodes    |    Current Node    |     Objective Bounds      |     Work
 Expl Unexpl |  Obj  Depth IntInf | Incumbent    BestBd   Gap | It/Node Time

     0     0     cutoff    0        27.58333   27.58333

Balance Constrain: Exact and near-exact matching constraints

In [71]:
import gurobipy as gp
from gurobipy import GRB

# Define sets
T = [1, 2, 3]  # Treated units
C = [4, 5, 6, 7, 8, 9]  # Control units
B = [0, 1]  # Categories of a nominal covariate
m = 2  # Number of controls per treated unit
xi = 2  # Allowed deviation for near-exact matching

# Example categorical covariate values for treated and control groups
x_cat = {
    1: 0, 2: 1, 3: 0,  # Treated
    4: 0, 5: 1, 6: 1, 7: 0, 8: 0, 9: 1  # Control
}

# Example distances δ_t,c
delta = {(t, c): abs(t - c) for t in T for c in C}

# Create Gurobi model
model = gp.Model("Optimal_Matching_Exact")

# Decision variables
a = model.addVars(T, C, vtype=GRB.BINARY, name="a")
u = model.addVars(B, vtype=GRB.CONTINUOUS, name="u")  # Auxiliary variables for near-exact matching

# Objective function: Minimize total distance
model.setObjective(
    gp.quicksum(delta[t, c] * a[t, c] for t in T for c in C),
    GRB.MINIMIZE
)

# Constraint: Each treated unit is matched to exactly m controls
for t in T:
    model.addConstr(gp.quicksum(a[t, c] for c in C) == m, f"Match_{t}")

# Constraint: Each control is used at most once
for c in C:
    model.addConstr(gp.quicksum(a[t, c] for t in T) <= 1, f"Control_{c}")

# **Exact Matching Constraint**
for b in B:
    model.addConstr(
        gp.quicksum(a[t, c] * (1 if x_cat[t] == b and x_cat[c] == b else 0) for t in T for c in C)
        == m * sum(1 for t in T if x_cat[t] == b),
        f"Exact_Matching_{b}"
    )

# **Near-Exact Matching Constraint using auxiliary variable**
for b in B:
    model.addConstr(
        u[b] >= gp.quicksum(a[t, c] * (1 if x_cat[t] == b and x_cat[c] == b else 0) for t in T for c in C)
        - m * sum(1 for t in T if x_cat[t] == b),
        f"Near_Exact_Matching_Upper_{b}"
    )

    model.addConstr(
        u[b] >= -gp.quicksum(a[t, c] * (1 if x_cat[t] == b and x_cat[c] == b else 0) for t in T for c in C)
        + m * sum(1 for t in T if x_cat[t] == b),
        f"Near_Exact_Matching_Lower_{b}"
    )

    model.addConstr(u[b] <= xi, f"Near_Exact_Bound_{b}")

# Solve the model
model.optimize()

# Print results
if model.status == GRB.OPTIMAL:
    print("Optimal Matching Found:")
    for t in T:
        for c in C:
            if a[t, c].x > 0.5:
                print(f"Treated {t} matched with Control {c}")
else:
    print("No optimal solution found.")


Gurobi Optimizer version 12.0.1 build v12.0.1rc0 (mac64[x86] - Darwin 23.6.0 23G93)

CPU model: Intel(R) Core(TM) i5-1038NG7 CPU @ 2.00GHz
Thread count: 4 physical cores, 8 logical processors, using up to 8 threads

Optimize a model with 17 rows, 20 columns and 69 nonzeros
Model fingerprint: 0xec2582d3
Variable types: 2 continuous, 18 integer (18 binary)
Coefficient statistics:
  Matrix range     [1e+00, 1e+00]
  Objective range  [1e+00, 8e+00]
  Bounds range     [1e+00, 1e+00]
  RHS range        [1e+00, 4e+00]
Presolve removed 8 rows and 6 columns
Presolve time: 0.00s

Explored 0 nodes (0 simplex iterations) in 0.01 seconds (0.00 work units)
Thread count was 1 (of 8 available processors)

Solution count 0

Model is infeasible
Best objective -, best bound -, gap -
No optimal solution found.


### Balancing Weights

Problem Setup and Weighting Framework

In [83]:
rseed = 42

In [84]:
import numpy as np
import itertools
import pandas as pd

# Set up the parameters
K = 3  # Number of binary factors
Q = 2 ** K  # Number of possible treatment combinations
N = 1000  # Number of subjects

# Generate all possible treatment combinations
treatment_combinations = np.array(list(itertools.product([-1, 1], repeat=K)))

# Simulate covariates X (D-dimensional, here D=3 for simplicity)
D = 3
X = np.random.normal(0, 1, size=(N, D))

# Simulate potential outcomes for each individual and each treatment combination
# Assume potential outcomes depend on treatment and covariates linearly
beta_treatment = np.random.randn(Q)  # Random effect size for each treatment combination
beta_covariate = np.random.randn(D)  # Effect size for covariates

# For each subject, generate potential outcomes
Y_potential = np.zeros((N, Q))
for i in range(N):
    for q in range(Q):
        Y_potential[i, q] = beta_treatment[q] + X[i].dot(beta_covariate) + np.random.normal(0, 1)

# Assign each individual a random treatment from the possible combinations
Z_indices = np.random.choice(Q, size=N)  # Random treatment assignment
Y_observed = np.array([Y_potential[i, Z_indices[i]] for i in range(N)])  # Observed outcome

# Estimate main effects
# Contrast vector for each factor
main_effects = []
for k in range(K):
    g_k = np.array([1 if comb[k] == 1 else -1 for comb in treatment_combinations])
    E_Y = np.array([np.mean(Y_observed[Z_indices == q]) if np.any(Z_indices == q) else 0 for q in range(Q)])
    tau_k = (1 / (2 ** (K - 1))) * g_k.T @ E_Y
    main_effects.append(tau_k)

# Estimate interaction effects between two factors
interaction_effects = {}
for k1, k2 in itertools.combinations(range(K), 2):
    g_k1 = np.array([1 if comb[k1] == 1 else -1 for comb in treatment_combinations])
    g_k2 = np.array([1 if comb[k2] == 1 else -1 for comb in treatment_combinations])
    g_interaction = g_k1 * g_k2
    tau_k1k2 = (1 / (2 ** (K - 1))) * g_interaction @ E_Y
    interaction_effects[(k1, k2)] = tau_k1k2

# Present results
results = {
    "Main Effects": main_effects,
    "Interaction Effects": interaction_effects
}

# Convert results to DataFrame for display
main_effects_df = pd.DataFrame({
    'Factor': [f'z_{k+1}' for k in range(K)],
    'Estimated Main Effect': main_effects
})

interaction_effects_df = pd.DataFrame(
    [(f'z_{k1+1} & z_{k2+1}', effect) for (k1, k2), effect in interaction_effects.items()],
    columns=['Interaction', 'Estimated Effect']
)

# Display the DataFrames using standard methods
print("Main Effects Estimates:\n")
print(main_effects_df)

print("\nInteraction Effects Estimates:\n")
print(interaction_effects_df)


Main Effects Estimates:

  Factor  Estimated Main Effect
0    z_1               1.006173
1    z_2              -0.137883
2    z_3              -0.429061

Interaction Effects Estimates:

  Interaction  Estimated Effect
0   z_1 & z_2          0.173369
1   z_1 & z_3          0.471227
2   z_2 & z_3         -0.198338


Weighting for Observational Factorial Studies

In [85]:
import numpy as np
import itertools
import pandas as pd
from scipy.stats import norm

# Parameters
K = 3  # Number of binary factors
Q = 2 ** K  # Number of treatment combinations
N = 1000  # Number of individuals

# Generate treatment combinations
treatment_combinations = np.array(list(itertools.product([-1, 1], repeat=K)))

# Simulate covariates X (D-dimensional)
D = 3
X = np.random.normal(0, 1, size=(N, D))

# Simulate potential outcomes
beta_treatment = np.random.randn(Q)  # Random effects for each treatment combination
beta_covariate = np.random.randn(D)  # Effects for covariates

Y_potential = np.zeros((N, Q))
for i in range(N):
    for q in range(Q):
        Y_potential[i, q] = beta_treatment[q] + X[i].dot(beta_covariate) + np.random.normal(0, 1)

# Random assignment of treatments
Z_indices = np.random.choice(Q, size=N)  # Random treatment assignment
Y_observed = np.array([Y_potential[i, Z_indices[i]] for i in range(N)])  # Observed outcomes

# Simulate f(X) and f_z(X) for weighting function
# Assume f(X) is standard normal and f_z(X) shifts mean for treatment groups
f_X = norm.pdf(X)
f_z = np.zeros(N)
for i in range(N):
    z_idx = Z_indices[i]
    treatment_effect = treatment_combinations[z_idx]
    shifted_mean = treatment_effect * 0.5  # Shifted by treatment effect
    f_z[i] = np.prod(norm.pdf(X[i], loc=shifted_mean, scale=1))

# Weighting function w_z(X) = f(X) / f_z(X)
w_z = np.prod(f_X, axis=1) / f_z
w_i = N * w_z / (2 ** (K - 1) * np.bincount(Z_indices, minlength=Q)[Z_indices])

# Calculate the weighting estimator for factorial effects
weighted_effects = []
for k in range(K):
    g_k = np.array([1 if comb[k] == 1 else -1 for comb in treatment_combinations])
    g_k_pos = np.maximum(g_k, 0)
    g_k_neg = np.maximum(-g_k, 0)

    # A_iK^+ and A_iK^-
    A_iK_pos = np.array([np.sum([g_k_pos[q] * (Z_indices[i] == q) for q in range(Q)]) for i in range(N)])
    A_iK_neg = 1 - A_iK_pos

    # Weighted estimators for tau_K^+ and tau_K^-
    tau_k_pos = (1 / N) * np.sum(w_i * A_iK_pos * Y_observed)
    tau_k_neg = (1 / N) * np.sum(w_i * A_iK_neg * Y_observed)

    # Combine to get the weighted factorial effect
    tau_k_weighted = tau_k_pos - tau_k_neg
    weighted_effects.append(tau_k_weighted)

# Interaction effects using the weighting estimator
interaction_effects_weighted = {}
for k1, k2 in itertools.combinations(range(K), 2):
    g_k1 = np.array([1 if comb[k1] == 1 else -1 for comb in treatment_combinations])
    g_k2 = np.array([1 if comb[k2] == 1 else -1 for comb in treatment_combinations])
    g_interaction = g_k1 * g_k2

    g_interaction_pos = np.maximum(g_interaction, 0)
    g_interaction_neg = np.maximum(-g_interaction, 0)

    A_iK_pos = np.array([np.sum([g_interaction_pos[q] * (Z_indices[i] == q) for q in range(Q)]) for i in range(N)])
    A_iK_neg = 1 - A_iK_pos

    tau_inter_pos = (1 / N) * np.sum(w_i * A_iK_pos * Y_observed)
    tau_inter_neg = (1 / N) * np.sum(w_i * A_iK_neg * Y_observed)

    tau_inter_weighted = tau_inter_pos - tau_inter_neg
    interaction_effects_weighted[(k1, k2)] = tau_inter_weighted

# Prepare results
weighted_main_effects_df = pd.DataFrame({
    'Factor': [f'z_{k+1}' for k in range(K)],
    'Weighted Effect': weighted_effects
})

weighted_interaction_effects_df = pd.DataFrame(
    [(f'z_{k1+1} & z_{k2+1}', effect) for (k1, k2), effect in interaction_effects_weighted.items()],
    columns=['Interaction', 'Weighted Effect']
)

# Display results
print("Weighted Main Effects Using Weighting Estimator:\n", weighted_main_effects_df)
print("\nWeighted Interaction Effects Using Weighting Estimator:\n", weighted_interaction_effects_df)


Weighted Main Effects Using Weighting Estimator:
   Factor  Weighted Effect
0    z_1         1.709458
1    z_2         0.775540
2    z_3        -0.650236

Weighted Interaction Effects Using Weighting Estimator:
   Interaction  Weighted Effect
0   z_1 & z_2         0.997883
1   z_1 & z_3         0.549006
2   z_2 & z_3        -1.254974


3.1 Weighting for Estimating a Single Factorial Effect 

3.1.1 General Additive Outcome Model

The model assumes outcomes are affected by both covariates and factorial interactions.

The weighting balances both: Covariates across treatment assignments and Higher-order interactions that can bias the factorial effect estimation.

In [86]:
import numpy as np
import itertools
import pandas as pd
from scipy.stats import norm

# Parameters
K = 3  # Number of binary factors
Q = 2 ** K  # Number of treatment combinations
N = 1000  # Number of individuals

# Generate treatment combinations
treatment_combinations = np.array(list(itertools.product([-1, 1], repeat=K)))

# Simulate covariates X (D-dimensional)
D = 3
X = np.random.normal(0, 1, size=(N, D))

# Simulate potential outcomes using a general additive model
S = 3  # Number of basis functions
alpha = np.random.randn(S)
beta = np.random.randn(Q)

# Define basis functions h_s(X) = X^s (simple polynomial basis)
def h_s(X, s):
    return X ** (s + 1)

# Compute mu(X) and nu(z)
mu_X = np.sum([alpha[s] * h_s(X, s).sum(axis=1) for s in range(S)], axis=0)
nu_z = np.zeros((N, Q))
for i in range(N):
    for q in range(Q):
        z = treatment_combinations[q]
        nu_z[i, q] = beta[q] * np.prod(z)

# Simulate potential outcomes with reshaping for broadcasting
Y_potential = mu_X[:, np.newaxis] + nu_z + np.random.normal(0, 1, size=(N, Q))


# Random assignment of treatments
Z_indices = np.random.choice(Q, size=N)
Y_observed = np.array([Y_potential[i, Z_indices[i]] for i in range(N)])

# Simulate f(X) and f_z(X) for weighting
f_X = norm.pdf(X)
f_z = np.zeros(N)
for i in range(N):
    z_idx = Z_indices[i]
    treatment_effect = treatment_combinations[z_idx]
    shifted_mean = treatment_effect * 0.5
    f_z[i] = np.prod(norm.pdf(X[i], loc=shifted_mean, scale=1))

# Weighting function
w_z = np.prod(f_X, axis=1) / f_z
w_i = N * w_z / (2 ** (K - 1) * np.bincount(Z_indices, minlength=Q)[Z_indices])

# Calculate the new weighting estimator
weighted_effects = []
for k in range(K):
    g_k = np.array([1 if comb[k] == 1 else -1 for comb in treatment_combinations])
    g_k_pos = np.maximum(g_k, 0)
    g_k_neg = np.maximum(-g_k, 0)

    # A_iK^+ and A_iK^-
    A_iK_pos = np.array([np.sum([g_k_pos[q] * (Z_indices[i] == q) for q in range(Q)]) for i in range(N)])
    A_iK_neg = 1 - A_iK_pos

    # Weighted estimators for tau_K^+ and tau_K^-
    tau_k_pos = (1 / N) * np.sum(w_i * A_iK_pos * Y_observed)
    tau_k_neg = (1 / N) * np.sum(w_i * A_iK_neg * Y_observed)

    # Combine to get the weighted factorial effect
    tau_k_weighted = tau_k_pos - tau_k_neg
    weighted_effects.append(tau_k_weighted)

# Interaction effects using the new weighting estimator
interaction_effects_weighted = {}
for k1, k2 in itertools.combinations(range(K), 2):
    g_k1 = np.array([1 if comb[k1] == 1 else -1 for comb in treatment_combinations])
    g_k2 = np.array([1 if comb[k2] == 1 else -1 for comb in treatment_combinations])
    g_interaction = g_k1 * g_k2

    g_interaction_pos = np.maximum(g_interaction, 0)
    g_interaction_neg = np.maximum(-g_interaction, 0)

    A_iK_pos = np.array([np.sum([g_interaction_pos[q] * (Z_indices[i] == q) for q in range(Q)]) for i in range(N)])
    A_iK_neg = 1 - A_iK_pos

    tau_inter_pos = (1 / N) * np.sum(w_i * A_iK_pos * Y_observed)
    tau_inter_neg = (1 / N) * np.sum(w_i * A_iK_neg * Y_observed)

    tau_inter_weighted = tau_inter_pos - tau_inter_neg
    interaction_effects_weighted[(k1, k2)] = tau_inter_weighted

# Prepare results
weighted_main_effects_df = pd.DataFrame({
    'Factor': [f'z_{k+1}' for k in range(K)],
    'Weighted Effect': weighted_effects
})

weighted_interaction_effects_df = pd.DataFrame(
    [(f'z_{k1+1} & z_{k2+1}', effect) for (k1, k2), effect in interaction_effects_weighted.items()],
    columns=['Interaction', 'Weighted Effect']
)

# Display results
print("Weighted Main Effects Using New Weighting Estimator:\n", weighted_main_effects_df)
print("\nWeighted Interaction Effects Using New Weighting Estimator:\n", weighted_interaction_effects_df)


Weighted Main Effects Using New Weighting Estimator:
   Factor  Weighted Effect
0    z_1        -2.700076
1    z_2        -4.358818
2    z_3        -2.682912

Weighted Interaction Effects Using New Weighting Estimator:
   Interaction  Weighted Effect
0   z_1 & z_2        -1.531859
1   z_1 & z_3        -0.467510
2   z_2 & z_3        -1.249908


3.1.2 Outcome Model with Treatment Effect Heterogeneity

In [87]:
import numpy as np
import itertools
import pandas as pd
from scipy.stats import norm

# Parameters
K = 3  # Number of binary factors
Q = 2 ** K  # Number of treatment combinations
N = 1000  # Number of individuals

# Generate treatment combinations
treatment_combinations = np.array(list(itertools.product([-1, 1], repeat=K)))

# Simulate covariates X (D-dimensional)
D = 3
X = np.random.normal(0, 1, size=(N, D))

# Simulate potential outcomes using heterogeneous treatment effects
S = 3  # Number of basis functions
alpha = np.random.randn(S, Q)  # Different coefficients for each treatment
beta = np.random.randn(Q)

# Define basis functions h_s(X) = X^s (polynomial basis)
def h_s(X, s):
    return X ** (s + 1)

# Define heterogeneous basis functions q_sJ(X, z)
def q_sJ(X, z, s):
    return h_s(X, s) * np.prod(z)

# Compute mu(X, z) with heterogeneity
mu_XZ = np.zeros((N, Q))
for i in range(N):
    for q in range(Q):
        z = treatment_combinations[q]
        mu_XZ[i, q] = sum([alpha[s, q] * h_s(X[i], s).sum() for s in range(S)])

# Simulate nu(z)
nu_z = np.zeros((N, Q))
for i in range(N):
    for q in range(Q):
        z = treatment_combinations[q]
        nu_z[i, q] = beta[q] * np.prod(z)

# Generate potential outcomes
Y_potential = mu_XZ + nu_z + np.random.normal(0, 1, size=(N, Q))

# Random assignment of treatments
Z_indices = np.random.choice(Q, size=N)
Y_observed = np.array([Y_potential[i, Z_indices[i]] for i in range(N)])

# Simulate f(X) and f_z(X) for weighting function
f_X = norm.pdf(X)
f_z = np.zeros(N)
for i in range(N):
    z_idx = Z_indices[i]
    treatment_effect = treatment_combinations[z_idx]
    shifted_mean = treatment_effect * 0.5
    f_z[i] = np.prod(norm.pdf(X[i], loc=shifted_mean, scale=1))

# Weighting function
w_z = np.prod(f_X, axis=1) / f_z
w_i = N * w_z / (2 ** (K - 1) * np.bincount(Z_indices, minlength=Q)[Z_indices])

# Calculate the weighting estimator with heterogeneity
weighted_effects = []
for k in range(K):
    g_k = np.array([1 if comb[k] == 1 else -1 for comb in treatment_combinations])
    g_k_pos = np.maximum(g_k, 0)
    g_k_neg = np.maximum(-g_k, 0)

    # A_iK^+ and A_iK^-
    A_iK_pos = np.array([np.sum([g_k_pos[q] * (Z_indices[i] == q) for q in range(Q)]) for i in range(N)])
    A_iK_neg = 1 - A_iK_pos

    # Weighted estimators for tau_K^+ and tau_K^-
    tau_k_pos = (1 / N) * np.sum(w_i * A_iK_pos * Y_observed)
    tau_k_neg = (1 / N) * np.sum(w_i * A_iK_neg * Y_observed)

    # Combine to get the weighted factorial effect
    tau_k_weighted = tau_k_pos - tau_k_neg
    weighted_effects.append(tau_k_weighted)

# Interaction effects with heterogeneity
interaction_effects_weighted = {}
for k1, k2 in itertools.combinations(range(K), 2):
    g_k1 = np.array([1 if comb[k1] == 1 else -1 for comb in treatment_combinations])
    g_k2 = np.array([1 if comb[k2] == 1 else -1 for comb in treatment_combinations])
    g_interaction = g_k1 * g_k2

    g_interaction_pos = np.maximum(g_interaction, 0)
    g_interaction_neg = np.maximum(-g_interaction, 0)

    A_iK_pos = np.array([np.sum([g_interaction_pos[q] * (Z_indices[i] == q) for q in range(Q)]) for i in range(N)])
    A_iK_neg = 1 - A_iK_pos

    tau_inter_pos = (1 / N) * np.sum(w_i * A_iK_pos * Y_observed)
    tau_inter_neg = (1 / N) * np.sum(w_i * A_iK_neg * Y_observed)

    tau_inter_weighted = tau_inter_pos - tau_inter_neg
    interaction_effects_weighted[(k1, k2)] = tau_inter_weighted

# Prepare results
weighted_main_effects_df = pd.DataFrame({
    'Factor': [f'z_{k+1}' for k in range(K)],
    'Weighted Effect': weighted_effects
})

weighted_interaction_effects_df = pd.DataFrame(
    [(f'z_{k1+1} & z_{k2+1}', effect) for (k1, k2), effect in interaction_effects_weighted.items()],
    columns=['Interaction', 'Weighted Effect']
)

# Display results
print("Weighted Main Effects with Heterogeneity:\n", weighted_main_effects_df)
print("\nWeighted Interaction Effects with Heterogeneity:\n", weighted_interaction_effects_df)


Weighted Main Effects with Heterogeneity:
   Factor  Weighted Effect
0    z_1       -11.910414
1    z_2       -11.442368
2    z_3        -4.497607

Weighted Interaction Effects with Heterogeneity:
   Interaction  Weighted Effect
0   z_1 & z_2       -19.377291
1   z_1 & z_3        -8.112643
2   z_2 & z_3        -2.841704


3.2 Weighting for Estimating Multiple Factorial Effects Simultaneously

In [88]:
import numpy as np
import pandas as pd
from scipy.stats import multivariate_normal, norm
import gurobipy as gp
from gurobipy import GRB

# Parameters
N = 200  # Sample size
K = 3  # Number of binary factors
D = 5  # Number of covariates
rho = 0.2  # Covariance correlation

# Generate multivariate normal covariates X with mean mu and covariance Sigma
mu = np.array([0.1, 0.1, 0.1, 0.0, 0.0])
Sigma = np.full((D, D), rho)
np.fill_diagonal(Sigma, 1)

# Logistic regression coefficients for Z
beta1 = np.array([1/4, 2/4, 0, 3/4, 1])
beta2 = np.array([3/4, 1/4, 1, 0, 2/4])
beta3 = np.array([1, 0, 3/4, 2/4, 1/4])

# Generate covariates X
X = multivariate_normal.rvs(mean=mu, cov=Sigma, size=N)

# Generate treatment assignment Z based on logistic regression
def logistic_prob(X, beta):
    logits = X @ beta
    return 1 / (1 + np.exp(-logits))

Z = np.zeros((N, K))
for k, beta in enumerate([beta1, beta2, beta3]):
    probs = logistic_prob(X, beta)
    Z[:, k] = np.random.binomial(1, probs)

Z_expanded = np.repeat(Z, D // K + 1, axis=1)[:, :D] 

# Generate potential outcomes
epsilon = np.random.normal(0, 1, size=(N, 3))
Y1 = 2 * np.sum(X, axis=1) + np.sum(Z, axis=1) + epsilon[:, 0]
Y2 = 2 * np.sum(X, axis=1) + np.sum(X * Z_expanded, axis=1) + epsilon[:, 1]
Y3 = np.sin(X[:, 0]) + np.cos(X[:, 1]) + (np.minimum(1, Y1) + X[:, 1]) * Z[:, 0] + np.sum(X * Z_expanded, axis=1) + epsilon[:, 2]
# Initialize Gurobi model
model = gp.Model("weight_optimization")

# Add variables (weights)
w = model.addVars(N, lb=0.0, name="w")

# Objective: Minimize variance of weights (sum of squared weights)
model.setObjective(gp.quicksum(w[i] * w[i] for i in range(N)), GRB.MINIMIZE)

# Balance constraints (Standardized Mean Differences close to 0)
# Linear balance constraints (avoid non-linear operations)
for d in range(D):
    treated_sum = gp.quicksum(w[i] * X[i, d] * Z[i, 0] for i in range(N))
    control_sum = gp.quicksum(w[i] * X[i, d] * (1 - Z[i, 0]) for i in range(N))
    model.addConstr(treated_sum - control_sum == 0, name=f"balance_{d}")

# Solve the optimization problem
model.optimize()

# Extract the optimal weights
if model.status == GRB.OPTIMAL:
    w_opt = np.array([w[i].X for i in range(N)])
else:
    raise ValueError("Optimization did not converge!")

# Apply optimal weights to outcomes
weighted_Y1 = np.sum(w_opt * Y1) / np.sum(w_opt)
weighted_Y2 = np.sum(w_opt * Y2) / np.sum(w_opt)
weighted_Y3 = np.sum(w_opt * Y3) / np.sum(w_opt)

# Check covariate balance using standardized mean differences after weighting
def compute_smd_weighted(X, Z_col, weights):
    treated_mean = np.average(X[Z_col == 1], axis=0, weights=weights[Z_col == 1])
    control_mean = np.average(X[Z_col == 0], axis=0, weights=weights[Z_col == 0])
    pooled_sd = np.sqrt((np.average((X[Z_col == 1] - treated_mean) ** 2, axis=0, weights=weights[Z_col == 1]) +
                         np.average((X[Z_col == 0] - control_mean) ** 2, axis=0, weights=weights[Z_col == 0])) / 2)
    smd = np.abs(treated_mean - control_mean) / pooled_sd
    return smd

# Evaluate SMDs after weighting
smd_weighted_results = {}
for k in range(K):
    smd_weighted_results[f'Z{k+1}'] = compute_smd_weighted(X, Z[:, k], w_opt)

smd_weighted_df = pd.DataFrame(smd_weighted_results)

# Display results
print("Weighted Expected Outcomes:")
print(f"Weighted E[Y1]: {weighted_Y1}")
print(f"Weighted E[Y2]: {weighted_Y2}")
print(f"Weighted E[Y3]: {weighted_Y3}")

print("\nCovariate Balance After Weighting (Standardized Mean Differences):")
print(smd_weighted_df)



Gurobi Optimizer version 12.0.1 build v12.0.1rc0 (mac64[x86] - Darwin 23.6.0 23G93)

CPU model: Intel(R) Core(TM) i5-1038NG7 CPU @ 2.00GHz
Thread count: 4 physical cores, 8 logical processors, using up to 8 threads

Optimize a model with 5 rows, 200 columns and 1000 nonzeros
Model fingerprint: 0x11a6f028
Model has 200 quadratic objective terms
Coefficient statistics:
  Matrix range     [1e-03, 4e+00]
  Objective range  [0e+00, 0e+00]
  QObjective range [2e+00, 2e+00]
  Bounds range     [0e+00, 0e+00]
  RHS range        [0e+00, 0e+00]
Presolve time: 0.01s
Presolved: 5 rows, 200 columns, 1000 nonzeros
Presolved model has 200 quadratic objective terms
Ordering time: 0.00s

Barrier statistics:
 AA' NZ     : 1.000e+01
 Factor NZ  : 1.500e+01
 Factor Ops : 5.500e+01 (less than 1 second per iteration)
 Threads    : 1

                  Objective                Residual
Iter       Primal          Dual         Primal    Dual     Compl     Time
   0   5.00000000e+07 -5.00000000e+07  3.87e+04 0.0

In [89]:
print("Weighted Expected Outcomes:")
print(f"Weighted E[Y1]: {weighted_Y1}")
print(f"Weighted E[Y2]: {weighted_Y2}")
print(f"Weighted E[Y3]: {weighted_Y3}")

print("\nCovariate Balance After Weighting (Standardized Mean Differences):")
print(smd_weighted_df)

Weighted Expected Outcomes:
Weighted E[Y1]: 2.246950101786268
Weighted E[Y2]: 1.2929983228108735
Weighted E[Y3]: 0.9268150563338297

Covariate Balance After Weighting (Standardized Mean Differences):
         Z1        Z2        Z3
0  0.010293  0.903805  0.914416
1  0.014170  0.606045  0.315237
2  0.007236  1.179601  0.874816
3  0.006528  0.317556  0.571239
4  0.008002  0.432347  0.554586


In [90]:
# Import required libraries
import numpy as np
import pandas as pd
from scipy.stats import multivariate_normal
import gurobipy as gp
from gurobipy import GRB
from sklearn.metrics import mean_squared_error

# Simulation parameters
N = 200  # Sample size
K = 3  # Number of binary treatment factors
D = 5  # Number of covariates
rho = 0.2  # Correlation coefficient

# Generate covariates X from a multivariate normal distribution
mu = np.array([0.1, 0.1, 0.1, 0.0, 0.0])
Sigma = np.full((D, D), rho)
np.fill_diagonal(Sigma, 1)
X = multivariate_normal.rvs(mean=mu, cov=Sigma, size=N)

# Logistic regression coefficients
beta1 = np.array([1/4, 2/4, 0, 3/4, 1])
beta2 = np.array([3/4, 1/4, 1, 0, 2/4])
beta3 = np.array([1, 0, 3/4, 2/4, 1/4])

# Function to compute logistic probabilities
def logistic_prob(X, beta):
    logits = X @ beta
    return 1 / (1 + np.exp(-logits))

# Generate treatment assignments
Z = np.zeros((N, K))
for k, beta in enumerate([beta1, beta2, beta3]):
    probs = logistic_prob(X, beta)
    Z[:, k] = np.random.binomial(1, probs)

# Generate potential outcomes
epsilon = np.random.normal(0, 1, size=(N, 3))
Z_expanded = np.repeat(Z, D // K + 1, axis=1)[:, :D]

# Define outcome models
# General Additive Outcome Model
Y1 = 2 * np.sum(X, axis=1) + np.sum(Z, axis=1) + epsilon[:, 0]

# Outcome Model with Treatment Effect Heterogeneity
Y2 = 2 * np.sum(X, axis=1) + np.sum(X * Z_expanded, axis=1) + epsilon[:, 1]

# Misspecified Outcome Model
Y3 = np.sin(X[:, 0]) + np.cos(X[:, 1]) + (np.minimum(1, Y1) + X[:, 1]) * Z[:, 0] + np.sum(X * Z_expanded, axis=1) + epsilon[:, 2]

# Initialize Gurobi model for optimization
model = gp.Model("FactorialEffectOptimization")
w = model.addVars(N, lb=0.0, name="w")

# Objective function: Minimize entropy (x * log(x)) as m(w)
model.setObjective(gp.quicksum(w[i] * w[i] for i in range(N)), GRB.MINIMIZE)

# Defining balance constraints according to the equation in the image
# Balance constraint for factorial effects
for d in range(D):
    treated_sum = gp.quicksum(w[i] * X[i, d] * Z[i, 0] for i in range(N))
    control_sum = gp.quicksum(w[i] * X[i, d] * (1 - Z[i, 0]) for i in range(N))
    model.addConstr(treated_sum - control_sum == 0, name=f"balance_{d}")

# Solve the optimization problem
model.optimize()

# Extract optimal weights
if model.status == GRB.OPTIMAL:
    w_opt = np.array([w[i].X for i in range(N)])
else:
    raise ValueError("Optimization did not converge!")

# Apply optimal weights to outcomes
weighted_Y1 = np.sum(w_opt * Y1) / np.sum(w_opt)
weighted_Y2 = np.sum(w_opt * Y2) / np.sum(w_opt)
weighted_Y3 = np.sum(w_opt * Y3) / np.sum(w_opt)

# Function to compute standardized mean differences (SMD)
def compute_smd_weighted(X, Z_col, weights):
    treated_mean = np.average(X[Z_col == 1], axis=0, weights=weights[Z_col == 1])
    control_mean = np.average(X[Z_col == 0], axis=0, weights=weights[Z_col == 0])
    pooled_sd = np.sqrt((np.average((X[Z_col == 1] - treated_mean) ** 2, axis=0, weights=weights[Z_col == 1]) +
                         np.average((X[Z_col == 0] - control_mean) ** 2, axis=0, weights=weights[Z_col == 0])) / 2)
    smd = np.abs(treated_mean - control_mean) / pooled_sd
    return smd

# Compute SMDs after weighting
smd_weighted_results = {}
for k in range(K):
    smd_weighted_results[f'Z{k+1}'] = compute_smd_weighted(X, Z[:, k], w_opt)

smd_weighted_df = pd.DataFrame(smd_weighted_results)


Gurobi Optimizer version 12.0.1 build v12.0.1rc0 (mac64[x86] - Darwin 23.6.0 23G93)

CPU model: Intel(R) Core(TM) i5-1038NG7 CPU @ 2.00GHz
Thread count: 4 physical cores, 8 logical processors, using up to 8 threads

Optimize a model with 5 rows, 200 columns and 1000 nonzeros
Model fingerprint: 0xa082a03b
Model has 200 quadratic objective terms
Coefficient statistics:
  Matrix range     [3e-04, 3e+00]
  Objective range  [0e+00, 0e+00]
  QObjective range [2e+00, 2e+00]
  Bounds range     [0e+00, 0e+00]
  RHS range        [0e+00, 0e+00]
Presolve time: 0.00s
Presolved: 5 rows, 200 columns, 1000 nonzeros
Presolved model has 200 quadratic objective terms
Ordering time: 0.00s

Barrier statistics:
 AA' NZ     : 1.000e+01
 Factor NZ  : 1.500e+01
 Factor Ops : 5.500e+01 (less than 1 second per iteration)
 Threads    : 1

                  Objective                Residual
Iter       Primal          Dual         Primal    Dual     Compl     Time
   0   5.00000000e+07 -5.00000000e+07  6.04e+04 0.0

In [91]:
# Calculate RMSE for outcomes
# Define true main effects based on simulation setup (as per Y1)
true_main_effects = np.array([2] * D)  # Assuming true effects are 2 for all covariates in Y1
true_interaction_effects = np.array([0] * (K * (K - 1) // 2))  # No true interaction effects assumed in Y1

# Extract estimated effects (replace with actual estimates from your model)
estimated_main_effects = np.array([weighted_Y1, weighted_Y2, weighted_Y3])

# Calculate RMSE for main effects
rmse_main_effects = np.sqrt(np.mean((estimated_main_effects - true_main_effects[:3])**2))

# Assuming zero true interaction effects for simplicity in the additive model
estimated_interaction_effects = np.array([0.0, 0.0, 0.0])  # Replace with actual interaction effect estimates
rmse_interaction_effects = np.sqrt(np.mean((estimated_interaction_effects - true_interaction_effects) ** 2))

# Display RMSE
print(f"RMSE for Main Effects: {rmse_main_effects}")
print(f"RMSE for Interaction Effects: {rmse_interaction_effects}")


# Prepare results for output
results_df = pd.DataFrame({
    'Outcome Model': ['Additive', 'Heterogeneous', 'Misspecified'],
    'Weighted Expected Outcome': [weighted_Y1, weighted_Y2, weighted_Y3],
})

# Display results
print("Weighted Outcomes and RMSE:\n", results_df)
print("\nCovariate Balance SMDs After Weighting:\n", smd_weighted_df)


RMSE for Main Effects: 0.5326814892645563
RMSE for Interaction Effects: 0.0
Weighted Outcomes and RMSE:
    Outcome Model  Weighted Expected Outcome
0       Additive                   2.564412
1  Heterogeneous                   1.650722
2   Misspecified                   1.359147

Covariate Balance SMDs After Weighting:
          Z1        Z2        Z3
0  0.024732  0.572232  1.031147
1  0.021637  0.513987  0.183483
2  0.004735  1.345759  1.005478
3  0.026358  0.102858  0.449505
4  0.009602  0.471160  0.035176
