In [12]:
!pip install econml



In [13]:
import numpy as np
import pandas as pd

from sklearn.ensemble import RandomForestRegressor
from sklearn.linear_model import LinearRegression

from econml.dml import LinearDML

In [14]:
np.random.seed(42)

n = 2000  # sample size

# Confounders (covariates)
X = np.random.normal(0, 1, (n, 5))

# Treatment assignment (depends on confounders)
T = (X[:, 0] + X[:, 1] + np.random.normal(0, 1, n) > 0).astype(int)

# Outcome variable with true treatment effect = 3
Y = 3 * T + X[:, 2] + 0.5 * X[:, 3] + np.random.normal(0, 1, n)

In [15]:
# Confounders
X_covariates = X

# Treatment
T_treatment = T

# Outcome
Y_outcome = Y

In [16]:
# Model to estimate E[Y | X]
model_y = RandomForestRegressor(
    n_estimators=100,
    max_depth=6,
    random_state=42
)

# Model to estimate E[T | X]
model_t = RandomForestRegressor(
    n_estimators=100,
    max_depth=6,
    random_state=42
)

In [17]:
dml = LinearDML(
    model_y=model_y,
    model_t=model_t,
    discrete_treatment=True,
    random_state=42
)

In [18]:
dml.fit(
    Y_outcome,
    T_treatment,
    X=X_covariates
)



<econml.dml.dml.LinearDML at 0x7fb041796c00>

In [19]:
ate = dml.ate(X_covariates)

print("Estimated Average Treatment Effect (ATE):", ate)

Estimated Average Treatment Effect (ATE): 2.929363982560356


In [20]:
ate_interval = dml.ate_interval(X_covariates)

print("ATE 95% Confidence Interval:", ate_interval)

ATE 95% Confidence Interval: (np.float64(2.8004550219185225), np.float64(3.05827294320219))


In [21]:
ols = LinearRegression()

ols.fit(
    np.column_stack([T_treatment, X_covariates]),
    Y_outcome
)

print("OLS Estimated Treatment Effect:", ols.coef_[0])

OLS Estimated Treatment Effect: 3.0171246764784527
