In [39]:
# Third-party imports
import numpy as np
import pandas as pd
from tqdm import tqdm
from econml.dr import LinearDRLearner
def create_data():
        units = []
        for i in range(1000):
            A0 = np.random.randint(0, 2) # 50% 0 50% 1
            L1_chance = 0.2 if A0 else 0.6
            
            U = np.random.normal() 
            
            L1_chance += U
            L1 = 1 if np.random.random() < L1_chance else 0            
            A1_chance = 0.9 if L1 else 0.3
            A1 = 1 if np.random.random() < A1_chance else 0
            
            Y = 1 if np.random.random() < (0.5 * U**2) else 0 #1 if np.random.random() < U else 0
            units.append([A0, L1, A1, Y])

        columns = ['A0', 'L1', 'A1', 'Y']
        df = pd.DataFrame(units, columns=columns)

        df_new = df.copy()
        condition = (df['A0'] == 1) & (df['A1'] == 1)
        df_new['always_treated'] = np.where(condition, 1, np.where((df['A0'] == 0) & (df['A1'] == 0), 0, np.nan))
        df_new = df_new.dropna()
        return df_new

errors = [[], [], [], []]
for iteration in tqdm(range(500)):
    # Print the current iteration number
    df = create_data()

    # Instantiate a LinearDRLearner object. 
    est = LinearDRLearner()

    # Extract and reshape the necessary columns from the DataFrame for modeling.
    Y = np.array(df['Y']).reshape(-1, 1)
    T = np.array(df['always_treated']).reshape(-1, 1)
    X = np.array(df['L1']).reshape(-1, 1)

    # Fit the model with the prepared data.
    est.fit(Y.ravel(), T, X=X, W=None)
    est.effect(X, T0=0, T1=1)

    # Group by the 'always_treated' column and calculate the mean of 'Y' for each group.
    grouped = df.groupby('always_treated')['Y'].mean()

    # Calculate the difference in means between the two groups.
    unadjusted_ate = grouped.diff().dropna().iloc[0]

    # Group by both 'L1' and 'always_treated' columns, then calculate the mean of 'Y' for each group.
    grouped = df.groupby(['L1', 'always_treated'])['Y'].mean().reset_index()

    # Initialize a list to store adjusted ATEs.
    adjusted_ates = []

    # Iterate over unique values of 'L1' and calculate the ATE within each stratum of 'L1'.
    for l1_value in grouped['L1'].unique():
        treated_mean = grouped.loc[(grouped['L1'] == l1_value) & (grouped['always_treated'] == 1), 'Y'].values[0]
        control_mean = grouped.loc[(grouped['L1'] == l1_value) & (grouped['always_treated'] == 0), 'Y'].values[0]
        ate = treated_mean - control_mean
        adjusted_ates.append(ate)

    # Get the Average Treatment Effect estimated by the Double Robust method.
    double_robust_error = est.ate(X=X)

    # Prepare a list of errors for this iteration.
    iteration_errors = [double_robust_error, unadjusted_ate, adjusted_ates[0], adjusted_ates[1]]

    # Append the calculated errors for each method to the respective lists
    for j in range(4):
        errors[j].append(iteration_errors[j])
errors = np.array(errors)
import numpy as np
from scipy import stats

cases = ["double_robust_error", "unadjusted_ate", "adjusted_ates_L_is_0", "adjusted_ates_L_is_1"]

for i, results in enumerate(errors):
    # Calculate mean and standard deviation
    mean = np.mean(results)
    std_dev = np.std(results)

    # Calculate standard error
    std_error = stats.sem(results)

    # Calculate 95% confidence interval
    ci = stats.t.interval(0.95, len(results) - 1, loc=mean, scale=std_error)
    print(ci[0])
    print(cases[i])
    print(f"Mean: {mean:.3} ")
    print(f"95% Confidence Interval ({ci[0]:.3f}, {ci[1]:.3f})")


100%|██████████| 500/500 [06:47<00:00,  1.23it/s]
Degrees of freedom <= 0 for slice
invalid value encountered in scalar divide


TypeError: object of type 'int' has no len()

In [42]:
for i, results in enumerate(errors):
    # Calculate mean and standard deviation
    mean = np.mean(results)
    std_dev = np.std(results)

    # Calculate standard error
    std_error = stats.sem(results)

    # Calculate 95% confidence interval
    ci = stats.t.interval(0.95, len(results) - 1, loc=mean, scale=std_error)
    print(ci[0])
    print(cases[i])
    print(f"Mean: {mean:.3} ")
    print(f"95% Confidence Interval ({ci[0]:.3f}, {ci[1]:.3f})")

-0.00042874812346096157
double_robust_error
Mean: 0.00427 
95% Confidence Interval (-0.000, 0.009)
0.020498675113559434
unadjusted_ate
Mean: 0.0246 
95% Confidence Interval (0.020, 0.029)
-0.08945439186424788
adjusted_ates_L_is_0
Mean: -0.0838 
95% Confidence Interval (-0.089, -0.078)
0.11476890273913297
adjusted_ates_L_is_1
Mean: 0.123 
95% Confidence Interval (0.115, 0.132)


In [51]:
est.models_propensity

[[LogisticRegression(C=0.3593813663804626, intercept_scaling=1.0),
  LogisticRegression(C=0.3593813663804626, intercept_scaling=1.0)]]

In [52]:
est.models_regression

[[<econml.sklearn_extensions.linear_model.WeightedLassoWrapper at 0x7f7f2ab79dd0>,
  <econml.sklearn_extensions.linear_model.WeightedLassoWrapper at 0x7f7f2aad6590>]]

In [57]:
est.model_cate()