# Applying Strata on Synthetic Data

In [1]:
import sys

sys.path.append("../")

In [2]:
import pandas as pd

from utils.bootstrap import bootstrap_experiment
from utils.prop_utils import prop_probs
from utils.strat_utils import calculate_stratified_effects

In [3]:
synthetic_1000 = pd.read_csv("../output/synthetic_1000.csv")
synthetic_10000 = pd.read_csv("../output/synthetic_10000.csv")
synthetic_100000 = pd.read_csv("../output/synthetic_100000.csv")

synthetic_1000["re78_re75"] = synthetic_1000["re78"] - synthetic_1000["re75"]
synthetic_10000["re78_re75"] = synthetic_10000["re78"] - synthetic_10000["re75"]
synthetic_100000["re78_re75"] = synthetic_100000["re78"] - synthetic_100000["re75"]

synthetic_1000["id"] = synthetic_1000.reset_index()["index"]
synthetic_10000["id"] = synthetic_10000.reset_index()["index"]
synthetic_100000["id"] = synthetic_100000.reset_index()["index"]

In [4]:
propensity_probabilities = prop_probs(
    synthetic_1000,
    "treat",
    ["age", "education", "black", "hispanic", "married", "re74", "re75"],
)
synthetic_1000 = pd.merge(synthetic_1000, propensity_probabilities, on="id")

In [5]:
ci = bootstrap_experiment(
    synthetic_1000,
    calculate_stratified_effects,
    num_exp=1,
    n=50,
    ci=95,
    prop_col="propensity",
    treatment_col="treat",
    outcome_col="re78_re75",
    num_strata=7,
)

print(
    f"Causal estimate of `Re78` - `Re75` in Synthetic data as a difference of means is "
    f"US${ci[0][0]:.2f} ± [{ci[0][1]:.2f}, {ci[0][2]:.2f}]"
)

Running experiments: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  6.98it/s]

Causal estimate of `Re78` - `Re75` in Synthetic data as a difference of means is US$4356.52 ± [3177.33, 5391.32]





In [6]:
propensity_probabilities = prop_probs(
    synthetic_10000,
    "treat",
    ["age", "education", "black", "hispanic", "married", "re74", "re75"],
)
synthetic_10000 = pd.merge(synthetic_10000, propensity_probabilities, on="id")

In [7]:
ci = bootstrap_experiment(
    synthetic_10000,
    calculate_stratified_effects,
    num_exp=1,
    n=50,
    ci=95,
    prop_col="propensity",
    treatment_col="treat",
    outcome_col="re78_re75",
    num_strata=7,
)

print(
    f"Causal estimate of `Re78` - `Re75` in Synthetic data as a difference of means is "
    f"US${ci[0][0]:.2f} ± [{ci[0][1]:.2f}, {ci[0][2]:.2f}]"
)

Running experiments: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  4.69it/s]

Causal estimate of `Re78` - `Re75` in Synthetic data as a difference of means is US$4717.56 ± [4307.82, 5068.40]





In [8]:
propensity_probabilities = prop_probs(
    synthetic_100000,
    "treat",
    ["age", "education", "black", "hispanic", "married", "re74", "re75"],
)
synthetic_100000 = pd.merge(synthetic_100000, propensity_probabilities, on="id")

In [9]:
ci = bootstrap_experiment(
    synthetic_100000,
    calculate_stratified_effects,
    num_exp=1,
    n=50,
    ci=95,
    prop_col="propensity",
    treatment_col="treat",
    outcome_col="re78_re75",
    num_strata=7,
)

print(
    f"Causal estimate of `Re78` - `Re75` in Synthetic data as a difference of means is "
    f"US${ci[0][0]:.2f} ± [{ci[0][1]:.2f}, {ci[0][2]:.2f}]"
)

Running experiments: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  1.26it/s]

Causal estimate of `Re78` - `Re75` in Synthetic data as a difference of means is US$4913.45 ± [4773.73, 5030.68]





## Naive E[Y|A]

This is the method with strata = 1

In [10]:
ci = bootstrap_experiment(
    synthetic_1000,
    calculate_stratified_effects,
    num_exp=1,
    n=50,
    ci=95,
    prop_col="propensity",
    treatment_col="treat",
    outcome_col="re78_re75",
    num_strata=1,
)

print(
    f"Causal estimate of `Re78` - `Re75` in Synthetic data as a difference of means is "
    f"US${ci[0][0]:.2f} ± [{ci[0][1]:.2f}, {ci[0][2]:.2f}]"
)

Running experiments: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 14.50it/s]

Causal estimate of `Re78` - `Re75` in Synthetic data as a difference of means is US$4420.95 ± [3393.72, 5395.73]





In [11]:
ci = bootstrap_experiment(
    synthetic_10000,
    calculate_stratified_effects,
    num_exp=1,
    n=50,
    ci=95,
    prop_col="propensity",
    treatment_col="treat",
    outcome_col="re78_re75",
    num_strata=1,
)

print(
    f"Causal estimate of `Re78` - `Re75` in Synthetic data as a difference of means is "
    f"US${ci[0][0]:.2f} ± [{ci[0][1]:.2f}, {ci[0][2]:.2f}]"
)

Running experiments: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  9.06it/s]

Causal estimate of `Re78` - `Re75` in Synthetic data as a difference of means is US$5004.29 ± [4594.87, 5358.68]





In [12]:
ci = bootstrap_experiment(
    synthetic_100000,
    calculate_stratified_effects,
    num_exp=1,
    n=50,
    ci=95,
    prop_col="propensity",
    treatment_col="treat",
    outcome_col="re78_re75",
    num_strata=1,
)

print(
    f"Causal estimate of `Re78` - `Re75` in Synthetic data as a difference of means is "
    f"US${ci[0][0]:.2f} ± [{ci[0][1]:.2f}, {ci[0][2]:.2f}]"
)

Running experiments: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  1.66it/s]

Causal estimate of `Re78` - `Re75` in Synthetic data as a difference of means is US$5287.46 ± [5165.69, 5397.89]



