In [2]:
from numpy.random import binomial, normal, seed
from numpy import mean, unique

# RCT or A/B TEST

## Data Simulation

In [3]:
seed(1234)
n = 10000
treatment = binomial(1, 0.5, size=n)
outcome = binomial(1, 0.3, size=n)*treatment + binomial(1, 0.5, size=n)*(1-treatment)

## Results

P(O=1|T=1)−P(O=1|T=0)

In [4]:
condition_prob_diff = mean(outcome[treatment==1]) - mean(outcome[treatment==0])
print(condition_prob_diff)

-0.1810218929047634


adjustment(1)−adjustment(0)  doesn’t apply

ATE = 0.3 - 0.5 = -0.2

$\hat ATE$ =P(O=1|do(T=1))−P(O=1|do(T=0))  

In [5]:
print(condition_prob_diff)

-0.1810218929047634


# CONFOUNDER

## Data Simulation

In [6]:
seed(1234)
n = 10000
confounder = binomial(1, 0.8, size=n)
treatment = binomial(1, 0.6, size=n)*confounder + binomial(1, 0.2, size=n)*(1-confounder)
outcome = binomial(1, 0.3, size=n)*treatment + binomial(1, 0.5, size=n)*(1-treatment) + confounder + normal(size=n)

### Adjustment Formula

In [7]:
def adjustment(t, o, z, t0):
    ind_t0 = t == t0
    z_values = unique(z)

    adjusted_prob = 0
    for z_ in z_values:
        ind_z_ = z == z_
        ind = ind_t0 & ind_z_
        adjusted_prob = adjusted_prob + mean(o[ind])*mean(ind_z_)
    return(adjusted_prob)

## Results

P(O=1|T=1)−P(O=1|T=0)

In [8]:
condition_prob_diff = mean(outcome[treatment==1]) - mean(outcome[treatment==0])
print(condition_prob_diff)

0.02107032530615749


adjustment(1)−adjustment(0)

In [9]:
adj_result = adjustment(treatment, outcome, confounder, 1) - adjustment(treatment, outcome, confounder, 0)
print(adj_result)

-0.21901378090633217


ATE = 0.3 - 0.5 = -0.2

$\hat ATE$ =P(O=1|do(T=1))−P(O=1|do(T=0))  

In [10]:
print(adj_result)

-0.21901378090633217


# MEDIATOR

## Data Simulation

In [23]:
seed(1234)
n = 10000

treatment = binomial(1, 0.4, size=n)
mediator = binomial(1, 0.6, size=n)*treatment + binomial(1, 0.2, size=n)*(1-treatment)
outcome = binomial(1, 0.4, size=n)*treatment + binomial(1, 0.5, size=n)*(1-treatment) + mediator + normal(size=n)

## Results

P(O=1|T=1)−P(O=1|T=0)

In [24]:
condition_prob_diff = mean(outcome[treatment==1]) - mean(outcome[treatment==0])
print(condition_prob_diff)

0.29470156607738807


adjustment(1)−adjustment(0)

In [25]:
adj_result = adjustment(treatment, outcome, mediator, 1) - adjustment(treatment, outcome, mediator, 0)
print(adj_result)

-0.10511006573608273


ATE = (0.6 + 0.4) - (0.2 + 0.5) = 0.3

$\hat ATE$ =P(O=1|do(T=1))−P(O=1|do(T=0))  

In [26]:
print(condition_prob_diff)

0.29470156607738807


# PREDICTOR

## Data Simulation

In [27]:
seed(1234)
n = 10000

treatment = binomial(1, 0.4, size=n)
predictor = binomial(1, 0.4, size=n)
outcome = binomial(1, 0.4, size=n)*treatment + binomial(1, 0.5, size=n)*(1-treatment) + predictor + normal(size=n)

## Results

P(O=1|T=1)−P(O=1|T=0)

In [28]:
condition_prob_diff = mean(outcome[treatment==1]) - mean(outcome[treatment==0])
print(condition_prob_diff)

-0.07386507504710504


adjustment(1)−adjustment(0)

In [29]:
adj_result = adjustment(treatment, outcome, predictor, 1) - adjustment(treatment, outcome, predictor, 0)
print(adj_result)

-0.08193645550537232


ATE = 0.4 - 0.5 = -0.1

$\hat ATE$ =P(O=1|do(T=1))−P(O=1|do(T=0))  has two unbiased estimators

In [30]:
print(condition_prob_diff)
print(adj_result)

-0.07386507504710504
-0.08193645550537232


# UNOBSERVED CONFOUNDER

## Data Simulation

In [15]:
seed(1234)
n = 10000
confounder = binomial(1, 0.8, size=n)
treatment = binomial(1, 0.6, size=n)*confounder + binomial(1, 0.2, size=n)*(1-confounder)
outcome = binomial(1, 0.3, size=n)*treatment + binomial(1, 0.5, size=n)*(1-treatment) + confounder + normal(size=n)

-0.07386507504710504
-0.08193645550537232


## Results

P(O=1|T=1)−P(O=1|T=0)

In [8]:
condition_prob_diff = mean(outcome[treatment==1]) - mean(outcome[treatment==0])
print(condition_prob_diff)

0.02107032530615749


adjustment(1)−adjustment(0) cannot be calculated

ATE = 0.3 - 0.5 = -0.2

$\hat ATE$ =P(O=1|do(T=1))−P(O=1|do(T=0)) should be the result of the adjustment formula, but cannot be calculated