In [1]:
## Reference: https://www.youtube.com/watch?v=xekqR10lQNo
## For personal study purposes only

In [2]:
from dowhy import datasets

import pandas as pd
import numpy as np

from causalinference import CausalModel

In [3]:
data = datasets.linear_dataset(
    beta=10,
    num_common_causes=4,
    num_samples=10_000,
    treatment_is_binary=True,
    outcome_is_binary=False,
)

df = data['df']
df = df.rename({'v0': 'treatment', 'y': 'outcome'}, axis=1)
df['treatment'] = df['treatment'].astype(int)

In [4]:
causal = CausalModel(
    Y=df['outcome'].values,
    D=df['treatment'].values,
    X=df[['W0', 'W1', 'W2', 'W3']].values,
)

print(causal.summary_stats)


Summary Statistics

                      Controls (N_c=5073)        Treated (N_t=4927)             
       Variable         Mean         S.d.         Mean         S.d.     Raw-diff
--------------------------------------------------------------------------------
              Y       -2.984        2.626       13.151        2.565       16.135

                      Controls (N_c=5073)        Treated (N_t=4927)             
       Variable         Mean         S.d.         Mean         S.d.     Nor-diff
--------------------------------------------------------------------------------
             X0        0.044        0.902        0.960        0.903        1.015
             X1       -1.345        0.906       -0.460        0.888        0.986
             X2       -0.085        0.896        0.817        0.915        0.996
             X3        0.221        1.002        0.284        1.004        0.063



In [5]:
causal.est_via_matching()

print(causal.estimates)


Treatment Effect Estimates: Matching

                     Est.       S.e.          z      P>|z|      [95% Conf. int.]
--------------------------------------------------------------------------------
           ATE     12.085      0.129     93.948      0.000     11.833     12.337
           ATC     12.103      0.180     67.285      0.000     11.750     12.455
           ATT     12.066      0.175     68.905      0.000     11.723     12.410



In [6]:
## bias-adjusted

causal.est_via_matching(bias_adj=True)

print(causal.estimates)


Treatment Effect Estimates: Matching

                     Est.       S.e.          z      P>|z|      [95% Conf. int.]
--------------------------------------------------------------------------------
           ATE      9.999      0.001  10691.670      0.000      9.998     10.001
           ATC     10.002      0.001   7565.703      0.000      9.999     10.004
           ATT      9.997      0.001   7948.781      0.000      9.995      9.999



  return np.linalg.lstsq(X, Y)[0][1:]  # don't need intercept coef
