In [None]:
# Install a pip package in the current Jupyter kernel
import sys
!{sys.executable} -m pip install causalinference
#!{sys.executable} -m pip install --upgrade pip

In [1]:
from causalinference import CausalModel

In [2]:
from causalinference.utils import random_data

In [3]:
Y,D,X = random_data()

In [5]:
Y

array([ 3.68701283, -5.2109153 , -0.23568421, ...,  8.68049672,
       -1.79617721, -1.50194103])

In [6]:
D

array([1, 0, 0, ..., 1, 0, 0])

In [13]:
D.sum()

2442

In [14]:
len(D)

5000

In [7]:
X

array([[ 0.11619304, -0.69909487,  0.66829198],
       [-0.44591012, -1.05486457, -3.08208161],
       [ 0.11038862,  0.22503751, -1.07373702],
       ...,
       [ 0.25157802,  1.52187466,  0.24940663],
       [-0.70794981, -0.59754784,  0.11156497],
       [ 0.57657545, -1.19623804, -0.76249627]])

In [9]:
causal = CausalModel(Y,D,X)

In [10]:
help(causal)

Help on CausalModel in module causalinference.causal object:

class CausalModel(builtins.object)
 |  Class that provides the main tools of Causal Inference.
 |  
 |  Methods defined here:
 |  
 |  __init__(self, Y, D, X)
 |      Initialize self.  See help(type(self)) for accurate signature.
 |  
 |  est_propensity(self, lin='all', qua=None)
 |      Estimates the propensity scores given list of covariates to
 |      include linearly or quadratically.
 |      
 |      The propensity score is the conditional probability of
 |      receiving the treatment given the observed covariates.
 |      Estimation is done via a logistic regression.
 |      
 |      Parameters
 |      ----------
 |      lin: string or list, optional
 |              Column numbers (zero-based) of variables of
 |              the original covariate matrix X to include
 |              linearly. Defaults to the string 'all', which
 |              uses whole covariate matrix.
 |      qua: list, optional
 |              Tu

In [11]:
print(causal.summary_stats)


Summary Statistics

                      Controls (N_c=2558)        Treated (N_t=2442)             
       Variable         Mean         S.d.         Mean         S.d.     Raw-diff
--------------------------------------------------------------------------------
              Y       -0.960        1.719        5.007        3.107        5.966

                      Controls (N_c=2558)        Treated (N_t=2442)             
       Variable         Mean         S.d.         Mean         S.d.     Nor-diff
--------------------------------------------------------------------------------
             X0       -0.300        0.939        0.330        0.940        0.670
             X1       -0.315        0.951        0.334        0.964        0.678
             X2       -0.366        0.947        0.332        0.948        0.736



In [19]:
causal.est_via_ols()

  olscoef = np.linalg.lstsq(Z, Y)[0]


In [20]:
print(causal.estimates)


Treatment Effect Estimates: OLS

                     Est.       S.e.          z      P>|z|      [95% Conf. int.]
--------------------------------------------------------------------------------
           ATE      2.977      0.034     88.070      0.000      2.911      3.043
           ATC      2.005      0.039     51.076      0.000      1.928      2.082
           ATT      3.995      0.038    103.955      0.000      3.920      4.070



In [21]:
causal.est_propensity_s()

In [22]:
print(causal.propensity)


Estimated Parameters of Propensity Score

                    Coef.       S.e.          z      P>|z|      [95% Conf. int.]
--------------------------------------------------------------------------------
     Intercept     -0.032      0.036     -0.898      0.369     -0.102      0.038
            X2      1.030      0.041     25.007      0.000      0.949      1.111
            X1      0.967      0.041     23.845      0.000      0.887      1.046
            X0      0.970      0.041     23.623      0.000      0.889      1.050
         X2*X0      0.077      0.041      1.885      0.059     -0.003      0.157



In [23]:
causal.propensity["fitted"]

array([0.52480921, 0.0104286 , 0.30524781, ..., 0.87485764, 0.23375874,
       0.19029035])

In [12]:
#causal.est_propensity()

In [24]:
causal.trim_s()

In [25]:
causal.cutoff

0.09889325356871048

In [26]:
print(causal.summary_stats)


Summary Statistics

                      Controls (N_c=2063)        Treated (N_t=1948)             
       Variable         Mean         S.d.         Mean         S.d.     Raw-diff
--------------------------------------------------------------------------------
              Y       -0.515        1.429        4.071        2.278        4.587

                      Controls (N_c=2063)        Treated (N_t=1948)             
       Variable         Mean         S.d.         Mean         S.d.     Nor-diff
--------------------------------------------------------------------------------
             X0       -0.153        0.883        0.186        0.890        0.382
             X1       -0.170        0.892        0.183        0.896        0.395
             X2       -0.212        0.882        0.161        0.871        0.426



In [27]:
causal.stratify_s()

In [29]:
print(causal.strata)


Stratification Summary

              Propensity Score         Sample Size     Ave. Propensity   Outcome
   Stratum      Min.      Max.  Controls   Treated  Controls   Treated  Raw-diff
--------------------------------------------------------------------------------
         1     0.099     0.180       429        74     0.138     0.142     1.253
         2     0.180     0.227       205        46     0.204     0.204     1.796
         3     0.227     0.276       179        71     0.252     0.254     2.070
         4     0.277     0.372       327       174     0.321     0.326     2.154
         5     0.372     0.482       302       199     0.425     0.429     2.796
         6     0.482     0.595       241       261     0.534     0.539     3.234
         7     0.595     0.699       165       336     0.642     0.646     3.675
         8     0.699     0.796       136       365     0.744     0.749     4.291
         9     0.797     0.901        79       422     0.845     0.850     4.651



In [31]:
for stratum in causal.strata:
    stratum.est_via_ols(adj =1)

  olscoef = np.linalg.lstsq(Z, Y)[0]


In [32]:
causal.est_via_blocking()

  olscoef = np.linalg.lstsq(Z, Y)[0]


In [34]:
print(causal.estimates)


Treatment Effect Estimates: Blocking

                     Est.       S.e.          z      P>|z|      [95% Conf. int.]
--------------------------------------------------------------------------------
           ATE      2.962      0.037     80.550      0.000      2.890      3.034
           ATC      2.471      0.042     58.361      0.000      2.388      2.553
           ATT      3.482      0.040     86.123      0.000      3.402      3.561



In [35]:
causal.est_via_matching()

In [36]:
print(causal.estimates)


Treatment Effect Estimates: Blocking

                     Est.       S.e.          z      P>|z|      [95% Conf. int.]
--------------------------------------------------------------------------------
           ATE      2.962      0.037     80.550      0.000      2.890      3.034
           ATC      2.471      0.042     58.361      0.000      2.388      2.553
           ATT      3.482      0.040     86.123      0.000      3.402      3.561

Treatment Effect Estimates: Matching

                     Est.       S.e.          z      P>|z|      [95% Conf. int.]
--------------------------------------------------------------------------------
           ATE      3.042      0.070     43.732      0.000      2.906      3.178
           ATC      2.569      0.082     31.423      0.000      2.409      2.730
           ATT      3.543      0.082     43.092      0.000      3.381      3.704



In [37]:
causal.est_via_weighting()

  wlscoef = np.linalg.lstsq(Z_w, Y_w)[0]


In [38]:
print(causal.estimates)


Treatment Effect Estimates: Blocking

                     Est.       S.e.          z      P>|z|      [95% Conf. int.]
--------------------------------------------------------------------------------
           ATE      2.962      0.037     80.550      0.000      2.890      3.034
           ATC      2.471      0.042     58.361      0.000      2.388      2.553
           ATT      3.482      0.040     86.123      0.000      3.402      3.561

Treatment Effect Estimates: Matching

                     Est.       S.e.          z      P>|z|      [95% Conf. int.]
--------------------------------------------------------------------------------
           ATE      3.042      0.070     43.732      0.000      2.906      3.178
           ATC      2.569      0.082     31.423      0.000      2.409      2.730
           ATT      3.543      0.082     43.092      0.000      3.381      3.704

Treatment Effect Estimates: Weighting

                     Est.       S.e.          z      P>|z|      [95% Con