added IPW ATE (#46)

* added IPW ATE * working WLS regression * added tests
akelleh · Nov 4, 2017 · 8e3222f · 8e3222f
1 parent 282ea28
commit 8e3222f
Show file tree

Hide file tree

Showing 4 changed files with 136 additions and 24 deletions.
diff --git a/.idea/vcs.xml b/.idea/vcs.xml
diff --git a/causality/estimation/parametric.py b/causality/estimation/parametric.py
@@ -1,5 +1,5 @@
 import pandas as pd
-from statsmodels.regression.linear_model import OLS
+from statsmodels.regression.linear_model import OLS, WLS
 from statsmodels.robust.robust_linear_model import RLM
 from statsmodels.discrete.discrete_model import Logit
 from sklearn.neighbors import NearestNeighbors
@@ -34,18 +34,18 @@ def average_treatment_effect(self, X, start='Start', end='End', assignment='assi
         control_final = control[end]
         del test, control
 
-        df = pd.DataFrame({'y' : test_initial, 
+        df = pd.DataFrame({'y' : test_initial,
                    assignment : [1. for i in test_initial],
                    't' :[0. for i in test_initial] })
-        df = df.append(pd.DataFrame({'y' : test_final, 
+        df = df.append(pd.DataFrame({'y' : test_final,
                                      assignment : [1. for i in test_final],
                                      't' :[1. for i in test_final] }))
 
-        df = df.append(pd.DataFrame({'y' : control_initial, 
+        df = df.append(pd.DataFrame({'y' : control_initial,
                                      assignment : [0. for i in control_initial],
                                      't' :[0. for i in control_initial] }))
 
-        df = df.append(pd.DataFrame({'y' : control_final, 
+        df = df.append(pd.DataFrame({'y' : control_final,
                                      assignment : [0. for i in control_final],
                                      't' :[1. for i in control_final] }))
         del test_initial, test_final, control_initial, control_final
@@ -57,7 +57,7 @@ def average_treatment_effect(self, X, start='Start', end='End', assignment='assi
         conf_int = result.conf_int().ix['did']
         expected = result.params['did']
         return conf_int[0], expected, conf_int[1]
-        
+
     def test_parallel_trend(self, X, start='Start', end='End', assignment='assignment'):
         """
         This will find the average treatment effect on
@@ -66,7 +66,7 @@ def test_parallel_trend(self, X, start='Start', end='End', assignment='assignmen
         that the average treatment effect between the test
         and control groups when neither is treated is 0.
 
-        The format for this dataset is the same as that 
+        The format for this dataset is the same as that
         for the real estimation task, except that the start
         time is some time before the experiment is run, and
         the end time is the starting point for the experiment.
@@ -76,14 +76,12 @@ def test_parallel_trend(self, X, start='Start', end='End', assignment='assignmen
             return True
         return False
 
-
-class PropensityScoreMatching(object):
+class PropensityScoringModel(object):
     def __init__(self):
         # change the model if there are multiple matches per treated!
         self.propensity_score_model = None
 
-
-    def score(self, X, confounder_types, assignment='assignment', store_model_fit=False, intercept=True):
+    def score(self, X, confounder_types, assignment='assignment', store_model_fit=False, intercept=True, propensity_score_name='propensity score'):
         """
         Fit a propensity score model using the data in X and the confounders listed in confounder_types. This adds
         the propensity scores to the dataframe, and returns the new dataframe.
@@ -120,9 +118,14 @@ def score(self, X, confounder_types, assignment='assignment', store_model_fit=Fa
         model = logit.fit()
         if store_model_fit:
             self.propensity_score_model = model
-        X.loc[:, 'propensity score'] = model.predict(df[regression_confounders])
+        X.loc[:, propensity_score_name] = model.predict(df[regression_confounders])
         return X
 
+class PropensityScoreMatching(PropensityScoringModel):
+    def __init__(self):
+        # change the model if there are multiple matches per treated!
+        self.propensity_score_model = None
+
     def match(self, X, assignment='assignment', score='propensity score', n_neighbors=2, treated_value=1,
               control_value=0, match_to='treated'):
         """
@@ -376,3 +379,80 @@ def check_support(self, X, assignment, confounder_types=None):
             pp.show()
 
 
+class InverseProbabilityWeightedLS(PropensityScoringModel):
+    def __init__(self):
+        self.propensity_score_model = None
+        self.wls_model = None
+
+    def estimate_effect(self, X, assignment, outcome, confounder_types, propensity_score_name='propensity score',
+                        additional_weight_column=None, weight_name='weights', ols_intercept='True', effect='ATE'):
+        X = self.compute_weights(X,
+                                 assignment,
+                                 outcome,
+                                 confounder_types,
+                                 propensity_score_name=propensity_score_name,
+                                 additional_weight_column=additional_weight_column,
+                                 weight_name=weight_name,
+                                 effect=effect)
+        self.fit_WLS(X, assignment, outcome, confounder_types, weight_name=weight_name, intercept=ols_intercept)
+        return self.wls_model.conf_int().transpose()[assignment][0], self.wls_model.params[assignment], self.wls_model.conf_int().transpose()[assignment][1]
+
+    def estimate_ATE(self, X, assignment, outcome, confounder_types, propensity_score_name='propensity score',
+                     additional_weight_column=None, weight_name='weights', ols_intercept='True'):
+        return self.estimate_effect(X, assignment, outcome, confounder_types, propensity_score_name='propensity score',
+                                    additional_weight_column=None, weight_name='weights', ols_intercept='True', effect='ATE')
+
+    def estimate_ATC(self, X, assignment, outcome, confounder_types, propensity_score_name='propensity score',
+                     additional_weight_column=None, weight_name='weights', ols_intercept='True'):
+        return self.estimate_effect(X, assignment, outcome, confounder_types, propensity_score_name='propensity score',
+                                    additional_weight_column=None, weight_name='weights', ols_intercept='True', effect='ATC')
+
+    def estimate_ATT(self, X, assignment, outcome, confounder_types, propensity_score_name='propensity score',
+                     additional_weight_column=None, weight_name='weights', ols_intercept='True'):
+        return self.estimate_effect(X, assignment, outcome, confounder_types, propensity_score_name='propensity score',
+                                    additional_weight_column=None, weight_name='weights', ols_intercept='True', effect='ATT')
+
+    def compute_weights(self, X, assignment, outcome, confounder_types, propensity_score_name='propensity score',
+                        additional_weight_column=None, weight_name='weights', effect='ATE'):
+        X = self.score(X,
+                       confounder_types,
+                       assignment=assignment,
+                       store_model_fit=True,
+                       intercept=True,
+                       propensity_score_name=propensity_score_name)
+        if effect == 'ATE':
+            X.loc[:, weight_name] = (X[assignment] == 1) / X[propensity_score_name] + (X[assignment] == 0) / (1. - X[propensity_score_name])
+        elif effect == 'ATC':
+            X.loc[:, weight_name] = (X[assignment] == 1) * (1. - X[propensity_score_name]) / X[propensity_score_name] + (X[assignment] == 0) * 1.
+        elif effect == 'ATT':
+            X.loc[:, weight_name] = (X[assignment] == 1) * 1. + (X[assignment] == 0) * X[propensity_score_name] / (1. - X[propensity_score_name])
+        else:
+            raise Exception('Effect {} not recognized'.format(effect))
+
+        if additional_weight_column:
+            X.loc[:, weight_name] = X[weight_name] * X[additional_weight_column]
+        return X
+
+    def fit_WLS(self, X, assignment, outcome, confounder_types, weight_name='weights', intercept='True'):
+        df = X[[assignment, outcome]].copy()
+        regression_confounders = []
+        for confounder, var_type in confounder_types.items():
+            if var_type == 'o' or var_type == 'u':
+                c_dummies = pd.get_dummies(X[[confounder]], prefix=confounder)
+                if len(c_dummies.columns) == 1:
+                    df = pd.concat([df, c_dummies[c_dummies.columns]], axis=1)
+                    regression_confounders.extend(c_dummies.columns)
+                else:
+                    df = pd.concat([df, c_dummies[c_dummies.columns[1:]]], axis=1)
+                    regression_confounders.extend(c_dummies.columns[1:])
+            else:
+                regression_confounders.append(confounder)
+                df.loc[:, confounder] = X[confounder].copy()
+                df.loc[:, confounder] = X[confounder].copy()
+        if intercept:
+            df.loc[:, 'intercept'] = 1.
+            regression_confounders.append('intercept')
+        model = WLS(df[outcome], df[[assignment] + regression_confounders], weights=X[weight_name])
+        result = model.fit()
+        self.wls_model = result
+        return result
diff --git a/setup.py b/setup.py
@@ -9,7 +9,7 @@
 setup(
     name='causality',
 
-    version='0.0.4',
+    version='0.0.5',
 
     description='Tools for causal analysis',
     long_description=long_description,

diff --git a/tests/unit/parametric.py b/tests/unit/parametric.py
@@ -1,11 +1,13 @@
 import pandas as pd
 import numpy as np
 
-from causality.estimation.parametric import DifferenceInDifferences, PropensityScoreMatching
-from tests.unit import TestAPI 
+from causality.estimation.parametric import (DifferenceInDifferences,
+                                             PropensityScoreMatching,
+                                             InverseProbabilityWeightedLS)
+from tests.unit import TestAPI
 
 
-class TestDID(TestAPI): 
+class TestDID(TestAPI):
     def setUp(self):
         SIZE = 2000
         assignment = np.random.binomial(1,0.5, size=SIZE)
@@ -26,11 +28,11 @@ def test_did_estimator(self):
         lower, exp, upper = self.did.average_treatment_effect(self.X)
         assert 1.8 <= exp <= 2.2
         assert lower <= exp <= upper
-        
+
         self.did = DifferenceInDifferences(robust=True)
         lower, exp, upper = self.did.average_treatment_effect(self.X)
         assert 1.8 <= exp <= 2.2
-        assert lower <= exp <= upper 
+        assert lower <= exp <= upper
 
 
 class TestPropScore(TestAPI):
@@ -78,3 +80,39 @@ def test_at_estimators(self):
         matcher = PropensityScoreMatching()
         ATE = matcher.estimate_ATE(X, 'd', 'y', {'z1': 'c', 'z2': 'c', 'z3': 'c'})
         assert 0.9 <= ATE <= 1.1
+
+class TestIPW(TestAPI):
+    def test_estimators(self):
+        N = 2000
+        z = np.random.normal(size=N)
+        d = np.random.binomial(1, p=1. / (1. + np.exp(-z)))
+        y0 = np.random.normal(size=N)
+        y1 = y0 + 2. * (1 + z)
+        y = (d == 1) * y1 + (d == 0) * y0
+        X = pd.DataFrame({'d': d, 'z': z, 'y': y, 'y0': y0, 'y1': y1})
+
+        assignment = 'd'
+        confounder_types = {'z': 'c'}
+        outcome = 'y'
+        ipw_model = InverseProbabilityWeightedLS()
+        atc_lower, atc_exp, atc_upper = ipw_model.estimate_ATC(X,
+                                                               assignment,
+                                                               outcome,
+                                                               confounder_types,
+                                                               propensity_score_name='propensity score')
+        assert 0.9 * atc_lower <= (X[X['d'] == 0]['y1'] - X[X['d'] == 0]['y0']).mean() <= 1.1 * atc_upper
+
+        att_lower, att_exp, att_upper = ipw_model.estimate_ATT(X,
+                                                               assignment,
+                                                               outcome,
+                                                               confounder_types,
+                                                               propensity_score_name='propensity score')
+        assert 0.9 * att_lower <= (X[X['d'] == 1]['y1'] - X[X['d'] == 1]['y0']).mean() <= 1.1 * att_upper
+
+
+        ate_lower, ate_exp, ate_upper = ipw_model.estimate_ATE(X,
+                                                               assignment,
+                                                               outcome,
+                                                               confounder_types,
+                                                               propensity_score_name='propensity score')
+        assert 0.9 * ate_lower <= (X['y1'] - X['y0']).mean() <= 1.1 * ate_upper