Navigation Menu

Skip to content

Commit

Permalink
added IPW ATE (#46)
Browse files Browse the repository at this point in the history
* added IPW ATE

* working WLS regression

* added tests
  • Loading branch information
Adam Kelleher committed Nov 4, 2017
1 parent 282ea28 commit 8e3222f
Show file tree
Hide file tree
Showing 4 changed files with 136 additions and 24 deletions.
6 changes: 0 additions & 6 deletions .idea/vcs.xml

This file was deleted.

104 changes: 92 additions & 12 deletions causality/estimation/parametric.py
@@ -1,5 +1,5 @@
import pandas as pd
from statsmodels.regression.linear_model import OLS
from statsmodels.regression.linear_model import OLS, WLS
from statsmodels.robust.robust_linear_model import RLM
from statsmodels.discrete.discrete_model import Logit
from sklearn.neighbors import NearestNeighbors
Expand Down Expand Up @@ -34,18 +34,18 @@ def average_treatment_effect(self, X, start='Start', end='End', assignment='assi
control_final = control[end]
del test, control

df = pd.DataFrame({'y' : test_initial,
df = pd.DataFrame({'y' : test_initial,
assignment : [1. for i in test_initial],
't' :[0. for i in test_initial] })
df = df.append(pd.DataFrame({'y' : test_final,
df = df.append(pd.DataFrame({'y' : test_final,
assignment : [1. for i in test_final],
't' :[1. for i in test_final] }))

df = df.append(pd.DataFrame({'y' : control_initial,
df = df.append(pd.DataFrame({'y' : control_initial,
assignment : [0. for i in control_initial],
't' :[0. for i in control_initial] }))

df = df.append(pd.DataFrame({'y' : control_final,
df = df.append(pd.DataFrame({'y' : control_final,
assignment : [0. for i in control_final],
't' :[1. for i in control_final] }))
del test_initial, test_final, control_initial, control_final
Expand All @@ -57,7 +57,7 @@ def average_treatment_effect(self, X, start='Start', end='End', assignment='assi
conf_int = result.conf_int().ix['did']
expected = result.params['did']
return conf_int[0], expected, conf_int[1]

def test_parallel_trend(self, X, start='Start', end='End', assignment='assignment'):
"""
This will find the average treatment effect on
Expand All @@ -66,7 +66,7 @@ def test_parallel_trend(self, X, start='Start', end='End', assignment='assignmen
that the average treatment effect between the test
and control groups when neither is treated is 0.
The format for this dataset is the same as that
The format for this dataset is the same as that
for the real estimation task, except that the start
time is some time before the experiment is run, and
the end time is the starting point for the experiment.
Expand All @@ -76,14 +76,12 @@ def test_parallel_trend(self, X, start='Start', end='End', assignment='assignmen
return True
return False


class PropensityScoreMatching(object):
class PropensityScoringModel(object):
def __init__(self):
# change the model if there are multiple matches per treated!
self.propensity_score_model = None


def score(self, X, confounder_types, assignment='assignment', store_model_fit=False, intercept=True):
def score(self, X, confounder_types, assignment='assignment', store_model_fit=False, intercept=True, propensity_score_name='propensity score'):
"""
Fit a propensity score model using the data in X and the confounders listed in confounder_types. This adds
the propensity scores to the dataframe, and returns the new dataframe.
Expand Down Expand Up @@ -120,9 +118,14 @@ def score(self, X, confounder_types, assignment='assignment', store_model_fit=Fa
model = logit.fit()
if store_model_fit:
self.propensity_score_model = model
X.loc[:, 'propensity score'] = model.predict(df[regression_confounders])
X.loc[:, propensity_score_name] = model.predict(df[regression_confounders])
return X

class PropensityScoreMatching(PropensityScoringModel):
def __init__(self):
# change the model if there are multiple matches per treated!
self.propensity_score_model = None

def match(self, X, assignment='assignment', score='propensity score', n_neighbors=2, treated_value=1,
control_value=0, match_to='treated'):
"""
Expand Down Expand Up @@ -376,3 +379,80 @@ def check_support(self, X, assignment, confounder_types=None):
pp.show()


class InverseProbabilityWeightedLS(PropensityScoringModel):
def __init__(self):
self.propensity_score_model = None
self.wls_model = None

def estimate_effect(self, X, assignment, outcome, confounder_types, propensity_score_name='propensity score',
additional_weight_column=None, weight_name='weights', ols_intercept='True', effect='ATE'):
X = self.compute_weights(X,
assignment,
outcome,
confounder_types,
propensity_score_name=propensity_score_name,
additional_weight_column=additional_weight_column,
weight_name=weight_name,
effect=effect)
self.fit_WLS(X, assignment, outcome, confounder_types, weight_name=weight_name, intercept=ols_intercept)
return self.wls_model.conf_int().transpose()[assignment][0], self.wls_model.params[assignment], self.wls_model.conf_int().transpose()[assignment][1]

def estimate_ATE(self, X, assignment, outcome, confounder_types, propensity_score_name='propensity score',
additional_weight_column=None, weight_name='weights', ols_intercept='True'):
return self.estimate_effect(X, assignment, outcome, confounder_types, propensity_score_name='propensity score',
additional_weight_column=None, weight_name='weights', ols_intercept='True', effect='ATE')

def estimate_ATC(self, X, assignment, outcome, confounder_types, propensity_score_name='propensity score',
additional_weight_column=None, weight_name='weights', ols_intercept='True'):
return self.estimate_effect(X, assignment, outcome, confounder_types, propensity_score_name='propensity score',
additional_weight_column=None, weight_name='weights', ols_intercept='True', effect='ATC')

def estimate_ATT(self, X, assignment, outcome, confounder_types, propensity_score_name='propensity score',
additional_weight_column=None, weight_name='weights', ols_intercept='True'):
return self.estimate_effect(X, assignment, outcome, confounder_types, propensity_score_name='propensity score',
additional_weight_column=None, weight_name='weights', ols_intercept='True', effect='ATT')

def compute_weights(self, X, assignment, outcome, confounder_types, propensity_score_name='propensity score',
additional_weight_column=None, weight_name='weights', effect='ATE'):
X = self.score(X,
confounder_types,
assignment=assignment,
store_model_fit=True,
intercept=True,
propensity_score_name=propensity_score_name)
if effect == 'ATE':
X.loc[:, weight_name] = (X[assignment] == 1) / X[propensity_score_name] + (X[assignment] == 0) / (1. - X[propensity_score_name])
elif effect == 'ATC':
X.loc[:, weight_name] = (X[assignment] == 1) * (1. - X[propensity_score_name]) / X[propensity_score_name] + (X[assignment] == 0) * 1.
elif effect == 'ATT':
X.loc[:, weight_name] = (X[assignment] == 1) * 1. + (X[assignment] == 0) * X[propensity_score_name] / (1. - X[propensity_score_name])
else:
raise Exception('Effect {} not recognized'.format(effect))

if additional_weight_column:
X.loc[:, weight_name] = X[weight_name] * X[additional_weight_column]
return X

def fit_WLS(self, X, assignment, outcome, confounder_types, weight_name='weights', intercept='True'):
df = X[[assignment, outcome]].copy()
regression_confounders = []
for confounder, var_type in confounder_types.items():
if var_type == 'o' or var_type == 'u':
c_dummies = pd.get_dummies(X[[confounder]], prefix=confounder)
if len(c_dummies.columns) == 1:
df = pd.concat([df, c_dummies[c_dummies.columns]], axis=1)
regression_confounders.extend(c_dummies.columns)
else:
df = pd.concat([df, c_dummies[c_dummies.columns[1:]]], axis=1)
regression_confounders.extend(c_dummies.columns[1:])
else:
regression_confounders.append(confounder)
df.loc[:, confounder] = X[confounder].copy()
df.loc[:, confounder] = X[confounder].copy()
if intercept:
df.loc[:, 'intercept'] = 1.
regression_confounders.append('intercept')
model = WLS(df[outcome], df[[assignment] + regression_confounders], weights=X[weight_name])
result = model.fit()
self.wls_model = result
return result
2 changes: 1 addition & 1 deletion setup.py
Expand Up @@ -9,7 +9,7 @@
setup(
name='causality',

version='0.0.4',
version='0.0.5',

description='Tools for causal analysis',
long_description=long_description,
Expand Down
48 changes: 43 additions & 5 deletions tests/unit/parametric.py
@@ -1,11 +1,13 @@
import pandas as pd
import numpy as np

from causality.estimation.parametric import DifferenceInDifferences, PropensityScoreMatching
from tests.unit import TestAPI
from causality.estimation.parametric import (DifferenceInDifferences,
PropensityScoreMatching,
InverseProbabilityWeightedLS)
from tests.unit import TestAPI


class TestDID(TestAPI):
class TestDID(TestAPI):
def setUp(self):
SIZE = 2000
assignment = np.random.binomial(1,0.5, size=SIZE)
Expand All @@ -26,11 +28,11 @@ def test_did_estimator(self):
lower, exp, upper = self.did.average_treatment_effect(self.X)
assert 1.8 <= exp <= 2.2
assert lower <= exp <= upper

self.did = DifferenceInDifferences(robust=True)
lower, exp, upper = self.did.average_treatment_effect(self.X)
assert 1.8 <= exp <= 2.2
assert lower <= exp <= upper
assert lower <= exp <= upper


class TestPropScore(TestAPI):
Expand Down Expand Up @@ -78,3 +80,39 @@ def test_at_estimators(self):
matcher = PropensityScoreMatching()
ATE = matcher.estimate_ATE(X, 'd', 'y', {'z1': 'c', 'z2': 'c', 'z3': 'c'})
assert 0.9 <= ATE <= 1.1

class TestIPW(TestAPI):
def test_estimators(self):
N = 2000
z = np.random.normal(size=N)
d = np.random.binomial(1, p=1. / (1. + np.exp(-z)))
y0 = np.random.normal(size=N)
y1 = y0 + 2. * (1 + z)
y = (d == 1) * y1 + (d == 0) * y0
X = pd.DataFrame({'d': d, 'z': z, 'y': y, 'y0': y0, 'y1': y1})

assignment = 'd'
confounder_types = {'z': 'c'}
outcome = 'y'
ipw_model = InverseProbabilityWeightedLS()
atc_lower, atc_exp, atc_upper = ipw_model.estimate_ATC(X,
assignment,
outcome,
confounder_types,
propensity_score_name='propensity score')
assert 0.9 * atc_lower <= (X[X['d'] == 0]['y1'] - X[X['d'] == 0]['y0']).mean() <= 1.1 * atc_upper

att_lower, att_exp, att_upper = ipw_model.estimate_ATT(X,
assignment,
outcome,
confounder_types,
propensity_score_name='propensity score')
assert 0.9 * att_lower <= (X[X['d'] == 1]['y1'] - X[X['d'] == 1]['y0']).mean() <= 1.1 * att_upper


ate_lower, ate_exp, ate_upper = ipw_model.estimate_ATE(X,
assignment,
outcome,
confounder_types,
propensity_score_name='propensity score')
assert 0.9 * ate_lower <= (X['y1'] - X['y0']).mean() <= 1.1 * ate_upper

0 comments on commit 8e3222f

Please sign in to comment.