In [1]:
from causallib.estimation import IPW, Matching
import matplotlib.pyplot as plt
# import seaborn as sb
import pandas as pd
import numpy as np
from causallib.evaluation.weight_evaluator import calculate_covariate_balance
from sklearn.linear_model import LogisticRegression
from causallib.preprocessing.transformers import PropensityTransformer, MatchingTransformer
from causallib.evaluation import PropensityEvaluator
from causallib.datasets import load_nhefs
%matplotlib inline

In [2]:
def binarize(df, column_name):
    df = df.copy()
    m = df[column_name].median()
    def balance(i): return np.abs(0.5 - (df[column_name] < i).sum()/len(df))
    mstar = min([m-1, m, m+1], key=balance)
    df = df.assign(**{column_name: (df[column_name] < mstar).astype(int)})
    df = df.rename(columns={column_name: column_name + f"<{mstar}"})
    return df


def get_matching_data():
    data = load_nhefs(onehot=False, augment=False)
    data.X = binarize(data.X, "education")
    data.X = binarize(data.X, "exercise")
    data.X = binarize(data.X, "active")
    return data


binarized_data = get_matching_data()
X, a, y = binarized_data.X, binarized_data.a, binarized_data.y

In [3]:
binarized_data.X.join(binarized_data.a).join(binarized_data.y).head()

Unnamed: 0,active<1.0,age,education<3.0,exercise<2.0,race,sex,smokeintensity,smokeyrs,wt71,qsmk,wt82_71
0,1,42,1,0,1,0,30,29,79.04,0,-10.09396
1,1,36,1,1,0,0,20,24,58.63,0,2.60497
2,1,56,1,0,1,1,20,26,56.81,0,9.414486
3,0,68,1,0,1,0,3,53,59.42,0,4.990117
4,0,40,1,1,0,0,20,19,87.09,0,4.989251


In [4]:
m_euclid = Matching(metric="euclidean").fit(X, a, y)
m_mahalanobis = Matching(metric="mahalanobis").fit(X, a, y)
Y_euclid = m_euclid.estimate_individual_outcome(X, a)
Y_mahalanobis = m_mahalanobis.estimate_individual_outcome(X, a)

In [5]:
Y_euclid.assign(ATE=Y_euclid[1]-Y_euclid[0]).mean()

0      1.702105
1      5.562541
ATE    3.860437
dtype: float64

In [6]:
Y_mahalanobis.assign(ATE=Y_mahalanobis[1]-Y_mahalanobis[0]).mean()

0      1.852420
1      5.084078
ATE    3.231658
dtype: float64

In [7]:
Y_euclid.join(Y_mahalanobis, lsuffix="_euclidean",
              rsuffix="_mahalanobis").join(a).sample(10)

Unnamed: 0_level_0,0_euclidean,1_euclidean,0_mahalanobis,1_mahalanobis,qsmk
sample_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
1062,7.712741,19.50692,7.712741,3.510894,0
1488,-1.819004,3.629735,6.578474,3.629735,1
413,-10.884546,13.720219,-10.884546,7.488372,0
960,11.680321,17.576245,11.680321,6.578283,0
1582,15.76557,11.003239,15.76557,-4.312685,0
1539,1.359149,10.890219,1.359149,9.982066,0
177,4.99255,0.225072,4.99255,4.64692,0
1064,11.680321,15.985468,-2.037552,15.985468,1
1225,-16.673769,-1.926482,-7.373183,-1.926482,1
1323,-1.929297,-20.751731,-1.929297,-20.751731,0


In [8]:
propensity_transform = PropensityTransformer(
    learner=LogisticRegression(
        solver="liblinear",
        class_weight="balanced"),
    include_covariates=False)

In [9]:
augmented_data = load_nhefs()
X, a, y = augmented_data.X, augmented_data.a, augmented_data.y
matcher = Matching(propensity_transform=propensity_transform)
matcher.fit(X, a, y)
matcher.estimate_population_outcome(X, a)

0    1.675577
1    5.320829
dtype: float64

In [10]:
from causallib.estimation import PropensityMatching

pm = PropensityMatching(learner=LogisticRegression(
    solver="liblinear",
    class_weight="balanced"))
pm.fit(X, a, y)
pm.estimate_population_outcome(X, a)

0    1.675577
1    5.320829
dtype: float64

In [11]:
for n in range(1, 10):
    matcher.n_neighbors = n
    matcher.fit(X, a, y)
    Y = matcher.estimate_population_outcome(X, a)
    print(f"Using {n} neighbors, the effect is: {(Y[1] - Y[0]):.3f}")

Using 1 neighbors, the effect is: 3.645
Using 2 neighbors, the effect is: 3.471
Using 3 neighbors, the effect is: 3.288
Using 4 neighbors, the effect is: 3.254
Using 5 neighbors, the effect is: 3.181
Using 6 neighbors, the effect is: 3.105
Using 7 neighbors, the effect is: 3.131
Using 8 neighbors, the effect is: 3.172
Using 9 neighbors, the effect is: 3.211
