<a href="https://colab.research.google.com/github/dmachlanski/ncrm-causality-2021/blob/main/Day_2_CI_in_practice.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install econml

Collecting econml
[?25l  Downloading https://files.pythonhosted.org/packages/fe/4d/a94c40f3af6fabc3160d4be7378913dcb652fe8a930e3f9ae523934aab2f/econml-0.10.0-cp37-cp37m-manylinux2010_x86_64.whl (3.0MB)
[K     |████████████████████████████████| 3.0MB 5.2MB/s 
Collecting sparse
[?25l  Downloading https://files.pythonhosted.org/packages/46/20/c409a0d0ea7623a936080e0038eb77a02e62629a07944706c26f24ebcbb8/sparse-0.12.0-py2.py3-none-any.whl (76kB)
[K     |████████████████████████████████| 81kB 7.0MB/s 
Collecting shap~=0.38.1
[?25l  Downloading https://files.pythonhosted.org/packages/44/20/54381999efe3000f70a7f68af79ba857cfa3f82278ab0e02e6ba1c06b002/shap-0.38.1.tar.gz (352kB)
[K     |████████████████████████████████| 358kB 42.5MB/s 
[?25hCollecting dowhy
[?25l  Downloading https://files.pythonhosted.org/packages/c4/e0/c1480994d39eaa66faa7d4106a4daea7b345dcc31ad83196460c2676701c/dowhy-0.6-py3-none-any.whl (123kB)
[K     |████████████████████████████████| 133kB 43.0MB/s 
Collecting sli

In [None]:
from econml.data.dgps import ihdp_surface_B
from econml.metalearners import XLearner
from sklearn.ensemble import RandomForestRegressor, RandomForestClassifier
from sklearn.model_selection import train_test_split
import numpy as np
import pandas as pd

In [None]:
Y, T, X, expected_te = ihdp_surface_B()

y_train, y_test, t_train, t_test, x_train, x_test, te_train, te_test = train_test_split(Y, T, X, expected_te, test_size=0.2, random_state=1, stratify=T)

In [None]:
# X-Learner
xl = XLearner(models=RandomForestRegressor(), propensity_model=RandomForestClassifier())
xl.fit(y_train, t_train, X=x_train)

xl_te_in = xl.effect(x_train)
xl_te_out = xl.effect(x_test)

In [None]:
t_train_2d = t_train.reshape(-1, 1)
t_test_2d = t_test.reshape(-1, 1)

# Random Forest
rf = RandomForestRegressor()
rf.fit(np.concatenate([x_train, t_train_2d], axis=1), y_train)

rf_y0_in = rf.predict(np.concatenate([x_train, np.zeros_like(t_train_2d)], axis=1))
rf_y1_in = rf.predict(np.concatenate([x_train, np.ones_like(t_train_2d)], axis=1))

rf_y0_out = rf.predict(np.concatenate([x_test, np.zeros_like(t_test_2d)], axis=1))
rf_y1_out = rf.predict(np.concatenate([x_test, np.ones_like(t_test_2d)], axis=1))

rf_te_in = rf_y1_in - rf_y0_in
rf_te_out = rf_y1_out - rf_y0_out

In [None]:
def rmse(a, b):
    return np.sqrt(((a - b)**2).mean())

def ate_error(pred_te, true_te):
  return np.abs(np.mean(pred_te) - np.mean(true_te))

def pehe_error(pred_te, true_te):
  return rmse(true_te, pred_te)

In [None]:
xl_ate_in, xl_pehe_in = ate_error(xl_te_in, te_train), pehe_error(xl_te_in, te_train)
xl_ate_out, xl_pehe_out = ate_error(xl_te_out, te_test), pehe_error(xl_te_out, te_test)

rf_ate_in, rf_pehe_in = ate_error(rf_te_in, te_train), pehe_error(rf_te_in, te_train)
rf_ate_out, rf_pehe_out = ate_error(rf_te_out, te_test), pehe_error(rf_te_out, te_test)

In [None]:
results = []
results.append(['XL', xl_ate_in, xl_pehe_in, xl_ate_out, xl_pehe_out])
results.append(['RF', rf_ate_in, rf_pehe_in, rf_ate_out, rf_pehe_out])

cols = ['Method', 'ATE in', 'PEHE in', 'ATE out', 'PEHE out']

df = pd.DataFrame(results, columns=cols)
df

Unnamed: 0,Method,ATE in,PEHE in,ATE out,PEHE out
0,XL,0.228419,2.491936,0.110626,2.286824
1,RF,0.060529,1.376601,0.114791,1.751148
