# Project 2 - Meta Learners and Doubly Robust

Data from: Heyes, Anthony, and Soodeh Saberian. 2019. "Temperature and Decisions: Evidence from 207,000 Court Cases." American Economic Journal: Applied Economics, 11 (2): 238–65.

Notebooks used troughout the code: 
- CIBT-11-Propensity-Score
- CIBT-21 Meta-Learners
- CIBT-12-Doubly-Robust-Estimation
- Doubly Robust Learner and Interpretability-econml notebook

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
plt.style.use('fivethirtyeight')

In [None]:
from sklearn.linear_model import LogisticRegression
from sklearn.linear_model import LinearRegression
import seaborn as sns
from sklearn.linear_model import LassoCV
!pip install econml
from econml.dr import LinearDRLearner
from joblib import Parallel, delayed 
import shap
from sklearn.preprocessing import StandardScaler
from econml.dml import CausalForestDML
from sklearn.ensemble import RandomForestRegressor, RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from lightgbm import LGBMRegressor
from sklearn.model_selection import train_test_split

## Data Description 

In [None]:
df = pd.read_stata('matched_corrected.dta')
df.describe()

In [None]:
#Create a dummy for asylum
df['dummy_asylum'] = df['c_asy_type'].apply(lambda x: 1 if x == 'E' else 0)
#Create a dummy for gender
df['dummy_gender'] = df['gender'].apply(lambda x: 1 if x == 'female' else 0)

In [None]:
#As outlined in the correction article drop the observation for China
df = df[df['nat_name'] != 'CHINA']

In [None]:
# Get unique values to identify variables for the dummy variables
unique__names = df['nat_name'].unique()
locations = df['location'].unique()

In [None]:
#Create a categorical variable for nationatility

middle_eastern_countries = ["BAHRAIN", "CYPRUS", "EGYPT", "IRAN", "IRAQ", "ISRAEL", "JORDAN", 
    "KUWAIT", "LEBANON", "OMAN", "PALESTINE", "QATAR", "SAUDI ARABIA", 
    "SYRIA", "TURKEY", "UNITED ARAB EMIRATES", "YEMEN"]

africa = ["ERITREA", "RWANDA", "SOMALIA", "SUDAN", "CONGO", "ETHIOPIA", "LIBYA", 
    "MALI", "ANGOLA", "BURUNDI", "TANZANIA", "NIGERIA", "GABON", "GHANA", 
    "SENEGAL", "CHAD", "DJIBOUTI", "CAMEROON", "UGANDA", "KENYA", 
    "ZAMBIA", "MAURITANIA", "SOUTH AFRICA", "GUINEA", "BURKINA FASO", 
    "MOROCCO", "ALGERIA", "COMORO ISLANDS", "EQUATORIAL GUINEA", 
    "CENTRAL AFRICAN REPUBLIC", "CAPE VERDE", "LESOTHO", "SWAZILAND", 
    "GAMBIA", "SIERRA LEONE", "GUINEA BISSAU"]

america = ["GUATEMALA", "EL SALVADOR", "PANAMA", "COLOMBIA", 
    "ARGENTINA", "HAITI", "VENEZUELA", "MEXICO", "CUBA", "DOMINICAN REPUBLIC", 
    "BRAZIL", "CHILE", "SURINAME", "TRINIDAD AND TOBAGO", "JAMAICA", 
    "CANADA", "USA", "ST. KITTS, WEST INDIES", "ANTIGUA AND BARBUDA", 
    "BARBADOS", "BAHAMAS", "BELIZE", "DOMINICA", "GRENADA", 
    "NICARAGUA", "URUGUAY", "PARAGUAY", "ST. LUCIA", "ST. VINCENT AND THE GRENADINES"]

asia = ["PAKISTAN", "VIETNAM", "INDONESIA", "AFGHANISTAN", 
    "IRAN", "BANGLADESH", "PHILIPPINES", "TAIWAN", "MALAYSIA", 
    "KAZAKHSTAN", "KYRGYZSTAN", "THAILAND", "TURKMENISTAN", "UZBEKISTAN", 
    "MONGOLIA", "SRI LANKA", "BHUTAN", "LAOS", "NEPAL", 
    "MYANMAR", "KAMPUCHEA", "BRUNEI", "BURMA", "KOREA", "NORTH KOREA"]

europe = ["RUSSIA", "ARMENIA", "ALBANIA", "YUGOSLAVIA", "UNITED KINGDOM", 
    "BULGARIA", "ROMANIA", "HUNGARY", "POLAND", "CZECH REPUBLIC", 
    "SLOVAK REPUBLIC", "GERMANY", "FRANCE", "ITALY", "SPAIN", 
    "SWEDEN", "DENMARK", "FINLAND", "AUSTRIA", "SWITZERLAND", 
    "BELGIUM", "GREECE", "NETHERLANDS", "CROATIA", "SLOVENIA", 
    "MONACO", "LITHUANIA", "LATVIA", "ESTONIA", "ICELAND"]

df['middleast'] = 0
df['america'] = 0
df['africa'] = 0
df['asia'] = 0
df['europe'] = 0

df.loc[df['nat_name'].isin(middle_eastern_countries), 'middleast'] = 1
df.loc[df['nat_name'].isin(america), 'america'] = 1
df.loc[df['nat_name'].isin(africa), 'africa'] = 1
df.loc[df['nat_name'].isin(asia), 'asia'] = 1
df.loc[df['nat_name'].isin(europe), 'europe'] = 1

#Create interaction terms
df['middleast_dev'] = df['middleast']*df['temp6t4']
df['america_dev'] = df['america']*df['temp6t4']
df['africa_dev'] = df['africa']*df['temp6t4']
df['asia_dev'] = df['asia']*df['temp6t4']
df['europe_dev'] = df['europe']*df['temp6t4']

In [None]:
#Create a categorical variable for location and group locations into regions
northeast = ['NEWARK', 'BOSTON', 'NEW YORK CITY', 'BUFFALO', 'PHILADELPHIA', 
    'NEW YORK ANNEX', 'NY DET (VARICK ST.)', 'HARTFORD', 
    '*PA DOC.', 'CLEVELAND', '*BOP  DANBURY', '*RI  DOC',
    '*WISCONSIN DOC', '*NH  DOC', '*SUFFOLK COUNTY','*NEWARK VIDEO HEARINGS','*JESSUP'
    '*BOP ALLENWOOD', '*NORTHERN STATE NJ DOC','YORK COUNTY DET','YORK COUNTY DET']

midwest = ['CHICAGO', 'DETROIT', 'CINCINNATI', 'CLEVELAND', 'ST. LOUIS', 
    'MEMPHIS', 'KANSAS CITY', 'OMAHA', '*MI  DOC', 
    '*IL DOC - STATESVILLE', '*MO DOC', '*OHIO DOC', 
    '*INDIANA YOUTH CENTER']

south = ['ARLINGTON', 'DALLAS', 'HOUSTON', 'MIAMI', 'ATLANTA', 
    'NEW ORLEANS', 'SAN ANTONIO', 'DALLAS DET', 'SAN ANTONIO DET', 
    'HOUSTON DET', 'ATLANTA DET', '*GEORGIA DOC', '*VA DOC', 
    '*DADE COUNTY FL DOC', '*BROWARD  FL DOC', 'ORLANDO', 'KROME DET',
    'PORT ISABEL DET', 'EL PASO', 'EL PASO DET', '*TX DOC', 
    'LOUISVILLE', 'OKLAHOMA CITY', 'OKLAHOMA CITY DET', 
    'BATAVIA SPC', 'BROWARD TRANS CTR','ST. THOMAS', 'ST. CROIX', 'ROLLING PLAINS DETENTION CENTER',
    '*BOP BIG SPRING AIRPARK','BRADENTON DET','SAN ANTONIO DET']

west = ['DENVER', 'SAN DIEGO', 'LOS ANGELES', 'SAN FRANCISCO', 
    'PHOENIX', 'LAS VEGAS', 'RENO', 'SALT LAKE CITY', 'OTAY MESA', 
    'TUCSON', 'HONOLULU', 'SAN JUAN', 'SEATTLE', 'PORTLAND',
    'SAN FRANCISCO DET', 'DENVER DET', 'SAN DIEGO DETAINED', 
    'MIRA LOMA DET', 'HONOLULU DET', '*CO DOC', '*AZ DOC',
    '*WA DOC', '*AK DOC', 'ANCHORAGE', 'SAN PEDRO', 
    'IMPERIAL', '*NM DOC','PORTLAND DET','*MONROE WA DOC','SAN FRANCISCO ANNEX']

df['northeast'] = 0
df['midwest'] = 0
df['south'] = 0
df['west'] = 0

df.loc[df['location'].isin(northeast), 'northeast'] = 1
df.loc[df['location'].isin(midwest), 'midwest'] = 1
df.loc[df['location'].isin(south), 'south'] = 1
df.loc[df['location'].isin(west), 'west'] = 1

In [None]:
# Create dummy variables for the months 

df['month'] = df['date'].dt.month
df = pd.get_dummies(df, columns=['month'], prefix='month', drop_first=False)

In [None]:
#Create a date categorical variable
df['year'] = df['date'].dt.year

df['year2000'] = 0
df['year2001'] = 0
df['year2002'] = 0
df['year2003'] = 0
df['year2004'] = 0

df.loc[df['year'] == 2000, 'year2000'] = 1
df.loc[df['year'] == 2001, 'year2001'] = 1
df.loc[df['year'] == 2002, 'year2002'] = 1
df.loc[df['year'] == 2003, 'year2003'] = 1
df.loc[df['year'] == 2004, 'year2004'] = 1

# Interaction term for location and year
years = [2000, 2001, 2002, 2003, 2004]
locations = ['northeast', 'midwest', 'south', 'west']

for year in years:
    for location in locations:
        df[f'{location}_year{year}'] = df[location] * df[f'year{year}']

In [None]:
#Clean the dataset

#Drop asylum cases with no classification
df = df[df['c_asy_type'].isin(['E', 'I'])]

# Clean dataset by dropping any rows with NA observations
df_final = df.dropna(axis=0) 

## IPW

In [None]:
#Code in this section based on the notebook: CIBT-11-Propensity-Score

#Changed for deviation, being the treatment variable because if we had only temperature 
#we might have that specific regions such as Texas is always treated etc.
df_final['T_binary'] = (df_final['deviation'] > 0.000095).astype(int)
print(df_final['T_binary'].value_counts())

In [None]:
T = 'T_binary'
Y = 'res'
X = ['chair', 'dummy_asylum', 'dummy_gender', 
                     'middleast', 'america', 'africa', 'europe', 'northeast', 'midwest', 
                     'south', 'year2000', 'year2001', 'year2002', 
                     'year2003','month_1',
                     'month_2','month_3','month_4','month_5','month_6','month_7','month_8',
                     'month_9','month_10','month_11']

ps_model = LogisticRegression(C=1e6).fit(df_final[X], df_final[T])

data_ps = df_final.assign(propensity_score=ps_model.predict_proba(df_final[X])[:, 1])

data_ps[["T_binary", "res", "propensity_score"]].head()

In [None]:
weight_t = 1/data_ps.query("T_binary==1")["propensity_score"]
weight_nt = 1/(1-data_ps.query("T_binary==0")["propensity_score"])
print("Original Sample Size", df.shape[0])
print("Treated Population Sample Size", sum(weight_t))
print("Untreated Population Sample Size", sum(weight_nt))

In [None]:
sns.distplot(data_ps.query("T_binary==0")["propensity_score"], kde=False, label="Non Treated")
sns.distplot(data_ps.query("T_binary==1")["propensity_score"], kde=False, label="Treated")
plt.legend();

In [None]:
# Remove observations with propensity score = 1
data_ps = data_ps[data_ps["propensity_score"] < 1]


treated_data = data_ps.query("T_binary == 1")
control_data = data_ps.query("T_binary == 0")

y1 = sum(treated_data["T_binary"] * weight_t) / len(treated_data)
y0 = sum(control_data["T_binary"] * weight_nt) / len(control_data)

ate = np.mean(weight_t * treated_data["T_binary"]) - np.mean(weight_nt * control_data["T_binary"])

print(ate)


In [None]:
# Calculate weights for treated and control groups
treated_data = data_ps.query("res == 1")
control_data = data_ps.query("res == 0")

if not treated_data.empty:
    weight_t = 1 / treated_data["propensity_score"]
    print("Weight_t:", weight_t)

if not control_data.empty:
    weight_nt = 1 / (1 - control_data["propensity_score"])
    print("Weight_nt:", weight_nt)


In [None]:
print(data_ps["propensity_score"].min(), data_ps["propensity_score"].max())

In [None]:
def run_ps(df_final, X, T, y):
    ps = LogisticRegression(C=1e6, max_iter=2000, solver='liblinear').fit(df_final[X], df_final[T]).predict_proba(df_final[X])[:, 1]
    weight = (df_final[T]-ps) / (ps*(1-ps)) 
    return np.mean(weight * df_final[y]) 

sample_df = df_final.sample(frac=1, replace=True)
ate = run_ps(sample_df, X, T, Y)
print(ate)

In [None]:
sample_df = df_final.sample(frac=0.1, replace=True)

In [None]:
def run_ps(sample_df, X, T, y):
    ps = LogisticRegression(C=1e6, max_iter=2000, solver='liblinear').fit(sample_df[X], sample_df[T]).predict_proba(sample_df[X])[:, 1]
    weight = (sample_df[T]-ps) / (ps*(1-ps)) 
    return np.mean(weight * sample_df[y])

np.random.seed(88)
df_sampled = df_final.sample(frac=0.1, replace=True) 
bootstrap_sample = 100
ates = Parallel(n_jobs=4)(delayed(run_ps)(df_sampled.sample(frac=1, replace=True), X, T, Y)
                          for _ in range(bootstrap_sample))
ates = np.array(ates)
ates

In [None]:
sns.distplot(ates, kde=False)
plt.vlines(np.percentile(ates, 2.5), 0, 30, linestyles="dotted")
plt.vlines(np.percentile(ates, 97.5), 0, 30, linestyles="dotted", label="95% CI")
plt.legend();

## Learners 

In [None]:
#Code in this section based on the notebook: CIBT-21 Meta-Learners
!pip install lightgbm

In [None]:
df_train, df_test = train_test_split(df_final, test_size=0.5, random_state=42)

In [None]:
np.random.seed(123)
s_learner = LGBMRegressor(max_depth=3, min_child_samples=30)
s_learner.fit(df_train[X+[T]], df_train[Y]);

In [None]:
s_learner_cate_train = (s_learner.predict(df_train[X].assign(**{T: 1})) -
                        s_learner.predict(df_train[X].assign(**{T: 0})))

s_learner_cate_test = df_test.assign(
    cate=(s_learner.predict(df_test[X].assign(**{T: 1})) - 
          s_learner.predict(df_test[X].assign(**{T: 0}))) 
)
ATE_test = s_learner_cate_test['cate'].mean()
print(ATE_test)

In [None]:
plt.figure(figsize=(10, 6))
plt.scatter(df_test['chair'], s_learner_cate_test['cate'], alpha=0.5, color='lightblue')
plt.xlabel('Judge Identification')
plt.ylabel('CATE')
plt.grid(True)
plt.show()

In [None]:
def elast(data, y, t):
    return (np.sum((data[t] - data[t].mean())*(data[y] - data[y].mean())) /
            np.sum((data[t] - data[t].mean())**2))

def cumulative_gain(dataset, prediction, y, t, min_periods=30, steps=100):
    size = dataset.shape[0]
    ordered_df = dataset.sort_values(prediction, ascending=False).reset_index(drop=True)
    n_rows = list(range(min_periods, size, size // steps)) + [size]
    return np.array([elast(ordered_df.head(rows), y, t) * (rows/size) for rows in n_rows])

In [None]:
gain_curve_test = cumulative_gain(s_learner_cate_test, "cate", y="res", t="T_binary")
gain_curve_train = cumulative_gain(df_train.assign(cate=s_learner_cate_train), "cate",y="res", t="T_binary")
plt.plot(gain_curve_test, color="C0", label="Test")
plt.plot(gain_curve_train, color="C1", label="Train")
plt.plot([0, 100], [0, elast(df_test, y="res", t="T_binary")], linestyle="--", color="black", label="Baseline")
plt.legend()

In [None]:
def compute_ate_s_learner(df, X, T, Y):
    s_learner = LGBMRegressor(max_depth=3, min_child_samples=30)
    s_learner.fit(df[X + [T]], df[Y])
    cate = (
        s_learner.predict(df[X].assign(**{T: 1})) -
        s_learner.predict(df[X].assign(**{T: 0}))
    )
    return cate.mean()

bootstrap_samples = 1000

df_sampled = df_test.sample(frac=0.1, replace=True)

np.random.seed(88)  
ates_s_learner = Parallel(n_jobs=4)(delayed(compute_ate_s_learner)(
    df_sampled.sample(frac=1, replace=True), X, T, Y
) for _ in range(bootstrap_samples))

ates_s_learner = np.array(ates_s_learner)

In [None]:
sns.distplot(ates_s_learner, kde=False)
plt.vlines(np.percentile(ates_s_learner, 2.5), 0, 20, linestyles="dotted")
plt.vlines(np.percentile(ates_s_learner, 97.5), 0, 20, linestyles="dotted", label="95% CI")
plt.legend();

## T-Learner 

In [None]:
np.random.seed(123)

m0 = LGBMRegressor(max_depth=2, min_child_samples=60)
m1 = LGBMRegressor(max_depth=2, min_child_samples=60)

m0.fit(df_train.query(f"{T}==0")[X], df_train.query(f"{T}==0")[Y])
m1.fit(df_train.query(f"{T}==1")[X], df_train.query(f"{T}==1")[Y])

t_learner_cate_train = m1.predict(df_train[X]) - m0.predict(df_train[X])
t_learner_cate_test = df_test.assign(cate=m1.predict(df_test[X]) - m0.predict(df_test[X]))

In [None]:
ATE_test_t = t_learner_cate_test['cate'].mean()
print(ATE_test_t)

In [None]:
plt.figure(figsize=(10, 6))
plt.scatter(df_test['chair'], t_learner_cate_test['cate'], alpha=0.5, color='lightblue')
plt.xlabel('Judge Identification')
plt.ylabel('CATE')
plt.grid(True)
plt.show()

In [None]:
gain_curve_test = cumulative_gain(t_learner_cate_test, "cate", y="res", t="T_binary")
gain_curve_train = cumulative_gain(df_train.assign(cate=t_learner_cate_train), "cate", y="res", t="T_binary")
plt.plot(gain_curve_test, color="C0", label="Test")
plt.plot(gain_curve_train, color="C1", label="Train")
plt.plot([0, 100], [0, elast(df_test, "res", "T_binary")], linestyle="--", color="black", label="Baseline")
plt.legend();

In [None]:
def compute_ate_bootstrap(df, X, T, Y, m0, m1):
    m0.fit(df.query(f"{T}==0")[X], df.query(f"{T}==0")[Y])
    m1.fit(df.query(f"{T}==1")[X], df.query(f"{T}==1")[Y])
    cate = m1.predict(df[X]) - m0.predict(df[X])
    return cate.mean()

bootstrap_samples = 1000

df_sampled = df_test.sample(frac=0.1, replace=True)

np.random.seed(88)  #
ates_tlearner = Parallel(n_jobs=4)(delayed(compute_ate_bootstrap)(
df_sampled.sample(frac=1, replace=True), X, T, Y, m0, m1
) for _ in range(bootstrap_samples))

ates_tlearner = np.array(ates_tlearner)

In [None]:
sns.distplot(ates_tlearner, kde=False)
plt.vlines(np.percentile(ates_tlearner, 2.5), 0, 20, linestyles="dotted")
plt.vlines(np.percentile(ates_tlearner, 97.5), 0, 20, linestyles="dotted", label="95% CI")
plt.legend();

## X-Learner

In [None]:
np.random.seed(123)

m0 = LGBMRegressor(max_depth=2, min_child_samples=30)
m1 = LGBMRegressor(max_depth=2, min_child_samples=30)

g = LogisticRegression(solver="lbfgs", penalty='none') 

m0.fit(df_train.query(f"{T}==0")[X], df_train.query(f"{T}==0")[Y])
m1.fit(df_train.query(f"{T}==1")[X], df_train.query(f"{T}==1")[Y])
                       
g.fit(df_train[X], df_train[T]);

In [None]:
d_train = np.where(df_train[T]==0,
                   m1.predict(df_train[X]) - df_train[Y],
                   df_train[Y] - m0.predict(df_train[X]))

mx0 = LGBMRegressor(max_depth=2, min_child_samples=30)
mx1 = LGBMRegressor(max_depth=2, min_child_samples=30)

mx0.fit(df_train.query(f"{T}==0")[X], d_train[df_train[T]==0])
mx1.fit(df_train.query(f"{T}==1")[X], d_train[df_train[T]==1]);

In [None]:
def ps_predict(df, t): 
    return g.predict_proba(df[X])[:, t]
    
    
x_cate_train = (ps_predict(df_train,1)*mx0.predict(df_train[X]) +
                ps_predict(df_train,0)*mx1.predict(df_train[X]))

x_cate_test = df_test.assign(cate=(ps_predict(df_test,1)*mx0.predict(df_test[X]) +
                                ps_predict(df_test,0)*mx1.predict(df_test[X])))

In [None]:
ATE_test_x = x_cate_test['cate'].mean()
print(ATE_test_x)

In [None]:
plt.figure(figsize=(10, 6))
plt.scatter(df_test['chair'], x_cate_test['cate'], alpha=0.5, color='lightblue')
plt.xlabel('Judge Identification')
plt.ylabel('CATE')
plt.grid(True)
plt.show()

In [None]:
def compute_ate_x_learner(df, X, T, Y):
    m0 = LGBMRegressor(max_depth=2, min_child_samples=30)
    m1 = LGBMRegressor(max_depth=2, min_child_samples=30)
    m0.fit(df.query(f"{T}==0")[X], df.query(f"{T}==0")[Y])
    m1.fit(df.query(f"{T}==1")[X], df.query(f"{T}==1")[Y])
    
    d_train = np.where(
        df[T] == 0,
        m1.predict(df[X]) - df[Y],
        df[Y] - m0.predict(df[X])
    )
    
    g = LogisticRegression(solver="lbfgs", penalty='none')
    g.fit(df[X], df[T])
    
    def ps_predict(data, t):
        return g.predict_proba(data[X])[:, t]
    
    mx0 = LGBMRegressor(max_depth=2, min_child_samples=30)
    mx1 = LGBMRegressor(max_depth=2, min_child_samples=30)
    
    mx0.fit(df.query(f"{T}==0")[X], d_train[df[T] == 0])
    mx1.fit(df.query(f"{T}==1")[X], d_train[df[T] == 1])
    
    cate = (ps_predict(df, 1) * mx0.predict(df[X]) +
            ps_predict(df, 0) * mx1.predict(df[X]))
    
    return cate.mean()

bootstrap_samples = 100

df_sampled = df_test.sample(frac=0.1, replace=True)

np.random.seed(123)  
x_ates_x = Parallel(n_jobs=4)(delayed(compute_ate_x_learner)(
    df_sampled.sample(frac=1, replace=True), X, T, Y
) for _ in range(bootstrap_samples))

x_ates_x_learner = np.array(x_ates_x)

In [None]:
sns.distplot(x_ates_x_learner, kde=False)
plt.vlines(np.percentile(x_ates_x_learner, 2.5), 0, 20, linestyles="dotted")
plt.vlines(np.percentile(x_ates_x_learner, 97.5), 0, 20, linestyles="dotted", label="95% CI")
plt.legend();

In [None]:
gain_curve_test = cumulative_gain(x_cate_test, "cate", y="res", t="T_binary")
gain_curve_train = cumulative_gain(df_train.assign(cate=x_cate_train), "cate", y="res", t="T_binary")
plt.plot(gain_curve_test, color="C0", label="Test")
plt.plot(gain_curve_train, color="C1", label="Train")
plt.plot([0, 100], [0, elast(df_test, "res", "T_binary")], linestyle="--", color="black", label="Baseline")
plt.legend();

## Doubly Robust 

In [None]:
#first half of the code based on: CIBT-12-Doubly-Robust-Estimation
def doubly_robust(df, X, T, Y):
    ps = LogisticRegression(C=1e6, max_iter=1000).fit(df[X], df[T]).predict_proba(df[X])[:, 1]
    mu0 = LinearRegression().fit(df.query(f"{T}==0")[X], df.query(f"{T}==0")[Y]).predict(df[X])
    mu1 = LinearRegression().fit(df.query(f"{T}==1")[X], df.query(f"{T}==1")[Y]).predict(df[X])
    return (
        np.mean(df[T]*(df[Y] - mu1)/ps + mu1) -
        np.mean((1-df[T])*(df[Y] - mu0)/(1-ps) + mu0)
    )

In [None]:
doubly_robust(df_final, X, T, Y)

In [None]:
np.random.seed(88)
bootstrap_sample = 100
df_sampled = df_final.sample(frac=0.1, replace=True)
ates_double = Parallel(n_jobs=4)(delayed(doubly_robust)(df_sampled.sample(frac=1, replace=True), X, T, Y)
                          for _ in range(bootstrap_sample))
ates_double = np.array(ates_double)

In [None]:
sns.distplot(ates_double, kde=False)
plt.vlines(np.percentile(ates_double, 2.5), 0, 20, linestyles="dotted")
plt.vlines(np.percentile(ates_double, 97.5), 0, 20, linestyles="dotted", label="95% CI")
plt.legend();

In [None]:
#Code based on Doubly Robust Learner and Interpretability-econml notebook

est = LinearDRLearner(model_regression=LassoCV(cv=3),
                      model_propensity= LogisticRegression(C=1e6, max_iter=1000, solver='liblinear'))

Y_value = df_final[Y]
T_value = df_final[T]
X_value = df_final[X]


est.fit(Y_value, T_value, X=X_value)

In [None]:
est.fit(Y_value, T_value, X=X_value)

treatment_effects = est.effect(X_value)  

average_treatment_effect = np.mean(treatment_effects)

print(average_treatment_effect)

In [None]:
shap_values = est.shap_values(X_value)
shap_values_for_target = shap_values['res']['T_binary_1'].values  

shap_values_for_target = shap_values_for_target.astype(float)

if np.any(np.isnan(shap_values_for_target)) or np.any(np.isinf(shap_values_for_target)):
    print("NaN or Inf values found in SHAP values, replacing them with zeros.")
    shap_values_for_target = np.nan_to_num(shap_values_for_target)  

print(f"Shape of shap_values_for_target after adjustments: {shap_values_for_target.shape}")

scaler = StandardScaler()
shap_values_scaled = scaler.fit_transform(shap_values_for_target)

shap.summary_plot(shap_values_scaled, feature_names=[
    'chair', 'dummy_asylum', 'dummy_gender', 'middleast', 'america', 'africa', 'europe', 
    'northeast', 'midwest', 'south', 'year2000', 'year2001', 'year2002', 'year2003', 
    'month_1', 'month_2', 'month_3', 'month_4', 'month_5', 'month_6', 'month_7', 
    'month_8', 'month_9', 'month_10', 'month_11'])


In [None]:
est = CausalForestDML(model_y=RandomForestRegressor(n_estimators=50),
                      model_t=RandomForestClassifier(n_estimators=50),  
                      discrete_treatment=True)  

est.fit(Y_value, T_value, X=X_value)

treatment_effects = est.effect(X_value)

print(treatment_effects.mean())

In [None]:
np.random.seed(88)
bootstrap_sample = 10
ates_double_causal = Parallel(n_jobs=4)(delayed(est)(sample_df.sample(frac=1, replace=True), X, T, Y)
                          for _ in range(bootstrap_sample))
ates_double_causal = np.array(ates_double_causal)

In [None]:
sns.distplot(ates_double, kde=False)
plt.vlines(np.percentile(ates_double_causal, 2.5), 0, 20, linestyles="dotted")
plt.vlines(np.percentile(ates_double_causal, 97.5), 0, 20, linestyles="dotted", label="95% CI")
plt.legend();