In [None]:
# import packages
import numpy as np
from numpy import random

import pandas as pd

from sklearn.ensemble import RandomForestRegressor, RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression, LogisticRegressionCV
from sklearn.linear_model import Lasso, LassoCV
from sklearn.preprocessing import PolynomialFeatures

import time

from sklearn.model_selection import KFold

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.saving import load_model

from abc import ABC, abstractmethod

In [None]:
# set global seed
tf.keras.utils.set_random_seed(8953)

In [None]:
data = pd.read_csv('/Users/arberimbibaj/dataset_example_indicatorCATE.csv', header=None, index_col=[0])
data = data.to_numpy()

In [None]:
N = len(data)
d = len(data[0,:]) - 3

In [None]:
# train test split
random.shuffle(data)
training, test = data[:700,:], data[700:,:]

In [None]:
# slice dataset by treatment status
training_control = training[training[:,26]==0]
training_treatment = training[training[:,26]==1]

# slice test set by treatment status
test_control = test[test[:,26]==0]
test_treatment = test[test[:,26]==1]

# Y_train by treatment status
Y_train_control = training_control[:,0]
Y_train_treatment = training_treatment[:,0]

# Y_test by treatment status
Y_test_control = test_control[:,0]
Y_test_treatment = test_treatment[:,0]

# X_train by treatment status
X_train_control = training_control[:,1:26]
X_train_treatment = training_treatment[:,1:26]

# X_test by treatment status
X_test_control = test_control[:,1:26]
X_test_treatment = test_treatment[:,1:26]

# X and Y test
X_test = test[:,1:26]
Y_test = test[:,0]

# X_train and Y_train (no split by treatment status)
X_train = training[:,1:26]
Y_train = training[:,0]

# W_train and W_test
W_train = training[:,26]
W_test = test[:,26]

# tau_test
tau_test = test[:,27]
tau_test_control = test_control[:,27]
tau_test_treatment = test_treatment[:,27]

In [None]:
# set training and test features for the S-Learner (it views W as no different from other X's)
X_W_train = training[:,1:27]
X_W_test = test[:,1:27]
X_test_0 = np.concatenate((test[:,1:26],np.zeros((300,1))), axis=1)
X_test_1 = np.concatenate((test[:,1:26],np.ones((300,1))), axis=1)

In [None]:
X_W_train

In [None]:
X_train_control

In [None]:
X_train_treatment

In [None]:
Y_train_control

In [None]:
Y_train_treatment

In [None]:
X_train

In [None]:
W_train

In [None]:
Y_train

In [None]:
X_test

In [None]:
W_test

In [None]:
Y_test

In [None]:
tau_test

# T-Learner

In [None]:
# T-Learner (example with Random Forest)

# mu_0
t_learner_mu0 = RandomForestRegressor(max_depth=100, random_state=0)
t_learner_mu0.fit(X_train_control,Y_train_control)
t_mu_0_hat = t_learner_mu0.predict(X_test)

# mu_1
t_learner_mu1 = RandomForestRegressor(max_depth=100, random_state=0)
t_learner_mu1.fit(X_train_treatment,Y_train_treatment)
t_mu_1_hat = t_learner_mu1.predict(X_test)
# Prediction = mu_1 - mu_0
t_tau_hat = t_mu_1_hat - t_mu_0_hat
t_tau_hat

In [None]:
# mean squared error
((t_tau_hat - tau_test)**2).mean()

# S-Learner

In [None]:
### S-Learner

In [None]:
X_test_0

In [None]:
X_test_1

In [None]:
Y_train

In [None]:
# S-learner (example with Random Forest)

# mu_x
s_learner = RandomForestRegressor(max_depth=100, random_state=0)
s_learner.fit(X_W_train,Y_train)

# mu_0_hat
s_mu_0_hat = s_learner.predict(X_test_0)

# mu_1_hat
s_mu_1_hat = s_learner.predict(X_test_1)

# tau_hat
s_tau_hat = s_mu_1_hat - s_mu_0_hat
s_tau_hat


In [None]:
# mean squared error
((s_tau_hat - tau_test)**2).mean()

# X-Learner

In [None]:
### X-Learner

# mu_0 (same procedure as for t-learner, maybe can speed up process)
x_learner_mu0 = RandomForestRegressor(max_depth=100, random_state=0)
x_learner_mu0.fit(X_train_control,Y_train_control)

# mu_1 (same procedure as for t-learner, maybe can speed up process)
x_learner_mu1 = RandomForestRegressor(max_depth=100, random_state=0)
x_learner_mu1.fit(X_train_treatment,Y_train_treatment)

# compute imputed treatment effect D_0 and D_1
# d_0
imputed_0 = x_learner_mu1.predict(X_train_control) - Y_train_control

# d_1
imputed_1 = Y_train_treatment - x_learner_mu0.predict(X_train_treatment)

# regress imputed on X
# tau_hat_0
x_tau_0_hat = RandomForestRegressor(max_depth=100, random_state=0)
x_tau_0_hat.fit(X_train_control ,imputed_0)

# tau_hat_1
x_tau_1_hat = RandomForestRegressor(max_depth=100, random_state=0)
x_tau_1_hat.fit(X_train_treatment ,imputed_1)

# estimate e_x to use as g_x
g_x_hat = RandomForestClassifier(max_depth=100, random_state=0)
g_x_hat.fit(X_train,W_train)
probabilities = g_x_hat.predict_proba(X_test)
probas_1 = probabilities[:,1]
probas_0 = probabilities[:,0]

# final estimator of tau
x_tau_hat = probas_1 * x_tau_0_hat.predict(X_test) + probas_0 * x_tau_1_hat.predict(X_test)
x_tau_hat


In [None]:
# mean squared error (much lower here!)
((x_tau_hat - tau_test)**2).mean()

# R-Learner

In [None]:
### R-Learner

# estimate e_x
r_learner_e_x = RandomForestClassifier(max_depth=100, random_state=0)
r_learner_e_x.fit(X_train,W_train)

# get e_x predictions
r_probas = r_learner_e_x.predict_proba(X_train)
r_probas_0 = r_probas[:,0] # probabilities of W=0
r_probas_1 = r_probas[:,1] # probabilities of W=1

# estimate mu_x
r_learner_mu_x = RandomForestRegressor(max_depth=100, random_state=0)
r_learner_mu_x.fit(X_train,Y_train)

# compute r-pseudo-outcome and weights
r_learner_pseudo_outcomes = (Y_train - r_learner_mu_x.predict(X_train)) / (W_train - r_probas_1)
r_learner_weights = (W_train - r_probas_1)**2

# estimate tau (regress pseudo-outcomes on X, weight by (W-e(x))^2)
r_learner_tau = RandomForestRegressor(max_depth=100, random_state=0)
r_learner_tau.fit(X_train,r_learner_pseudo_outcomes, sample_weight=r_learner_weights)

# predict tau
r_tau_hats = r_learner_tau.predict(X_test)
r_tau_hats

In [None]:
((r_tau_hats - tau_test)**2).mean()

# DR-Learner

In [None]:
### DR-Learner

# TODO: APPLY CROSS-FITTING?
# estimate e_x
dr_learner_e_x = RandomForestClassifier(n_estimators=100, max_depth=100, random_state=0)
dr_learner_e_x.fit(X_train, W_train)

dr_probas = dr_learner_e_x.predict_proba(X_train)
dr_probas_0 = dr_probas[:,0] # probabilities of W=0
dr_probas_1 = dr_probas[:,1] # probabilities of W=1

# estimate mu_0
dr_learner_mu_0 = RandomForestRegressor(n_estimators=100,max_depth=100, random_state=0)
dr_learner_mu_0.fit(X_train_control,Y_train_control)

# estimate mu_1
dr_learner_mu_1 = RandomForestRegressor(n_estimators=100,max_depth=100, random_state=0)
dr_learner_mu_1.fit(X_train_treatment,Y_train_treatment)

# DR-pseudo-outcomes
mu_w = W_train * dr_learner_mu_1.predict(X_train) + (1 - W_train) * dr_learner_mu_0.predict(X_train) # this is mu_w for each observation, i.e mu_1 for units in the treatment groups, and mu_0 for units in the control group
dr_pseudo_outcomes = (W_train - dr_probas_1) / (dr_probas_1 * dr_probas_0) * (Y_train - mu_w) + dr_learner_mu_1.predict(X_train) - dr_learner_mu_0.predict(X_train)

# estimate tau (regress pseudo-outcomes on X) # TODO: USE "Test Set" for this estimation
dr_learner_tau_hat = RandomForestRegressor(n_estimators=100,max_depth=100, random_state=0)
dr_learner_tau_hat.fit(X_train,dr_pseudo_outcomes)

# predict tau
dr_tau_hat = dr_learner_tau_hat.predict(X_test)
dr_tau_hat

In [None]:
((dr_tau_hat - tau_test)**2).mean()

# RA-Learner

In [None]:
### RA-Learner

# mu_0 (same procedure as for t-learner, maybe can speed up process)
ra_learner_mu0 = RandomForestRegressor(max_depth=100, random_state=0)
ra_learner_mu0.fit(X_train_control,Y_train_control)

# mu_1 (same procedure as for t-learner, maybe can speed up process)
ra_learner_mu1 = RandomForestRegressor(max_depth=100, random_state=0)
ra_learner_mu1.fit(X_train_treatment,Y_train_treatment)

# e_x
ra_learner_e_x = RandomForestClassifier(max_depth=100, random_state=0)
ra_learner_e_x.fit(X_train,W_train)

# ra-pseudo-outcome
ra_pseudo_outcome = W_train*(Y_train - ra_learner_mu0.predict(X_train)) + (1 - W_train)*(ra_learner_mu1.predict(X_train) - Y_train)

# tau_hat
ra_tau_hat_learner = RandomForestRegressor(max_depth=100, random_state=0)
ra_tau_hat_learner.fit(X_train, ra_pseudo_outcome)
ra_tau_hat = ra_tau_hat_learner.predict(X_test)
ra_tau_hat

In [None]:
# mean squared error
((ra_tau_hat - tau_test)**2).mean()

# PW-Learner

In [None]:
### PW-Learner
# mu_0 (same procedure as for t-learner, maybe can speed up process)
pw_learner_mu0 = RandomForestRegressor(max_depth=100, random_state=0)
pw_learner_mu0.fit(X_train_control,Y_train_control)

# mu_1 (same procedure as for t-learner, maybe can speed up process)
pw_learner_mu1 = RandomForestRegressor(max_depth=100, random_state=0)
pw_learner_mu1.fit(X_train_treatment,Y_train_treatment)

# e_x
pw_learner_e_x = RandomForestClassifier(max_depth=100, random_state=0)
pw_learner_e_x.fit(X_train,W_train)

# ra-pseudo-outcome
pw_pseudo_outcome = (W_train/pw_learner_e_x.predict_proba(X_train)[:,1] - (1 - W_train)/(pw_learner_e_x.predict_proba(X_train)[:,0]))*Y_train

# tau_hat
pw_tau_hat_learner = RandomForestRegressor(max_depth=100, random_state=0)
pw_tau_hat_learner.fit(X_train, pw_pseudo_outcome)
pw_tau_hat = pw_tau_hat_learner.predict(X_test)
pw_tau_hat

In [None]:
# mean squared error
((pw_tau_hat - tau_test)**2).mean()

# F-Learner: AKA THE SAME AS PW-LEARNER!!!

In [None]:
### F-Learner
# mu_0 (same procedure as for t-learner, maybe can speed up process)
f_learner_mu0 = RandomForestRegressor(max_depth=100, random_state=0)
f_learner_mu0.fit(X_train_control,Y_train_control)

# mu_1 (same procedure as for t-learner, maybe can speed up process)
f_learner_mu1 = RandomForestRegressor(max_depth=100, random_state=0)
f_learner_mu1.fit(X_train_treatment,Y_train_treatment)

# e_x
f_learner_e_x = RandomForestClassifier(max_depth=100, random_state=0)
f_learner_e_x.fit(X_train,W_train)

# ra-pseudo-outcome
f_pseudo_outcome = (W_train/f_learner_e_x.predict_proba(X_train)[:,1] - (1 - W_train)/(f_learner_e_x.predict_proba(X_train)[:,0]))*Y_train

# tau_hat
f_tau_hat_learner = RandomForestRegressor(max_depth=100, random_state=0)
f_tau_hat_learner.fit(X_train, pw_pseudo_outcome)
f_tau_hat = f_tau_hat_learner.predict(X_test)
f_tau_hat

In [None]:
# mean squared error
print(((f_tau_hat - tau_test)**2).mean())
print("Same as for PW-Learner")

# U-Learner

In [None]:
### U-Learner
# estimate e_x
u_learner_e_x = RandomForestClassifier(max_depth=100, random_state=0)
u_learner_e_x.fit(X_train,W_train)

# estimate mu_x
u_learner_mu_x = RandomForestRegressor(max_depth=100, random_state=0)
u_learner_mu_x.fit(X_train,Y_train)

# compute residuals
u_learner_residuals = (Y_train - u_learner_mu_x.predict(X_train))/(W_train - u_learner_e_x.predict_proba(X_train)[:,1])

# tau_hat - regress residuals on X
u_tau_hat_learner = RandomForestRegressor(max_depth=100, random_state=0)
u_tau_hat_learner.fit(X_train,u_learner_residuals)

u_tau_hats = u_tau_hat_learner.predict(X_test)
u_tau_hats


In [None]:
# mean squared error
((u_tau_hats - tau_test)**2).mean()

# Just some lasso tests

In [None]:
poly_train = PolynomialFeatures(degree=4, interaction_only=False, include_bias=False)
X_poly_train = poly_train.fit_transform(X_train)
poly_test = PolynomialFeatures(degree=4, interaction_only=False, include_bias=False)
X_poly_test = poly_test.fit_transform(X_test)
X_poly_train

In [None]:
lasso_poly = LassoCV(cv=10, random_state=0, tol=1e-2)

In [None]:
# degree 3: 10 seconds to fit (100% cpu)
# degree 4: 90 seconds to fit (100% cpu)

In [None]:
y_predictions = lasso_poly.predict(X_poly_test)

In [None]:
((y_predictions - Y_test)**2).mean()

# just some Neural Network test

In [None]:
# make model
# 3 layers with 200 units (elu activation), 2 layers with 100 units (elu activations), 1 output layer (linear activation)
model = keras.Sequential([
    keras.Input(shape=(d,)),
    layers.Dense(units=200, activation="relu", name="layer1"),
    layers.Dense(units=200, activation="relu", name="layer2"),
    layers.Dense(units=200, activation="relu", name="layer3"),
    layers.Dense(units=100, activation="relu", name="layer4"),
    layers.Dense(units=100, activation="relu", name="layer5"),
    layers.Dense(units=1, activation="linear", name="layer6"),

], name="Dense_Neural_Network")
model.summary()

In [None]:
model.compile(
    optimizer=keras.optimizers.Adam(),  # Optimizer
    # Loss function to minimize
    loss=keras.losses.MeanSquaredError(),
    # List of metrics to monitor
    metrics=[keras.metrics.MeanSquaredError()],
)

In [None]:
callback = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=2, start_from_epoch=0)

In [None]:
print("Training Model")
training = model.fit(
    X_train,
    Y_train,
    batch_size=100,
    epochs=10,
    validation_data=(X_test, Y_test),
    callbacks=[callback] # include early stopping
)

In [None]:
predictions

In [None]:
print("Generate predictions for test samples")
predictions = np.reshape(model.predict(X_test),(300,))
print("predictions shape:", predictions.shape)
predictions

In [None]:
results = model.evaluate(X_test, Y_test, batch_size=100)

In [None]:
((predictions - Y_test)**2).mean()

# TRY SAME WITH LASSO AND NEURAL NETWORK!

## With Lasso (or L1-loss for logistic regression)

In [None]:
poly_train = PolynomialFeatures(degree=3, interaction_only=False, include_bias=False)
X_poly_train = poly_train.fit_transform(X_train)
poly_test = PolynomialFeatures(degree=3, interaction_only=False, include_bias=False)
X_poly_test = poly_test.fit_transform(X_test)
X_poly_train

In [None]:
# compute polynomial features for treatment and control groups in training set
poly_train_treatment = PolynomialFeatures(degree=3, interaction_only=False, include_bias=False)
X_poly_train_treatment = poly_train_treatment.fit_transform(X_train_treatment)
poly_train_control = PolynomialFeatures(degree=3, interaction_only=False, include_bias=False)
X_poly_train_control = poly_train_treatment.fit_transform(X_train_control)
poly_test = PolynomialFeatures(degree=3, interaction_only=False, include_bias=False)
X_poly_test = poly_test.fit_transform(X_test)

In [None]:
X_poly_train_control

In [None]:
X_poly_train_treatment


## T-learner Lasso

In [None]:

# T-Learner (example with Lasso)
tic = time.perf_counter()
# mu_0
t_learner_mu0 = LassoCV(cv=10, tol=1e-2, random_state=0)
t_learner_mu0.fit(X_poly_train_control,Y_train_control)
t_mu_0_hat = t_learner_mu0.predict(X_poly_test)

# mu_1
t_learner_mu1 = LassoCV(cv=10, tol=1e-2, random_state=0)
t_learner_mu1.fit(X_poly_train_treatment,Y_train_treatment)
t_mu_1_hat = t_learner_mu1.predict(X_poly_test)

# Prediction = mu_1 - mu_0
t_tau_hat = t_mu_1_hat - t_mu_0_hat
toc = time.perf_counter()
t_tau_hat

In [None]:
print(f'Time for calculations: {toc-tic}') # took 69 seconds

In [None]:
((t_tau_hat - tau_test)**2).mean()

## S-learner Lasso

In [None]:
# compute polynomial features for treatment and control groups in training set
xw_poly_train = PolynomialFeatures(degree=4, interaction_only=False, include_bias=False)
X_W_poly_train = poly_train_treatment.fit_transform(X_W_train)

xw_poly_test_0 = PolynomialFeatures(degree=4, interaction_only=False, include_bias=False)
X_poly_test_0 = xw_poly_test_0.fit_transform(X_test_0)

xw_poly_test_1 = PolynomialFeatures(degree=4, interaction_only=False, include_bias=False)
X_poly_test_1 = xw_poly_test_0.fit_transform(X_test_1)

In [None]:
# S-learner (example with Random Forest)
tic = time.perf_counter()
# mu_x
s_learner = LassoCV(cv=10, tol=1e-2, random_state=0)
s_learner.fit(X_W_poly_train,Y_train)

# mu_0_hat
s_mu_0_hat = s_learner.predict(X_poly_test_0)

# mu_1_hat
s_mu_1_hat = s_learner.predict(X_poly_test_1)

# tau_hat
s_tau_hat = s_mu_1_hat - s_mu_0_hat
toc = time.perf_counter()
print(f'Time for computation: {toc-tic}')

In [None]:
((s_tau_hat - tau_test)**2).mean()

## X-learner with lasso (or l1-penalty)
### TAKES A LOT OF TIME!

In [None]:
### X-Learner

tic = time.perf_counter()

# mu_0 (same procedure as for t-learner, maybe can speed up process)
x_learner_mu0 = LassoCV(cv=10, tol=1, random_state=0)
x_learner_mu0.fit(X_poly_train_control,Y_train_control)

# mu_1 (same procedure as for t-learner, maybe can speed up process)
x_learner_mu1 = LassoCV(cv=10, tol=1, random_state=0)
x_learner_mu1.fit(X_poly_train_treatment,Y_train_treatment)

# compute imputed treatment effect D_0 and D_1
# d_0
imputed_0 = x_learner_mu1.predict(X_poly_train_control) - Y_train_control

# d_1
imputed_1 = Y_train_treatment - x_learner_mu0.predict(X_poly_train_treatment)

# regress imputed on X
# tau_hat_0
x_tau_0_hat = LassoCV(cv=10, tol=1, random_state=0)
x_tau_0_hat.fit(X_poly_train_control ,imputed_0)

# tau_hat_1
x_tau_1_hat = LassoCV(cv=10, tol=1, random_state=0)
x_tau_1_hat.fit(X_poly_train_treatment ,imputed_1)

# estimate e_x to use as g_x
g_x_hat = LogisticRegressionCV(cv=KFold(10), penalty='l1', solver='saga', tol=1, random_state=0)
g_x_hat.fit(X_poly_train,W_train)
probabilities = g_x_hat.predict_proba(X_poly_test)
probas_1 = probabilities[:,1]
probas_0 = probabilities[:,0]

# final estimator of tau
x_tau_hat = probas_1 * x_tau_0_hat.predict(X_poly_test) + probas_0 * x_tau_1_hat.predict(X_poly_test)

toc = time.perf_counter()

print(f'Time for computation: {toc-tic}') # 127 seconds

In [None]:
((x_tau_hat - tau_test)**2).mean()

## R-learner with lasso (or l1-penalty)

In [None]:
### R-Learner

tic = time.perf_counter()

# estimate e_x
r_learner_e_x = LogisticRegressionCV(cv=KFold(10), penalty='l1', solver='saga', tol=1, random_state=0)
r_learner_e_x.fit(X_poly_train,W_train)

# get e_x predictions
r_probas = r_learner_e_x.predict_proba(X_poly_train)
r_probas_0 = r_probas[:,0] # probabilities of W=0
r_probas_1 = r_probas[:,1] # probabilities of W=1

# estimate mu_x
r_learner_mu_x = LassoCV(cv=10, tol=1, random_state=0)
r_learner_mu_x.fit(X_poly_train,Y_train)

# compute r-pseudo-outcome and weights
r_learner_pseudo_outcomes = (Y_train - r_learner_mu_x.predict(X_poly_train)) / (W_train - r_probas_1)
r_learner_weights = (W_train - r_probas_1)**2

# estimate tau (regress pseudo-outcomes on X, weight by (W-e(x))^2)
r_learner_tau = LassoCV(cv=10, tol=1, random_state=0)
r_learner_tau.fit(X_poly_train,r_learner_pseudo_outcomes, sample_weight=r_learner_weights)

# predict tau
r_tau_hats = r_learner_tau.predict(X_poly_test)
r_tau_hats

toc = time.perf_counter()

print(f'Time for computation: {toc-tic} seconds') # 98 seconds

In [None]:
((r_tau_hats - tau_test)**2).mean()

## Dr-learner with lasso (l1-penalty)

In [None]:
### DR-Learner

tic = time.perf_counter()

# TODO: APPLY CROSS-FITTING?
# estimate e_x
dr_learner_e_x = LogisticRegressionCV(cv=KFold(10), penalty='l1', solver='saga', tol=1, random_state=0)
dr_learner_e_x.fit(X_poly_train, W_train)

dr_probas = dr_learner_e_x.predict_proba(X_poly_train)
dr_probas_0 = dr_probas[:,0] # probabilities of W=0
dr_probas_1 = dr_probas[:,1] # probabilities of W=1

# estimate mu_0
dr_learner_mu_0 = LassoCV(cv=10, tol=1, random_state=0)
dr_learner_mu_0.fit(X_poly_train_control,Y_train_control)

# estimate mu_1
dr_learner_mu_1 = LassoCV(cv=10, tol=1, random_state=0)
dr_learner_mu_1.fit(X_poly_train_treatment,Y_train_treatment)

# DR-pseudo-outcomes
mu_w = W_train * dr_learner_mu_1.predict(X_poly_train) + (1 - W_train) * dr_learner_mu_0.predict(X_poly_train) # this is mu_w for each observation, i.e mu_1 for units in the treatment groups, and mu_0 for units in the control group
dr_pseudo_outcomes = (W_train - dr_probas_1) / (dr_probas_1 * dr_probas_0) * (Y_train - mu_w) + dr_learner_mu_1.predict(X_poly_train) - dr_learner_mu_0.predict(X_poly_train)

# estimate tau (regress pseudo-outcomes on X) # TODO: USE "Test Set" for this estimation
dr_learner_tau_hat = LassoCV(cv=10, tol=1, random_state=0)
dr_learner_tau_hat.fit(X_poly_train,dr_pseudo_outcomes)

# predict tau
dr_tau_hat = dr_learner_tau_hat.predict(X_poly_test)

toc = time.perf_counter()

print(f'Time needed for computation: {toc-tic} seconds') # 104 seconds

In [None]:
((dr_tau_hat - tau_test)**2).mean()

## Ra-learner with lasso

In [None]:
### RA-Learner

tic = time.perf_counter()

# mu_0 (same procedure as for t-learner, maybe can speed up process)
ra_learner_mu0 = LassoCV(cv=10, tol=1, random_state=0)
ra_learner_mu0.fit(X_poly_train_control,Y_train_control)

# mu_1 (same procedure as for t-learner, maybe can speed up process)
ra_learner_mu1 = LassoCV(cv=10, tol=1, random_state=0)
ra_learner_mu1.fit(X_poly_train_treatment,Y_train_treatment)

# e_x
ra_learner_e_x = LogisticRegressionCV(cv=KFold(10), penalty='l1', solver='saga', tol=1, random_state=0)
ra_learner_e_x.fit(X_poly_train,W_train)

# ra-pseudo-outcome
ra_pseudo_outcome = W_train*(Y_train - ra_learner_mu0.predict(X_poly_train)) + (1 - W_train)*(ra_learner_mu1.predict(X_poly_train) - Y_train)

# tau_hat
ra_tau_hat_learner = LassoCV(cv=10, tol=1, random_state=0)
ra_tau_hat_learner.fit(X_poly_train, ra_pseudo_outcome)
ra_tau_hat = ra_tau_hat_learner.predict(X_poly_test)

toc = time.perf_counter()

print(f'Time for computation: {toc-tic} seconds.') # 121 seconds

In [None]:
((ra_tau_hat - tau_test)**2).mean()

## PW-learner with lasso

In [None]:
### PW-Learner

tic = time.perf_counter()

# mu_0 (same procedure as for t-learner, maybe can speed up process)
pw_learner_mu0 = LassoCV(cv=10, tol=1, random_state=0)
pw_learner_mu0.fit(X_poly_train_control,Y_train_control)

# mu_1 (same procedure as for t-learner, maybe can speed up process)
pw_learner_mu1 = LassoCV(cv=10, tol=1, random_state=0)
pw_learner_mu1.fit(X_poly_train_treatment,Y_train_treatment)

# e_x
pw_learner_e_x = LogisticRegressionCV(cv=KFold(10), penalty='l1', solver='saga', tol=1, random_state=0)
pw_learner_e_x.fit(X_poly_train,W_train)

# ra-pseudo-outcome
pw_pseudo_outcome = (W_train/pw_learner_e_x.predict_proba(X_poly_train)[:,1] - (1 - W_train)/(pw_learner_e_x.predict_proba(X_poly_train)[:,0]))*Y_train

# tau_hat
pw_tau_hat_learner = LassoCV(cv=10, tol=1, random_state=0)
pw_tau_hat_learner.fit(X_poly_train, pw_pseudo_outcome)
pw_tau_hat = pw_tau_hat_learner.predict(X_poly_test)

toc = time.perf_counter()

print(f'Time for computation: {toc-tic} seconds.') # 117 seconds

In [None]:
((pw_tau_hat - tau_test)**2).mean()

## U-learner with lasso

In [None]:
### U-Learner

tic = time.perf_counter()

# estimate e_x
u_learner_e_x = LogisticRegressionCV(cv=KFold(10), penalty='l1', solver='saga', tol=1, random_state=0)
u_learner_e_x.fit(X_poly_train,W_train)

# estimate mu_x
u_learner_mu_x = LassoCV(cv=10, tol=1, random_state=0)
u_learner_mu_x.fit(X_poly_train,Y_train)

# compute residuals
u_learner_residuals = (Y_train - u_learner_mu_x.predict(X_poly_train))/(W_train - u_learner_e_x.predict_proba(X_poly_train)[:,1])

# tau_hat - regress residuals on X
u_tau_hat_learner = LassoCV(cv=10, tol=1, random_state=0)
u_tau_hat_learner.fit(X_poly_train,u_learner_residuals)

u_tau_hats = u_tau_hat_learner.predict(X_poly_test)

toc = time.perf_counter()

print(f'Time for computation: {toc-tic} seconds.') # 98 seconds


In [None]:
((u_tau_hats - tau_test)**2).mean()

# now do it with neural networks


In [None]:
# make model
# 3 layers with 200 units (elu activation), 2 layers with 100 units (elu activations), 1 output layer (linear activation)
model_25 = keras.Sequential([
    keras.Input(shape=(25,)),
    layers.Dense(units=200, activation="relu", name="layer1"),
    layers.Dense(units=200, activation="relu", name="layer2"),
    layers.Dense(units=200, activation="relu", name="layer3"),
    layers.Dense(units=100, activation="relu", name="layer4"),
    layers.Dense(units=100, activation="relu", name="layer5"),
    layers.Dense(units=1, activation="linear", name="layer6"),

], name="Dense_Neural_Network")

In [None]:
# compile the model
model_25.compile(
    optimizer=keras.optimizers.Adam(),  # Optimizer
    # Loss function to minimize
    loss=keras.losses.MeanSquaredError(),
    # List of metrics to monitor
    metrics=[keras.metrics.MeanSquaredError()],
)

In [None]:
# save the model
model_25.save('model_25')

In [None]:
# same model, but input shape=26, for t-learner only
# 3 layers with 200 units (elu activation), 2 layers with 100 units (elu activations), 1 output layer (linear activation)
model_26 = keras.Sequential([
    keras.Input(shape=(26,)),
    layers.Dense(units=200, activation="relu", name="layer1"),
    layers.Dense(units=200, activation="relu", name="layer2"),
    layers.Dense(units=200, activation="relu", name="layer3"),
    layers.Dense(units=100, activation="relu", name="layer4"),
    layers.Dense(units=100, activation="relu", name="layer5"),
    layers.Dense(units=1, activation="linear", name="layer6"),

], name="Dense_Neural_Network")

In [None]:
# compile the model
model_26.compile(
    optimizer=keras.optimizers.Adam(),  # Optimizer
    # Loss function to minimize
    loss=keras.losses.MeanSquaredError(),
    # List of metrics to monitor
    metrics=[keras.metrics.MeanSquaredError()],
)

In [None]:
# save the model
model_26.save('model_26')

In [None]:
model_ex = keras.Sequential([
    keras.Input(shape=(25,)),
    layers.Dense(units=200, activation="relu", name="layer1"),
    layers.Dense(units=200, activation="relu", name="layer2"),
    layers.Dense(units=200, activation="relu", name="layer3"),
    layers.Dense(units=100, activation="relu", name="layer4"),
    layers.Dense(units=100, activation="relu", name="layer5"),
    layers.Dense(units=1, activation="linear", name="layer6"),

], name="Dense_Neural_Network_Classification")

In [None]:
# compile the model
model_ex.compile(
    optimizer=keras.optimizers.Adam(),  # Optimizer
    # Loss function to minimize
    loss=keras.losses.BinaryCrossentropy(from_logits=True, label_smoothing=0.5),
    # List of metrics to monitor
    metrics=keras.metrics.BinaryAccuracy(),
)

In [None]:
# save the model
model_ex.save('model_ex')

In [None]:
callback = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=0, start_from_epoch=30)

# Break

In [None]:
model_ex_sigmoid = keras.Sequential([
    keras.Input(shape=(25,)),
    layers.Dense(units=200, activation="relu", name="layer1"),
    layers.Dense(units=200, activation="relu", name="layer2"),
    layers.Dense(units=200, activation="relu", name="layer3"),
    layers.Dense(units=100, activation="relu", name="layer4"),
    layers.Dense(units=100, activation="relu", name="layer5"),
    layers.Dense(units=1, activation="sigmoid", name="layer6"),

], name="Dense_Neural_Network_Classification")

In [None]:
# compile the model
model_ex_sigmoid.compile(
    optimizer=keras.optimizers.Adam(),  # Optimizer
    # Loss function to minimize
    loss=keras.losses.BinaryCrossentropy(from_logits=False, label_smoothing=0.5),
    # List of metrics to monitor
    metrics=keras.metrics.BinaryAccuracy(),
)

In [None]:
# save the model
model_ex_sigmoid.save('model_ex_sigmoid')

In [None]:
# T-Learner (example with Random Forest)

tic = time.perf_counter()

# mu_0
t_learner_mu0 = load_model('model_25')
print('Training mu0')
t_learner_mu0.fit(X_train_control,Y_train_control,
    batch_size=100,
    epochs=100,
    validation_data=(X_test, Y_test),
    callbacks=None # include early stopping
)
t_mu_0_hat = t_learner_mu0.predict(X_test)

# mu_1
t_learner_mu1 = load_model('model_25')
print('Training mu1')
t_learner_mu1.fit(X_train_treatment,Y_train_treatment,
    batch_size=100,
    epochs=100,
    validation_data=(X_test, Y_test),
    callbacks=None # include early stopping
)
t_mu_1_hat = t_learner_mu1.predict(X_test)

# Prediction = mu_1 - mu_0
t_tau_hat = t_mu_1_hat - t_mu_0_hat

toc = time.perf_counter()

print(f'Time for computation: {toc-tic} seconds.') # 3 seconds


In [None]:
((np.reshape(t_tau_hat,(300,)) - tau_test)**2).mean() # 3.18

## S-learner with NN

In [None]:
# S-learner (example with Random Forest)

# mu_x
s_learner = load_model('model_26')
s_learner.fit(X_W_train,Y_train,
    batch_size=100,
    epochs=100,
    validation_data=(X_W_test, Y_test),
    callbacks=None # include early stopping
)

# mu_0_hat
s_mu_0_hat = s_learner.predict(X_test_0)

# mu_1_hat
s_mu_1_hat = s_learner.predict(X_test_1)

# tau_hat
s_tau_hat = s_mu_1_hat - s_mu_0_hat


In [None]:
((np.reshape(s_tau_hat,(300,)) - tau_test)**2).mean() # 1.98

# X-learner with Neural Network

In [None]:
### X-Learner

# mu_0 (same procedure as for t-learner, maybe can speed up process)
x_learner_mu0 = load_model('model_25')
x_learner_mu0.fit(X_train_control,Y_train_control,
    batch_size=100,
    epochs=100,
    validation_data=(X_test_control, Y_test_control),
    callbacks=None # include early stopping
)

# d_1
imputed_1 = Y_train_treatment - np.reshape(x_learner_mu0.predict(X_train_treatment),(len(Y_train_treatment),))

# mu_1 (same procedure as for t-learner, maybe can speed up process)
x_learner_mu1 = load_model('model_25')
x_learner_mu1.fit(X_train_treatment,Y_train_treatment,
    batch_size=100,
    epochs=100,
    validation_data=(X_test_treatment, Y_test_treatment),
    callbacks=None # include early stopping
)

# d_0
imputed_0 = np.reshape(x_learner_mu1.predict(X_train_control),(len(Y_train_control),)) - Y_train_control


# regress imputed on X

# tau_hat_1
x_tau_1_hat = load_model('model_25')
x_tau_1_hat.fit(X_train_treatment,imputed_1,
    batch_size=100,
    epochs=100,
    validation_data=(X_test_treatment, tau_test_treatment),
    callbacks=None # include early stopping
)

x_tau_1_hat_predicts = np.reshape(x_tau_1_hat.predict(X_test),(len(X_test),))

# tau_hat_0
x_tau_0_hat = load_model('model_25')
x_tau_0_hat.fit(X_train_control,imputed_0,
    batch_size=100,
    epochs=100,
    validation_data=(X_test_control, tau_test_control),
    callbacks=None # include early stopping
)

x_tau_0_hat_predicts = np.reshape(x_tau_0_hat.predict(X_test),(len(X_test),))

# estimate e_x to use as g_x
g_x_hat = load_model('model_ex')
g_x_hat.fit(X_train,W_train,
    batch_size=100,
    epochs=100,
    validation_data=(X_test, W_test),
    callbacks=None # include early stopping
)
x_probabilities = g_x_hat.predict(X_test)
x_probs_1 = np.reshape(keras.activations.sigmoid(x_probabilities),(len(x_probabilities,)))
x_probs_0 = 1 - x_probs_1

# final estimator of tau
x_tau_hat = x_probs_1 * x_tau_0_hat_predicts + x_probs_0 * x_tau_1_hat_predicts

In [None]:
((np.reshape(x_tau_hat,(300,)) - tau_test)**2).mean() # 3.1614 with smoothing of 0.5

# R-learner with NN

In [None]:
### R-Learner

# estimate e_x
r_learner_e_x = load_model('model_ex')
r_learner_e_x.fit(X_train,W_train,
    batch_size=100,
    epochs=100,
    validation_data=(X_test, W_test),
    callbacks=None # include early stopping
)

# get e_x predictions
r_probabilities = np.reshape(keras.activations.sigmoid(r_learner_e_x.predict(X_train)),len(X_train,))
r_probas_1 = r_probabilities# probabilities of W=1
r_probas_0 = 1 - r_probabilities # probabilities of W=0

# estimate mu_x
r_learner_mu_x = load_model('model_25')
r_learner_mu_x.fit(X_train,Y_train,
    batch_size=100,
    epochs=100,
    validation_data=(X_test, Y_test),
    callbacks=None # include early stopping
)

# compute r-pseudo-outcome and weights
r_learner_pseudo_outcomes = (Y_train - np.reshape(r_learner_mu_x.predict(X_train),(len(X_train),))) / (W_train - r_probas_1)
r_learner_weights = (W_train - r_probas_1)**2

# estimate tau (regress pseudo-outcomes on X, weight by (W-e(x))^2)
r_learner_tau = load_model('model_25')
r_learner_tau.fit(X_train,r_learner_pseudo_outcomes,
    sample_weight=r_learner_weights,
    batch_size=100,
    epochs=100,
    validation_data=None,
    callbacks=None # include early stopping
)

# predict tau
r_tau_hats = r_learner_tau.predict(X_test)

In [None]:
((np.reshape(r_tau_hats,(len(X_test))) - tau_test)**2).mean() #47.81

It's reproducible now!

# DR-learner with Neural Network

In [None]:
### DR-Learner

tic = time.perf_counter()

# TODO: APPLY CROSS-FITTING?
# estimate e_x
dr_learner_e_x = load_model('model_ex')
dr_learner_e_x.fit(X_train, W_train,
    batch_size=100,
    epochs=100,
    validation_data=(X_test, W_test),
    callbacks=None # include early stopping
)

dr_probabilities = np.reshape(keras.activations.sigmoid(dr_learner_e_x.predict(X_train)),len(X_train,))
dr_probas_0 = 1 - dr_probabilities # probabilities of W=0
dr_probas_1 = dr_probabilities # probabilities of W=1

# estimate mu_0
dr_learner_mu_0 = load_model('model_25')
dr_learner_mu_0.fit(X_train_control,Y_train_control,
    batch_size=100,
    epochs=100,
    validation_data=(X_test_control, Y_test_control),
    callbacks=None # include early stopping
)

dr_learner_mu_0_predictions = dr_learner_mu_0.predict(X_train)

# estimate mu_1
dr_learner_mu_1 = load_model('model_25')
dr_learner_mu_1.fit(X_train_treatment,Y_train_treatment,
    batch_size=100,
    epochs=100,
    validation_data=(X_test_treatment, Y_test_treatment),
    callbacks=None # include early stopping
)

dr_learner_mu_1_predictions = dr_learner_mu_1.predict(X_train)

# DR-pseudo-outcomes
mu_w = W_train * dr_learner_mu_1_predictions + (1 - W_train) * dr_learner_mu_0_predictions # this is mu_w for each observation, i.e mu_1 for units in the treatment groups, and mu_0 for units in the control group
dr_pseudo_outcomes = (W_train - dr_probas_1) / (dr_probas_1 * dr_probas_0) * (Y_train - mu_w) + dr_learner_mu_1_predictions - dr_learner_mu_0_predictions

# estimate tau (regress pseudo-outcomes on X) # TODO: USE "Test Set" for this estimation
dr_learner_tau_hat = load_model('model_25')
dr_learner_tau_hat.fit(X_train,dr_pseudo_outcomes,
    batch_size=100,
    epochs=100,
    validation_data=None,
    callbacks=None # include early stopping
)

# predict tau
dr_tau_hat = dr_learner_tau_hat.predict(X_test)

toc = time.perf_counter()

print(f'Time needed for computation: {toc-tic} seconds') # 104 seconds

In [None]:
((np.reshape(dr_tau_hat,(len(tau_test),)) - tau_test)**2).mean() # 8.3514

# RA-learner with Neural Network

In [None]:
### RA-Learner

# mu_0 (same procedure as for t-learner, maybe can speed up process)
ra_learner_mu0 = load_model('model_25')
ra_learner_mu0.fit(X_train_control,Y_train_control,
    batch_size=100,
    epochs=100,
    validation_data=(X_test_control, Y_test_control),
    callbacks=None # include early stopping
)

# get hats
ra_learner_mu0_predictions = np.reshape(ra_learner_mu0.predict(X_train),(len(X_train),))

# mu_1 (same procedure as for t-learner, maybe can speed up process)
ra_learner_mu1 = load_model('model_25')
ra_learner_mu1.fit(X_train_treatment,Y_train_treatment,
    batch_size=100,
    epochs=100,
    validation_data=(X_test_treatment, Y_test_treatment),
    callbacks=None # include early stopping
)

# get hats
ra_learner_mu1_predictions = np.reshape(ra_learner_mu1.predict(X_train),(len(X_train),))


# e_x TODO: IS IT NEEDED?
"""ra_learner_e_x = load_model('model_ex')
ra_learner_e_x.fit(X_train,W_train,
    batch_size=100,
    epochs=100,
    validation_data=(X_test, W_test),
    callbacks=[callback] # include early stopping
)"""

# ra-pseudo-outcome
ra_pseudo_outcome = W_train*(Y_train - ra_learner_mu0_predictions) + (1 - W_train)*(ra_learner_mu1_predictions - Y_train)

# tau_hat
ra_tau_hat_learner = load_model('model_25')
ra_tau_hat_learner.fit(X_train, ra_pseudo_outcome,
    batch_size=100,
    epochs=100,
    validation_data=None,
    callbacks=None # include early stopping
)

ra_tau_hat = ra_tau_hat_learner.predict(X_test)
ra_tau_hat

In [None]:
((np.reshape(ra_tau_hat,(len(tau_test),)) - tau_test)**2).mean() # 3.397

# PW-learner with Neural Network

In [None]:
### PW-Learner
# mu_0 TODO: really needed
# pw_learner_mu0 = load_model('model_25')
"""pw_learner_mu0.fit(X_train_control,Y_train_control,
    batch_size=100,
    epochs=100,
    validation_data=(X_test_control, Y_test_control),
    callbacks=None # include early stopping
)"""

# mu_1 TODO: really needed?
#pw_learner_mu1 = load_model('model_25')
"""pw_learner_mu1.fit(X_train_treatment,Y_train_treatment,
    batch_size=100,
    epochs=100,
    validation_data=(X_test_treatment, Y_test_treatment),
    callbacks=None # include early stopping
)"""

# e_x
pw_learner_e_x = load_model('model_ex')
pw_learner_e_x.fit(X_train,W_train,
    batch_size=100,
    epochs=100,
    validation_data=(X_test, W_test),
    callbacks=None # include early stopping
)

pw_probabilities = np.reshape(keras.activations.sigmoid(pw_learner_e_x.predict(X_train)),len(X_train,))
pw_probs_1 = pw_probabilities
pw_probs_0 = 1 - pw_probabilities

# ra-pseudo-outcome
pw_pseudo_outcome = (W_train / pw_probs_1 - (1 - W_train) / pw_probs_0)*Y_train

# tau_hat
pw_tau_hat_learner = load_model('model_25')
pw_tau_hat_learner.fit(X_train, pw_pseudo_outcome,
    batch_size=100,
    epochs=100,
    validation_data=None,
    callbacks=None # include early stopping
)
pw_tau_hat = pw_tau_hat_learner.predict(X_test)
pw_tau_hat

In [None]:
((np.reshape(pw_tau_hat,(len(tau_test),)) - tau_test)**2).mean() # 271.842 TODO: CHECK IF IT REALLY IS CORRECT

# U-learner with Neural Network

In [None]:
### U-Learner
# estimate e_x
u_learner_e_x = load_model('model_ex')
u_learner_e_x.fit(X_train,W_train,
    batch_size=100,
    epochs=100,
    validation_data=(X_test, W_test),
    callbacks=None # include early stopping
)

u_probs_1 = np.reshape(u_learner_e_x.predict(X_train),(len(X_train),))
u_probs_0 = 1 - u_probs_1

# estimate mu_x
u_learner_mu_x = load_model('model_25')
u_learner_mu_x.fit(X_train,Y_train,
    batch_size=100,
    epochs=100,
    validation_data=(X_test,Y_test),
    callbacks=None
)

u_learner_mu_x_predictions = np.reshape(u_learner_mu_x.predict(X_train),(len(X_train),))
# compute residuals
u_learner_residuals = (Y_train - u_learner_mu_x_predictions)/(W_train - u_probs_1)

# tau_hat - regress residuals on X
u_tau_hat_learner = load_model('model_25')
u_tau_hat_learner.fit(X_train,u_learner_residuals,
    batch_size=100,
    epochs=100,
    validation_data=None,
    callbacks=None
)

u_tau_hats = u_tau_hat_learner.predict(X_test)
u_tau_hats


In [None]:
((np.reshape(u_tau_hats,(len(tau_test),)) - tau_test)**2).mean() # 74893.80 TODO: CHECK IF IT REALLY IS CORRECT!!!

###### STABLE ESTIMATOR? SINCE EXTREME VALUES FOR ESTIMATED PROBABILITIES!

# Make Classes

In [None]:
class Metalearner():
    pass

# Class T Learner

In [None]:
class TLearner(): # TODO: comment what is what.
    def __init__(self, method): # TODO: or maybe not give base_learners but method, i.e. : 'lasso', 'rf' or 'nn'
        self.method = method

        if method == 'rf':
            self.mu0_model = RandomForestRegressor(n_estimators=1000, max_depth=100, random_state=0)
            self.mu1_model = RandomForestRegressor(n_estimators=1000, max_depth=100, random_state=0)
        elif method == 'lasso':
            self.mu0_model = LassoCV(cv=10, tol=1e-2, random_state=0)
            self.mu1_model = LassoCV(cv=10, tol=1e-2, random_state=0)
            self.poly = PolynomialFeatures(degree=3, interaction_only=False, include_bias=False)
        elif method == 'nn':
            self.mu0_model = load_model('model_25')
            self.mu1_model = load_model('model_25')
        else:
            raise NotImplementedError('Base learner method not specified')



    def fit(self,
            x, y, w):# TODO: training process
        if self.method == 'rf':
            # 1: train mu_0
            print("Fitting random forest for mu_0")
            self.mu0_model.fit(x[w==0], y[w==0])

            # 2: train mu_1
            print("Fitting random forest for mu_1")
            self.mu1_model.fit(x[w==1], y[w==1])

        elif self.method == 'lasso':
            # make polynomial features
            x_poly_train = self.poly.fit_transform(x)

            # 1: train mu_0
            print("Fitting lasso for mu_0")
            self.mu0_model.fit(x_poly_train[w==0], y[w==0])

            # 2: train mu_1
            print("Fitting lasso for mu_1")
            self.mu1_model.fit(x_poly_train[w==1], y[w==1])

        elif self.method == 'nn':
            # 1: train mu_0
            print("Training neural network for mu_0")
            self.mu0_model.fit(x[w==0], y[w==0],
                           batch_size=100,
                           epochs=100,
                           validation_data=(X_test_control, Y_test_control),
                           callbacks=None, # include early stopping
                           verbose=0
                               )

            # 2: train mu_1
            print("Training neural network for mu_1")
            self.mu1_model.fit(x[w==1], y[w==1],
                           batch_size=100,
                           epochs=100,
                           validation_data=(X_test_treatment, Y_test_treatment),
                           callbacks=None, # include early stopping
                           verbose=0
                               )

        else:
            raise NotImplementedError('Base learner method not specified in fit')



    def predict(self,
                x): # TODO:
        if self.method == 'rf':
            # 1: calculate hats of mu_1 & mu_0
            mu0_hats = self.mu0_model.predict(x)
            mu1_hats = self.mu1_model.predict(x)
            predictions = mu1_hats - mu0_hats

        elif self.method == 'lasso':
            # make polynomial features
            x_poly_test = self.poly.fit_transform(x)

            # 1: calculate hats of mu_1 & mu_0
            mu0_hats = self.mu0_model.predict(x_poly_test)
            mu1_hats = self.mu1_model.predict(x_poly_test)
            predictions = mu1_hats - mu0_hats

        elif self.method == 'nn':
            mu0_hats = self.mu0_model.predict(x, verbose=0)
            mu1_hats = self.mu1_model.predict(x, verbose=0)
            predictions = np.reshape(mu1_hats-mu0_hats,(len(x),))

        else:
            raise NotImplementedError('Base learner method not specified in predict')
        return predictions

In [None]:
t_nn = TLearner(method='lasso')
t_nn.fit(X_train, Y_train, W_train)
predictions = t_nn.predict(X_test)
((predictions - tau_test)**2).mean()
# rf: 10.091322530886687
# lasso: 5.583461099392904
# nn: 3.1867804239471273

# Class S Learner

In [None]:
class SLearner(): # TODO: comment what is what.
    def __init__(self, method): # TODO: or maybe not give base_learners but method, i.e. : 'lasso', 'rf' or 'nn'
        self.method = method

        if method == 'rf':
            self.mux_model = RandomForestRegressor(n_estimators=1000, max_depth=100, random_state=0)
        elif method == 'lasso':
            self.mux_model = LassoCV(cv=10, tol=1e-2, random_state=0)
            self.poly = PolynomialFeatures(degree=3, interaction_only=False, include_bias=False)
        elif method == 'nn':
            self.mux_model = load_model('model_26')
        else:
            raise NotImplementedError('Base learner method not specified')



    def fit(self,
            x, y, w): # TODO: training process
        x_w = np.concatenate((x,np.reshape(w,(len(w),1))), axis=1)

        if self.method == 'rf':
            # 1: train mu_x
            print("Fitting random forest for mu_x")
            self.mux_model.fit(x_w, y)

        elif self.method == 'lasso':
            # make polynomial features
            x_poly_train = self.poly.fit_transform(x_w)

            # 1: train mu_x
            print("Fitting lasso for mu_x")
            self.mux_model.fit(x_poly_train, y)


        elif self.method == 'nn':
            # 1: train mu_x
            print("Training neural network for mu_x")
            self.mux_model.fit(x_w, y,
                           batch_size=100,
                           epochs=100,
                           callbacks=None, # include early stopping
                           verbose=0
                               )

        else:
            raise NotImplementedError('Base learner method not specified in fit')



    def predict(self,
                x, w): # TODO:
        x_0 = np.concatenate((x,np.zeros((len(x),1))), axis=1)
        x_1 = np.concatenate((x,np.ones((len(x),1))), axis=1)

        if self.method == 'rf':
            # 1: calculate hats of mu_x with X and W=1 or W=0
            mu0_hats = self.mux_model.predict(x_0)
            mu1_hats = self.mux_model.predict(x_1)
            predictions = mu1_hats - mu0_hats

        elif self.method == 'lasso':
            # make polynomial features
            x_poly_0 = self.poly.fit_transform(x_0)
            x_poly_1 = self.poly.fit_transform(x_1)

            # 1: calculate hats of mu_x with X and W=1 or W=0
            mu0_hats = self.mux_model.predict(x_poly_0)
            mu1_hats = self.mux_model.predict(x_poly_1)
            predictions = mu1_hats - mu0_hats

        elif self.method == 'nn':
            # 1: calculate hats of mu_x with X and W=1 or W=0
            mu0_hats = self.mux_model.predict(x_0, verbose=0)
            mu1_hats = self.mux_model.predict(x_1, verbose=0)
            predictions = np.reshape(mu1_hats-mu0_hats,(len(x),))

        else:
            raise NotImplementedError('Base learner method not specified in predict')
        return predictions

In [None]:
s_nn = SLearner('lasso')
s_nn.fit(X_train, Y_train, W_train)
predictions = s_nn.predict(X_test, W_test)
((predictions - tau_test)**2).mean()
# rf: 18.134009488483855
# lasso: 5.559126710289806
# nn: 1.987529792077956

# Class X Learner

In [None]:
class XLearner(): # TODO: comment what is what.
    def __init__(self, method): # TODO: or maybe not give base_learners but method, i.e. : 'lasso', 'rf' or 'nn'
        self.method = method

        if method == 'rf':
            self.mu0_model = RandomForestRegressor(n_estimators=100, max_depth=100, random_state=0)
            self.mu1_model = RandomForestRegressor(n_estimators=100, max_depth=100, random_state=0)
            self.ex_model = RandomForestClassifier(max_depth=100, random_state=0)
            self.tau0_model = RandomForestRegressor(n_estimators=100, max_depth=100, random_state=0)
            self.tau1_model = RandomForestRegressor(n_estimators=100, max_depth=100, random_state=0)

        elif method == 'lasso':
            self.mu0_model = LassoCV(cv=10, tol=1, random_state=0)
            self.mu1_model = LassoCV(cv=10, tol=1, random_state=0)
            self.ex_model = LogisticRegressionCV(cv=KFold(10), penalty='l1', solver='saga', tol=1, random_state=0)
            self.tau0_model = LassoCV(cv=10, tol=1, random_state=0)
            self.tau1_model = LassoCV(cv=10, tol=1, random_state=0)
            self.poly = PolynomialFeatures(degree=3, interaction_only=False, include_bias=False)

        elif method == 'nn':
            self.mu0_model = load_model('model_25')
            self.mu1_model = load_model('model_25')
            self.ex_model = load_model('model_ex')
            self.tau0_model = load_model('model_25')
            self.tau1_model = load_model('model_25')
        else:
            raise NotImplementedError('Base learner method not specified')



    def fit(self,
            x, y, w):# TODO: training process
        if self.method == 'rf':
            # 1: train mu_0 and get imputed_1
            print("Fitting random forest for mu_0")
            self.mu0_model.fit(x[w==0], y[w==0])
            imputed_1 = y[w==1] - self.mu0_model.predict(x[w==1])

            # 2: train mu_1 and get imputed_0
            print("Fitting random forest for mu_1")
            self.mu1_model.fit(x[w==1], y[w==1])
            imputed_0 = self.mu1_model.predict(x[w==0]) - y[w==0]

            # 3: train tau_0
            print("Fitting random forest for tau_0")
            self.tau0_model.fit(x[w==0], imputed_0)

            # 4: train tau_1
            print("Fitting random forest for tau_1")
            self.tau1_model.fit(x[w==1], imputed_1)

            # 5: train e_x
            print("Fitting random forest for e_x")
            self.ex_model.fit(x,w)

        elif self.method == 'lasso':
            # make polynomial features
            x_poly_train = self.poly.fit_transform(x)

            # 1: train mu_0 and get imputed_1
            print("Fitting lasso for mu_0")
            self.mu0_model.fit(x_poly_train[w==0], y[w==0])
            imputed_1 = y[w==1] - self.mu0_model.predict(x_poly_train[w==1])


            # 2: train mu_1 and get imputed_0
            print("Fitting lasso for mu_1")
            self.mu1_model.fit(x_poly_train[w==1], y[w==1])
            imputed_0 = self.mu1_model.predict(x_poly_train[w==0]) - y[w==0]

            # 3: train tau_0
            print("Fitting random forest for tau_0")
            self.tau0_model.fit(x_poly_train[w==0], imputed_0)

            # 4: train tau_1
            print("Fitting random forest for tau_1")
            self.tau1_model.fit(x_poly_train[w==1], imputed_1)

            # 5: train e_x
            print("Fitting random forest for e_x")
            self.ex_model.fit(x_poly_train,w)

        elif self.method == 'nn':
            # 1: train mu_0
            print("Training neural network for mu_0")
            self.mu0_model.fit(x[w==0], y[w==0],
                               batch_size=100,
                               epochs=100,
                               callbacks=None, # include early stopping
                               verbose=0
                               )
            imputed_1 = y[w==1] - np.reshape(self.mu0_model.predict(x[w==1], verbose=0),(len(x[w==1]),))

            # 2: train mu_1
            print("Training neural network for mu_1")
            self.mu1_model.fit(x[w==1], y[w==1],
                               batch_size=100,
                               epochs=100,
                               callbacks=None, # include early stopping
                               verbose=0
                               )
            imputed_0 = np.reshape(self.mu1_model.predict(x[w==0], verbose=0),(len(x[w==0]),)) - y[w==0]

            # 3: train tau_0
            print("Fitting random forest for tau_0")
            self.tau0_model.fit(x[w==0], imputed_0,
                                batch_size=100,
                                epochs=100,
                                callbacks=None, # include early stopping
                                verbose=0
                                )

            # 4: train tau_1
            print("Fitting random forest for tau_1")
            self.tau1_model.fit(x[w==1], imputed_1,
                                batch_size=100,
                                epochs=100,
                                callbacks=None, # include early stopping
                                verbose=0
                                )

            # 5: train e_x
            print("Fitting random forest for e_x")
            self.ex_model.fit(x,w,
                              batch_size=100,
                              epochs=100,
                              callbacks=None, # include early stopping
                              verbose=0
                              )

        else:
            raise NotImplementedError('Base learner method not specified in fit')



    def predict(self,
                x): # TODO:
        if self.method == 'rf':
            # 1: calculate hats of tau_0 and tau_1
            tau_0_hats = self.tau0_model.predict(x)
            tau_1_hats = self.tau1_model.predict(x)
            # 2: probabilities
            probs = self.ex_model.predict_proba(x)[:,1]
            # 3: final predictions

        elif self.method == 'lasso':
            # make polynomial features
            x_poly_test = self.poly.fit_transform(x)

            # 1: calculate hats of tau_0 and tau_1
            tau_0_hats = self.tau0_model.predict(x_poly_test)
            tau_1_hats = self.tau1_model.predict(x_poly_test)
            probs = self.ex_model.predict_proba(x_poly_test)[:,1]

        elif self.method == 'nn':
            # 1: calculate hats of tau_0 and tau_1
            tau_0_hats = np.reshape(self.tau0_model.predict(x, verbose=0),(len(x),))
            tau_1_hats = np.reshape(self.tau1_model.predict(x, verbose=0),(len(x),))
            # 2: probabilities
            logit = self.ex_model.predict(x, verbose=0)
            probs = np.reshape(keras.activations.sigmoid(logit),(len(logit,)))
            # 3: final predictions

        else:
            raise NotImplementedError('Base learner method not specified in predict')

        predictions = probs * tau_0_hats + (1 - probs) * tau_1_hats
        return predictions

In [None]:
x_rf = XLearner('nn')
x_rf.fit(X_train, Y_train, W_train)
predictions = x_rf.predict(X_test)
((predictions - tau_test)**2).mean()
# rf: # 3.1369636408859614 --> same (good)
# lasso: # nn: 7.667219448077926 --> same (good)
# nn: 3.161416602361538 --> same (good)

# R-Learner

In [None]:
class RLearner():
    def __init__(self, method):
        self.method = method

        if method == 'rf':
            self.mux_model = RandomForestRegressor(n_estimators=100, max_depth=100, random_state=0)
            self.ex_model = RandomForestClassifier(max_depth=100, random_state=0)
            self.tau_model = RandomForestRegressor(n_estimators=100, max_depth=100, random_state=0)

        elif method == 'lasso':
            self.mux_model = LassoCV(cv=10, tol=1, random_state=0)
            self.ex_model = LogisticRegressionCV(cv=KFold(10), penalty='l1', solver='saga', tol=1, random_state=0)
            self.tau_model = LassoCV(cv=10, tol=1, random_state=0)
            self.poly = PolynomialFeatures(degree=3, interaction_only=False, include_bias=False)

        elif method == 'nn':
            self.mux_model = load_model('model_25')
            self.ex_model = load_model('model_ex')
            self.tau_model = load_model('model_25')

        else:
            raise NotImplementedError('Base learner method not specified or typo')

    def fit(self, x, y, w):

        if self.method == 'rf':
            # 1: fit mu_x
            print('Fitting random forest for mu_x')
            self.mux_model.fit(x,y)

            print('Fitting random forest for e_x')
            # 2: fit ex
            self.ex_model.fit(x,w)

            # 3: calculate pseudo_outcomes & weights
            probs = self.ex_model.predict_proba(x)[:,1]
            pseudo_outcomes = (y - self.mux_model.predict(x)) / (w - probs)
            weights = (w - probs)**2

            print('Fitting random forest for tau_x')
            # 4: fit tau
            self.tau_model.fit(x,pseudo_outcomes, sample_weight=weights)

        elif self.method == 'lasso':
            x_poly_train = self.poly.fit_transform(x)

            # 1: fit mu_x
            print('Fitting lasso for mu_x')
            self.mux_model.fit(x_poly_train,y)

            # 2: fit ex
            print('Fitting lasso for e_x')
            self.ex_model.fit(x_poly_train,w)

            # 3: calculate pseudo_outcomes & weights
            probs = self.ex_model.predict_proba(x_poly_train)[:,1]
            pseudo_outcomes = (y - self.mux_model.predict(x_poly_train)) / (w - probs)
            weights = (w - probs)**2

            # 4: fit tau
            print('Fitting lasso for tau_x')
            self.tau_model.fit(x_poly_train,pseudo_outcomes, sample_weight=weights)

        elif self.method == 'nn':

            # 1: fit mu_x
            print('Training NN for mu_x')
            self.mux_model.fit(x,y,
                               batch_size=100,
                               epochs=100,
                               callbacks=None,
                               verbose=0
                               )
            # 2: fit ex
            print('Training NN for e_x')
            self.ex_model.fit(x,w,
                              batch_size=100,
                              epochs=100,
                              callbacks=None,
                              verbose=0
                              )

            # 3: calculate pseudo_outcomes & weights
            probs = np.reshape(keras.activations.sigmoid(self.ex_model.predict(x, verbose=0)),len(x,))
            pseudo_outcomes = (y - np.reshape(self.mux_model.predict(x, verbose=0),(len(x),))) / (w - probs)
            weights = (w - probs)**2

            # 4: fit tau
            print('Training NN for tau_x')
            self.tau_model.fit(x,pseudo_outcomes,
                               sample_weight=weights,
                               batch_size=100,
                               epochs=100,
                               validation_data=None,
                               callbacks=None,
                               verbose=0
                               )

        else:
            raise NotImplementedError('Base learner method not specified in fit')

    def predict(self, x):

        if self.method == 'rf':
            predictions = self.tau_model.predict(x)

        elif self.method == 'lasso':
            x_poly_test = self.poly.fit_transform(x)
            predictions = self.tau_model.predict(x_poly_test)

        elif self.method == 'nn':
            predictions = np.reshape(self.tau_model.predict(x, verbose=0),(len(x),))

        else:
            raise NotImplementedError('Base learner method not specified in predict')

        return predictions

In [None]:
r_rf = RLearner('nn')
r_rf.fit(X_train, Y_train, W_train)
predictions = r_rf.predict(X_test)
((predictions - tau_test)**2).mean()
# rf: 17.722925118749608
# lasso: 5.50038865455844
# nn: 47.81939839016621

# Class DR-Learner

In [None]:
class DRLearner():
    def __init__(self, method):
        self.method = method
        if method == 'rf':
            self.mu0_model = RandomForestRegressor(n_estimators=100, max_depth=100, random_state=0)
            self.mu1_model = RandomForestRegressor(n_estimators=100, max_depth=100, random_state=0)
            self.ex_model = RandomForestClassifier(n_estimators=100, max_depth=100, random_state=0)
            self.tau_model = RandomForestRegressor(n_estimators=100, max_depth=100, random_state=0)

        elif method == 'lasso':
            self.mu0_model = LassoCV(cv=10, tol=1, random_state=0)
            self.mu1_model = LassoCV(cv=10, tol=1, random_state=0)
            self.ex_model = LogisticRegressionCV(cv=KFold(10), penalty='l1', solver='saga', tol=1, random_state=0)
            self.tau_model = LassoCV(cv=10, tol=1, random_state=0)
            self.poly = PolynomialFeatures(degree=3, interaction_only=False, include_bias=False)

        elif method == 'nn':
            self.mu0_model = load_model('model_25')
            self.mu1_model = load_model('model_25')
            self.ex_model = load_model('model_ex')
            self.tau_model = load_model('model_25')

        else:
            raise NotImplementedError('Base learner method not specified or typo')

        pass

    def fit(self, x, y, w):

        if self.method == 'rf':
            # 1: fit mu_0
            print('Fitting random forest for mu_0')
            self.mu0_model.fit(x[w==0],y[w==0])

            # 2: fit mu_1
            print('Fitting random forest for mu_1')
            self.mu1_model.fit(x[w==1],y[w==1])

            # 3: fit ex
            print('Fitting random forest for e_x')
            self.ex_model.fit(x,w)
            probs = self.ex_model.predict_proba(x)[:,1]
            neg_prob = self.ex_model.predict_proba(x)[:,0]

            # calculate pseudo_outcomes
            mu_w = w * self.mu1_model.predict(x) + (1 - w) * self.mu0_model.predict(x)
            pseudo_outcomes = (w - probs) / (probs * neg_prob) * (y - mu_w) + self.mu1_model.predict(x) - self.mu0_model.predict(x)

            # 4 fit tau
            print('Fitting random forest for tau_x')
            self.tau_model.fit(x,pseudo_outcomes)

        elif self.method == 'lasso':
            x_poly_train = self.poly.fit_transform(x)

            # 1: fit mu_0
            print('Fitting lasso for mu_0')
            self.mu0_model.fit(x_poly_train[w==0],y[w==0])

            # 2: fit mu_1
            print('Fitting lasso for mu_1')
            self.mu1_model.fit(x_poly_train[w==1],y[w==1])

            # 3: fit ex
            print('Fitting lasso for e_x')
            self.ex_model.fit(x_poly_train,w)
            probs = self.ex_model.predict_proba(x_poly_train)[:,1]

            # calculate pseudo_outcomes
            mu_w = w * self.mu1_model.predict(x_poly_train) + (1 - w) * self.mu0_model.predict(x_poly_train)
            pseudo_outcomes = (w - probs) / (probs * (1 - probs)) * (y - mu_w) + self.mu1_model.predict(x_poly_train) - self.mu0_model.predict(x_poly_train)

            # 4 fit tau
            print('Fitting lasso for tau_x')
            self.tau_model.fit(x_poly_train,pseudo_outcomes)

        elif self.method == 'nn':

            # 1: fit mu_0
            print('Training NN for mu_0')
            self.mu0_model.fit(x[w==0],y[w==0],
                               batch_size=100,
                               epochs=100,
                               callbacks=None,
                               verbose=0
                               )

            # 2: fit mu_1
            print('Training NN for mu_1')
            self.mu1_model.fit(x[w==1],y[w==1],
                               batch_size=100,
                               epochs=100,
                               callbacks=None,
                               verbose=0
                               )

            # 3: fit ex
            print('Training NN for e_x')
            self.ex_model.fit(x,w,
                              batch_size=100,
                              epochs=100,
                              callbacks=None,
                              verbose=0
                              )

            probs = np.reshape(keras.activations.sigmoid(self.ex_model.predict(x, verbose=0)),len(x,))

            # calculate pseudo_outcomes
            mu_w = w * self.mu1_model.predict(x, verbose=0) + (1 - w) * self.mu0_model.predict(x, verbose=0)
            pseudo_outcomes = (w - probs) / (probs * (1 - probs)) * (y - mu_w) + self.mu1_model.predict(x, verbose=0) - self.mu0_model.predict(x, verbose=0)

            # 4 fit tau
            print('Training NN for tau_x')
            self.tau_model.fit(x,pseudo_outcomes,
                                   batch_size=100,
                                   epochs=100,
                                   validation_data=None,
                                   callbacks=None,
                                   verbose=0
                                   )

    def predict(self, x):

        if self.method == 'rf':
            predictions = self.tau_model.predict(x)


        elif self.method == 'lasso':
            x_poly_test = self.poly.fit_transform(x)
            predictions = self.tau_model.predict(x_poly_test)

        elif self.method == 'nn':
            predictions = np.reshape(self.tau_model.predict(x, verbose=0),(len(x),))

        else:
            raise NotImplementedError('Base learner method not specified in predict')

        return predictions

In [None]:
dr_rf = DRLearner('rf')
dr_rf.fit(X_train, Y_train, W_train)
predictions = dr_rf.predict(X_test)
((predictions - tau_test)**2).mean()
# rf: 5.385491721300538 # why different??? ---> because if you take 1 - probs its not exactly the same as taking the [:,0] column!!
# lasso: 6.252082321980517
# nn: 8.35142898943478

# CHECK THIS: CHANGE (1 - PROBS) TO [:,0] TO BE MORE EXACT!!!

In [None]:
class RALearner():
    def __init__(self):
        pass

    def fit(self):
        # 1: fit mu_0
        # 2: fit mu_1
        # calculate pseudo_outcomes
        # 4 fit tau
        pass

    def predict(self):
        # 1: prediction: tau_hat
        pass
    pass

In [None]:
class PWLearner():
    def __init__(self):
        pass

    def fit(self):
        # 1: fit ex
        # 2: calculate pseudo_outcomes
        # 3: fit tau
        pass

    def predict(self):
        # 1: prediction: tau_hat
        pass
    pass

In [None]:
class ULearner():
    def __init__(self):
        pass

    def fit(self):
        # 1: fit mu_x
        # 2: fit ex
        # 3: calculate residuals
        # 4: fit tau
        pass

    def predict(self):
        # 1: prediction: tau_hat
        pass
    pass