In [None]:
# import packages
import numpy as np
from numpy import random

import pandas as pd

from sklearn.ensemble import RandomForestRegressor, RandomForestClassifier
# from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegressionCV
from sklearn.linear_model import LassoCV
from sklearn.preprocessing import PolynomialFeatures
from sklearn.model_selection import StratifiedKFold

import time

from sklearn.model_selection import KFold

import tensorflow as tf
from tensorflow import keras
from keras import layers
from keras.saving import load_model

# from abc import ABC, abstractmethod

from default_parameters import *
from keras import regularizers


In [None]:
# make GPU available for keras
# https://medium.com/mlearning-ai/install-tensorflow-on-mac-m1-m2-with-gpu-support-c404c6cfb580

In [None]:
# set float64 as standard
tf.keras.backend.set_floatx('float64')

In [None]:
print("Num GPUs Available: ", len(tf.config.list_physical_devices('GPU')))
# Why 0 GPUs

In [None]:
# set global seed
tf.keras.utils.set_random_seed(8953)

# Data preprocessing

In [None]:
data = pd.read_csv('/Users/arberimbibaj/dataset_example_indicatorCATE.csv', header=None, index_col=[0])
data = data.to_numpy()

In [None]:
N = len(data)
d = len(data[0, :]) - 3

In [None]:
# train test split
random.shuffle(data)
training, test = data[:700, :], data[700:, :]

In [None]:
# slice dataset by treatment status
training_control = training[training[:, 26] == 0]
training_treatment = training[training[:, 26] == 1]

# slice test set by treatment status
test_control = test[test[:, 26] == 0]
test_treatment = test[test[:, 26] == 1]

# Y_train by treatment status
Y_train_control = training_control[:, 0]
Y_train_treatment = training_treatment[:, 0]

# Y_test by treatment status
Y_test_control = test_control[:, 0]
Y_test_treatment = test_treatment[:, 0]

# X_train by treatment status
X_train_control = training_control[:, 1:26]
X_train_treatment = training_treatment[:, 1:26]

# X_test by treatment status
X_test_control = test_control[:, 1:26]
X_test_treatment = test_treatment[:, 1:26]

# X and Y test
X_test = test[:, 1:26]
Y_test = test[:, 0]

# X_train and Y_train (no split by treatment status)
X_train = training[:, 1:26]
Y_train = training[:, 0]

# W_train and W_test
W_train = training[:, 26]
W_test = test[:, 26]

# tau_test
tau_test = test[:, 27]
tau_test_control = test_control[:, 27]
tau_test_treatment = test_treatment[:, 27]

In [None]:
# set training and test features for the S-Learner (it views W as no different from other X's)
X_W_train = training[:, 1:27]
X_W_test = test[:, 1:27]
X_test_0 = np.concatenate((test[:, 1:26], np.zeros((300, 1))), axis=1)
X_test_1 = np.concatenate((test[:, 1:26], np.ones((300, 1))), axis=1)

In [None]:
X_train

In [None]:
W_train

In [None]:
from sklearn.ensemble import RandomForestClassifier
import pandas as pd

In [None]:
rf = RandomForestClassifier(n_estimators=1000, max_depth=7, max_features=0.3)
rf.fit(X_train, W_train)
probs = rf.predict_proba(X_train)
probs

In [None]:
probs = pd.DataFrame(probs[:, 0])
probs.describe()

In [None]:
model1 = clone_nn_regression(nn_sequential)

In [None]:
model1.fit(X_train, Y_train, epochs=3)

In [None]:
model2 = clone_nn_regression(nn_sequential)

In [None]:
model2.fit(X_train, Y_train, epochs=3)

In [None]:
X_train

In [None]:
model3 = clone_nn_regression(nn_sequential)

In [None]:
model3.fit(X_train, Y_train, epochs=3)

# Random Forest

T-Learner

In [None]:
# T-Learner (example with Random Forest)

# mu_0
t_learner_mu0 = RandomForestRegressor(max_depth=100, random_state=0)
t_learner_mu0.fit(X_train_control, Y_train_control)
t_mu_0_hat = t_learner_mu0.predict(X_test)

# mu_1
t_learner_mu1 = RandomForestRegressor(max_depth=100, random_state=0)
t_learner_mu1.fit(X_train_treatment, Y_train_treatment)
t_mu_1_hat = t_learner_mu1.predict(X_test)
# Prediction = mu_1 - mu_0
t_tau_hat = t_mu_1_hat - t_mu_0_hat
t_tau_hat

In [None]:
# mean squared error
((t_tau_hat - tau_test) ** 2).mean()

S-Learner

In [None]:
### S-Learner

In [None]:
X_test_0

In [None]:
X_test_1

In [None]:
Y_train

In [None]:
# S-learner (example with Random Forest)

# mu_x
s_learner = RandomForestRegressor(max_depth=100, random_state=0)
s_learner.fit(X_W_train, Y_train)

# mu_0_hat
s_mu_0_hat = s_learner.predict(X_test_0)

# mu_1_hat
s_mu_1_hat = s_learner.predict(X_test_1)

# tau_hat
s_tau_hat = s_mu_1_hat - s_mu_0_hat
s_tau_hat


In [None]:
# mean squared error
((s_tau_hat - tau_test) ** 2).mean()

X-Learner

In [None]:
### X-Learner

# mu_0 (same procedure as for t-learner, maybe can speed up process)
x_learner_mu0 = RandomForestRegressor(max_depth=100, random_state=0)
x_learner_mu0.fit(X_train_control, Y_train_control)

# mu_1 (same procedure as for t-learner, maybe can speed up process)
x_learner_mu1 = RandomForestRegressor(max_depth=100, random_state=0)
x_learner_mu1.fit(X_train_treatment, Y_train_treatment)

# compute imputed treatment effect D_0 and D_1
# d_0
imputed_0 = x_learner_mu1.predict(X_train_control) - Y_train_control

# d_1
imputed_1 = Y_train_treatment - x_learner_mu0.predict(X_train_treatment)

# regress imputed on X
# tau_hat_0
x_tau_0_hat = RandomForestRegressor(max_depth=100, random_state=0)
x_tau_0_hat.fit(X_train_control, imputed_0)

# tau_hat_1
x_tau_1_hat = RandomForestRegressor(max_depth=100, random_state=0)
x_tau_1_hat.fit(X_train_treatment, imputed_1)

# estimate e_x to use as g_x
g_x_hat = RandomForestClassifier(max_depth=100, random_state=0)
g_x_hat.fit(X_train, W_train)
probabilities = g_x_hat.predict_proba(X_test)
probas_1 = probabilities[:, 1]
probas_0 = probabilities[:, 0]

# final estimator of tau
x_tau_hat = probas_1 * x_tau_0_hat.predict(X_test) + probas_0 * x_tau_1_hat.predict(X_test)
x_tau_hat


In [None]:
### See g_x hat
g_x_hat.predict_proba(X_test)

In [None]:
# mean squared error (much lower here!)
((x_tau_hat - tau_test) ** 2).mean()

In [None]:
ind = np.random.choice(len(X_train), int(len(X_train) / 2), replace=False)

In [None]:
lol = np.array((1, 2, 3, 4, 5, 6, 7, 8, 9, 10))
lol

In [None]:
train_ind = np.random.choice(10, int(10 / 2), replace=False)
train_ind

In [None]:
ind = np.zeros(len(X_train), dtype=bool)

In [None]:
ind[train_ind] = 1

In [None]:
ind

In [None]:
X_train[ind]

In [None]:
~ind

In [None]:
lol[ind]

In [None]:
lol[~ind]

In [None]:
ind

In [None]:
~ind

In [None]:
X_train

In [None]:
X_train[ind]

R-Learner

In [None]:
from sklearn.model_selection import train_test_split

In [None]:
### R-Learner
# split for cross-fitting
index = np.zeros(len(X_train), dtype=bool)
train_ind = np.random.choice(len(X_train), int(len(X_train) / 2), replace=False)
index[train_ind] = 1

# estimate e_x
r_learner_e_x_1 = RandomForestClassifier(max_depth=100, random_state=0)
r_learner_e_x_2 = RandomForestClassifier(max_depth=100, random_state=0)
r_learner_e_x_1.fit(X_train[index], W_train[index])
r_learner_e_x_2.fit(X_train[~index], W_train[~index])

# get e_x predictions
r_probas_1 = np.zeros(len(X_train), )
r_probas_1[index] = r_learner_e_x_2.predict_proba(X_train[index])[:, 1]
r_probas_1[~index] = r_learner_e_x_1.predict_proba(X_train[~index])[:, 1]

#r_probas_0 = r_probas[:, 0]  # probabilities of W=0
#r_probas_1 = r_probas[:, 1]  # probabilities of W=1

# estimate mu_x
r_learner_mu_x = RandomForestRegressor(max_depth=100, random_state=0)
r_learner_mu_x.fit(X_train, Y_train)

# compute r-pseudo-outcome and weights
r_learner_pseudo_outcomes = (Y_train - r_learner_mu_x.predict(X_train)) / (W_train - r_probas_1)
r_learner_weights = (W_train - r_probas_1) ** 2

# estimate tau (regress pseudo-outcomes on X, weight by (W-e(x))^2)
r_learner_tau = RandomForestRegressor(max_depth=100, random_state=0)
r_learner_tau.fit(X_train, r_learner_pseudo_outcomes, sample_weight=r_learner_weights)

# predict tau
r_tau_hats = r_learner_tau.predict(X_test)
r_tau_hats

In [None]:
r_probas[index]

In [None]:
r_learner_e_x.predict_proba(X_test)

In [None]:
((r_tau_hats - tau_test) ** 2).mean()

DR-Learner

In [None]:
### DR-Learner

# TODO: APPLY CROSS-FITTING?
# estimate e_x
dr_learner_e_x = RandomForestClassifier(n_estimators=100, max_depth=100, random_state=0)
dr_learner_e_x.fit(X_train, W_train)

dr_probas = dr_learner_e_x.predict_proba(X_train)
dr_probas_0 = dr_probas[:, 0]  # probabilities of W=0
dr_probas_1 = dr_probas[:, 1]  # probabilities of W=1

# estimate mu_0
dr_learner_mu_0 = RandomForestRegressor(n_estimators=100, max_depth=100, random_state=0)
dr_learner_mu_0.fit(X_train_control, Y_train_control)

# estimate mu_1
dr_learner_mu_1 = RandomForestRegressor(n_estimators=100, max_depth=100, random_state=0)
dr_learner_mu_1.fit(X_train_treatment, Y_train_treatment)

# DR-pseudo-outcomes
mu_w = W_train * dr_learner_mu_1.predict(X_train) + (1 - W_train) * dr_learner_mu_0.predict(
    X_train)  # this is mu_w for each observation, i.e. mu_1 for units in the treatment groups, and mu_0 for units in the control group
dr_pseudo_outcomes = (W_train - dr_probas_1) / (dr_probas_1 * dr_probas_0) * (Y_train - mu_w) + dr_learner_mu_1.predict(
    X_train) - dr_learner_mu_0.predict(X_train)

# estimate tau (regress pseudo-outcomes on X) # TODO: USE "Test Set" for this estimation
dr_learner_tau_hat = RandomForestRegressor(n_estimators=100, max_depth=100, random_state=0)
dr_learner_tau_hat.fit(X_train, dr_pseudo_outcomes)

# predict tau
dr_tau_hat = dr_learner_tau_hat.predict(X_test)
dr_tau_hat

In [None]:
((dr_tau_hat - tau_test) ** 2).mean()

RA-Learner

In [None]:
### RA-Learner

# mu_0 (same procedure as for t-learner, maybe can speed up process)
ra_learner_mu0 = RandomForestRegressor(max_depth=100, random_state=0)
ra_learner_mu0.fit(X_train_control, Y_train_control)

# mu_1 (same procedure as for t-learner, maybe can speed up process)
ra_learner_mu1 = RandomForestRegressor(max_depth=100, random_state=0)
ra_learner_mu1.fit(X_train_treatment, Y_train_treatment)

# e_x
ra_learner_e_x = RandomForestClassifier(max_depth=100, random_state=0)
ra_learner_e_x.fit(X_train, W_train)

# ra-pseudo-outcome
ra_pseudo_outcome = W_train * (Y_train - ra_learner_mu0.predict(X_train)) + (1 - W_train) * (
        ra_learner_mu1.predict(X_train) - Y_train)

# tau_hat
ra_tau_hat_learner = RandomForestRegressor(max_depth=100, random_state=0)
ra_tau_hat_learner.fit(X_train, ra_pseudo_outcome)
ra_tau_hat = ra_tau_hat_learner.predict(X_test)
ra_tau_hat

In [None]:
# mean squared error
((ra_tau_hat - tau_test) ** 2).mean()

PW-Learner

In [None]:
# ACHTUNG: TRIED TO INCLUDE CROSS-FITTING!
### PW-Learner
# mu_0 (same procedure as for t-learner, maybe can speed up process)
pw_learner_mu0_1 = RandomForestRegressor(max_depth=100, random_state=0)
pw_learner_mu0_2 = RandomForestRegressor(max_depth=100, random_state=0)
pw_learner_mu0_1.fit(X_train_control, Y_train_control)
pw_learner_mu0_2.fit(X_train_control, Y_train_control)

# mu_1 (same procedure as for t-learner, maybe can speed up process)
pw_learner_mu1_1 = RandomForestRegressor(max_depth=100, random_state=0)
pw_learner_mu1_2 = RandomForestRegressor(max_depth=100, random_state=0)
pw_learner_mu1_1.fit(X_train_treatment, Y_train_treatment)
pw_learner_mu1_2.fit(X_train_treatment, Y_train_treatment)

# e_x
# split for cross-fitting
index = np.zeros(len(X_train), dtype=bool)
train_ind = np.random.choice(len(X_train), int(len(X_train) / 2), replace=False)
index[train_ind] = 1

pw_learner_e_x_1 = RandomForestClassifier(max_depth=100, random_state=0)
pw_learner_e_x_2 = RandomForestClassifier(max_depth=100, random_state=0)
pw_learner_e_x_1.fit(X_train[index], W_train[index])
pw_learner_e_x_2.fit(X_train[~index], W_train[~index])

pw_probas_1 = np.zeros(len(X_train), )
pw_probas_1[index] = pw_learner_e_x_2.predict_proba(X_train[index])[:, 1]
pw_probas_1[~index] = pw_learner_e_x_1.predict_proba(X_train[~index])[:, 1]

# ra-pseudo-outcome
pw_pseudo_outcome = (W_train / pw_probas_1 - (1 - W_train) / (1 - pw_probas_1)) * Y_train

# tau_hat
pw_tau_hat_learner = RandomForestRegressor(max_depth=100, random_state=0)
pw_tau_hat_learner.fit(X_train, pw_pseudo_outcome)
pw_tau_hat = pw_tau_hat_learner.predict(X_test)
pw_tau_hat

In [None]:
# mean squared error
((pw_tau_hat - tau_test) ** 2).mean()

U-Learner

In [None]:
### U-Learner
# estimate e_x
u_learner_e_x = RandomForestClassifier(max_depth=100, random_state=0)
u_learner_e_x.fit(X_train, W_train)

# estimate mu_x
u_learner_mu_x = RandomForestRegressor(max_depth=100, random_state=0)
u_learner_mu_x.fit(X_train, Y_train)

# compute residuals
u_learner_residuals = (Y_train - u_learner_mu_x.predict(X_train)) / (
        W_train - u_learner_e_x.predict_proba(X_train)[:, 1])

# tau_hat - regress residuals on X
u_tau_hat_learner = RandomForestRegressor(max_depth=100, random_state=0)
u_tau_hat_learner.fit(X_train, u_learner_residuals)

u_tau_hats = u_tau_hat_learner.predict(X_test)
u_tau_hats


In [None]:
# mean squared error
((u_tau_hats - tau_test) ** 2).mean()

In [None]:
pd.DataFrame(u_learner_e_x.predict_proba(X_train)[:, 1]).describe()

In [None]:
u_learner_residuals

# Lasso (or L1-loss for logistic regression)

# Preprocess for lasso

In [None]:
poly_train = PolynomialFeatures(degree=3, interaction_only=False, include_bias=False)
X_poly_train = poly_train.fit_transform(X_train)
poly_test = PolynomialFeatures(degree=3, interaction_only=False, include_bias=False)
X_poly_test = poly_test.fit_transform(X_test)
X_poly_train

In [None]:
# compute polynomial features for treatment and control groups in training set
poly_train_treatment = PolynomialFeatures(degree=3, interaction_only=False, include_bias=False)
X_poly_train_treatment = poly_train_treatment.fit_transform(X_train_treatment)
poly_train_control = PolynomialFeatures(degree=3, interaction_only=False, include_bias=False)
X_poly_train_control = poly_train_treatment.fit_transform(X_train_control)
poly_test = PolynomialFeatures(degree=3, interaction_only=False, include_bias=False)
X_poly_test = poly_test.fit_transform(X_test)

In [None]:
# compute polynomial features for treatment and control groups in training set
xw_poly_train = PolynomialFeatures(degree=4, interaction_only=False, include_bias=False)
X_W_poly_train = poly_train_treatment.fit_transform(X_W_train)

xw_poly_test_0 = PolynomialFeatures(degree=4, interaction_only=False, include_bias=False)
X_poly_test_0 = xw_poly_test_0.fit_transform(X_test_0)

xw_poly_test_1 = PolynomialFeatures(degree=4, interaction_only=False, include_bias=False)
X_poly_test_1 = xw_poly_test_0.fit_transform(X_test_1)

T-learner (Lasso)

In [None]:
# T-Learner (example with Lasso)

# mu_0
t_learner_mu0 = LassoCV(cv=10, tol=1e-2, random_state=0)
t_learner_mu0.fit(X_poly_train_control, Y_train_control)
t_mu_0_hat = t_learner_mu0.predict(X_poly_test)

# mu_1
t_learner_mu1 = LassoCV(cv=10, tol=1e-2, random_state=0)
t_learner_mu1.fit(X_poly_train_treatment, Y_train_treatment)
t_mu_1_hat = t_learner_mu1.predict(X_poly_test)

# Prediction = mu_1 - mu_0
t_tau_hat = t_mu_1_hat - t_mu_0_hat
t_tau_hat

In [None]:
((t_tau_hat - tau_test) ** 2).mean()

S-learner (Lasso)

In [None]:
# S-learner (example with Random Forest)
tic = time.perf_counter()
# mu_x
s_learner = LassoCV(cv=10, tol=1e-2, random_state=0)
s_learner.fit(X_W_poly_train, Y_train)

# mu_0_hat
s_mu_0_hat = s_learner.predict(X_poly_test_0)

# mu_1_hat
s_mu_1_hat = s_learner.predict(X_poly_test_1)

# tau_hat
s_tau_hat = s_mu_1_hat - s_mu_0_hat
toc = time.perf_counter()
print(f'Time for computation: {toc - tic}')

In [None]:
((s_tau_hat - tau_test) ** 2).mean()

X-learner (Lasso (or l1-penalty))

In [None]:
### X-Learner

tic = time.perf_counter()

# mu_0 (same procedure as for t-learner, maybe can speed up process)
x_learner_mu0 = LassoCV(cv=10, tol=1, random_state=0)
x_learner_mu0.fit(X_poly_train_control, Y_train_control)

# mu_1 (same procedure as for t-learner, maybe can speed up process)
x_learner_mu1 = LassoCV(cv=10, tol=1, random_state=0)
x_learner_mu1.fit(X_poly_train_treatment, Y_train_treatment)

# compute imputed treatment effect D_0 and D_1
# d_0
imputed_0 = x_learner_mu1.predict(X_poly_train_control) - Y_train_control

# d_1
imputed_1 = Y_train_treatment - x_learner_mu0.predict(X_poly_train_treatment)

# regress imputed on X
# tau_hat_0
x_tau_0_hat = LassoCV(cv=10, tol=1, random_state=0)
x_tau_0_hat.fit(X_poly_train_control, imputed_0)

# tau_hat_1
x_tau_1_hat = LassoCV(cv=10, tol=1, random_state=0)
x_tau_1_hat.fit(X_poly_train_treatment, imputed_1)

# estimate e_x to use as g_x
g_x_hat = LogisticRegressionCV(cv=KFold(10), penalty='l1', solver='saga', tol=1, random_state=0)
g_x_hat.fit(X_poly_train, W_train)
probabilities = g_x_hat.predict_proba(X_poly_test)
probas_1 = probabilities[:, 1]
probas_0 = probabilities[:, 0]

# final estimator of tau
x_tau_hat = probas_1 * x_tau_0_hat.predict(X_poly_test) + probas_0 * x_tau_1_hat.predict(X_poly_test)

toc = time.perf_counter()

print(f'Time for computation: {toc - tic}')  # 127 seconds

In [None]:
((x_tau_hat - tau_test) ** 2).mean()

R-learner (Lasso (or l1-penalty))

In [None]:
### R-Learner

tic = time.perf_counter()

# estimate e_x
r_learner_e_x = LogisticRegressionCV(cv=KFold(10), penalty='l1', solver='saga', tol=1, random_state=0)
r_learner_e_x.fit(X_poly_train, W_train)

# get e_x predictions
r_probas = r_learner_e_x.predict_proba(X_poly_train)
r_probas_0 = r_probas[:, 0]  # probabilities of W=0
r_probas_1 = r_probas[:, 1]  # probabilities of W=1

# estimate mu_x
r_learner_mu_x = LassoCV(cv=10, tol=1, random_state=0)
r_learner_mu_x.fit(X_poly_train, Y_train)

# compute r-pseudo-outcome and weights
r_learner_pseudo_outcomes = (Y_train - r_learner_mu_x.predict(X_poly_train)) / (W_train - r_probas_1)
r_learner_weights = (W_train - r_probas_1) ** 2

# estimate tau (regress pseudo-outcomes on X, weight by (W-e(x))^2)
r_learner_tau = LassoCV(cv=10, tol=1, random_state=0)
r_learner_tau.fit(X_poly_train, r_learner_pseudo_outcomes, sample_weight=r_learner_weights)

# predict tau
r_tau_hats = r_learner_tau.predict(X_poly_test)
r_tau_hats

toc = time.perf_counter()

print(f'Time for computation: {toc - tic} seconds')  # 98 seconds

In [None]:
((r_tau_hats - tau_test) ** 2).mean()

DR-learner (Lasso (l1-penalty))

In [None]:
### DR-Learner

tic = time.perf_counter()

# TODO: APPLY CROSS-FITTING?
# estimate e_x
dr_learner_e_x = LogisticRegressionCV(cv=KFold(10), penalty='l1', solver='saga', tol=1, random_state=0)
dr_learner_e_x.fit(X_poly_train, W_train)

dr_probas = dr_learner_e_x.predict_proba(X_poly_train)
dr_probas_0 = dr_probas[:, 0]  # probabilities of W=0
dr_probas_1 = dr_probas[:, 1]  # probabilities of W=1

# estimate mu_0
dr_learner_mu_0 = LassoCV(cv=10, tol=1, random_state=0)
dr_learner_mu_0.fit(X_poly_train_control, Y_train_control)

# estimate mu_1
dr_learner_mu_1 = LassoCV(cv=10, tol=1, random_state=0)
dr_learner_mu_1.fit(X_poly_train_treatment, Y_train_treatment)

# DR-pseudo-outcomes
mu_w = W_train * dr_learner_mu_1.predict(X_poly_train) + (1 - W_train) * dr_learner_mu_0.predict(
    X_poly_train)  # this is mu_w for each observation, i.e. mu_1 for units in the treatment groups, and mu_0 for units in the control group
dr_pseudo_outcomes = (W_train - dr_probas_1) / (dr_probas_1 * dr_probas_0) * (Y_train - mu_w) + dr_learner_mu_1.predict(
    X_poly_train) - dr_learner_mu_0.predict(X_poly_train)

# estimate tau (regress pseudo-outcomes on X) # TODO: USE "Test Set" for this estimation
dr_learner_tau_hat = LassoCV(cv=10, tol=1, random_state=0)
dr_learner_tau_hat.fit(X_poly_train, dr_pseudo_outcomes)

# predict tau
dr_tau_hat = dr_learner_tau_hat.predict(X_poly_test)

toc = time.perf_counter()

print(f'Time needed for computation: {toc - tic} seconds')  # 104 seconds

In [None]:
((dr_tau_hat - tau_test) ** 2).mean()

RA-learner (Lasso)

In [None]:
### RA-Learner

tic = time.perf_counter()

# mu_0 (same procedure as for t-learner, maybe can speed up process)
ra_learner_mu0 = LassoCV(cv=10, tol=1, random_state=0)
ra_learner_mu0.fit(X_poly_train_control, Y_train_control)

# mu_1 (same procedure as for t-learner, maybe can speed up process)
ra_learner_mu1 = LassoCV(cv=10, tol=1, random_state=0)
ra_learner_mu1.fit(X_poly_train_treatment, Y_train_treatment)

# e_x
ra_learner_e_x = LogisticRegressionCV(cv=KFold(10), penalty='l1', solver='saga', tol=1, random_state=0)
ra_learner_e_x.fit(X_poly_train, W_train)

# ra-pseudo-outcome
ra_pseudo_outcome = W_train * (Y_train - ra_learner_mu0.predict(X_poly_train)) + (1 - W_train) * (
        ra_learner_mu1.predict(X_poly_train) - Y_train)

# tau_hat
ra_tau_hat_learner = LassoCV(cv=10, tol=1, random_state=0)
ra_tau_hat_learner.fit(X_poly_train, ra_pseudo_outcome)
ra_tau_hat = ra_tau_hat_learner.predict(X_poly_test)

toc = time.perf_counter()

print(f'Time for computation: {toc - tic} seconds.')  # 121 seconds

In [None]:
((ra_tau_hat - tau_test) ** 2).mean()

PW-learner (Lasso)

In [None]:
### PW-Learner

tic = time.perf_counter()

# mu_0 (same procedure as for t-learner, maybe can speed up process)
pw_learner_mu0 = LassoCV(cv=10, tol=1, random_state=0)
pw_learner_mu0.fit(X_poly_train_control, Y_train_control)

# mu_1 (same procedure as for t-learner, maybe can speed up process)
pw_learner_mu1 = LassoCV(cv=10, tol=1, random_state=0)
pw_learner_mu1.fit(X_poly_train_treatment, Y_train_treatment)

# e_x
pw_learner_e_x = LogisticRegressionCV(cv=KFold(10), penalty='l1', solver='saga', tol=1, random_state=0)
pw_learner_e_x.fit(X_poly_train, W_train)

# ra-pseudo-outcome
pw_pseudo_outcome = (W_train / pw_learner_e_x.predict_proba(X_poly_train)[:, 1] - (1 - W_train) / (
    pw_learner_e_x.predict_proba(X_poly_train)[:, 0])) * Y_train

# tau_hat
pw_tau_hat_learner = LassoCV(cv=10, tol=1, random_state=0)
pw_tau_hat_learner.fit(X_poly_train, pw_pseudo_outcome)
pw_tau_hat = pw_tau_hat_learner.predict(X_poly_test)

toc = time.perf_counter()

print(f'Time for computation: {toc - tic} seconds.')  # 117 seconds

In [None]:
((pw_tau_hat - tau_test) ** 2).mean()

U-learner (Lasso)

In [None]:
### U-Learner

tic = time.perf_counter()

# estimate e_x
u_learner_e_x = LogisticRegressionCV(cv=KFold(10), penalty='l1', solver='saga', tol=1, random_state=0)
u_learner_e_x.fit(X_poly_train, W_train)

# estimate mu_x
u_learner_mu_x = LassoCV(cv=10, tol=1, random_state=0)
u_learner_mu_x.fit(X_poly_train, Y_train)

# compute residuals
u_learner_residuals = (Y_train - u_learner_mu_x.predict(X_poly_train)) / (
        W_train - u_learner_e_x.predict_proba(X_poly_train)[:, 1])

# tau_hat - regress residuals on X
u_tau_hat_learner = LassoCV(cv=10, tol=1, random_state=0)
u_tau_hat_learner.fit(X_poly_train, u_learner_residuals)

u_tau_hats = u_tau_hat_learner.predict(X_poly_test)

toc = time.perf_counter()

print(f'Time for computation: {toc - tic} seconds.')  # 98 seconds


In [None]:
((u_tau_hats - tau_test) ** 2).mean()

# Neural Network


T-Learner (NN)

In [None]:
# T-Learner (example with Random Forest)

tic = time.perf_counter()

# mu_0
t_learner_mu0 = load_model('model_25')
print('Training mu0')
t_learner_mu0.fit(X_train_control, Y_train_control,
                  batch_size=100,
                  epochs=100,
                  validation_data=(X_test, Y_test),
                  callbacks=None  # include early stopping
                  )
t_mu_0_hat = t_learner_mu0.predict(X_test)

# mu_1
t_learner_mu1 = load_model('model_25')
print('Training mu1')
t_learner_mu1.fit(X_train_treatment, Y_train_treatment,
                  batch_size=100,
                  epochs=100,
                  validation_data=(X_test, Y_test),
                  callbacks=None  # include early stopping
                  )
t_mu_1_hat = t_learner_mu1.predict(X_test)

# Prediction = mu_1 - mu_0
t_tau_hat = t_mu_1_hat - t_mu_0_hat

toc = time.perf_counter()

print(f'Time for computation: {toc - tic} seconds.')  # 3 seconds


In [None]:
((np.reshape(t_tau_hat, (300,)) - tau_test) ** 2).mean()  # 3.18

S-learner (NN)

In [None]:
# S-learner (example with Random Forest)

# mu_x
s_learner = load_model('model_26')
s_learner.fit(X_W_train, Y_train,
              batch_size=100,
              epochs=100,
              validation_data=(X_W_test, Y_test),
              callbacks=None  # include early stopping
              )

# mu_0_hat
s_mu_0_hat = s_learner.predict(X_test_0)

# mu_1_hat
s_mu_1_hat = s_learner.predict(X_test_1)

# tau_hat
s_tau_hat = s_mu_1_hat - s_mu_0_hat


In [None]:
((np.reshape(s_tau_hat, (300,)) - tau_test) ** 2).mean()  # 1.98

X-learner (NN)

In [None]:
### X-Learner

# mu_0 (same procedure as for t-learner, maybe can speed up process)
x_learner_mu0 = load_model('model_25')
x_learner_mu0.fit(X_train_control, Y_train_control,
                  batch_size=100,
                  epochs=100,
                  validation_data=(X_test_control, Y_test_control),
                  callbacks=None  # include early stopping
                  )

# d_1
imputed_1 = Y_train_treatment - np.reshape(x_learner_mu0.predict(X_train_treatment), (len(Y_train_treatment),))

# mu_1 (same procedure as for t-learner, maybe can speed up process)
x_learner_mu1 = load_model('model_25')
x_learner_mu1.fit(X_train_treatment, Y_train_treatment,
                  batch_size=100,
                  epochs=100,
                  validation_data=(X_test_treatment, Y_test_treatment),
                  callbacks=None  # include early stopping
                  )

# d_0
imputed_0 = np.reshape(x_learner_mu1.predict(X_train_control), (len(Y_train_control),)) - Y_train_control

# regress imputed on X

# tau_hat_1
x_tau_1_hat = load_model('model_25')
x_tau_1_hat.fit(X_train_treatment, imputed_1,
                batch_size=100,
                epochs=100,
                validation_data=(X_test_treatment, tau_test_treatment),
                callbacks=None  # include early stopping
                )

x_tau_1_hat_predicts = np.reshape(x_tau_1_hat.predict(X_test), (len(X_test),))

# tau_hat_0
x_tau_0_hat = load_model('model_25')
x_tau_0_hat.fit(X_train_control, imputed_0,
                batch_size=100,
                epochs=100,
                validation_data=(X_test_control, tau_test_control),
                callbacks=None  # include early stopping
                )

x_tau_0_hat_predicts = np.reshape(x_tau_0_hat.predict(X_test), (len(X_test),))

# estimate e_x to use as g_x
g_x_hat = load_model('model_ex')
g_x_hat.fit(X_train, W_train,
            batch_size=100,
            epochs=100,
            validation_data=(X_test, W_test),
            callbacks=None  # include early stopping
            )
x_probabilities = g_x_hat.predict(X_test)
x_probs_1 = np.reshape(keras.activations.sigmoid(x_probabilities), (len(x_probabilities, )))
x_probs_0 = 1 - x_probs_1

# final estimator of tau
x_tau_hat = x_probs_1 * x_tau_0_hat_predicts + x_probs_0 * x_tau_1_hat_predicts

In [None]:
((np.reshape(x_tau_hat, (300,)) - tau_test) ** 2).mean()  # 3.1614 with smoothing of 0.5

R-learner (NN)

In [None]:
### R-Learner

# estimate e_x
r_learner_e_x = load_model('model_ex')
r_learner_e_x.fit(X_train, W_train,
                  batch_size=100,
                  epochs=100,
                  validation_data=(X_test, W_test),
                  callbacks=None  # include early stopping
                  )

# get e_x predictions
r_probabilities = np.reshape(keras.activations.sigmoid(r_learner_e_x.predict(X_train)), len(X_train, ))
r_probas_1 = r_probabilities  # probabilities of W=1
r_probas_0 = 1 - r_probabilities  # probabilities of W=0

# estimate mu_x
r_learner_mu_x = load_model('model_25')
r_learner_mu_x.fit(X_train, Y_train,
                   batch_size=100,
                   epochs=100,
                   validation_data=(X_test, Y_test),
                   callbacks=None  # include early stopping
                   )

# compute r-pseudo-outcome and weights
r_learner_pseudo_outcomes = (Y_train - np.reshape(r_learner_mu_x.predict(X_train), (len(X_train),))) / (
        W_train - r_probas_1)
r_learner_weights = (W_train - r_probas_1) ** 2

# estimate tau (regress pseudo-outcomes on X, weight by (W-e(x))^2)
r_learner_tau = load_model('model_25')
r_learner_tau.fit(X_train, r_learner_pseudo_outcomes,
                  sample_weight=r_learner_weights,
                  batch_size=100,
                  epochs=100,
                  validation_data=None,
                  callbacks=None  # include early stopping
                  )

# predict tau
r_tau_hats = r_learner_tau.predict(X_test)

In [None]:
((np.reshape(r_tau_hats, (len(X_test))) - tau_test) ** 2).mean()  #47.81

DR-learner (NN)

In [None]:
### DR-Learner

tic = time.perf_counter()

# TODO: APPLY CROSS-FITTING?
# estimate e_x
dr_learner_e_x = load_model('model_ex')
dr_learner_e_x.fit(X_train, W_train,
                   batch_size=100,
                   epochs=100,
                   validation_data=(X_test, W_test),
                   callbacks=None  # include early stopping
                   )

dr_probabilities = np.reshape(keras.activations.sigmoid(dr_learner_e_x.predict(X_train)), len(X_train, ))
dr_probas_0 = 1 - dr_probabilities  # probabilities of W=0
dr_probas_1 = dr_probabilities  # probabilities of W=1

# estimate mu_0
dr_learner_mu_0 = load_model('model_25')
dr_learner_mu_0.fit(X_train_control, Y_train_control,
                    batch_size=100,
                    epochs=100,
                    validation_data=(X_test_control, Y_test_control),
                    callbacks=None  # include early stopping
                    )

dr_learner_mu_0_predictions = dr_learner_mu_0.predict(X_train)

# estimate mu_1
dr_learner_mu_1 = load_model('model_25')
dr_learner_mu_1.fit(X_train_treatment, Y_train_treatment,
                    batch_size=100,
                    epochs=100,
                    validation_data=(X_test_treatment, Y_test_treatment),
                    callbacks=None  # include early stopping
                    )

dr_learner_mu_1_predictions = dr_learner_mu_1.predict(X_train)

# DR-pseudo-outcomes
mu_w = W_train * dr_learner_mu_1_predictions + (
        1 - W_train) * dr_learner_mu_0_predictions  # this is mu_w for each observation, i.e. mu_1 for units in the treatment groups, and mu_0 for units in the control group
dr_pseudo_outcomes = (W_train - dr_probas_1) / (dr_probas_1 * dr_probas_0) * (
        Y_train - mu_w) + dr_learner_mu_1_predictions - dr_learner_mu_0_predictions

# estimate tau (regress pseudo-outcomes on X) # TODO: USE "Test Set" for this estimation
dr_learner_tau_hat = load_model('model_25')
dr_learner_tau_hat.fit(X_train, dr_pseudo_outcomes,
                       batch_size=100,
                       epochs=100,
                       validation_data=None,
                       callbacks=None  # include early stopping
                       )

# predict tau
dr_tau_hat = dr_learner_tau_hat.predict(X_test)

toc = time.perf_counter()

print(f'Time needed for computation: {toc - tic} seconds')  # 104 seconds

In [None]:
((np.reshape(dr_tau_hat, (len(tau_test),)) - tau_test) ** 2).mean()  # 8.3514

RA-learner (NN)

In [None]:
### RA-Learner

# mu_0 (same procedure as for t-learner, maybe can speed up process)
ra_learner_mu0 = load_model('model_25')
ra_learner_mu0.fit(X_train_control, Y_train_control,
                   batch_size=100,
                   epochs=100,
                   validation_data=(X_test_control, Y_test_control),
                   callbacks=None  # include early stopping
                   )

# get hats
ra_learner_mu0_predictions = np.reshape(ra_learner_mu0.predict(X_train), (len(X_train),))

# mu_1 (same procedure as for t-learner, maybe can speed up process)
ra_learner_mu1 = load_model('model_25')
ra_learner_mu1.fit(X_train_treatment, Y_train_treatment,
                   batch_size=100,
                   epochs=100,
                   validation_data=(X_test_treatment, Y_test_treatment),
                   callbacks=None  # include early stopping
                   )

# get hats
ra_learner_mu1_predictions = np.reshape(ra_learner_mu1.predict(X_train), (len(X_train),))

# e_x TODO: IS IT NEEDED?
"""ra_learner_e_x = load_model('model_ex')
ra_learner_e_x.fit(X_train,W_train,
    batch_size=100,
    epochs=100,
    validation_data=(X_test, W_test),
    callbacks=[callback] # include early stopping
)"""

# ra-pseudo-outcome
ra_pseudo_outcome = W_train * (Y_train - ra_learner_mu0_predictions) + (1 - W_train) * (
        ra_learner_mu1_predictions - Y_train)

# tau_hat
ra_tau_hat_learner = load_model('model_25')
ra_tau_hat_learner.fit(X_train, ra_pseudo_outcome,
                       batch_size=100,
                       epochs=100,
                       validation_data=None,
                       callbacks=None  # include early stopping
                       )

ra_tau_hat = ra_tau_hat_learner.predict(X_test)
ra_tau_hat

In [None]:
((np.reshape(ra_tau_hat, (len(tau_test),)) - tau_test) ** 2).mean()  # 3.397

PW-learner (NN)

In [None]:
### PW-Learner

# e_x
pw_learner_e_x = load_model('model_ex')
pw_learner_e_x.fit(X_train, W_train,
                   batch_size=100,
                   epochs=100,
                   validation_data=(X_test, W_test),
                   callbacks=None  # include early stopping
                   )

pw_probabilities = np.reshape(keras.activations.sigmoid(pw_learner_e_x.predict(X_train)), len(X_train, ))
pw_probs_1 = pw_probabilities
pw_probs_0 = 1 - pw_probabilities

# ra-pseudo-outcome
pw_pseudo_outcome = (W_train / pw_probs_1 - (1 - W_train) / pw_probs_0) * Y_train

# tau_hat
pw_tau_hat_learner = load_model('model_25')
pw_tau_hat_learner.fit(X_train, pw_pseudo_outcome,
                       batch_size=100,
                       epochs=100,
                       validation_data=None,
                       callbacks=None  # include early stopping
                       )
pw_tau_hat = pw_tau_hat_learner.predict(X_test)
pw_tau_hat

In [None]:
((np.reshape(pw_tau_hat, (len(tau_test),)) - tau_test) ** 2).mean()  # 271.842 TODO: CHECK IF IT REALLY IS CORRECT

In [None]:
# see why so bad
pw_probabilities

U-learner (NN)

In [None]:
### U-Learner
# estimate e_x

u_learner_e_x = load_model('model_ex')
u_learner_e_x.fit(X_train, W_train,
                  batch_size=100,
                  epochs=1000,
                  validation_split=0.3,
                  callbacks=callback  # include early stopping
                  )

u_probs_1 = np.reshape(keras.activations.sigmoid(u_learner_e_x.predict(X_train)), (len(X_train),))

# estimate mu_x
u_learner_mu_x = load_model('model_25')
u_learner_mu_x.fit(X_train, Y_train,
                   batch_size=100,
                   epochs=100,
                   validation_data=None,
                   callbacks=None
                   )

u_learner_mu_x_predictions = np.reshape(u_learner_mu_x.predict(X_train), (len(X_train),))
# compute residuals
u_learner_residuals = (Y_train - u_learner_mu_x_predictions) / (W_train - u_probs_1)

# tau_hat - regress residuals on X
u_tau_hat_learner = load_model('model_25')
u_tau_hat_learner.fit(X_train, u_learner_residuals,
                      batch_size=100,
                      epochs=1000,
                      callbacks=callback,
                      validation_split=0.3
                      )

u_tau_hats = u_tau_hat_learner.predict(X_test)
u_tau_hats


In [None]:
((np.reshape(u_tau_hats, (len(tau_test),)) - tau_test) ** 2).mean()  # 51.102 TODO: CHECK IF IT REALLY IS CORRECT!!!

In [None]:
u_learner_residuals

In [None]:
import pandas as pd

pd.DataFrame(u_probs_1).describe()


# Make Classes

In [None]:
class Metalearner:
    pass

# Class T Learner

In [None]:
class TLearner:  # TODO: comment what is what.
    def __init__(self, method):  # TODO: or maybe not give base_learners but method, i.e. : 'lasso', 'rf' or 'nn'
        self.method = method

        if method == 'rf':
            self.mu0_model = RandomForestRegressor(n_estimators=1000, max_depth=99, random_state=0, max_features=0.66)
            self.mu1_model = RandomForestRegressor(n_estimators=1000, max_depth=99, random_state=0, max_features=0.66)
        elif method == 'lasso':
            self.mu0_model = LassoCV(cv=10, tol=1e-2, random_state=0)
            self.mu1_model = LassoCV(cv=10, tol=1e-2, random_state=0)
            self.poly = PolynomialFeatures(degree=3, interaction_only=False, include_bias=False)
        elif method == 'nn':
            self.mu0_model = load_model('model_25')
            self.mu1_model = load_model('model_25')
        else:
            raise NotImplementedError('Base learner method not specified')

    def fit(self,
            x, y, w):  # TODO: training process
        if self.method == 'rf':
            # 1: train mu_0
            print("Fitting random forest for mu_0")
            self.mu0_model.fit(x[w == 0], y[w == 0])

            # 2: train mu_1
            print("Fitting random forest for mu_1")
            self.mu1_model.fit(x[w == 1], y[w == 1])

        elif self.method == 'lasso':
            # make polynomial features
            x_poly_train = self.poly.fit_transform(x)

            # 1: train mu_0
            print("Fitting lasso for mu_0")
            self.mu0_model.fit(x_poly_train[w == 0], y[w == 0])

            # 2: train mu_1
            print("Fitting lasso for mu_1")
            self.mu1_model.fit(x_poly_train[w == 1], y[w == 1])

        elif self.method == 'nn':
            x = tf.convert_to_tensor(x)
            y = tf.convert_to_tensor(y)
            w = tf.convert_to_tensor(w)

            # 1: train mu_0
            print("Training neural network for mu_0")
            self.mu0_model.fit(x[w == 0], y[w == 0],
                               batch_size=100,
                               epochs=10000,
                               callbacks=callback,
                               validation_split=0.3,
                               verbose=0
                               )

            # 2: train mu_1
            print("Training neural network for mu_1")
            self.mu1_model.fit(x[w == 1], y[w == 1],
                               batch_size=100,
                               epochs=10000,
                               callbacks=callback,  # include early stopping
                               validation_split=0.3,
                               verbose=0
                               )

        else:
            raise NotImplementedError('Base learner method not specified in fit')

    def predict(self,
                x):  # TODO:
        if self.method == 'rf':
            # 1: calculate hats of mu_1 & mu_0
            mu0_hats = self.mu0_model.predict(x)
            mu1_hats = self.mu1_model.predict(x)
            predictions = mu1_hats - mu0_hats

        elif self.method == 'lasso':
            # make polynomial features
            x_poly_test = self.poly.fit_transform(x)

            # 1: calculate hats of mu_1 & mu_0
            mu0_hats = self.mu0_model.predict(x_poly_test)
            mu1_hats = self.mu1_model.predict(x_poly_test)
            predictions = mu1_hats - mu0_hats

        elif self.method == 'nn':
            x = tf.convert_to_tensor(x)
            mu0_hats = self.mu0_model(x)
            mu1_hats = self.mu1_model(x)
            predictions = np.reshape(mu1_hats - mu0_hats, (len(x),))

        else:
            raise NotImplementedError('Base learner method not specified in predict')
        return predictions

In [None]:
t_nn = TLearner(method='nn')
t_nn.fit(X_train, Y_train, W_train)
predictions = t_nn.predict(X_test)
((predictions - tau_test) ** 2).mean()
# rf: 10.091322530886687 / 7.359776707186481 (max_features=0.66)
# lasso: 5.583461099392904
# nn: 3.1867804239471273

In [None]:
stratified = StratifiedKFold(n_splits=CF_FOLDS, shuffle=True, random_state=0)

In [None]:
for (train_index, test_index) in stratified.split(X_train, W_train):
    temp_model = load_model('model_25')
    print('New Fold')
    temp_model.fit(X_train[train_index], Y_train[train_index], epochs=30)
    print(temp_model.predict(X_train[test_index]))

# Class S Learner

In [None]:
class SLearner:  # TODO: comment what is what.
    def __init__(self, method):  # TODO: or maybe not give base_learners but method, i.e. : 'lasso', 'rf' or 'nn'
        self.method = method

        if method == 'rf':
            self.mux_model = RandomForestRegressor(n_estimators=2000, max_depth=100, random_state=0, max_features=0.66)
        elif method == 'lasso':
            self.mux_model = LassoCV(cv=10, tol=1e-2, random_state=0)
            self.poly = PolynomialFeatures(degree=3, interaction_only=False, include_bias=False)
        elif method == 'nn':
            self.mux_model = load_model('model_26')
        else:
            raise NotImplementedError('Base learner method not specified')

    def fit(self,
            x, y, w):  # TODO: training process
        x_w = np.concatenate((x, np.reshape(w, (len(w), 1))), axis=1)

        if self.method == 'rf':
            # 1: train mu_x
            print("Fitting random forest for mu_x")
            self.mux_model.fit(x_w, y)

        elif self.method == 'lasso':
            # make polynomial features
            x_poly_train = self.poly.fit_transform(x_w)

            # 1: train mu_x
            print("Fitting lasso for mu_x")
            self.mux_model.fit(x_poly_train, y)


        elif self.method == 'nn':
            x_w = tf.convert_to_tensor(x_w)
            y = tf.convert_to_tensor(y)

            # 1: train mu_x
            print("Training neural network for mu_x")
            self.mux_model.fit(x_w, y,
                               batch_size=100,
                               epochs=100,
                               callbacks=None,  # include early stopping
                               verbose=0
                               )

        else:
            raise NotImplementedError('Base learner method not specified in fit')

    def predict(self,
                x):  # TODO:
        x_0 = np.concatenate((x, np.zeros((len(x), 1))), axis=1)
        x_1 = np.concatenate((x, np.ones((len(x), 1))), axis=1)

        if self.method == 'rf':
            # 1: calculate hats of mu_x with X and W=1 or W=0
            mu0_hats = self.mux_model.predict(x_0)
            mu1_hats = self.mux_model.predict(x_1)
            predictions = mu1_hats - mu0_hats

        elif self.method == 'lasso':
            # make polynomial features
            x_poly_0 = self.poly.fit_transform(x_0)
            x_poly_1 = self.poly.fit_transform(x_1)

            # 1: calculate hats of mu_x with X and W=1 or W=0
            mu0_hats = self.mux_model.predict(x_poly_0)
            mu1_hats = self.mux_model.predict(x_poly_1)
            predictions = mu1_hats - mu0_hats

        elif self.method == 'nn':
            x_0 = tf.convert_to_tensor(x_0)
            x_1 = tf.convert_to_tensor(x_1)
            # 1: calculate hats of mu_x with X and W=1 or W=0
            mu0_hats = self.mux_model(x_0)
            mu1_hats = self.mux_model(x_1)
            predictions = np.reshape(mu1_hats - mu0_hats, (len(x),))

        else:
            raise NotImplementedError('Base learner method not specified in predict')
        return predictions

In [None]:
s_nn = SLearner('nn')
s_nn.fit(X_train, Y_train, W_train)
predictions = s_nn.predict(X_test)
((predictions - tau_test) ** 2).mean()
# rf: 18.134009488483855
# lasso: 5.559126710289806
# nn: 1.987529792077956

# Class X Learner

In [None]:
class XLearner:  # TODO: comment what is what.
    def __init__(self, method):  # TODO: or maybe not give base_learners but method, i.e. : 'lasso', 'rf' or 'nn'
        self.method = method

        if method == 'rf':
            self.mu0_model = RandomForestRegressor(n_estimators=100, max_depth=100, random_state=0)
            self.mu1_model = RandomForestRegressor(n_estimators=100, max_depth=100, random_state=0)
            self.ex_model = RandomForestClassifier(max_depth=100, random_state=0)
            self.tau0_model = RandomForestRegressor(n_estimators=100, max_depth=100, random_state=0)
            self.tau1_model = RandomForestRegressor(n_estimators=100, max_depth=100, random_state=0)

        elif method == 'lasso':
            self.mu0_model = LassoCV(cv=10, tol=1, random_state=0)
            self.mu1_model = LassoCV(cv=10, tol=1, random_state=0)
            self.ex_model = LogisticRegressionCV(cv=KFold(10), penalty='l1', solver='saga', tol=1, random_state=0)
            self.tau0_model = LassoCV(cv=10, tol=1, random_state=0)
            self.tau1_model = LassoCV(cv=10, tol=1, random_state=0)
            self.poly = PolynomialFeatures(degree=3, interaction_only=False, include_bias=False)

        elif method == 'nn':
            self.mu0_model = load_model('model_25')
            self.mu1_model = load_model('model_25')
            self.ex_model = load_model('model_ex')
            self.tau0_model = load_model('model_25')
            self.tau1_model = load_model('model_25')
        else:
            raise NotImplementedError('Base learner method not specified')

    def fit(self,
            x, y, w):  # TODO: training process
        if self.method == 'rf':
            # 1: train mu_0 and get imputed_1
            print("Fitting random forest for mu_0")
            self.mu0_model.fit(x[w == 0], y[w == 0])
            imputed_1 = y[w == 1] - self.mu0_model.predict(x[w == 1])

            # 2: train mu_1 and get imputed_0
            print("Fitting random forest for mu_1")
            self.mu1_model.fit(x[w == 1], y[w == 1])
            imputed_0 = self.mu1_model.predict(x[w == 0]) - y[w == 0]

            # 3: train tau_0
            print("Fitting random forest for tau_0")
            self.tau0_model.fit(x[w == 0], imputed_0)

            # 4: train tau_1
            print("Fitting random forest for tau_1")
            self.tau1_model.fit(x[w == 1], imputed_1)

            # 5: train e_x
            print("Fitting random forest for e_x")
            self.ex_model.fit(x, w)

        elif self.method == 'lasso':
            # make polynomial features
            x_poly_train = self.poly.fit_transform(x)

            # 1: train mu_0 and get imputed_1
            print("Fitting lasso for mu_0")
            self.mu0_model.fit(x_poly_train[w == 0], y[w == 0])
            imputed_1 = y[w == 1] - self.mu0_model.predict(x_poly_train[w == 1])

            # 2: train mu_1 and get imputed_0
            print("Fitting lasso for mu_1")
            self.mu1_model.fit(x_poly_train[w == 1], y[w == 1])
            imputed_0 = self.mu1_model.predict(x_poly_train[w == 0]) - y[w == 0]

            # 3: train tau_0
            print("Fitting random forest for tau_0")
            self.tau0_model.fit(x_poly_train[w == 0], imputed_0)

            # 4: train tau_1
            print("Fitting random forest for tau_1")
            self.tau1_model.fit(x_poly_train[w == 1], imputed_1)

            # 5: train e_x
            print("Fitting random forest for e_x")
            self.ex_model.fit(x_poly_train, w)

        elif self.method == 'nn':
            # to tensor
            x = tf.convert_to_tensor(x)
            y = tf.convert_to_tensor(y)
            w = tf.convert_to_tensor(w)

            imputed_0 = np.empty(len(x), )
            imputed_1 = np.empty(len(x), )

            # 1: train mu_0
            print("Training neural network for mu_0")
            for train_index, test_index in stratified.split(x, w):
                index = np.zeros(len(x), dtype=bool)
                index[test_index] = 1

                temp_model = load_model('model_25')

                x_train = x[~index]
                x_test = x[index]
                w_train = w[~index]
                w_test = w[index]
                y_train = y[~index]
                y_test = y[index]

                temp_model.fit(x_train[w_train == 0], y_train[w_train == 0],
                               epochs=100,
                               callbacks=None,
                               verbose=0
                               )
                imputed_1[index][w_test == 1] = y_test[w_test == 1] - np.reshape(temp_model(x_test[w_test == 1]),
                                                                                 (len(x_test[w_test == 1]),))

            imputed_1 = tf.convert_to_tensor(imputed_1)

            """self.mu0_model.fit(x[w == 0], y[w == 0],
                               batch_size=100,
                               epochs=100,
                               callbacks=None,  # include early stopping
                               verbose=0
                               )
            imputed_1 = y[w == 1] - np.reshape(self.mu0_model(x[w == 1]), (len(x[w == 1]),))"""

            # 2: train mu_1
            print("Training neural network for mu_1")

            for train_index, test_index in stratified.split(x, w):
                index = np.zeros(len(x), dtype=bool)
                index[test_index] = 1

                temp_model = load_model('model_25')
                x_train = x[~index]
                x_test = x[index]
                w_train = w[~index]
                w_test = w[index]
                y_train = y[~index]

                temp_model.fit(x_train[w_train == 1], y_train[w_train == 1],
                               epochs=100,
                               callbacks=None,
                               verbose=0
                               )
                imputed_0[index][w_test == 0] = np.array(temp_model(x_test[w_test == 0])).squeeze() - y_test[
                    w_test == 0]

            imputed_0 = tf.convert_to_tensor(imputed_0)

            """self.mu1_model.fit(x[w == 1], y[w == 1],
                               batch_size=100,
                               epochs=100,
                               callbacks=None,  # include early stopping
                               verbose=0
                               )
            imputed_0 = np.reshape(self.mu1_model(x[w == 0]), (len(x[w == 0]),)) - y[w == 0]"""

            # 3: train tau_0
            print("Fitting random forest for tau_0")
            self.tau0_model.fit(x[w == 0], imputed_0[w == 0],
                                batch_size=100,
                                epochs=100,
                                callbacks=None,  # include early stopping
                                verbose=0
                                )

            # 4: train tau_1
            print("Fitting random forest for tau_1")
            self.tau1_model.fit(x[w == 1], imputed_1[w == 1],
                                batch_size=100,
                                epochs=100,
                                callbacks=None,  # include early stopping
                                verbose=0
                                )

            # 5: train e_x
            print("Fitting random forest for e_x")
            self.ex_model.fit(x, w,
                              batch_size=100,
                              epochs=100,
                              callbacks=None,  # include early stopping
                              verbose=0
                              )

        else:
            raise NotImplementedError('Base learner method not specified in fit')

    def predict(self,
                x):  # TODO:
        if self.method == 'rf':
            # 1: calculate hats of tau_0 and tau_1
            tau_0_hats = self.tau0_model.predict(x)
            tau_1_hats = self.tau1_model.predict(x)
            # 2: probabilities
            probs = self.ex_model.predict_proba(x)[:, 1]
            # 3: final predictions

        elif self.method == 'lasso':
            # make polynomial features
            x_poly_test = self.poly.fit_transform(x)

            # 1: calculate hats of tau_0 and tau_1
            tau_0_hats = self.tau0_model.predict(x_poly_test)
            tau_1_hats = self.tau1_model.predict(x_poly_test)
            probs = self.ex_model.predict_proba(x_poly_test)[:, 1]

        elif self.method == 'nn':
            x = tf.convert_to_tensor(x)
            # 1: calculate hats of tau_0 and tau_1
            tau_0_hats = np.reshape(self.tau0_model(x), (len(x),))
            tau_1_hats = np.reshape(self.tau1_model(x), (len(x),))
            # 2: probabilities
            logit = self.ex_model(x)
            probs = np.reshape(keras.activations.sigmoid(logit), (len(logit, )))
            # 3: final predictions

        else:
            raise NotImplementedError('Base learner method not specified in predict')

        predictions = probs * tau_0_hats + (1 - probs) * tau_1_hats
        return predictions

In [None]:
x_rf = XLearner('nn')
x_rf.fit(X_train, Y_train, W_train)
predictions = x_rf.predict(X_test)
((predictions - tau_test) ** 2).mean()
# rf: # 3.1369636408859614 --> same (good)
# lasso: # nn: 7.667219448077926 --> same (good)
# nn: 3.161416602361538 --> same (good)

In [None]:
imputed_1 = np.empty(len(X_train), )

for train_index, test_index in stratified.split(X_train, W_train):
    index = np.zeros(len(X_train), dtype=bool)
    index[test_index] = 1

    temp_model = load_model('model_25')

    x_train = X_train[~index]
    x_test = X_train[index]
    w_train = W_train[~index]
    w_test = W_train[index]
    y_train = Y_train[~index]
    y_test = Y_train[index]

In [None]:
temp_model(x_test[w_test == 1])

In [None]:
x_test[w_test == 0]

In [None]:
index = np.zeros(len(X_train), dtype=bool)
for train_index, test_index in stratified.split(X_train, W_train):
    index[test_index] = 1

In [None]:
index

In [None]:
stratified = StratifiedKFold(n_splits=CF_FOLDS, shuffle=True, random_state=0)

In [None]:
for (train_index, test_index) in stratified.split(X_train, W_train):
    temp_model = load_model('model_25')
    print('New Fold')
    temp_model.fit(X_train[train_index], Y_train[train_index], epochs=30)
    print(temp_model.predict(X_train[test_index]))

In [None]:
tf.experimental.numpy.empty(0)

# R-Learner

In [None]:
class RLearner:
    def __init__(self, method):
        self.method = method

        if method == 'rf':
            self.mux_model = RandomForestRegressor(n_estimators=100, max_depth=100, random_state=0)
            self.ex_model = RandomForestClassifier(max_depth=100, random_state=0)
            self.tau_model = RandomForestRegressor(n_estimators=100, max_depth=100, random_state=0)

        elif method == 'lasso':
            self.mux_model = LassoCV(cv=10, tol=1, random_state=0)
            self.ex_model = LogisticRegressionCV(cv=KFold(10), penalty='l1', solver='saga', tol=1, random_state=0)
            self.tau_model = LassoCV(cv=10, tol=1, random_state=0)
            self.poly = PolynomialFeatures(degree=3, interaction_only=False, include_bias=False)

        elif method == 'nn':
            self.mux_model = load_model('model_25')
            self.ex_model = load_model('model_ex')
            self.tau_model = load_model('model_25')

        else:
            raise NotImplementedError('Base learner method not specified or typo')

    def fit(self, x, y, w):

        if self.method == 'rf':
            # 1: fit mu_x
            print('Fitting random forest for mu_x')
            self.mux_model.fit(x, y)

            print('Fitting random forest for e_x')
            # 2: fit ex
            self.ex_model.fit(x, w)

            # 3: calculate pseudo_outcomes & weights
            probs = self.ex_model.predict_proba(x)[:, 1]
            pseudo_outcomes = (y - self.mux_model.predict(x)) / (w - probs)
            weights = (w - probs) ** 2

            print('Fitting random forest for tau_x')
            # 4: fit tau
            self.tau_model.fit(x, pseudo_outcomes, sample_weight=weights)

        elif self.method == 'lasso':
            x_poly_train = self.poly.fit_transform(x)

            # 1: fit mu_x
            print('Fitting lasso for mu_x')
            self.mux_model.fit(x_poly_train, y)

            # 2: fit ex
            print('Fitting lasso for e_x')
            self.ex_model.fit(x_poly_train, w)

            # 3: calculate pseudo_outcomes & weights
            probs = self.ex_model.predict_proba(x_poly_train)[:, 1]
            pseudo_outcomes = (y - self.mux_model.predict(x_poly_train)) / (w - probs)
            weights = (w - probs) ** 2

            # 4: fit tau
            print('Fitting lasso for tau_x')
            self.tau_model.fit(x_poly_train, pseudo_outcomes, sample_weight=weights)

        elif self.method == 'nn':
            # to tensor
            x = tf.convert_to_tensor(x)
            y = tf.convert_to_tensor(y)
            w = tf.convert_to_tensor(w)

            # 1: fit mu_x
            print('Training NN for mu_x')
            self.mux_model.fit(x, y,
                               batch_size=100,
                               epochs=400,
                               callbacks=callback,
                               validation_split=0.3,
                               verbose=0
                               )
            # 2: fit ex
            print('Training NN for e_x')
            self.ex_model.fit(x, w,
                              batch_size=100,
                              epochs=400,
                              callbacks=callback,
                              validation_split=0.3,
                              verbose=0
                              )

            # 3: calculate pseudo_outcomes & weights
            probs = np.reshape(keras.activations.sigmoid(self.ex_model(x)), len(x, ))
            pseudo_outcomes = (y - np.reshape(self.mux_model(x), (len(x),))) / (w - probs)
            weights = (w - probs) ** 2

            # 4: fit tau
            print('Training NN for tau_x')
            self.tau_model.fit(x, pseudo_outcomes,
                               sample_weight=weights,
                               batch_size=100,
                               epochs=100,
                               callbacks=callback,
                               validation_split=0.3,
                               verbose=0
                               )

        else:
            raise NotImplementedError('Base learner method not specified in fit')

    def predict(self, x):

        if self.method == 'rf':
            predictions = self.tau_model.predict(x)

        elif self.method == 'lasso':
            x_poly_test = self.poly.fit_transform(x)
            predictions = self.tau_model.predict(x_poly_test)

        elif self.method == 'nn':
            # to tensor
            x = tf.convert_to_tensor(x)
            predictions = np.reshape(self.tau_model(x), (len(x),))

        else:
            raise NotImplementedError('Base learner method not specified in predict')

        return predictions

In [None]:
r_rf = RLearner('nn')
r_rf.fit(X_train, Y_train, W_train)
predictions = r_rf.predict(X_test)
((predictions - tau_test) ** 2).mean()
# rf: 17.722925118749608
# lasso: 5.50038865455844
# nn: 47.81939839016621

# Class DR-Learner

In [None]:
class DRLearner:
    def __init__(self, method):
        self.method = method
        if method == 'rf':
            self.mu0_model = RandomForestRegressor(n_estimators=100, max_depth=100, random_state=0)
            self.mu1_model = RandomForestRegressor(n_estimators=100, max_depth=100, random_state=0)
            self.ex_model = RandomForestClassifier(n_estimators=100, max_depth=100, random_state=0)
            self.tau_model = RandomForestRegressor(n_estimators=100, max_depth=100, random_state=0)

        elif method == 'lasso':
            self.mu0_model = LassoCV(cv=10, tol=1, random_state=0)
            self.mu1_model = LassoCV(cv=10, tol=1, random_state=0)
            self.ex_model = LogisticRegressionCV(cv=KFold(10), penalty='l1', solver='saga', tol=1, random_state=0)
            self.tau_model = LassoCV(cv=10, tol=1, random_state=0)
            self.poly = PolynomialFeatures(degree=3, interaction_only=False, include_bias=False)

        elif method == 'nn':
            self.mu0_model = load_model('model_25')
            self.mu1_model = load_model('model_25')
            self.ex_model = load_model('model_ex')
            self.tau_model = load_model('model_25')

        else:
            raise NotImplementedError('Base learner method not specified or typo')

    def fit(self, x, y, w):

        if self.method == 'rf':
            # 1: fit mu_0
            print('Fitting random forest for mu_0')
            self.mu0_model.fit(x[w == 0], y[w == 0])

            # 2: fit mu_1
            print('Fitting random forest for mu_1')
            self.mu1_model.fit(x[w == 1], y[w == 1])

            # 3: fit ex
            print('Fitting random forest for e_x')
            self.ex_model.fit(x, w)
            probs = self.ex_model.predict_proba(x)[:, 1]
            neg_prob = self.ex_model.predict_proba(x)[:, 0]

            # calculate pseudo_outcomes
            mu_w = w * self.mu1_model.predict(x) + (1 - w) * self.mu0_model.predict(x)
            pseudo_outcomes = (w - probs) / (probs * neg_prob) * (y - mu_w) + self.mu1_model.predict(
                x) - self.mu0_model.predict(x)

            # 4 fit tau
            print('Fitting random forest for tau_x')
            self.tau_model.fit(x, pseudo_outcomes)

        elif self.method == 'lasso':
            x_poly_train = self.poly.fit_transform(x)

            # 1: fit mu_0
            print('Fitting lasso for mu_0')
            self.mu0_model.fit(x_poly_train[w == 0], y[w == 0])

            # 2: fit mu_1
            print('Fitting lasso for mu_1')
            self.mu1_model.fit(x_poly_train[w == 1], y[w == 1])

            # 3: fit ex
            print('Fitting lasso for e_x')
            self.ex_model.fit(x_poly_train, w)
            probs = self.ex_model.predict_proba(x_poly_train)[:, 1]

            # calculate pseudo_outcomes
            mu_w = w * self.mu1_model.predict(x_poly_train) + (1 - w) * self.mu0_model.predict(x_poly_train)
            pseudo_outcomes = (w - probs) / (probs * (1 - probs)) * (y - mu_w) + self.mu1_model.predict(
                x_poly_train) - self.mu0_model.predict(x_poly_train)

            # 4 fit tau
            print('Fitting lasso for tau_x')
            self.tau_model.fit(x_poly_train, pseudo_outcomes)

        elif self.method == 'nn':
            # to tensor
            x = tf.convert_to_tensor(x)
            y = tf.convert_to_tensor(y)
            w = tf.convert_to_tensor(w)

            # 1: fit mu_0
            print('Training NN for mu_0')
            self.mu0_model.fit(x[w == 0], y[w == 0],
                               batch_size=100,
                               epochs=100,
                               callbacks=None,
                               verbose=0
                               )

            # 2: fit mu_1
            print('Training NN for mu_1')
            self.mu1_model.fit(x[w == 1], y[w == 1],
                               batch_size=100,
                               epochs=100,
                               callbacks=None,
                               verbose=0
                               )

            # 3: fit ex
            print('Training NN for e_x')
            self.ex_model.fit(x, w,
                              batch_size=100,
                              epochs=100,
                              callbacks=None,
                              verbose=0
                              )

            probs = tf.reshape(keras.activations.sigmoid(self.ex_model(x)), len(x, ))

            # calculate pseudo_outcomes
            mu_0_hats = self.mu0_model(x)
            mu_1_hats = self.mu1_model(x)

            mu_w = w * mu_1_hats + (1 - w) * mu_0_hats
            pseudo_outcomes = (w - probs) / (probs * (1 - probs)) * (y - mu_w) + mu_1_hats - mu_0_hats

            # 4 fit tau
            print('Training NN for tau_x')
            self.tau_model.fit(x, pseudo_outcomes,
                               batch_size=100,
                               epochs=100,
                               validation_data=None,
                               callbacks=None,
                               verbose=0
                               )

    def predict(self, x):

        if self.method == 'rf':
            predictions = self.tau_model.predict(x)


        elif self.method == 'lasso':
            x_poly_test = self.poly.fit_transform(x)
            predictions = self.tau_model.predict(x_poly_test)

        elif self.method == 'nn':
            # to tensor
            x = tf.convert_to_tensor(x)
            # predict
            predictions = np.reshape(self.tau_model(x), (len(x),))

        else:
            raise NotImplementedError('Base learner method not specified in predict')

        return predictions

In [None]:
dr_rf = DRLearner('nn')
dr_rf.fit(X_train, Y_train, W_train)
predictions = dr_rf.predict(X_test)
((predictions - tau_test) ** 2).mean()
# rf: 5.385491721300538 # why different??? ---> because if you take 1 - probs it is not exactly the same as taking the [:,0] column!!
# lasso: 6.252082321980517
# nn: 8.35142898943478

# CHECK THIS: CHANGE (1 - PROBS) TO [:,0] TO BE MORE EXACT!!!

# Class RA-Learner

In [None]:
class RALearner:
    def __init__(self, method):
        self.method = method
        if method == 'rf':
            self.mu0_model = RandomForestRegressor(n_estimators=100, max_depth=100, random_state=0)
            self.mu1_model = RandomForestRegressor(n_estimators=100, max_depth=100, random_state=0)
            self.tau_model = RandomForestRegressor(n_estimators=100, max_depth=100, random_state=0)

        elif method == 'lasso':
            self.mu0_model = LassoCV(cv=10, tol=1, random_state=0)
            self.mu1_model = LassoCV(cv=10, tol=1, random_state=0)
            self.tau_model = LassoCV(cv=10, tol=1, random_state=0)
            self.poly = PolynomialFeatures(degree=3, interaction_only=False, include_bias=False)

        elif method == 'nn':
            self.mu0_model = load_model('model_25')
            self.mu1_model = load_model('model_25')
            self.tau_model = load_model('model_25')

        else:
            raise NotImplementedError('Base learner method not specified or typo')

    def fit(self, x, y, w):
        if self.method == 'rf':
            # 1: fit mu_0
            print('Fitting random forest for mu_0')
            self.mu0_model.fit(x[w == 0], y[w == 0])

            # 2: fit mu_1
            print('Fitting random forest for mu_1')
            self.mu1_model.fit(x[w == 1], y[w == 1])

            # calculate pseudo_outcomes
            pseudo_outcomes = w * (y - self.mu0_model.predict(x)) + (1 - w) * (self.mu1_model.predict(x) - y)

            # 4 fit tau
            print('Fitting random forest for tau_x')
            self.tau_model.fit(x, pseudo_outcomes)

        elif self.method == 'lasso':
            x_poly_train = self.poly.fit_transform(x)

            # 1: fit mu_0
            print('Fitting lasso for mu_0')
            self.mu0_model.fit(x_poly_train[w == 0], y[w == 0])

            # 2: fit mu_1
            print('Fitting lasso for mu_1')
            self.mu1_model.fit(x_poly_train[w == 1], y[w == 1])

            # calculate pseudo_outcomes
            pseudo_outcomes = w * (y - self.mu0_model.predict(x_poly_train)) + (1 - w) * (
                    self.mu1_model.predict(x_poly_train) - y)

            # 4 fit tau
            print('Fitting lasso for tau_x')
            self.tau_model.fit(x_poly_train, pseudo_outcomes)

        elif self.method == 'nn':
            # to tensor
            x = tf.convert_to_tensor(x)
            y = tf.convert_to_tensor(y)
            w = tf.convert_to_tensor(w)

            # 1: fit mu_0
            print('Training NN for mu_0')
            self.mu0_model.fit(x[w == 0], y[w == 0],
                               batch_size=100,
                               epochs=100,
                               callbacks=None,
                               verbose=0
                               )

            # 2: fit mu_1
            print('Training NN for mu_1')
            self.mu1_model.fit(x[w == 1], y[w == 1],
                               batch_size=100,
                               epochs=100,
                               callbacks=None,
                               verbose=0
                               )

            # calculate pseudo_outcomes
            mu0_predictions = np.reshape(self.mu0_model(x), (len(x),))
            mu1_predictions = np.reshape(self.mu1_model(x), (len(x),))

            pseudo_outcomes = w * (y - mu0_predictions) + (1 - w) * (mu1_predictions - y)

            # 4 fit tau
            print('Training NN for tau_x')
            self.tau_model.fit(x, pseudo_outcomes,
                               batch_size=100,
                               epochs=100,
                               validation_data=None,
                               callbacks=None,
                               verbose=0
                               )

    def predict(self, x):
        if self.method == 'rf':
            predictions = self.tau_model.predict(x)


        elif self.method == 'lasso':
            x_poly_test = self.poly.fit_transform(x)
            predictions = self.tau_model.predict(x_poly_test)

        elif self.method == 'nn':
            # to tensor
            x = tf.convert_to_tensor(x)
            # predict
            predictions = np.reshape(self.tau_model(x), (len(x),))

        else:
            raise NotImplementedError('Base learner method not specified in predict')

        return predictions

In [None]:
ra_rf = RALearner('nn')
ra_rf.fit(X_train, Y_train, W_train)
predictions = ra_rf.predict(X_test)
((predictions - tau_test) ** 2).mean()
# rf: 5.355494017645751
# lasso: 8.283890654355236
# nn: 3.3973654461530867

# 3.353439795888397
# 3.3534397958883955

# Class PW-Learner

In [None]:
class PWLearner:
    def __init__(self, method):
        self.method = method
        if method == 'rf':
            self.ex_model = RandomForestClassifier(n_estimators=100, max_depth=100, random_state=0)
            self.tau_model = RandomForestRegressor(n_estimators=100, max_depth=100, random_state=0)

        elif method == 'lasso':
            self.ex_model = LogisticRegressionCV(cv=KFold(10), penalty='l1', solver='saga', tol=1, random_state=0)
            self.tau_model = LassoCV(cv=10, tol=1, random_state=0)
            self.poly = PolynomialFeatures(degree=3, interaction_only=False, include_bias=False)

        elif method == 'nn':
            self.ex_1_model = load_model('model_ex')
            self.ex_2_model = load_model('model_ex')
            self.tau_model = load_model('model_25')

        else:
            raise NotImplementedError('Base learner method not specified or typo')

    def fit(self, x, y, w):

        if self.method == 'rf':
            # 3: fit ex
            print('Fitting random forest for e_x')
            self.ex_model.fit(x, w)
            probs = self.ex_model.predict_proba(x)[:, 1]
            counter_probs = self.ex_model.predict_proba(x)[:, 0]

            # calculate pseudo_outcomes
            pseudo_outcomes = (w / probs - (1 - w) / counter_probs) * y

            # 4 fit tau
            print('Fitting random forest for tau_x')
            self.tau_model.fit(x, pseudo_outcomes)

        elif self.method == 'lasso':
            x_poly_train = self.poly.fit_transform(x)

            # 3: fit ex
            print('Fitting lasso for e_x')
            self.ex_model.fit(x_poly_train, w)

            probs = self.ex_model.predict_proba(x_poly_train)[:, 1]
            counter_probs = self.ex_model.predict_proba(x_poly_train)[:, 0]

            # calculate pseudo_outcomes
            pseudo_outcomes = (w / probs - (1 - w) / counter_probs) * y

            # 4 fit tau
            print('Fitting lasso for tau_x')
            self.tau_model.fit(x_poly_train, pseudo_outcomes)

        elif self.method == 'nn':
            x = tf.convert_to_tensor(x)
            y = tf.convert_to_tensor(y)
            w = tf.convert_to_tensor(w)

            # cross-fitting
            # split for cross-fitting
            index = np.zeros(len(x), dtype=bool)
            train_ind = np.random.choice(len(x), int(len(x) / 2), replace=False)
            index[train_ind] = 1

            probs = np.zeros(len(x), )

            # 3: fit ex
            print('Training NN for e_x')

            self.ex_1_model.fit(x[index], w[index],
                                batch_size=100,
                                epochs=50,
                                callbacks=None,
                                verbose=0
                                )

            self.ex_2_model.fit(x[~index], w[~index],
                                batch_size=100,
                                epochs=50,
                                callbacks=None,
                                verbose=0
                                )

            probs[index] = tf.squeeze(keras.activations.sigmoid(self.ex_1_model(x[index])))
            probs[~index] = tf.squeeze(keras.activations.sigmoid(self.ex_2_model(x[~index])))

            # probs = tf.squeeze(keras.activations.sigmoid(self.ex_model(x)))
            counter_probs = 1 - probs

            # calculate pseudo_outcomes
            pseudo_outcomes = (w / probs - (1 - w) / counter_probs) * y

            # 4 fit tau
            print('Training NN for tau_x')
            self.tau_model.fit(x, pseudo_outcomes,
                               batch_size=100,
                               epochs=50,
                               validation_data=None,
                               callbacks=None,
                               verbose=0
                               )

    def predict(self, x):
        if self.method == 'rf':
            predictions = self.tau_model.predict(x)

        elif self.method == 'lasso':
            x_poly_test = self.poly.fit_transform(x)
            predictions = self.tau_model.predict(x_poly_test)

        elif self.method == 'nn':
            x = tf.convert_to_tensor(x)
            predictions = np.array(self.tau_model(x)).squeeze()

        else:
            raise NotImplementedError('Base learner method not specified in predict')

        return predictions

In [None]:
pw_rf = PWLearner('nn')
pw_rf.fit(X_train, Y_train, W_train)
predictions = pw_rf.predict(X_test)
((predictions - tau_test) ** 2).mean()
# rf: 30.529802728890644
# lasso: 16.03059004204301
# nn: 271.8425349295992

# 238.41087366274616

# Class U-Learner

In [None]:
class ULearner:
    def __init__(self, method):
        self.method = method

        if method == 'rf':
            self.tau_model = RandomForestRegressor(n_estimators=100, max_depth=100, random_state=0)

        elif method == 'lasso':
            self.tau_model = LassoCV(cv=10, tol=1, random_state=0)

        elif method == 'nn':
            self.tau_model = load_model('model_25')

        else:
            raise NotImplementedError('Base learner method not specified or typo')

    def compute_hats_rf(self, x_train, y_train, w_train, x_test):
        # 1: fit mu_x
        print('Fitting random forest for mu_x')
        temp_mux = RandomForestRegressor(n_estimators=100, max_depth=100, random_state=0)
        temp_mux.fit(x_train, y_train)
        # 2: fit ex
        temp_ex = RandomForestClassifier(n_estimators=100, max_depth=100, random_state=0)
        temp_ex.fit(x_train, w_train)
        # residuals
        probs = temp_ex.predict_proba(x_test)[:, 1]
        mux_hat = temp_mux.predict(x_test)
        return mux_hat, probs

    def compute_hats_lasso(self, x_train, y_train, w_train, x_test):
        # poly transformation
        transformer = PolynomialFeatures(degree=3, interaction_only=False, include_bias=False)
        x_poly_train = transformer.fit_transform(x_train)
        x_poly_test = transformer.fit_transform(x_test)

        # 1: fit mu_x
        print('Fitting random forest for mu_x')
        temp_mux = LassoCV(cv=10, tol=1, random_state=0)
        temp_mux.fit(x_poly_train, y_train)
        # 2: fit ex
        temp_ex = LogisticRegressionCV(cv=KFold(10), penalty='l1', solver='saga', tol=1, random_state=0)
        temp_ex.fit(x_poly_train, w_train)
        # residuals
        probs = temp_ex.predict_proba(x_poly_test)[:, 1]
        mux_hat = temp_mux.predict(x_poly_test)
        return mux_hat, probs

    def compute_hats_nn(x_train, y_train, w_train, x_test):
        # to tensor
        x_train = tf.convert_to_tensor(x_train)
        y_train = tf.convert_to_tensor(y_train)
        w_train = tf.convert_to_tensor(w_train)
        # 1: fit mu_x
        print('Fitting random forest for mu_x')
        temp_mux = load_model('model_25')
        temp_mux.fit(x_train, y_train, batch_size=100, epochs=50, callbacks=None, verbose=0)
        # 2: fit ex
        temp_ex = load_model('model_ex')
        temp_ex.fit(x_train, w_train, batch_size=100, epochs=50, callbacks=None, verbose=0)
        # residuals
        probs = tf.squeeze(keras.activations.sigmoid(temp_ex(x_test)))
        mux_hat = tf.squeeze(temp_mux(x_test))
        return mux_hat, probs

    def fit(self, x, y, w):

        # if no cross-fitting
        if self.method == 'rf':
            if CF_FOLDS == 1:
                mux_hat, probs = self.compute_hats_rf(x, y, w, x)

        # cross-fitting
            else:
                # initialize
                mux_hat = np.zeros(700)
                probs = np.zeros(700)
                # cross-fitting
                stratified = StratifiedKFold(n_splits=CF_FOLDS, shuffle=True, random_state=0)
                for train_index, test_index in stratified.split(x, w):
                    index = np.zeros(700, dtype=bool)
                    index[test_index] = 1
                    print('Fitting Classifier')
                    mux_hat[~index], probs[~index] =  self.compute_hats_rf(x[index], y[index], w[index], x[~index] )
            residuals = (y - mux_hat) / (w - probs)
            # 3: fit tau
            print('Fitting random forest for tau_x')
            self.tau_model.fit(x, residuals)

        elif self.method == 'lasso':
            if CF_FOLDS == 1:
                mux_hat, probs = self.compute_hats_lasso(x, y, w, x)

            else:
                # initialize
                mux_hat = np.zeros(700)
                probs = np.zeros(700)
                # cross-fitting
                stratified = StratifiedKFold(n_splits=CF_FOLDS, shuffle=True, random_state=0)
                for train_index, test_index in stratified.split(x, w):
                    index = np.zeros(700, dtype=bool)
                    index[test_index] = 1
                    print('Fitting Classifier')
                    mux_hat[~index], probs[~index] =  self.compute_hats_lasso(x[index], y[index], w[index], x[~index] )
            residuals = (y - mux_hat) / (w - probs)
            # 3: fit tau
            print('Fitting random forest for tau_x')
            transformer = PolynomialFeatures(degree=3, interaction_only=False, include_bias=False)
            x_poly = transformer.fit_transform(x)
            self.tau_model.fit(x_poly, residuals)

        elif self.method == 'nn':

            if CF_FOLDS == 1:
                mux_hat, probs = self.compute_hats_nn(x, y, w, x)

            else:
                # initialize
                mux_hat = np.zeros(700)
                probs = np.zeros(700)
                # cross-fitting
                stratified = StratifiedKFold(n_splits=CF_FOLDS, shuffle=True, random_state=0)
                for train_index, test_index in stratified.split(x, w):
                    index = np.zeros(700, dtype=bool)
                    index[test_index] = 1
                    print('Fitting Classifier')
                    mux_hat[~index], probs[~index] =  self.compute_hats_nn(x[index], y[index], w[index], x[~index] )
            residuals = (y - mux_hat) / (w - probs)
            # 3: fit tau
            print('Fitting random forest for tau_x')
            self.tau_model.fit(x, residuals, batch_size=100, epochs=50, callbacks=None, verbose=0)

    def predict(self, x):

        if self.method == 'rf':
            predictions = self.tau_model.predict(x)

        elif self.method == 'lasso':
            transformer = PolynomialFeatures(degree=3, interaction_only=False, include_bias=False)
            x_poly_test = transformer.fit_transform(x)
            predictions = self.tau_model.predict(x_poly_test)

        elif self.method == 'nn':
            # to tensor
            x = tf.convert_to_tensor(x)
            # predict
            predictions = np.reshape(self.tau_model(x), (len(x),))

        else:
            raise NotImplementedError('Base learner method not specified in predict')

        return predictions

In [None]:
u_rf = ULearner('nn')
u_rf.fit(X_train, Y_train, W_train)
predictions = u_rf.predict(X_test)
np.sqrt((((predictions - tau_test) ** 2).mean()))
# rf: 30.921286420155806
# lasso: 7.6762472449663495
# nn: 51.10236987028663

# 31.033895213307428
# 31.033895213307698


# with cross-fitting much better now :)

In [None]:
CF_FOLDS = 2

try stratifiedfolds

In [None]:
# TODO: THATS IT!!!!!
w_hats = np.zeros(700)

stratified = StratifiedKFold(n_splits=4, shuffle=True, random_state=0)
for train_index, test_index in stratified.split(X_train, W_train):
    index = np.zeros(700, dtype=bool)
    index[test_index] = 1
    print('Fitting Classifier')
    temp_model = RandomForestClassifier(n_estimators=100, max_depth=100, random_state=0)
    temp_model.fit(X_train[index], W_train[index])
    w_hats[~index] = temp_model.predict_proba(X_train[~index])[:, 1]

In [None]:
stratified = StratifiedKFold(n_splits=4, shuffle=True, random_state=0)
for train_index, test_index in stratified.split(X_train, W_train):
    index = np.zeros(700, dtype=bool)
    index[test_index] = 1
    print(index)
    print(~index)
    print(tf.convert_to_tensor(index))

In [None]:
w_hats

In [None]:
w_hats

In [None]:
index

In [None]:
index = np.zeros(700, dtype=bool)

# END

In [None]:
w = np.zeros((700,))

In [None]:
lol = load_model('model_ex')

In [None]:
lol.fit(X_train, w, batch_size=100, epochs=400, callbacks=callback, validation_split=0.3, verbose=1)

In [None]:
keras.activations.sigmoid(lol.predict(X_test))

Try out some stuff

In [None]:
W_train

In [None]:
X_train

In [None]:
classifier = RandomForestClassifier(n_estimators=1000, max_features=0.3, random_state=0)

In [None]:
classifier.fit(X_train, W_train)

In [None]:
classifier.predict_proba(X_test)

In [None]:
classifier.predict_proba(X_train)

In [None]:
# CROSS-FITTING WILL PROBABLY SOLVE THE PROBLEM!!

In [None]:
classifier.predict(X_train)

In [None]:
W_train

In [None]:
np.ones(100, dtype=bool)

In [None]:
import torch

In [None]:
pred_mask = torch.zeros(100, dtype=bool)
pred_mask

In [None]:
pred_mask

In [None]:
~pred_mask

In [None]:
rf = RandomForestRegressor()

In [None]:
hasattr(rf, 'predict')

In [None]:
rf_class = RandomForestClassifier()

In [None]:
hasattr(rf_class, 'train')

In [None]:
lasso = LassoCV()

In [None]:
hasattr(lasso, 'predict_proba')

MyModel

In [None]:
tf.keras.utils.set_random_seed(8953)

In [None]:
class MyModel(keras.Model):
    def __init__(
            self,
            input_dimension=FEATURE_DIMENSION,
            n_layers_1=3,
            n_layers_2=2,
            n_units_1=N_UNITS_FIRST_PART,
            n_units_2=N_UNITS_SECOND_PART,
            activation=NON_LINEARITY,
            regularizer=regularizers.L2(1e-4)):
        super().__init__()
        self.input_dimension = input_dimension
        self.n_layers_1 = n_layers_1
        self.n_layers_2 = n_layers_2
        self.n_units_1 = n_units_1
        self.n_units_2 = n_units_2
        self.activation = activation
        self.regularizer = regularizer

        self.dense1 = layers.Dense(units=self.n_units_1, activation=self.activation, name="layer1",
                                   kernel_regularizer=self.regularizer)
        self.dense2 = layers.Dense(units=self.n_units_1, activation=self.activation, name="layer2",
                                   kernel_regularizer=self.regularizer)
        self.dense3 = layers.Dense(units=self.n_units_1, activation=self.activation, name="layer3",
                                   kernel_regularizer=self.regularizer)
        self.dense4 = layers.Dense(units=self.n_units_2, activation=self.activation, name="layer4",
                                   kernel_regularizer=self.regularizer)
        self.dense5 = layers.Dense(units=self.n_units_2, activation=self.activation, name="layer5",
                                   kernel_regularizer=self.regularizer)
        self.dense6 = layers.Dense(units=1, activation='linear', name="layer6",
                                   kernel_regularizer=self.regularizer)

    def call(self, inputs):
        x = self.dense1(inputs)
        x = self.dense2(x)
        x = self.dense3(x)
        x = self.dense4(x)
        x = self.dense5(x)
        x = self.dense6(x)
        return x


In [None]:
model = MyModel()

In [None]:
model.compile(optimizer=keras.optimizers.legacy.Adam(learning_rate=LEARNING_RATE),  # Optimizer
              # Loss function to minimize
              loss=keras.losses.MeanSquaredError(),
              # List of metrics to monitor
              metrics=[keras.metrics.MeanSquaredError()],
              # weighted metrics
              weighted_metrics=[]
              )

In [None]:
model.fit(X_train, Y_train, epochs=4)

TRY

In [None]:
# 3 layers with 200 units (elu activation), 2 layers with 100 units (elu activations), 1 output layer (linear
# activation)
model1 = keras.Sequential([
    keras.Input(shape=(25,)),
    layers.Dense(units=200, activation="relu", name="layer1"),
    layers.Dense(units=200, activation="relu", name="layer2"),
    layers.Dense(units=200, activation="relu", name="layer3"),
    layers.Dense(units=100, activation="relu", name="layer4"),
    layers.Dense(units=100, activation="relu", name="layer5"),
    layers.Dense(units=1, activation="linear", name="layer6"),

], name="model_25")

# compile the model
model1.compile(
    optimizer=keras.optimizers.legacy.Adam(learning_rate=LEARNING_RATE),  # Optimizer
    # Loss function to minimize
    loss=keras.losses.MeanSquaredError(),
    # List of metrics to monitor
    metrics=[keras.metrics.MeanSquaredError()],
    # weighted metrics
    weighted_metrics=[]
)


In [None]:
model1

In [None]:
model1.summary()

In [None]:
model2 = tf.keras.models.clone_model(model1)

In [None]:
model2.summary()

In [None]:
model1.fit(X_train, Y_train, epochs=10)

In [None]:
model2.compile(
    optimizer=keras.optimizers.legacy.Adam(learning_rate=LEARNING_RATE),  # Optimizer
    # Loss function to minimize
    loss=keras.losses.MeanSquaredError(),
    # List of metrics to monitor
    metrics=[keras.metrics.MeanSquaredError()],
    # weighted metrics
    weighted_metrics=[]
)

In [None]:
model2.fit(X_train, Y_train, epochs=10)

In [None]:
def clone_model_regression(model):
    cloned_model = tf.keras.models.clone_model(model)
    cloned_model.compile(
        optimizer=keras.optimizers.legacy.Adam(learning_rate=LEARNING_RATE),
        # Loss function to minimize
        loss=keras.losses.MeanSquaredError(),
        # List of metrics to monitor
        metrics=[keras.metrics.MeanSquaredError()],
        # weighted metrics
        weighted_metrics=[]
    )
    return cloned_model

In [None]:
cloned_model = clone_model_regression(model1)

In [None]:
cloned_model.fit(X_train, Y_train, epochs=10)

In [None]:
def clone_model_classification(model):
    cloned_model = tf.keras.models.clone_model(model)
    cloned_model.compile(
        optimizer=keras.optimizers.legacy.Adam(learning_rate=LEARNING_RATE),
        # Loss function to minimize
        loss=keras.losses.MeanSquaredError(),
        # List of metrics to monitor
        metrics=[keras.metrics.MeanSquaredError()],
        # weighted metrics
        weighted_metrics=[]
    )
    return cloned_model

In [None]:
from HelperFuctions import *

In [None]:
model_nn = keras.Sequential([
    keras.Input(shape=(25,)),
    layers.Dense(units=200, activation="relu", name="layer1"),
    layers.Dense(units=200, activation="relu", name="layer2"),
    layers.Dense(units=200, activation="relu", name="layer3"),
    layers.Dense(units=100, activation="relu", name="layer4"),
    layers.Dense(units=100, activation="relu", name="layer5"),
    layers.Dense(units=1, activation="linear", name="layer6"),
], name="model_sequential")

In [None]:
model_nn_1 = keras.Sequential([
    keras.Input(shape=(26,)),
    layers.Dense(units=200, activation="relu", name="layer1"),
    layers.Dense(units=200, activation="relu", name="layer2"),
    layers.Dense(units=200, activation="relu", name="layer3"),
    layers.Dense(units=100, activation="relu", name="layer4"),
    layers.Dense(units=100, activation="relu", name="layer5"),
    layers.Dense(units=1, activation="linear", name="layer6"),
], name="model_sequential_1")

In [None]:
model_mu = clone_model_regression(model_nn)

In [None]:
model_ex = clone_model_classification(model_nn)

In [None]:
model_mu.fit(X_train, Y_train, epochs=10)

In [None]:
from NeuralNetworks import *

In [None]:
from HelperFuctions import *
from NeuralNetworks import *

In [None]:
model1 = clone_nn_regression(nn_sequential)

In [None]:
model1.fit(X_train[:, 0:20], Y_train, epochs=10)

In [None]:
model2 = clone_nn_regression(nn_sequential)

In [None]:
model2.fit(X_train[:, 0:20], Y_train, epochs=10)

In [None]:
model1 = clone_nn_regression(nn_sequential)