In [81]:
import copy

# import packages
import numpy as np
from numpy import random

import pandas as pd

from sklearn.ensemble import RandomForestRegressor, RandomForestClassifier
# from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegressionCV
from sklearn.linear_model import LassoCV
from sklearn.preprocessing import PolynomialFeatures
from sklearn.model_selection import StratifiedKFold

import time

from sklearn.model_selection import KFold

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.saving import load_model

# from abc import ABC, abstractmethod

from DefaultParameters import *
from tensorflow.keras import regularizers


In [None]:
# make GPU available for keras
# https://medium.com/mlearning-ai/install-tensorflow-on-mac-m1-m2-with-gpu-support-c404c6cfb580

In [2]:
# set float64 as standard
tf.keras.backend.set_floatx('float64')

In [3]:
print("Num GPUs Available: ", len(tf.config.list_physical_devices('GPU')))
# Why 0 GPUs

Num GPUs Available:  0


In [4]:
# set global seed
tf.keras.utils.set_random_seed(8953)

In [5]:
callback = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=10, start_from_epoch=50)

# Data preprocessing

In [6]:
data = pd.read_csv('/Users/arberimbibaj/dataset_example_indicatorCATE.csv', header=None, index_col=[0])
data = data.to_numpy()

In [7]:
N = len(data)
d = len(data[0, :]) - 3

In [8]:
# train test split
random.shuffle(data)
training, test = data[:700, :], data[700:, :]

In [9]:
# slice dataset by treatment status
training_control = training[training[:, 26] == 0]
training_treatment = training[training[:, 26] == 1]

# slice test set by treatment status
test_control = test[test[:, 26] == 0]
test_treatment = test[test[:, 26] == 1]

# Y_train by treatment status
Y_train_control = training_control[:, 0]
Y_train_treatment = training_treatment[:, 0]

# Y_test by treatment status
Y_test_control = test_control[:, 0]
Y_test_treatment = test_treatment[:, 0]

# X_train by treatment status
X_train_control = training_control[:, 1:26]
X_train_treatment = training_treatment[:, 1:26]

# X_test by treatment status
X_test_control = test_control[:, 1:26]
X_test_treatment = test_treatment[:, 1:26]

# X and Y test
X_test = test[:, 1:26]
Y_test = test[:, 0]

# X_train and Y_train (no split by treatment status)
X_train = training[:, 1:26]
Y_train = training[:, 0]

# W_train and W_test
W_train = training[:, 26]
W_test = test[:, 26]

# tau_test
tau_test = test[:, 27]
tau_test_control = test_control[:, 27]
tau_test_treatment = test_treatment[:, 27]

In [10]:
# set training and test features for the S-Learner (it views W as no different from other X's)
X_W_train = training[:, 1:27]
X_W_test = test[:, 1:27]
X_test_0 = np.concatenate((test[:, 1:26], np.zeros((300, 1))), axis=1)
X_test_1 = np.concatenate((test[:, 1:26], np.ones((300, 1))), axis=1)

In [11]:
X_train

array([[-0.417996, -0.258928,  0.439047, ...,  0.18676 , -0.512326,
         0.334241],
       [-0.755952, -0.264981,  0.466614, ..., -0.259811, -0.531267,
        -0.302281],
       [-0.169104,  0.508556, -0.68986 , ..., -0.861022, -0.363696,
        -0.031585],
       ...,
       [ 0.711108,  0.216507, -0.645339, ...,  0.037564,  0.564271,
         0.761275],
       [-0.16064 ,  0.623831, -0.381564, ...,  1.014533, -0.082782,
         0.045286],
       [ 0.443016, -0.63676 ,  0.513939, ..., -0.686854,  1.083426,
        -1.154605]])

# Random Forest

T-Learner

In [None]:
# T-Learner (example with Random Forest)

# mu_0
t_learner_mu0 = RandomForestRegressor(max_depth=100, random_state=0)
t_learner_mu0.fit(X_train_control, Y_train_control)
t_mu_0_hat = t_learner_mu0.predict(X_test)

# mu_1
t_learner_mu1 = RandomForestRegressor(max_depth=100, random_state=0)
t_learner_mu1.fit(X_train_treatment, Y_train_treatment)
t_mu_1_hat = t_learner_mu1.predict(X_test)
# Prediction = mu_1 - mu_0
t_tau_hat = t_mu_1_hat - t_mu_0_hat
t_tau_hat

In [None]:
# mean squared error
((t_tau_hat - tau_test) ** 2).mean()

S-Learner

In [None]:
### S-Learner

In [None]:
X_test_0

In [None]:
X_test_1

In [None]:
Y_train

In [None]:
# S-learner (example with Random Forest)

# mu_x
s_learner = RandomForestRegressor(max_depth=100, random_state=0)
s_learner.fit(X_W_train, Y_train)

# mu_0_hat
s_mu_0_hat = s_learner.predict(X_test_0)

# mu_1_hat
s_mu_1_hat = s_learner.predict(X_test_1)

# tau_hat
s_tau_hat = s_mu_1_hat - s_mu_0_hat
s_tau_hat


In [None]:
# mean squared error
((s_tau_hat - tau_test) ** 2).mean()

X-Learner

In [None]:
### X-Learner

# mu_0 (same procedure as for t-learner, maybe can speed up process)
x_learner_mu0 = RandomForestRegressor(max_depth=100, random_state=0)
x_learner_mu0.fit(X_train_control, Y_train_control)

# mu_1 (same procedure as for t-learner, maybe can speed up process)
x_learner_mu1 = RandomForestRegressor(max_depth=100, random_state=0)
x_learner_mu1.fit(X_train_treatment, Y_train_treatment)

# compute imputed treatment effect D_0 and D_1
# d_0
imputed_0 = x_learner_mu1.predict(X_train_control) - Y_train_control

# d_1
imputed_1 = Y_train_treatment - x_learner_mu0.predict(X_train_treatment)

# regress imputed on X
# tau_hat_0
x_tau_0_hat = RandomForestRegressor(max_depth=100, random_state=0)
x_tau_0_hat.fit(X_train_control, imputed_0)

# tau_hat_1
x_tau_1_hat = RandomForestRegressor(max_depth=100, random_state=0)
x_tau_1_hat.fit(X_train_treatment, imputed_1)

# estimate e_x to use as g_x
g_x_hat = RandomForestClassifier(max_depth=100, random_state=0)
g_x_hat.fit(X_train, W_train)
probabilities = g_x_hat.predict_proba(X_test)
probas_1 = probabilities[:, 1]
probas_0 = probabilities[:, 0]

# final estimator of tau
x_tau_hat = probas_1 * x_tau_0_hat.predict(X_test) + probas_0 * x_tau_1_hat.predict(X_test)
x_tau_hat


In [None]:
### See g_x hat
g_x_hat.predict_proba(X_test)

In [None]:
# mean squared error (much lower here!)
((x_tau_hat - tau_test) ** 2).mean()

In [29]:
ind = np.random.choice(len(X_train), int(len(X_train) / 2), replace=False)

In [30]:
lol = np.array((1, 2, 3, 4, 5, 6, 7, 8, 9, 10))
lol

array([ 1,  2,  3,  4,  5,  6,  7,  8,  9, 10])

In [35]:
train_ind = np.random.choice(10, int(10 / 2), replace=False)
train_ind

array([9, 0, 3, 4, 6])

In [33]:
ind = np.zeros(len(X_train), dtype=bool)

array([False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False,

In [37]:
ind[train_ind] = 1

In [38]:
ind

array([ True, False, False,  True,  True, False,  True, False, False,
        True, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False,

In [47]:
X_train[ind]

array([[-0.417996, -0.258928,  0.439047,  0.114545,  0.202294,  0.232629,
        -0.583582, -0.430972,  0.429963, -0.382148,  0.27617 ,  1.023249,
         0.386307,  0.635114, -0.90664 , -1.018844,  0.542753, -0.759871,
         0.600371, -0.063606,  0.325453,  0.172237,  0.18676 , -0.512326,
         0.334241],
       [ 0.40638 , -0.095792, -0.058921,  0.07669 ,  0.997712, -0.455101,
         0.485145,  0.179103, -0.196513, -0.026966, -0.797527,  0.993493,
        -0.46944 , -0.262035, -1.769498, -0.164981, -0.351169,  0.332267,
         0.339417,  0.437438, -0.058943, -0.305766, -0.738398, -0.531641,
         0.466363],
       [-0.232977,  0.555448, -0.406086,  0.522266,  0.459783,  0.232973,
        -0.043957, -0.447066, -0.490697,  0.567011, -0.004792,  1.000241,
         0.182318, -0.14904 , -0.944393,  0.254892,  0.225088,  0.525903,
        -1.276382, -0.506376,  0.482294,  0.422674,  0.318297, -0.981335,
         0.915256],
       [ 0.468949, -0.445755,  0.392202, -0.835586, 

In [41]:
~ind

array([False,  True,  True, False, False,  True, False,  True,  True,
       False,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,

In [14]:
lol[ind]

array([7, 4, 5, 1, 6])

In [19]:
lol[~ind]

array([ 4,  7,  6, 10,  5])

In [None]:
ind

In [None]:
~ind

In [None]:
X_train

In [None]:
X_train[ind]

R-Learner

In [None]:
from sklearn.model_selection import train_test_split

In [52]:
### R-Learner
# split for cross-fitting
index = np.zeros(len(X_train), dtype=bool)
train_ind = np.random.choice(len(X_train), int(len(X_train) / 2), replace=False)
index[train_ind] = 1

# estimate e_x
r_learner_e_x_1 = RandomForestClassifier(max_depth=100, random_state=0)
r_learner_e_x_2 = RandomForestClassifier(max_depth=100, random_state=0)
r_learner_e_x_1.fit(X_train[index], W_train[index])
r_learner_e_x_2.fit(X_train[~index], W_train[~index])

# get e_x predictions
r_probas_1 = np.zeros(len(X_train), )
r_probas_1[index] = r_learner_e_x_2.predict_proba(X_train[index])[:, 1]
r_probas_1[~index] = r_learner_e_x_1.predict_proba(X_train[~index])[:, 1]

#r_probas_0 = r_probas[:, 0]  # probabilities of W=0
#r_probas_1 = r_probas[:, 1]  # probabilities of W=1

# estimate mu_x
r_learner_mu_x = RandomForestRegressor(max_depth=100, random_state=0)
r_learner_mu_x.fit(X_train, Y_train)

# compute r-pseudo-outcome and weights
r_learner_pseudo_outcomes = (Y_train - r_learner_mu_x.predict(X_train)) / (W_train - r_probas_1)
r_learner_weights = (W_train - r_probas_1) ** 2

# estimate tau (regress pseudo-outcomes on X, weight by (W-e(x))^2)
r_learner_tau = RandomForestRegressor(max_depth=100, random_state=0)
r_learner_tau.fit(X_train, r_learner_pseudo_outcomes, sample_weight=r_learner_weights)

# predict tau
r_tau_hats = r_learner_tau.predict(X_test)
r_tau_hats

array([ 1.72631505e+00,  3.64147650e+00,  2.33421522e-01,  2.00015216e-01,
        3.57105421e+00, -1.98894219e+00,  4.66861967e+00, -1.18776198e+00,
       -6.72853315e+00,  1.73509282e+00,  1.02315829e+00,  2.66358520e+00,
        9.21810805e-01, -2.96307293e-01,  8.25681868e-01,  1.12836343e+00,
        2.91106564e+00,  2.11549719e+00, -5.07782777e-01, -7.78415001e-01,
        8.94690617e-01,  1.15673857e+00,  2.40012237e+00,  2.19039593e+00,
        5.58543465e+00,  2.04966289e-01,  1.20332015e+00,  5.60686242e-01,
       -2.70382222e-01,  6.08551160e+00, -8.98444513e-01, -6.06574142e-01,
        2.67656515e-01,  1.18291908e+00,  4.71558254e+00,  1.33624346e+00,
        3.39665587e+00,  1.13418876e+00,  4.24833897e-01,  5.11750884e-01,
       -2.74338417e-01, -5.78563498e-01,  1.79635905e+00,  3.79477200e+00,
        2.51367836e+00,  4.55615915e+00,  9.10703451e-01, -1.68875723e+00,
        2.12238465e+00,  9.43687899e-01,  2.44106823e+00,  3.60689674e-01,
        3.15639296e+00,  

In [51]:
r_probas[index]

array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0.

In [None]:
r_learner_e_x.predict_proba(X_test)

In [53]:
((r_tau_hats - tau_test) ** 2).mean()

13.714901338146138

DR-Learner

In [54]:
### DR-Learner

# TODO: APPLY CROSS-FITTING?
# estimate e_x
dr_learner_e_x = RandomForestClassifier(n_estimators=100, max_depth=100, random_state=0)
dr_learner_e_x.fit(X_train, W_train)

dr_probas = dr_learner_e_x.predict_proba(X_train)
dr_probas_0 = dr_probas[:, 0]  # probabilities of W=0
dr_probas_1 = dr_probas[:, 1]  # probabilities of W=1

# estimate mu_0
dr_learner_mu_0 = RandomForestRegressor(n_estimators=100, max_depth=100, random_state=0)
dr_learner_mu_0.fit(X_train_control, Y_train_control)

# estimate mu_1
dr_learner_mu_1 = RandomForestRegressor(n_estimators=100, max_depth=100, random_state=0)
dr_learner_mu_1.fit(X_train_treatment, Y_train_treatment)

# DR-pseudo-outcomes
mu_w = W_train * dr_learner_mu_1.predict(X_train) + (1 - W_train) * dr_learner_mu_0.predict(
    X_train)  # this is mu_w for each observation, i.e mu_1 for units in the treatment groups, and mu_0 for units in the control group
dr_pseudo_outcomes = (W_train - dr_probas_1) / (dr_probas_1 * dr_probas_0) * (Y_train - mu_w) + dr_learner_mu_1.predict(
    X_train) - dr_learner_mu_0.predict(X_train)

# estimate tau (regress pseudo-outcomes on X) # TODO: USE "Test Set" for this estimation
dr_learner_tau_hat = RandomForestRegressor(n_estimators=100, max_depth=100, random_state=0)
dr_learner_tau_hat.fit(X_train, dr_pseudo_outcomes)

# predict tau
dr_tau_hat = dr_learner_tau_hat.predict(X_test)
dr_tau_hat

array([ 0.8469803 ,  8.06422269,  0.74636315,  3.46938656,  9.09528554,
       -5.41001852, 10.61120148, -1.23555462, -3.1667259 ,  7.34959732,
        0.37375337,  6.51004978,  3.90636817,  1.71642186, -0.96798373,
        7.79938902,  2.94713074,  8.28080415, -3.57924635, -1.4321085 ,
        1.94462515,  2.0366692 ,  9.11873476,  7.36213599,  8.75639886,
        7.51676077, -0.50598641, -2.63898769,  0.12765948,  8.79954641,
        0.59651539,  0.33033395,  0.48555466,  1.57145183, 11.22184961,
        2.4211609 ,  7.90121481,  1.88173152,  1.56996062,  0.61934771,
        1.21434896,  0.132631  ,  3.83851582,  5.25933493,  8.10620533,
       10.94689164,  0.56418235, -4.02550191,  0.95639876,  1.19122792,
        7.08042072,  1.22571084,  8.62864371,  7.22893055,  7.80239003,
        8.80002186,  1.75150115, -2.06467558,  7.62897479,  1.65769294,
        8.39736954,  3.99587509, -2.05487766, -4.95670679,  8.01155259,
        7.58633606,  5.82328155, -2.86389204,  9.23833371,  7.13

In [55]:
((dr_tau_hat - tau_test) ** 2).mean()

5.371503418814479

RA-Learner

In [56]:
### RA-Learner

# mu_0 (same procedure as for t-learner, maybe can speed up process)
ra_learner_mu0 = RandomForestRegressor(max_depth=100, random_state=0)
ra_learner_mu0.fit(X_train_control, Y_train_control)

# mu_1 (same procedure as for t-learner, maybe can speed up process)
ra_learner_mu1 = RandomForestRegressor(max_depth=100, random_state=0)
ra_learner_mu1.fit(X_train_treatment, Y_train_treatment)

# e_x
ra_learner_e_x = RandomForestClassifier(max_depth=100, random_state=0)
ra_learner_e_x.fit(X_train, W_train)

# ra-pseudo-outcome
ra_pseudo_outcome = W_train * (Y_train - ra_learner_mu0.predict(X_train)) + (1 - W_train) * (
        ra_learner_mu1.predict(X_train) - Y_train)

# tau_hat
ra_tau_hat_learner = RandomForestRegressor(max_depth=100, random_state=0)
ra_tau_hat_learner.fit(X_train, ra_pseudo_outcome)
ra_tau_hat = ra_tau_hat_learner.predict(X_test)
ra_tau_hat

array([ 0.91490677,  8.26301955,  0.97106317,  3.5528225 ,  8.67564974,
       -4.68236172, 10.6688935 , -1.17413595, -2.63692004,  7.44641532,
        0.2918939 ,  6.43137586,  3.73575211,  2.59984884, -0.86203239,
        7.66755618,  2.51321221,  8.82989392, -4.41247407, -0.60076672,
        2.64336168,  1.99061936,  8.96845941,  7.7862815 ,  8.21040191,
        7.55267284, -1.14354599, -1.6653916 ,  1.25465375,  8.52964283,
       -0.46274891, -0.08576136,  0.63671972,  1.27520691, 11.51410645,
        2.00179904,  8.06064817,  0.79332432,  1.88817362,  0.45527881,
        1.06693595, -0.11435547,  2.77432839,  4.61230113,  8.27338701,
       10.53841517,  0.61909964, -4.58885619, -0.24858074,  1.83739071,
        7.30205278,  0.52086065,  9.27698339,  7.95976077,  7.9325154 ,
        8.00236788,  2.13039965, -2.13439482,  7.9438079 ,  1.6070221 ,
        7.90002368,  2.89252177, -2.14836991, -4.90215694,  8.36905704,
        7.70217215,  5.54715161, -1.83924182,  9.59718485,  6.96

In [57]:
# mean squared error
((ra_tau_hat - tau_test) ** 2).mean()

5.355494017645751

PW-Learner

In [66]:
# ACHTUNG: TRIED TO INCLUDE CROSS-FITTING!
### PW-Learner
# mu_0 (same procedure as for t-learner, maybe can speed up process)
pw_learner_mu0_1 = RandomForestRegressor(max_depth=100, random_state=0)
pw_learner_mu0_2 = RandomForestRegressor(max_depth=100, random_state=0)
pw_learner_mu0_1.fit(X_train_control, Y_train_control)
pw_learner_mu0_2.fit(X_train_control, Y_train_control)

# mu_1 (same procedure as for t-learner, maybe can speed up process)
pw_learner_mu1_1 = RandomForestRegressor(max_depth=100, random_state=0)
pw_learner_mu1_2 = RandomForestRegressor(max_depth=100, random_state=0)
pw_learner_mu1_1.fit(X_train_treatment, Y_train_treatment)
pw_learner_mu1_2.fit(X_train_treatment, Y_train_treatment)

# e_x
# split for cross-fitting
index = np.zeros(len(X_train), dtype=bool)
train_ind = np.random.choice(len(X_train), int(len(X_train) / 2), replace=False)
index[train_ind] = 1

pw_learner_e_x_1 = RandomForestClassifier(max_depth=100, random_state=0)
pw_learner_e_x_2 = RandomForestClassifier(max_depth=100, random_state=0)
pw_learner_e_x_1.fit(X_train[index], W_train[index])
pw_learner_e_x_2.fit(X_train[~index], W_train[~index])

pw_probas_1 = np.zeros(len(X_train), )
pw_probas_1[index] = pw_learner_e_x_2.predict_proba(X_train[index])[:, 1]
pw_probas_1[~index] = pw_learner_e_x_1.predict_proba(X_train[~index])[:, 1]

# ra-pseudo-outcome
pw_pseudo_outcome = (W_train / pw_probas_1 - (1 - W_train) / (1 - pw_probas_1)) * Y_train

# tau_hat
pw_tau_hat_learner = RandomForestRegressor(max_depth=100, random_state=0)
pw_tau_hat_learner.fit(X_train, pw_pseudo_outcome)
pw_tau_hat = pw_tau_hat_learner.predict(X_test)
pw_tau_hat

array([ 1.69335751e+00,  5.51435777e+00,  6.49317088e-01,  1.38362166e+00,
        6.18713897e+00, -7.55770863e+00,  2.65565760e+01, -2.88704592e+00,
       -1.46108193e+01,  8.69275972e+00, -7.73108024e+00,  5.79727300e+00,
       -3.34877109e+00,  9.08688721e+00, -6.88004021e+00,  1.33255598e+01,
        5.65555888e+00,  7.26496292e+00,  5.59304347e+00, -5.16473555e+00,
        4.53800163e+00, -2.70582499e-01,  1.54480767e+01,  8.60462234e+00,
        7.97849246e+00,  2.72901141e+00, -3.33299618e+00, -1.05528872e+01,
        7.24175812e-01,  6.29505882e+00, -2.00675881e+00,  2.92123016e+00,
       -3.33797351e+00,  5.04550345e+00, -1.81037455e+00,  4.78954307e-01,
        1.46588815e+01,  3.02715507e+00,  2.93444155e+00, -1.53125973e+00,
       -6.05593583e+00, -5.40691703e+00,  2.93908936e+00,  9.35490041e+00,
        4.68076226e+00,  7.51259325e+01, -4.81573023e+00, -2.51313844e+01,
        5.96131991e-01,  1.81198608e+00,  1.07100715e+01, -1.31322532e+01,
        1.06334147e+01,  

In [67]:
# mean squared error
((pw_tau_hat - tau_test) ** 2).mean()

94.43780635665614

U-Learner

In [60]:
### U-Learner
# estimate e_x
u_learner_e_x = RandomForestClassifier(max_depth=100, random_state=0)
u_learner_e_x.fit(X_train, W_train)

# estimate mu_x
u_learner_mu_x = RandomForestRegressor(max_depth=100, random_state=0)
u_learner_mu_x.fit(X_train, Y_train)

# compute residuals
u_learner_residuals = (Y_train - u_learner_mu_x.predict(X_train)) / (
        W_train - u_learner_e_x.predict_proba(X_train)[:, 1])

# tau_hat - regress residuals on X
u_tau_hat_learner = RandomForestRegressor(max_depth=100, random_state=0)
u_tau_hat_learner.fit(X_train, u_learner_residuals)

u_tau_hats = u_tau_hat_learner.predict(X_test)
u_tau_hats


array([ 2.85250904e+00,  1.00547688e+01, -4.92574582e-01,  1.91452713e+00,
        7.92620400e+00, -1.16463139e+01,  1.31921917e+01, -2.11709957e+00,
       -7.16250459e+00,  6.00462123e+00,  2.72981362e+00,  7.66322441e+00,
        2.45976887e+00,  1.18104536e+00,  9.14107486e-01,  6.16511795e+00,
        1.08497420e+01,  1.01644602e+01, -2.16899051e+00, -1.81475606e+00,
        1.34111458e+00,  3.41936913e+00,  1.04168301e+01,  7.57884238e+00,
        9.50578740e+00,  2.09883177e+00,  1.69555973e+00,  2.79744550e+00,
        4.16811513e-03,  1.50075375e+01, -4.28915305e+00, -8.77925502e-01,
       -2.78016855e+00,  3.32467561e+00,  1.06499625e+01,  3.00485772e+00,
        8.31998895e+00,  4.89600153e+00,  6.81894459e-02,  2.19437557e-02,
       -1.49448971e+00, -1.36872242e+00,  4.83187150e+00,  9.35912173e+00,
        6.47748569e+00,  1.37123644e+01,  4.08149426e+00, -2.59808756e+00,
        3.33981472e+00,  1.33195860e+00,  7.00985818e+00,  2.88766820e+00,
        1.12951611e+01,  

In [61]:
# mean squared error
((u_tau_hats - tau_test) ** 2).mean()

30.921286420155806

In [62]:
pd.DataFrame(u_learner_e_x.predict_proba(X_train)[:, 1]).describe()

Unnamed: 0,0
count,700.0
mean,0.500714
std,0.327341
min,0.01
25%,0.18
50%,0.635
75%,0.83
max,0.96


In [63]:
u_learner_residuals

array([-7.53570215e+00,  4.98723100e+00,  1.27991161e+00, -9.53500778e+00,
        9.61888400e+00,  1.06705232e+01,  9.07106653e+00,  5.32108474e+00,
        1.25167032e+01,  2.77165694e+00, -7.52731027e+00,  3.07867939e+00,
       -6.34744625e-01, -1.10509041e+01,  6.39476036e+00, -5.51503950e+00,
        8.12170286e-01, -8.40279786e+00,  1.28387357e+01,  2.72450608e+00,
        2.12960700e+00,  6.32594490e+00,  2.78297020e+01,  1.20707445e+01,
        1.25844545e+01,  1.54171109e+01, -8.19509583e+00,  1.49761160e+01,
       -2.62166063e+00, -1.10321900e+00,  1.81592264e+01,  4.08048964e+00,
       -1.03011748e+01,  1.84751838e+00,  1.15022312e+00,  4.26358376e+00,
       -9.46753062e-01,  3.23614317e+00,  1.25883081e+01,  1.92386464e+01,
       -8.86369172e+00,  4.26094588e-01, -1.44925474e-01, -5.89680377e+00,
       -1.57057073e+00,  8.44735270e+00,  2.92545955e+00,  5.66846345e+00,
       -4.83778690e+00,  5.80575091e+00,  1.35073998e+01,  6.02018300e+00,
        1.32363581e+01,  

# Lasso (or L1-loss for logistic regression)

# Preprocess for lasso

In [None]:
poly_train = PolynomialFeatures(degree=3, interaction_only=False, include_bias=False)
X_poly_train = poly_train.fit_transform(X_train)
poly_test = PolynomialFeatures(degree=3, interaction_only=False, include_bias=False)
X_poly_test = poly_test.fit_transform(X_test)
X_poly_train

In [None]:
# compute polynomial features for treatment and control groups in training set
poly_train_treatment = PolynomialFeatures(degree=3, interaction_only=False, include_bias=False)
X_poly_train_treatment = poly_train_treatment.fit_transform(X_train_treatment)
poly_train_control = PolynomialFeatures(degree=3, interaction_only=False, include_bias=False)
X_poly_train_control = poly_train_treatment.fit_transform(X_train_control)
poly_test = PolynomialFeatures(degree=3, interaction_only=False, include_bias=False)
X_poly_test = poly_test.fit_transform(X_test)

In [None]:
# compute polynomial features for treatment and control groups in training set
xw_poly_train = PolynomialFeatures(degree=4, interaction_only=False, include_bias=False)
X_W_poly_train = poly_train_treatment.fit_transform(X_W_train)

xw_poly_test_0 = PolynomialFeatures(degree=4, interaction_only=False, include_bias=False)
X_poly_test_0 = xw_poly_test_0.fit_transform(X_test_0)

xw_poly_test_1 = PolynomialFeatures(degree=4, interaction_only=False, include_bias=False)
X_poly_test_1 = xw_poly_test_0.fit_transform(X_test_1)

T-learner (Lasso)

In [None]:
# T-Learner (example with Lasso)

# mu_0
t_learner_mu0 = LassoCV(cv=10, tol=1e-2, random_state=0)
t_learner_mu0.fit(X_poly_train_control, Y_train_control)
t_mu_0_hat = t_learner_mu0.predict(X_poly_test)

# mu_1
t_learner_mu1 = LassoCV(cv=10, tol=1e-2, random_state=0)
t_learner_mu1.fit(X_poly_train_treatment, Y_train_treatment)
t_mu_1_hat = t_learner_mu1.predict(X_poly_test)

# Prediction = mu_1 - mu_0
t_tau_hat = t_mu_1_hat - t_mu_0_hat
t_tau_hat

In [None]:
((t_tau_hat - tau_test) ** 2).mean()

S-learner (Lasso)

In [None]:
# S-learner (example with Random Forest)
tic = time.perf_counter()
# mu_x
s_learner = LassoCV(cv=10, tol=1e-2, random_state=0)
s_learner.fit(X_W_poly_train, Y_train)

# mu_0_hat
s_mu_0_hat = s_learner.predict(X_poly_test_0)

# mu_1_hat
s_mu_1_hat = s_learner.predict(X_poly_test_1)

# tau_hat
s_tau_hat = s_mu_1_hat - s_mu_0_hat
toc = time.perf_counter()
print(f'Time for computation: {toc - tic}')

In [None]:
((s_tau_hat - tau_test) ** 2).mean()

X-learner (Lasso (or l1-penalty))

In [None]:
### X-Learner

tic = time.perf_counter()

# mu_0 (same procedure as for t-learner, maybe can speed up process)
x_learner_mu0 = LassoCV(cv=10, tol=1, random_state=0)
x_learner_mu0.fit(X_poly_train_control, Y_train_control)

# mu_1 (same procedure as for t-learner, maybe can speed up process)
x_learner_mu1 = LassoCV(cv=10, tol=1, random_state=0)
x_learner_mu1.fit(X_poly_train_treatment, Y_train_treatment)

# compute imputed treatment effect D_0 and D_1
# d_0
imputed_0 = x_learner_mu1.predict(X_poly_train_control) - Y_train_control

# d_1
imputed_1 = Y_train_treatment - x_learner_mu0.predict(X_poly_train_treatment)

# regress imputed on X
# tau_hat_0
x_tau_0_hat = LassoCV(cv=10, tol=1, random_state=0)
x_tau_0_hat.fit(X_poly_train_control, imputed_0)

# tau_hat_1
x_tau_1_hat = LassoCV(cv=10, tol=1, random_state=0)
x_tau_1_hat.fit(X_poly_train_treatment, imputed_1)

# estimate e_x to use as g_x
g_x_hat = LogisticRegressionCV(cv=KFold(10), penalty='l1', solver='saga', tol=1, random_state=0)
g_x_hat.fit(X_poly_train, W_train)
probabilities = g_x_hat.predict_proba(X_poly_test)
probas_1 = probabilities[:, 1]
probas_0 = probabilities[:, 0]

# final estimator of tau
x_tau_hat = probas_1 * x_tau_0_hat.predict(X_poly_test) + probas_0 * x_tau_1_hat.predict(X_poly_test)

toc = time.perf_counter()

print(f'Time for computation: {toc - tic}')  # 127 seconds

In [None]:
((x_tau_hat - tau_test) ** 2).mean()

R-learner (Lasso (or l1-penalty))

In [None]:
### R-Learner

tic = time.perf_counter()

# estimate e_x
r_learner_e_x = LogisticRegressionCV(cv=KFold(10), penalty='l1', solver='saga', tol=1, random_state=0)
r_learner_e_x.fit(X_poly_train, W_train)

# get e_x predictions
r_probas = r_learner_e_x.predict_proba(X_poly_train)
r_probas_0 = r_probas[:, 0]  # probabilities of W=0
r_probas_1 = r_probas[:, 1]  # probabilities of W=1

# estimate mu_x
r_learner_mu_x = LassoCV(cv=10, tol=1, random_state=0)
r_learner_mu_x.fit(X_poly_train, Y_train)

# compute r-pseudo-outcome and weights
r_learner_pseudo_outcomes = (Y_train - r_learner_mu_x.predict(X_poly_train)) / (W_train - r_probas_1)
r_learner_weights = (W_train - r_probas_1) ** 2

# estimate tau (regress pseudo-outcomes on X, weight by (W-e(x))^2)
r_learner_tau = LassoCV(cv=10, tol=1, random_state=0)
r_learner_tau.fit(X_poly_train, r_learner_pseudo_outcomes, sample_weight=r_learner_weights)

# predict tau
r_tau_hats = r_learner_tau.predict(X_poly_test)
r_tau_hats

toc = time.perf_counter()

print(f'Time for computation: {toc - tic} seconds')  # 98 seconds

In [None]:
((r_tau_hats - tau_test) ** 2).mean()

DR-learner (Lasso (l1-penalty))

In [None]:
### DR-Learner

tic = time.perf_counter()

# TODO: APPLY CROSS-FITTING?
# estimate e_x
dr_learner_e_x = LogisticRegressionCV(cv=KFold(10), penalty='l1', solver='saga', tol=1, random_state=0)
dr_learner_e_x.fit(X_poly_train, W_train)

dr_probas = dr_learner_e_x.predict_proba(X_poly_train)
dr_probas_0 = dr_probas[:, 0]  # probabilities of W=0
dr_probas_1 = dr_probas[:, 1]  # probabilities of W=1

# estimate mu_0
dr_learner_mu_0 = LassoCV(cv=10, tol=1, random_state=0)
dr_learner_mu_0.fit(X_poly_train_control, Y_train_control)

# estimate mu_1
dr_learner_mu_1 = LassoCV(cv=10, tol=1, random_state=0)
dr_learner_mu_1.fit(X_poly_train_treatment, Y_train_treatment)

# DR-pseudo-outcomes
mu_w = W_train * dr_learner_mu_1.predict(X_poly_train) + (1 - W_train) * dr_learner_mu_0.predict(
    X_poly_train)  # this is mu_w for each observation, i.e mu_1 for units in the treatment groups, and mu_0 for units in the control group
dr_pseudo_outcomes = (W_train - dr_probas_1) / (dr_probas_1 * dr_probas_0) * (Y_train - mu_w) + dr_learner_mu_1.predict(
    X_poly_train) - dr_learner_mu_0.predict(X_poly_train)

# estimate tau (regress pseudo-outcomes on X) # TODO: USE "Test Set" for this estimation
dr_learner_tau_hat = LassoCV(cv=10, tol=1, random_state=0)
dr_learner_tau_hat.fit(X_poly_train, dr_pseudo_outcomes)

# predict tau
dr_tau_hat = dr_learner_tau_hat.predict(X_poly_test)

toc = time.perf_counter()

print(f'Time needed for computation: {toc - tic} seconds')  # 104 seconds

In [None]:
((dr_tau_hat - tau_test) ** 2).mean()

RA-learner (Lasso)

In [None]:
### RA-Learner

tic = time.perf_counter()

# mu_0 (same procedure as for t-learner, maybe can speed up process)
ra_learner_mu0 = LassoCV(cv=10, tol=1, random_state=0)
ra_learner_mu0.fit(X_poly_train_control, Y_train_control)

# mu_1 (same procedure as for t-learner, maybe can speed up process)
ra_learner_mu1 = LassoCV(cv=10, tol=1, random_state=0)
ra_learner_mu1.fit(X_poly_train_treatment, Y_train_treatment)

# e_x
ra_learner_e_x = LogisticRegressionCV(cv=KFold(10), penalty='l1', solver='saga', tol=1, random_state=0)
ra_learner_e_x.fit(X_poly_train, W_train)

# ra-pseudo-outcome
ra_pseudo_outcome = W_train * (Y_train - ra_learner_mu0.predict(X_poly_train)) + (1 - W_train) * (
        ra_learner_mu1.predict(X_poly_train) - Y_train)

# tau_hat
ra_tau_hat_learner = LassoCV(cv=10, tol=1, random_state=0)
ra_tau_hat_learner.fit(X_poly_train, ra_pseudo_outcome)
ra_tau_hat = ra_tau_hat_learner.predict(X_poly_test)

toc = time.perf_counter()

print(f'Time for computation: {toc - tic} seconds.')  # 121 seconds

In [None]:
((ra_tau_hat - tau_test) ** 2).mean()

PW-learner (Lasso)

In [None]:
### PW-Learner

tic = time.perf_counter()

# mu_0 (same procedure as for t-learner, maybe can speed up process)
pw_learner_mu0 = LassoCV(cv=10, tol=1, random_state=0)
pw_learner_mu0.fit(X_poly_train_control, Y_train_control)

# mu_1 (same procedure as for t-learner, maybe can speed up process)
pw_learner_mu1 = LassoCV(cv=10, tol=1, random_state=0)
pw_learner_mu1.fit(X_poly_train_treatment, Y_train_treatment)

# e_x
pw_learner_e_x = LogisticRegressionCV(cv=KFold(10), penalty='l1', solver='saga', tol=1, random_state=0)
pw_learner_e_x.fit(X_poly_train, W_train)

# ra-pseudo-outcome
pw_pseudo_outcome = (W_train / pw_learner_e_x.predict_proba(X_poly_train)[:, 1] - (1 - W_train) / (
    pw_learner_e_x.predict_proba(X_poly_train)[:, 0])) * Y_train

# tau_hat
pw_tau_hat_learner = LassoCV(cv=10, tol=1, random_state=0)
pw_tau_hat_learner.fit(X_poly_train, pw_pseudo_outcome)
pw_tau_hat = pw_tau_hat_learner.predict(X_poly_test)

toc = time.perf_counter()

print(f'Time for computation: {toc - tic} seconds.')  # 117 seconds

In [None]:
((pw_tau_hat - tau_test) ** 2).mean()

U-learner (Lasso)

In [None]:
### U-Learner

tic = time.perf_counter()

# estimate e_x
u_learner_e_x = LogisticRegressionCV(cv=KFold(10), penalty='l1', solver='saga', tol=1, random_state=0)
u_learner_e_x.fit(X_poly_train, W_train)

# estimate mu_x
u_learner_mu_x = LassoCV(cv=10, tol=1, random_state=0)
u_learner_mu_x.fit(X_poly_train, Y_train)

# compute residuals
u_learner_residuals = (Y_train - u_learner_mu_x.predict(X_poly_train)) / (
        W_train - u_learner_e_x.predict_proba(X_poly_train)[:, 1])

# tau_hat - regress residuals on X
u_tau_hat_learner = LassoCV(cv=10, tol=1, random_state=0)
u_tau_hat_learner.fit(X_poly_train, u_learner_residuals)

u_tau_hats = u_tau_hat_learner.predict(X_poly_test)

toc = time.perf_counter()

print(f'Time for computation: {toc - tic} seconds.')  # 98 seconds


In [None]:
((u_tau_hats - tau_test) ** 2).mean()

# Neural Network


T-Learner (NN)

In [None]:
# T-Learner (example with Random Forest)

tic = time.perf_counter()

# mu_0
t_learner_mu0 = load_model('model_25')
print('Training mu0')
t_learner_mu0.fit(X_train_control, Y_train_control,
                  batch_size=100,
                  epochs=100,
                  validation_data=(X_test, Y_test),
                  callbacks=None  # include early stopping
                  )
t_mu_0_hat = t_learner_mu0.predict(X_test)

# mu_1
t_learner_mu1 = load_model('model_25')
print('Training mu1')
t_learner_mu1.fit(X_train_treatment, Y_train_treatment,
                  batch_size=100,
                  epochs=100,
                  validation_data=(X_test, Y_test),
                  callbacks=None  # include early stopping
                  )
t_mu_1_hat = t_learner_mu1.predict(X_test)

# Prediction = mu_1 - mu_0
t_tau_hat = t_mu_1_hat - t_mu_0_hat

toc = time.perf_counter()

print(f'Time for computation: {toc - tic} seconds.')  # 3 seconds


In [None]:
((np.reshape(t_tau_hat, (300,)) - tau_test) ** 2).mean()  # 3.18

S-learner (NN)

In [None]:
# S-learner (example with Random Forest)

# mu_x
s_learner = load_model('model_26')
s_learner.fit(X_W_train, Y_train,
              batch_size=100,
              epochs=100,
              validation_data=(X_W_test, Y_test),
              callbacks=None  # include early stopping
              )

# mu_0_hat
s_mu_0_hat = s_learner.predict(X_test_0)

# mu_1_hat
s_mu_1_hat = s_learner.predict(X_test_1)

# tau_hat
s_tau_hat = s_mu_1_hat - s_mu_0_hat


In [None]:
((np.reshape(s_tau_hat, (300,)) - tau_test) ** 2).mean()  # 1.98

X-learner (NN)

In [None]:
### X-Learner

# mu_0 (same procedure as for t-learner, maybe can speed up process)
x_learner_mu0 = load_model('model_25')
x_learner_mu0.fit(X_train_control, Y_train_control,
                  batch_size=100,
                  epochs=100,
                  validation_data=(X_test_control, Y_test_control),
                  callbacks=None  # include early stopping
                  )

# d_1
imputed_1 = Y_train_treatment - np.reshape(x_learner_mu0.predict(X_train_treatment), (len(Y_train_treatment),))

# mu_1 (same procedure as for t-learner, maybe can speed up process)
x_learner_mu1 = load_model('model_25')
x_learner_mu1.fit(X_train_treatment, Y_train_treatment,
                  batch_size=100,
                  epochs=100,
                  validation_data=(X_test_treatment, Y_test_treatment),
                  callbacks=None  # include early stopping
                  )

# d_0
imputed_0 = np.reshape(x_learner_mu1.predict(X_train_control), (len(Y_train_control),)) - Y_train_control

# regress imputed on X

# tau_hat_1
x_tau_1_hat = load_model('model_25')
x_tau_1_hat.fit(X_train_treatment, imputed_1,
                batch_size=100,
                epochs=100,
                validation_data=(X_test_treatment, tau_test_treatment),
                callbacks=None  # include early stopping
                )

x_tau_1_hat_predicts = np.reshape(x_tau_1_hat.predict(X_test), (len(X_test),))

# tau_hat_0
x_tau_0_hat = load_model('model_25')
x_tau_0_hat.fit(X_train_control, imputed_0,
                batch_size=100,
                epochs=100,
                validation_data=(X_test_control, tau_test_control),
                callbacks=None  # include early stopping
                )

x_tau_0_hat_predicts = np.reshape(x_tau_0_hat.predict(X_test), (len(X_test),))

# estimate e_x to use as g_x
g_x_hat = load_model('model_ex')
g_x_hat.fit(X_train, W_train,
            batch_size=100,
            epochs=100,
            validation_data=(X_test, W_test),
            callbacks=None  # include early stopping
            )
x_probabilities = g_x_hat.predict(X_test)
x_probs_1 = np.reshape(keras.activations.sigmoid(x_probabilities), (len(x_probabilities, )))
x_probs_0 = 1 - x_probs_1

# final estimator of tau
x_tau_hat = x_probs_1 * x_tau_0_hat_predicts + x_probs_0 * x_tau_1_hat_predicts

In [None]:
((np.reshape(x_tau_hat, (300,)) - tau_test) ** 2).mean()  # 3.1614 with smoothing of 0.5

R-learner (NN)

In [None]:
### R-Learner

# estimate e_x
r_learner_e_x = load_model('model_ex')
r_learner_e_x.fit(X_train, W_train,
                  batch_size=100,
                  epochs=100,
                  validation_data=(X_test, W_test),
                  callbacks=None  # include early stopping
                  )

# get e_x predictions
r_probabilities = np.reshape(keras.activations.sigmoid(r_learner_e_x.predict(X_train)), len(X_train, ))
r_probas_1 = r_probabilities  # probabilities of W=1
r_probas_0 = 1 - r_probabilities  # probabilities of W=0

# estimate mu_x
r_learner_mu_x = load_model('model_25')
r_learner_mu_x.fit(X_train, Y_train,
                   batch_size=100,
                   epochs=100,
                   validation_data=(X_test, Y_test),
                   callbacks=None  # include early stopping
                   )

# compute r-pseudo-outcome and weights
r_learner_pseudo_outcomes = (Y_train - np.reshape(r_learner_mu_x.predict(X_train), (len(X_train),))) / (
        W_train - r_probas_1)
r_learner_weights = (W_train - r_probas_1) ** 2

# estimate tau (regress pseudo-outcomes on X, weight by (W-e(x))^2)
r_learner_tau = load_model('model_25')
r_learner_tau.fit(X_train, r_learner_pseudo_outcomes,
                  sample_weight=r_learner_weights,
                  batch_size=100,
                  epochs=100,
                  validation_data=None,
                  callbacks=None  # include early stopping
                  )

# predict tau
r_tau_hats = r_learner_tau.predict(X_test)

In [None]:
((np.reshape(r_tau_hats, (len(X_test))) - tau_test) ** 2).mean()  #47.81

DR-learner (NN)

In [None]:
### DR-Learner

tic = time.perf_counter()

# TODO: APPLY CROSS-FITTING?
# estimate e_x
dr_learner_e_x = load_model('model_ex')
dr_learner_e_x.fit(X_train, W_train,
                   batch_size=100,
                   epochs=100,
                   validation_data=(X_test, W_test),
                   callbacks=None  # include early stopping
                   )

dr_probabilities = np.reshape(keras.activations.sigmoid(dr_learner_e_x.predict(X_train)), len(X_train, ))
dr_probas_0 = 1 - dr_probabilities  # probabilities of W=0
dr_probas_1 = dr_probabilities  # probabilities of W=1

# estimate mu_0
dr_learner_mu_0 = load_model('model_25')
dr_learner_mu_0.fit(X_train_control, Y_train_control,
                    batch_size=100,
                    epochs=100,
                    validation_data=(X_test_control, Y_test_control),
                    callbacks=None  # include early stopping
                    )

dr_learner_mu_0_predictions = dr_learner_mu_0.predict(X_train)

# estimate mu_1
dr_learner_mu_1 = load_model('model_25')
dr_learner_mu_1.fit(X_train_treatment, Y_train_treatment,
                    batch_size=100,
                    epochs=100,
                    validation_data=(X_test_treatment, Y_test_treatment),
                    callbacks=None  # include early stopping
                    )

dr_learner_mu_1_predictions = dr_learner_mu_1.predict(X_train)

# DR-pseudo-outcomes
mu_w = W_train * dr_learner_mu_1_predictions + (
        1 - W_train) * dr_learner_mu_0_predictions  # this is mu_w for each observation, i.e mu_1 for units in the treatment groups, and mu_0 for units in the control group
dr_pseudo_outcomes = (W_train - dr_probas_1) / (dr_probas_1 * dr_probas_0) * (
        Y_train - mu_w) + dr_learner_mu_1_predictions - dr_learner_mu_0_predictions

# estimate tau (regress pseudo-outcomes on X) # TODO: USE "Test Set" for this estimation
dr_learner_tau_hat = load_model('model_25')
dr_learner_tau_hat.fit(X_train, dr_pseudo_outcomes,
                       batch_size=100,
                       epochs=100,
                       validation_data=None,
                       callbacks=None  # include early stopping
                       )

# predict tau
dr_tau_hat = dr_learner_tau_hat.predict(X_test)

toc = time.perf_counter()

print(f'Time needed for computation: {toc - tic} seconds')  # 104 seconds

In [None]:
((np.reshape(dr_tau_hat, (len(tau_test),)) - tau_test) ** 2).mean()  # 8.3514

RA-learner (NN)

In [None]:
### RA-Learner

# mu_0 (same procedure as for t-learner, maybe can speed up process)
ra_learner_mu0 = load_model('model_25')
ra_learner_mu0.fit(X_train_control, Y_train_control,
                   batch_size=100,
                   epochs=100,
                   validation_data=(X_test_control, Y_test_control),
                   callbacks=None  # include early stopping
                   )

# get hats
ra_learner_mu0_predictions = np.reshape(ra_learner_mu0.predict(X_train), (len(X_train),))

# mu_1 (same procedure as for t-learner, maybe can speed up process)
ra_learner_mu1 = load_model('model_25')
ra_learner_mu1.fit(X_train_treatment, Y_train_treatment,
                   batch_size=100,
                   epochs=100,
                   validation_data=(X_test_treatment, Y_test_treatment),
                   callbacks=None  # include early stopping
                   )

# get hats
ra_learner_mu1_predictions = np.reshape(ra_learner_mu1.predict(X_train), (len(X_train),))

# e_x TODO: IS IT NEEDED?
"""ra_learner_e_x = load_model('model_ex')
ra_learner_e_x.fit(X_train,W_train,
    batch_size=100,
    epochs=100,
    validation_data=(X_test, W_test),
    callbacks=[callback] # include early stopping
)"""

# ra-pseudo-outcome
ra_pseudo_outcome = W_train * (Y_train - ra_learner_mu0_predictions) + (1 - W_train) * (
        ra_learner_mu1_predictions - Y_train)

# tau_hat
ra_tau_hat_learner = load_model('model_25')
ra_tau_hat_learner.fit(X_train, ra_pseudo_outcome,
                       batch_size=100,
                       epochs=100,
                       validation_data=None,
                       callbacks=None  # include early stopping
                       )

ra_tau_hat = ra_tau_hat_learner.predict(X_test)
ra_tau_hat

In [None]:
((np.reshape(ra_tau_hat, (len(tau_test),)) - tau_test) ** 2).mean()  # 3.397

PW-learner (NN)

In [None]:
### PW-Learner

# e_x
pw_learner_e_x = load_model('model_ex')
pw_learner_e_x.fit(X_train, W_train,
                   batch_size=100,
                   epochs=100,
                   validation_data=(X_test, W_test),
                   callbacks=None  # include early stopping
                   )

pw_probabilities = np.reshape(keras.activations.sigmoid(pw_learner_e_x.predict(X_train)), len(X_train, ))
pw_probs_1 = pw_probabilities
pw_probs_0 = 1 - pw_probabilities

# ra-pseudo-outcome
pw_pseudo_outcome = (W_train / pw_probs_1 - (1 - W_train) / pw_probs_0) * Y_train

# tau_hat
pw_tau_hat_learner = load_model('model_25')
pw_tau_hat_learner.fit(X_train, pw_pseudo_outcome,
                       batch_size=100,
                       epochs=100,
                       validation_data=None,
                       callbacks=None  # include early stopping
                       )
pw_tau_hat = pw_tau_hat_learner.predict(X_test)
pw_tau_hat

In [None]:
((np.reshape(pw_tau_hat, (len(tau_test),)) - tau_test) ** 2).mean()  # 271.842 TODO: CHECK IF IT REALLY IS CORRECT

In [None]:
# see why so bad
pw_probabilities

U-learner (NN)

In [None]:
### U-Learner
# estimate e_x

u_learner_e_x = load_model('model_ex')
u_learner_e_x.fit(X_train, W_train,
                  batch_size=100,
                  epochs=1000,
                  validation_split=0.3,
                  callbacks=callback  # include early stopping
                  )

u_probs_1 = np.reshape(keras.activations.sigmoid(u_learner_e_x.predict(X_train)), (len(X_train),))

# estimate mu_x
u_learner_mu_x = load_model('model_25')
u_learner_mu_x.fit(X_train, Y_train,
                   batch_size=100,
                   epochs=100,
                   validation_data=None,
                   callbacks=None
                   )

u_learner_mu_x_predictions = np.reshape(u_learner_mu_x.predict(X_train), (len(X_train),))
# compute residuals
u_learner_residuals = (Y_train - u_learner_mu_x_predictions) / (W_train - u_probs_1)

# tau_hat - regress residuals on X
u_tau_hat_learner = load_model('model_25')
u_tau_hat_learner.fit(X_train, u_learner_residuals,
                      batch_size=100,
                      epochs=1000,
                      callbacks=callback,
                      validation_split=0.3
                      )

u_tau_hats = u_tau_hat_learner.predict(X_test)
u_tau_hats


In [None]:
((np.reshape(u_tau_hats, (len(tau_test),)) - tau_test) ** 2).mean()  # 51.102 TODO: CHECK IF IT REALLY IS CORRECT!!!

In [None]:
u_learner_residuals

In [None]:
import pandas as pd

pd.DataFrame(u_probs_1).describe()


# Make Classes

In [None]:
class Metalearner:
    pass

# Class T Learner

In [None]:
class TLearner:  # TODO: comment what is what.
    def __init__(self, method):  # TODO: or maybe not give base_learners but method, i.e. : 'lasso', 'rf' or 'nn'
        self.method = method

        if method == 'rf':
            self.mu0_model = RandomForestRegressor(n_estimators=1000, max_depth=99, random_state=0, max_features=0.66)
            self.mu1_model = RandomForestRegressor(n_estimators=1000, max_depth=99, random_state=0, max_features=0.66)
        elif method == 'lasso':
            self.mu0_model = LassoCV(cv=10, tol=1e-2, random_state=0)
            self.mu1_model = LassoCV(cv=10, tol=1e-2, random_state=0)
            self.poly = PolynomialFeatures(degree=3, interaction_only=False, include_bias=False)
        elif method == 'nn':
            self.mu0_model = load_model('model_25')
            self.mu1_model = load_model('model_25')
        else:
            raise NotImplementedError('Base learner method not specified')

    def fit(self,
            x, y, w):  # TODO: training process
        if self.method == 'rf':
            # 1: train mu_0
            print("Fitting random forest for mu_0")
            self.mu0_model.fit(x[w == 0], y[w == 0])

            # 2: train mu_1
            print("Fitting random forest for mu_1")
            self.mu1_model.fit(x[w == 1], y[w == 1])

        elif self.method == 'lasso':
            # make polynomial features
            x_poly_train = self.poly.fit_transform(x)

            # 1: train mu_0
            print("Fitting lasso for mu_0")
            self.mu0_model.fit(x_poly_train[w == 0], y[w == 0])

            # 2: train mu_1
            print("Fitting lasso for mu_1")
            self.mu1_model.fit(x_poly_train[w == 1], y[w == 1])

        elif self.method == 'nn':
            x = tf.convert_to_tensor(x)
            y = tf.convert_to_tensor(y)
            w = tf.convert_to_tensor(w)

            # 1: train mu_0
            print("Training neural network for mu_0")
            self.mu0_model.fit(x[w == 0], y[w == 0],
                               batch_size=100,
                               epochs=10000,
                               callbacks=callback,
                               validation_split=0.3,
                               verbose=0
                               )

            # 2: train mu_1
            print("Training neural network for mu_1")
            self.mu1_model.fit(x[w == 1], y[w == 1],
                               batch_size=100,
                               epochs=10000,
                               callbacks=callback,  # include early stopping
                               validation_split=0.3,
                               verbose=0
                               )

        else:
            raise NotImplementedError('Base learner method not specified in fit')

    def predict(self,
                x):  # TODO:
        if self.method == 'rf':
            # 1: calculate hats of mu_1 & mu_0
            mu0_hats = self.mu0_model.predict(x)
            mu1_hats = self.mu1_model.predict(x)
            predictions = mu1_hats - mu0_hats

        elif self.method == 'lasso':
            # make polynomial features
            x_poly_test = self.poly.fit_transform(x)

            # 1: calculate hats of mu_1 & mu_0
            mu0_hats = self.mu0_model.predict(x_poly_test)
            mu1_hats = self.mu1_model.predict(x_poly_test)
            predictions = mu1_hats - mu0_hats

        elif self.method == 'nn':
            x = tf.convert_to_tensor(x)
            mu0_hats = self.mu0_model(x)
            mu1_hats = self.mu1_model(x)
            predictions = np.reshape(mu1_hats - mu0_hats, (len(x),))

        else:
            raise NotImplementedError('Base learner method not specified in predict')
        return predictions

In [None]:
t_nn = TLearner(method='nn')
t_nn.fit(X_train, Y_train, W_train)
predictions = t_nn.predict(X_test)
((predictions - tau_test) ** 2).mean()
# rf: 10.091322530886687 / 7.359776707186481 (max_features=0.66)
# lasso: 5.583461099392904
# nn: 3.1867804239471273

In [14]:
stratified = StratifiedKFold(n_splits=CF_FOLDS, shuffle=True, random_state=0)

In [None]:
for (train_index, test_index) in stratified.split(X_train, W_train):
    temp_model = load_model('model_25')
    print('New Fold')
    temp_model.fit(X_train[train_index], Y_train[train_index], epochs=30)
    print(temp_model.predict(X_train[test_index]))

# Class S Learner

In [None]:
class SLearner:  # TODO: comment what is what.
    def __init__(self, method):  # TODO: or maybe not give base_learners but method, i.e. : 'lasso', 'rf' or 'nn'
        self.method = method

        if method == 'rf':
            self.mux_model = RandomForestRegressor(n_estimators=2000, max_depth=100, random_state=0, max_features=0.66)
        elif method == 'lasso':
            self.mux_model = LassoCV(cv=10, tol=1e-2, random_state=0)
            self.poly = PolynomialFeatures(degree=3, interaction_only=False, include_bias=False)
        elif method == 'nn':
            self.mux_model = load_model('model_26')
        else:
            raise NotImplementedError('Base learner method not specified')

    def fit(self,
            x, y, w):  # TODO: training process
        x_w = np.concatenate((x, np.reshape(w, (len(w), 1))), axis=1)

        if self.method == 'rf':
            # 1: train mu_x
            print("Fitting random forest for mu_x")
            self.mux_model.fit(x_w, y)

        elif self.method == 'lasso':
            # make polynomial features
            x_poly_train = self.poly.fit_transform(x_w)

            # 1: train mu_x
            print("Fitting lasso for mu_x")
            self.mux_model.fit(x_poly_train, y)


        elif self.method == 'nn':
            x_w = tf.convert_to_tensor(x_w)
            y = tf.convert_to_tensor(y)

            # 1: train mu_x
            print("Training neural network for mu_x")
            self.mux_model.fit(x_w, y,
                               batch_size=100,
                               epochs=100,
                               callbacks=None,  # include early stopping
                               verbose=0
                               )

        else:
            raise NotImplementedError('Base learner method not specified in fit')

    def predict(self,
                x):  # TODO:
        x_0 = np.concatenate((x, np.zeros((len(x), 1))), axis=1)
        x_1 = np.concatenate((x, np.ones((len(x), 1))), axis=1)

        if self.method == 'rf':
            # 1: calculate hats of mu_x with X and W=1 or W=0
            mu0_hats = self.mux_model.predict(x_0)
            mu1_hats = self.mux_model.predict(x_1)
            predictions = mu1_hats - mu0_hats

        elif self.method == 'lasso':
            # make polynomial features
            x_poly_0 = self.poly.fit_transform(x_0)
            x_poly_1 = self.poly.fit_transform(x_1)

            # 1: calculate hats of mu_x with X and W=1 or W=0
            mu0_hats = self.mux_model.predict(x_poly_0)
            mu1_hats = self.mux_model.predict(x_poly_1)
            predictions = mu1_hats - mu0_hats

        elif self.method == 'nn':
            x_0 = tf.convert_to_tensor(x_0)
            x_1 = tf.convert_to_tensor(x_1)
            # 1: calculate hats of mu_x with X and W=1 or W=0
            mu0_hats = self.mux_model(x_0)
            mu1_hats = self.mux_model(x_1)
            predictions = np.reshape(mu1_hats - mu0_hats, (len(x),))

        else:
            raise NotImplementedError('Base learner method not specified in predict')
        return predictions

In [None]:
s_nn = SLearner('nn')
s_nn.fit(X_train, Y_train, W_train)
predictions = s_nn.predict(X_test)
((predictions - tau_test) ** 2).mean()
# rf: 18.134009488483855
# lasso: 5.559126710289806
# nn: 1.987529792077956

# Class X Learner

In [26]:
class XLearner:  # TODO: comment what is what.
    def __init__(self, method):  # TODO: or maybe not give base_learners but method, i.e. : 'lasso', 'rf' or 'nn'
        self.method = method

        if method == 'rf':
            self.mu0_model = RandomForestRegressor(n_estimators=100, max_depth=100, random_state=0)
            self.mu1_model = RandomForestRegressor(n_estimators=100, max_depth=100, random_state=0)
            self.ex_model = RandomForestClassifier(max_depth=100, random_state=0)
            self.tau0_model = RandomForestRegressor(n_estimators=100, max_depth=100, random_state=0)
            self.tau1_model = RandomForestRegressor(n_estimators=100, max_depth=100, random_state=0)

        elif method == 'lasso':
            self.mu0_model = LassoCV(cv=10, tol=1, random_state=0)
            self.mu1_model = LassoCV(cv=10, tol=1, random_state=0)
            self.ex_model = LogisticRegressionCV(cv=KFold(10), penalty='l1', solver='saga', tol=1, random_state=0)
            self.tau0_model = LassoCV(cv=10, tol=1, random_state=0)
            self.tau1_model = LassoCV(cv=10, tol=1, random_state=0)
            self.poly = PolynomialFeatures(degree=3, interaction_only=False, include_bias=False)

        elif method == 'nn':
            self.mu0_model = load_model('model_25')
            self.mu1_model = load_model('model_25')
            self.ex_model = load_model('model_ex')
            self.tau0_model = load_model('model_25')
            self.tau1_model = load_model('model_25')
        else:
            raise NotImplementedError('Base learner method not specified')

    def fit(self,
            x, y, w):  # TODO: training process
        if self.method == 'rf':
            # 1: train mu_0 and get imputed_1
            print("Fitting random forest for mu_0")
            self.mu0_model.fit(x[w == 0], y[w == 0])
            imputed_1 = y[w == 1] - self.mu0_model.predict(x[w == 1])

            # 2: train mu_1 and get imputed_0
            print("Fitting random forest for mu_1")
            self.mu1_model.fit(x[w == 1], y[w == 1])
            imputed_0 = self.mu1_model.predict(x[w == 0]) - y[w == 0]

            # 3: train tau_0
            print("Fitting random forest for tau_0")
            self.tau0_model.fit(x[w == 0], imputed_0)

            # 4: train tau_1
            print("Fitting random forest for tau_1")
            self.tau1_model.fit(x[w == 1], imputed_1)

            # 5: train e_x
            print("Fitting random forest for e_x")
            self.ex_model.fit(x, w)

        elif self.method == 'lasso':
            # make polynomial features
            x_poly_train = self.poly.fit_transform(x)

            # 1: train mu_0 and get imputed_1
            print("Fitting lasso for mu_0")
            self.mu0_model.fit(x_poly_train[w == 0], y[w == 0])
            imputed_1 = y[w == 1] - self.mu0_model.predict(x_poly_train[w == 1])

            # 2: train mu_1 and get imputed_0
            print("Fitting lasso for mu_1")
            self.mu1_model.fit(x_poly_train[w == 1], y[w == 1])
            imputed_0 = self.mu1_model.predict(x_poly_train[w == 0]) - y[w == 0]

            # 3: train tau_0
            print("Fitting random forest for tau_0")
            self.tau0_model.fit(x_poly_train[w == 0], imputed_0)

            # 4: train tau_1
            print("Fitting random forest for tau_1")
            self.tau1_model.fit(x_poly_train[w == 1], imputed_1)

            # 5: train e_x
            print("Fitting random forest for e_x")
            self.ex_model.fit(x_poly_train, w)

        elif self.method == 'nn':
            # to tensor
            x = tf.convert_to_tensor(x)
            y = tf.convert_to_tensor(y)
            w = tf.convert_to_tensor(w)

            imputed_0 = np.empty(len(x), )
            imputed_1 = np.empty(len(x), )

            # 1: train mu_0
            print("Training neural network for mu_0")
            for train_index, test_index in stratified.split(x, w):
                index = np.zeros(len(x), dtype=bool)
                index[test_index] = 1

                temp_model = load_model('model_25')

                x_train = x[~index]
                x_test = x[index]
                w_train = w[~index]
                w_test = w[index]
                y_train = y[~index]
                y_test = y[index]

                temp_model.fit(x_train[w_train == 0], y_train[w_train == 0],
                               epochs=100,
                               callbacks=None,
                               verbose=0
                               )
                imputed_1[index][w_test == 1] = y_test[w_test == 1] - np.reshape(temp_model(x_test[w_test == 1]),
                                                                                 (len(x_test[w_test == 1]),))

            imputed_1 = tf.convert_to_tensor(imputed_1)

            """self.mu0_model.fit(x[w == 0], y[w == 0],
                               batch_size=100,
                               epochs=100,
                               callbacks=None,  # include early stopping
                               verbose=0
                               )
            imputed_1 = y[w == 1] - np.reshape(self.mu0_model(x[w == 1]), (len(x[w == 1]),))"""

            # 2: train mu_1
            print("Training neural network for mu_1")

            for train_index, test_index in stratified.split(x, w):
                index = np.zeros(len(x), dtype=bool)
                index[test_index] = 1

                temp_model = load_model('model_25')
                x_train = x[~index]
                x_test = x[index]
                w_train = w[~index]
                w_test = w[index]
                y_train = y[~index]

                temp_model.fit(x_train[w_train == 1], y_train[w_train == 1],
                               epochs=100,
                               callbacks=None,
                               verbose=0
                               )
                imputed_0[index][w_test == 0] = np.array(temp_model(x_test[w_test == 0])).squeeze() - y_test[
                    w_test == 0]

            imputed_0 = tf.convert_to_tensor(imputed_0)

            """self.mu1_model.fit(x[w == 1], y[w == 1],
                               batch_size=100,
                               epochs=100,
                               callbacks=None,  # include early stopping
                               verbose=0
                               )
            imputed_0 = np.reshape(self.mu1_model(x[w == 0]), (len(x[w == 0]),)) - y[w == 0]"""

            # 3: train tau_0
            print("Fitting random forest for tau_0")
            self.tau0_model.fit(x[w == 0], imputed_0[w == 0],
                                batch_size=100,
                                epochs=100,
                                callbacks=None,  # include early stopping
                                verbose=0
                                )

            # 4: train tau_1
            print("Fitting random forest for tau_1")
            self.tau1_model.fit(x[w == 1], imputed_1[w == 1],
                                batch_size=100,
                                epochs=100,
                                callbacks=None,  # include early stopping
                                verbose=0
                                )

            # 5: train e_x
            print("Fitting random forest for e_x")
            self.ex_model.fit(x, w,
                              batch_size=100,
                              epochs=100,
                              callbacks=None,  # include early stopping
                              verbose=0
                              )

        else:
            raise NotImplementedError('Base learner method not specified in fit')

    def predict(self,
                x):  # TODO:
        if self.method == 'rf':
            # 1: calculate hats of tau_0 and tau_1
            tau_0_hats = self.tau0_model.predict(x)
            tau_1_hats = self.tau1_model.predict(x)
            # 2: probabilities
            probs = self.ex_model.predict_proba(x)[:, 1]
            # 3: final predictions

        elif self.method == 'lasso':
            # make polynomial features
            x_poly_test = self.poly.fit_transform(x)

            # 1: calculate hats of tau_0 and tau_1
            tau_0_hats = self.tau0_model.predict(x_poly_test)
            tau_1_hats = self.tau1_model.predict(x_poly_test)
            probs = self.ex_model.predict_proba(x_poly_test)[:, 1]

        elif self.method == 'nn':
            x = tf.convert_to_tensor(x)
            # 1: calculate hats of tau_0 and tau_1
            tau_0_hats = np.reshape(self.tau0_model(x), (len(x),))
            tau_1_hats = np.reshape(self.tau1_model(x), (len(x),))
            # 2: probabilities
            logit = self.ex_model(x)
            probs = np.reshape(keras.activations.sigmoid(logit), (len(logit, )))
            # 3: final predictions

        else:
            raise NotImplementedError('Base learner method not specified in predict')

        predictions = probs * tau_0_hats + (1 - probs) * tau_1_hats
        return predictions

In [27]:
x_rf = XLearner('nn')
x_rf.fit(X_train, Y_train, W_train)
predictions = x_rf.predict(X_test)
((predictions - tau_test) ** 2).mean()
# rf: # 3.1369636408859614 --> same (good)
# lasso: # nn: 7.667219448077926 --> same (good)
# nn: 3.161416602361538 --> same (good)

Training neural network for mu_0
Training neural network for mu_1
Fitting random forest for tau_0
Fitting random forest for tau_1
Fitting random forest for e_x


nan

array([nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan,
       nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan,
       nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan,
       nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan,
       nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan,
       nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan,
       nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan,
       nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan,
       nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan,
       nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan,
       nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan,
       nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan,
       nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan,
       nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, na

In [None]:
imputed_1 = np.empty(len(X_train), )

for train_index, test_index in stratified.split(X_train, W_train):
    index = np.zeros(len(X_train), dtype=bool)
    index[test_index] = 1

    temp_model = load_model('model_25')

    x_train = X_train[~index]
    x_test = X_train[index]
    w_train = W_train[~index]
    w_test = W_train[index]
    y_train = Y_train[~index]
    y_test = Y_train[index]

In [None]:
temp_model(x_test[w_test == 1])

In [None]:
x_test[w_test == 0]

In [None]:
index = np.zeros(len(X_train), dtype=bool)
for train_index, test_index in stratified.split(X_train, W_train):
    index[test_index] = 1

In [None]:
index

In [None]:
stratified = StratifiedKFold(n_splits=CF_FOLDS, shuffle=True, random_state=0)

In [None]:
for (train_index, test_index) in stratified.split(X_train, W_train):
    temp_model = load_model('model_25')
    print('New Fold')
    temp_model.fit(X_train[train_index], Y_train[train_index], epochs=30)
    print(temp_model.predict(X_train[test_index]))

In [None]:
tf.experimental.numpy.empty(0)

# R-Learner

In [None]:
class RLearner:
    def __init__(self, method):
        self.method = method

        if method == 'rf':
            self.mux_model = RandomForestRegressor(n_estimators=100, max_depth=100, random_state=0)
            self.ex_model = RandomForestClassifier(max_depth=100, random_state=0)
            self.tau_model = RandomForestRegressor(n_estimators=100, max_depth=100, random_state=0)

        elif method == 'lasso':
            self.mux_model = LassoCV(cv=10, tol=1, random_state=0)
            self.ex_model = LogisticRegressionCV(cv=KFold(10), penalty='l1', solver='saga', tol=1, random_state=0)
            self.tau_model = LassoCV(cv=10, tol=1, random_state=0)
            self.poly = PolynomialFeatures(degree=3, interaction_only=False, include_bias=False)

        elif method == 'nn':
            self.mux_model = load_model('model_25')
            self.ex_model = load_model('model_ex')
            self.tau_model = load_model('model_25')

        else:
            raise NotImplementedError('Base learner method not specified or typo')

    def fit(self, x, y, w):

        if self.method == 'rf':
            # 1: fit mu_x
            print('Fitting random forest for mu_x')
            self.mux_model.fit(x, y)

            print('Fitting random forest for e_x')
            # 2: fit ex
            self.ex_model.fit(x, w)

            # 3: calculate pseudo_outcomes & weights
            probs = self.ex_model.predict_proba(x)[:, 1]
            pseudo_outcomes = (y - self.mux_model.predict(x)) / (w - probs)
            weights = (w - probs) ** 2

            print('Fitting random forest for tau_x')
            # 4: fit tau
            self.tau_model.fit(x, pseudo_outcomes, sample_weight=weights)

        elif self.method == 'lasso':
            x_poly_train = self.poly.fit_transform(x)

            # 1: fit mu_x
            print('Fitting lasso for mu_x')
            self.mux_model.fit(x_poly_train, y)

            # 2: fit ex
            print('Fitting lasso for e_x')
            self.ex_model.fit(x_poly_train, w)

            # 3: calculate pseudo_outcomes & weights
            probs = self.ex_model.predict_proba(x_poly_train)[:, 1]
            pseudo_outcomes = (y - self.mux_model.predict(x_poly_train)) / (w - probs)
            weights = (w - probs) ** 2

            # 4: fit tau
            print('Fitting lasso for tau_x')
            self.tau_model.fit(x_poly_train, pseudo_outcomes, sample_weight=weights)

        elif self.method == 'nn':
            # to tensor
            x = tf.convert_to_tensor(x)
            y = tf.convert_to_tensor(y)
            w = tf.convert_to_tensor(w)

            # 1: fit mu_x
            print('Training NN for mu_x')
            self.mux_model.fit(x, y,
                               batch_size=100,
                               epochs=400,
                               callbacks=callback,
                               validation_split=0.3,
                               verbose=0
                               )
            # 2: fit ex
            print('Training NN for e_x')
            self.ex_model.fit(x, w,
                              batch_size=100,
                              epochs=400,
                              callbacks=callback,
                              validation_split=0.3,
                              verbose=0
                              )

            # 3: calculate pseudo_outcomes & weights
            probs = np.reshape(keras.activations.sigmoid(self.ex_model(x)), len(x, ))
            pseudo_outcomes = (y - np.reshape(self.mux_model(x), (len(x),))) / (w - probs)
            weights = (w - probs) ** 2

            # 4: fit tau
            print('Training NN for tau_x')
            self.tau_model.fit(x, pseudo_outcomes,
                               sample_weight=weights,
                               batch_size=100,
                               epochs=100,
                               callbacks=callback,
                               validation_split=0.3,
                               verbose=0
                               )

        else:
            raise NotImplementedError('Base learner method not specified in fit')

    def predict(self, x):

        if self.method == 'rf':
            predictions = self.tau_model.predict(x)

        elif self.method == 'lasso':
            x_poly_test = self.poly.fit_transform(x)
            predictions = self.tau_model.predict(x_poly_test)

        elif self.method == 'nn':
            # to tensor
            x = tf.convert_to_tensor(x)
            predictions = np.reshape(self.tau_model(x), (len(x),))

        else:
            raise NotImplementedError('Base learner method not specified in predict')

        return predictions

In [None]:
r_rf = RLearner('nn')
r_rf.fit(X_train, Y_train, W_train)
predictions = r_rf.predict(X_test)
((predictions - tau_test) ** 2).mean()
# rf: 17.722925118749608
# lasso: 5.50038865455844
# nn: 47.81939839016621

# Class DR-Learner

In [None]:
class DRLearner:
    def __init__(self, method):
        self.method = method
        if method == 'rf':
            self.mu0_model = RandomForestRegressor(n_estimators=100, max_depth=100, random_state=0)
            self.mu1_model = RandomForestRegressor(n_estimators=100, max_depth=100, random_state=0)
            self.ex_model = RandomForestClassifier(n_estimators=100, max_depth=100, random_state=0)
            self.tau_model = RandomForestRegressor(n_estimators=100, max_depth=100, random_state=0)

        elif method == 'lasso':
            self.mu0_model = LassoCV(cv=10, tol=1, random_state=0)
            self.mu1_model = LassoCV(cv=10, tol=1, random_state=0)
            self.ex_model = LogisticRegressionCV(cv=KFold(10), penalty='l1', solver='saga', tol=1, random_state=0)
            self.tau_model = LassoCV(cv=10, tol=1, random_state=0)
            self.poly = PolynomialFeatures(degree=3, interaction_only=False, include_bias=False)

        elif method == 'nn':
            self.mu0_model = load_model('model_25')
            self.mu1_model = load_model('model_25')
            self.ex_model = load_model('model_ex')
            self.tau_model = load_model('model_25')

        else:
            raise NotImplementedError('Base learner method not specified or typo')

    def fit(self, x, y, w):

        if self.method == 'rf':
            # 1: fit mu_0
            print('Fitting random forest for mu_0')
            self.mu0_model.fit(x[w == 0], y[w == 0])

            # 2: fit mu_1
            print('Fitting random forest for mu_1')
            self.mu1_model.fit(x[w == 1], y[w == 1])

            # 3: fit ex
            print('Fitting random forest for e_x')
            self.ex_model.fit(x, w)
            probs = self.ex_model.predict_proba(x)[:, 1]
            neg_prob = self.ex_model.predict_proba(x)[:, 0]

            # calculate pseudo_outcomes
            mu_w = w * self.mu1_model.predict(x) + (1 - w) * self.mu0_model.predict(x)
            pseudo_outcomes = (w - probs) / (probs * neg_prob) * (y - mu_w) + self.mu1_model.predict(
                x) - self.mu0_model.predict(x)

            # 4 fit tau
            print('Fitting random forest for tau_x')
            self.tau_model.fit(x, pseudo_outcomes)

        elif self.method == 'lasso':
            x_poly_train = self.poly.fit_transform(x)

            # 1: fit mu_0
            print('Fitting lasso for mu_0')
            self.mu0_model.fit(x_poly_train[w == 0], y[w == 0])

            # 2: fit mu_1
            print('Fitting lasso for mu_1')
            self.mu1_model.fit(x_poly_train[w == 1], y[w == 1])

            # 3: fit ex
            print('Fitting lasso for e_x')
            self.ex_model.fit(x_poly_train, w)
            probs = self.ex_model.predict_proba(x_poly_train)[:, 1]

            # calculate pseudo_outcomes
            mu_w = w * self.mu1_model.predict(x_poly_train) + (1 - w) * self.mu0_model.predict(x_poly_train)
            pseudo_outcomes = (w - probs) / (probs * (1 - probs)) * (y - mu_w) + self.mu1_model.predict(
                x_poly_train) - self.mu0_model.predict(x_poly_train)

            # 4 fit tau
            print('Fitting lasso for tau_x')
            self.tau_model.fit(x_poly_train, pseudo_outcomes)

        elif self.method == 'nn':
            # to tensor
            x = tf.convert_to_tensor(x)
            y = tf.convert_to_tensor(y)
            w = tf.convert_to_tensor(w)

            # 1: fit mu_0
            print('Training NN for mu_0')
            self.mu0_model.fit(x[w == 0], y[w == 0],
                               batch_size=100,
                               epochs=100,
                               callbacks=None,
                               verbose=0
                               )

            # 2: fit mu_1
            print('Training NN for mu_1')
            self.mu1_model.fit(x[w == 1], y[w == 1],
                               batch_size=100,
                               epochs=100,
                               callbacks=None,
                               verbose=0
                               )

            # 3: fit ex
            print('Training NN for e_x')
            self.ex_model.fit(x, w,
                              batch_size=100,
                              epochs=100,
                              callbacks=None,
                              verbose=0
                              )

            probs = tf.reshape(keras.activations.sigmoid(self.ex_model(x)), len(x, ))

            # calculate pseudo_outcomes
            mu_0_hats = self.mu0_model(x)
            mu_1_hats = self.mu1_model(x)

            mu_w = w * mu_1_hats + (1 - w) * mu_0_hats
            pseudo_outcomes = (w - probs) / (probs * (1 - probs)) * (y - mu_w) + mu_1_hats - mu_0_hats

            # 4 fit tau
            print('Training NN for tau_x')
            self.tau_model.fit(x, pseudo_outcomes,
                               batch_size=100,
                               epochs=100,
                               validation_data=None,
                               callbacks=None,
                               verbose=0
                               )

    def predict(self, x):

        if self.method == 'rf':
            predictions = self.tau_model.predict(x)


        elif self.method == 'lasso':
            x_poly_test = self.poly.fit_transform(x)
            predictions = self.tau_model.predict(x_poly_test)

        elif self.method == 'nn':
            # to tensor
            x = tf.convert_to_tensor(x)
            # predict
            predictions = np.reshape(self.tau_model(x), (len(x),))

        else:
            raise NotImplementedError('Base learner method not specified in predict')

        return predictions

In [None]:
dr_rf = DRLearner('nn')
dr_rf.fit(X_train, Y_train, W_train)
predictions = dr_rf.predict(X_test)
((predictions - tau_test) ** 2).mean()
# rf: 5.385491721300538 # why different??? ---> because if you take 1 - probs its not exactly the same as taking the [:,0] column!!
# lasso: 6.252082321980517
# nn: 8.35142898943478

# CHECK THIS: CHANGE (1 - PROBS) TO [:,0] TO BE MORE EXACT!!!

# Class RA-Learner

In [None]:
class RALearner:
    def __init__(self, method):
        self.method = method
        if method == 'rf':
            self.mu0_model = RandomForestRegressor(n_estimators=100, max_depth=100, random_state=0)
            self.mu1_model = RandomForestRegressor(n_estimators=100, max_depth=100, random_state=0)
            self.tau_model = RandomForestRegressor(n_estimators=100, max_depth=100, random_state=0)

        elif method == 'lasso':
            self.mu0_model = LassoCV(cv=10, tol=1, random_state=0)
            self.mu1_model = LassoCV(cv=10, tol=1, random_state=0)
            self.tau_model = LassoCV(cv=10, tol=1, random_state=0)
            self.poly = PolynomialFeatures(degree=3, interaction_only=False, include_bias=False)

        elif method == 'nn':
            self.mu0_model = load_model('model_25')
            self.mu1_model = load_model('model_25')
            self.tau_model = load_model('model_25')

        else:
            raise NotImplementedError('Base learner method not specified or typo')

    def fit(self, x, y, w):
        if self.method == 'rf':
            # 1: fit mu_0
            print('Fitting random forest for mu_0')
            self.mu0_model.fit(x[w == 0], y[w == 0])

            # 2: fit mu_1
            print('Fitting random forest for mu_1')
            self.mu1_model.fit(x[w == 1], y[w == 1])

            # calculate pseudo_outcomes
            pseudo_outcomes = w * (y - self.mu0_model.predict(x)) + (1 - w) * (self.mu1_model.predict(x) - y)

            # 4 fit tau
            print('Fitting random forest for tau_x')
            self.tau_model.fit(x, pseudo_outcomes)

        elif self.method == 'lasso':
            x_poly_train = self.poly.fit_transform(x)

            # 1: fit mu_0
            print('Fitting lasso for mu_0')
            self.mu0_model.fit(x_poly_train[w == 0], y[w == 0])

            # 2: fit mu_1
            print('Fitting lasso for mu_1')
            self.mu1_model.fit(x_poly_train[w == 1], y[w == 1])

            # calculate pseudo_outcomes
            pseudo_outcomes = w * (y - self.mu0_model.predict(x_poly_train)) + (1 - w) * (
                    self.mu1_model.predict(x_poly_train) - y)

            # 4 fit tau
            print('Fitting lasso for tau_x')
            self.tau_model.fit(x_poly_train, pseudo_outcomes)

        elif self.method == 'nn':
            # to tensor
            x = tf.convert_to_tensor(x)
            y = tf.convert_to_tensor(y)
            w = tf.convert_to_tensor(w)

            # 1: fit mu_0
            print('Training NN for mu_0')
            self.mu0_model.fit(x[w == 0], y[w == 0],
                               batch_size=100,
                               epochs=100,
                               callbacks=None,
                               verbose=0
                               )

            # 2: fit mu_1
            print('Training NN for mu_1')
            self.mu1_model.fit(x[w == 1], y[w == 1],
                               batch_size=100,
                               epochs=100,
                               callbacks=None,
                               verbose=0
                               )

            # calculate pseudo_outcomes
            mu0_predictions = np.reshape(self.mu0_model(x), (len(x),))
            mu1_predictions = np.reshape(self.mu1_model(x), (len(x),))

            pseudo_outcomes = w * (y - mu0_predictions) + (1 - w) * (mu1_predictions - y)

            # 4 fit tau
            print('Training NN for tau_x')
            self.tau_model.fit(x, pseudo_outcomes,
                               batch_size=100,
                               epochs=100,
                               validation_data=None,
                               callbacks=None,
                               verbose=0
                               )

    def predict(self, x):
        if self.method == 'rf':
            predictions = self.tau_model.predict(x)


        elif self.method == 'lasso':
            x_poly_test = self.poly.fit_transform(x)
            predictions = self.tau_model.predict(x_poly_test)

        elif self.method == 'nn':
            # to tensor
            x = tf.convert_to_tensor(x)
            # predict
            predictions = np.reshape(self.tau_model(x), (len(x),))

        else:
            raise NotImplementedError('Base learner method not specified in predict')

        return predictions

In [None]:
ra_rf = RALearner('nn')
ra_rf.fit(X_train, Y_train, W_train)
predictions = ra_rf.predict(X_test)
((predictions - tau_test) ** 2).mean()
# rf: 5.355494017645751
# lasso: 8.283890654355236
# nn: 3.3973654461530867

# 3.353439795888397
# 3.3534397958883955

# Class PW-Learner

In [66]:
class PWLearner:
    def __init__(self, method):
        self.method = method
        if method == 'rf':
            self.ex_model = RandomForestClassifier(n_estimators=100, max_depth=100, random_state=0)
            self.tau_model = RandomForestRegressor(n_estimators=100, max_depth=100, random_state=0)

        elif method == 'lasso':
            self.ex_model = LogisticRegressionCV(cv=KFold(10), penalty='l1', solver='saga', tol=1, random_state=0)
            self.tau_model = LassoCV(cv=10, tol=1, random_state=0)
            self.poly = PolynomialFeatures(degree=3, interaction_only=False, include_bias=False)

        elif method == 'nn':
            self.ex_1_model = load_model('model_ex')
            self.ex_2_model = load_model('model_ex')
            self.tau_model = load_model('model_25')

        else:
            raise NotImplementedError('Base learner method not specified or typo')

    def fit(self, x, y, w):

        if self.method == 'rf':
            # 3: fit ex
            print('Fitting random forest for e_x')
            self.ex_model.fit(x, w)
            probs = self.ex_model.predict_proba(x)[:, 1]
            counter_probs = self.ex_model.predict_proba(x)[:, 0]

            # calculate pseudo_outcomes
            pseudo_outcomes = (w / probs - (1 - w) / counter_probs) * y

            # 4 fit tau
            print('Fitting random forest for tau_x')
            self.tau_model.fit(x, pseudo_outcomes)

        elif self.method == 'lasso':
            x_poly_train = self.poly.fit_transform(x)

            # 3: fit ex
            print('Fitting lasso for e_x')
            self.ex_model.fit(x_poly_train, w)

            probs = self.ex_model.predict_proba(x_poly_train)[:, 1]
            counter_probs = self.ex_model.predict_proba(x_poly_train)[:, 0]

            # calculate pseudo_outcomes
            pseudo_outcomes = (w / probs - (1 - w) / counter_probs) * y

            # 4 fit tau
            print('Fitting lasso for tau_x')
            self.tau_model.fit(x_poly_train, pseudo_outcomes)

        elif self.method == 'nn':
            x = tf.convert_to_tensor(x)
            y = tf.convert_to_tensor(y)
            w = tf.convert_to_tensor(w)

            # cross-fitting
            # split for cross-fitting
            index = np.zeros(len(x), dtype=bool)
            train_ind = np.random.choice(len(x), int(len(x) / 2), replace=False)
            index[train_ind] = 1

            probs = np.zeros(len(x), )

            # 3: fit ex
            print('Training NN for e_x')

            self.ex_1_model.fit(x[index], w[index],
                                batch_size=100,
                                epochs=50,
                                callbacks=None,
                                verbose=0
                                )

            self.ex_2_model.fit(x[~index], w[~index],
                                batch_size=100,
                                epochs=50,
                                callbacks=None,
                                verbose=0
                                )

            probs[index] = tf.squeeze(keras.activations.sigmoid(self.ex_1_model(x[index])))
            probs[~index] = tf.squeeze(keras.activations.sigmoid(self.ex_2_model(x[~index])))

            # probs = tf.squeeze(keras.activations.sigmoid(self.ex_model(x)))
            counter_probs = 1 - probs

            # calculate pseudo_outcomes
            pseudo_outcomes = (w / probs - (1 - w) / counter_probs) * y

            # 4 fit tau
            print('Training NN for tau_x')
            self.tau_model.fit(x, pseudo_outcomes,
                               batch_size=100,
                               epochs=50,
                               validation_data=None,
                               callbacks=None,
                               verbose=0
                               )

    def predict(self, x):
        if self.method == 'rf':
            predictions = self.tau_model.predict(x)

        elif self.method == 'lasso':
            x_poly_test = self.poly.fit_transform(x)
            predictions = self.tau_model.predict(x_poly_test)

        elif self.method == 'nn':
            x = tf.convert_to_tensor(x)
            predictions = np.array(self.tau_model(x)).squeeze()

        else:
            raise NotImplementedError('Base learner method not specified in predict')

        return predictions

In [69]:
pw_rf = PWLearner('lasso')
pw_rf.fit(X_train, Y_train, W_train)
predictions = pw_rf.predict(X_test)
((predictions - tau_test) ** 2).mean()
# rf: 30.529802728890644
# lasso: 16.03059004204301
# nn: 271.8425349295992

# 238.41087366274616

Fitting lasso for e_x
Fitting lasso for tau_x


16.03059004204301

# Class U-Learner

In [86]:
class ULearner:
    def __init__(self, method):
        self.method = method
        if method == 'rf':
            self.mux_model = RandomForestRegressor(n_estimators=100, max_depth=100, random_state=0)
            self.ex_model = RandomForestClassifier(n_estimators=100, max_depth=100, random_state=0)
            self.tau_model = RandomForestRegressor(n_estimators=100, max_depth=100, random_state=0)

        elif method == 'lasso':
            self.mux_model = LassoCV(cv=10, tol=1, random_state=0)
            self.ex_model = LogisticRegressionCV(cv=KFold(10), penalty='l1', solver='saga', tol=1, random_state=0)
            self.tau_model = LassoCV(cv=10, tol=1, random_state=0)
            self.poly = PolynomialFeatures(degree=3, interaction_only=False, include_bias=False)

        elif method == 'nn':
            self.mux_1_model = load_model('model_25')
            self.mux_2_model = load_model('model_25')
            self.ex_1_model = load_model('model_ex')
            self.ex_2_model = load_model('model_ex')
            self.tau_model = load_model('model_25')

        else:
            raise NotImplementedError('Base learner method not specified or typo')

    def compute_hats(self, x_train, y_train, w_train, x_test):
        # 1: fit mu_x
        print('Fitting random forest for mu_x')
        temp_mux = RandomForestRegressor(n_estimators=100, max_depth=100, random_state=0)
        temp_mux.fit(x_train, y_train)
        # 2: fit ex
        temp_ex = RandomForestClassifier(n_estimators=100, max_depth=100, random_state=0)
        temp_ex.fit(x_train, w_train)
        # residuals
        probs = temp_ex.predict_proba(x_test)[:, 1]
        mux_hat = temp_mux.predict(x_test)
        return mux_hat, probs


    def fit(self, x, y, w):

        if self.method == 'rf':
            if CF_FOLDS == 1:
                mux_hat, probs = self.compute_hats(x, y, w, x)

            else:
                # initialize
                mux_hat = np.zeros(700)
                probs = np.zeros(700)
                # cross-fitting
                stratified = StratifiedKFold(n_splits=2, shuffle=True, random_state=0)
                for train_index, test_index in stratified.split(x, w):
                    index = np.zeros(700, dtype=bool)
                    index[test_index] = 1
                    print('Fitting Classifier')
                    """
                    temp_ex = RandomForestClassifier(n_estimators=100, max_depth=100, random_state=0)
                    temp_ex.fit(x[index], w[index])
                    temp_mux = RandomForestRegressor(n_estimators=100, max_depth=100, random_state=0)
                    temp_mux.fit(x[index], y[index])
                    probs[~index] = temp_ex.predict_proba(x[~index])[:, 1]
                    mux_hat[~index] = temp_mux.predict(x[~index])
                    """
                    mux_hat[~index], probs[~index] =  self.compute_hats(x[index], y[index], w[index], x[~index] )


            """
            # 1: fit mu_x
            print('Fitting random forest for mu_x')
            temp_mux = RandomForestRegressor(n_estimators=100, max_depth=100, random_state=0)
            temp_mux.fit(x, y)
            # 2: fit ex
            temp_ex = RandomForestClassifier(n_estimators=100, max_depth=100, random_state=0)
            temp_ex.fit(x, w)
            # residuals
            probs = temp_ex.predict_proba(x)[:, 1]
            mux_hat = temp_mux.predict(x)
            """
            residuals = (y - mux_hat) / (w - probs)
            # 3: fit tau
            print('Fitting random forest for tau_x')
            self.tau_model.fit(x, residuals)

            """
            # 2: fit mu_x
            print('Fitting random forest for mu_x')
            self.mux_model.fit(x, y)

            # 3: fit ex
            print('Fitting random forest for e_x')
            self.ex_model.fit(x, w)
            probs = self.ex_model.predict_proba(x)[:, 1]

            # calculate residuals
            residuals = (y - self.mux_model.predict(x)) / (w - probs)

            # 4 fit tau
            print('Fitting random forest for tau_x')
            self.tau_model.fit(x, residuals)
            """

        elif self.method == 'lasso':
            x_poly_train = self.poly.fit_transform(x)

            # 2: fit mu_x
            print('Fitting lasso for mu_x')
            self.mux_model.fit(x_poly_train, y)

            # 3: fit ex
            print('Fitting lasso for e_x')
            self.ex_model.fit(x_poly_train, w)
            probs = self.ex_model.predict_proba(x_poly_train)[:, 1]

            # calculate pseudo_outcomes
            residuals = (y - self.mux_model.predict(x_poly_train)) / (w - probs)

            # 4 fit tau
            print('Fitting lasso for tau_x')
            self.tau_model.fit(x_poly_train, residuals)

        elif self.method == 'nn':
            # to tensor
            x = tf.convert_to_tensor(x)
            y = tf.convert_to_tensor(y)
            w = tf.convert_to_tensor(w)

            # cross-fitting
            index = np.zeros(len(x), dtype=bool)
            train_ind = np.random.choice(len(x), int(len(x) / 2), replace=False)
            index[train_ind] = 1

            probs = np.zeros(len(x), )
            mu_x_predictions = np.zeros(len(x), )

            # 1: fit mu_x
            print('Training NN for mu_x')
            self.mux_1_model.fit(x[index], y[index],
                                 batch_size=100,
                                 epochs=50,
                                 callbacks=None,
                                 verbose=0
                                 )

            self.mux_2_model.fit(x[~index], y[~index],
                                 batch_size=100,
                                 epochs=50,
                                 callbacks=None,
                                 verbose=0
                                 )

            # 3: fit ex
            print('Training NN for e_x')
            self.ex_1_model.fit(x[index], w[index],
                                batch_size=100,
                                epochs=50,
                                callbacks=None,
                                verbose=0
                                )

            self.ex_2_model.fit(x[~index], w[~index],
                                batch_size=100,
                                epochs=50,
                                callbacks=None,
                                verbose=0
                                )

            probs[~index] = tf.reshape(keras.activations.sigmoid(self.ex_2_model(x[~index])), len(x[~index], ))
            probs[index] = tf.reshape(keras.activations.sigmoid(self.ex_1_model(x[index])), len(x[index], ))

            mu_x_predictions[index] = tf.reshape(self.mux_1_model(x[index]), (len(x[index]),))
            mu_x_predictions[~index] = tf.reshape(self.mux_2_model(x[~index]), (len(x[~index]),))

            """

            probs = tf.reshape(keras.activations.sigmoid(self.ex_model(x)), len(x, ))

            # calculate pseudo_outcomes
            mu_x_predictions = tf.reshape(self.mux_model(x), (len(x),))

            """
            residuals = (y - mu_x_predictions) / (w - probs)

            # 4 fit tau
            print('Training NN for tau_x')
            self.tau_model.fit(x, residuals,
                               batch_size=100,
                               epochs=50,
                               validation_data=None,
                               callbacks=None,
                               verbose=0
                               )

    def predict(self, x):

        if self.method == 'rf':
            predictions = self.tau_model.predict(x)

        elif self.method == 'lasso':
            x_poly_test = self.poly.fit_transform(x)
            predictions = self.tau_model.predict(x_poly_test)

        elif self.method == 'nn':
            # to tensor
            x = tf.convert_to_tensor(x)
            # predict
            predictions = np.reshape(self.tau_model(x), (len(x),))

        else:
            raise NotImplementedError('Base learner method not specified in predict')

        return predictions

In [93]:
u_rf = ULearner('nn')
u_rf.fit(X_train, Y_train, W_train)
predictions = u_rf.predict(X_test)
np.sqrt((((predictions - tau_test) ** 2).mean()))
# rf: 30.921286420155806
# lasso: 7.6762472449663495
# nn: 51.10236987028663

# 31.033895213307428
# 31.033895213307698

Training NN for mu_x
Training NN for e_x
Training NN for tau_x


2.086929293352275

In [88]:
CF_FOLDS = 2

try stratifiedfolds

In [55]:
# TODO: THATS IT!!!!!
w_hats = np.zeros(700)

stratified = StratifiedKFold(n_splits=4, shuffle=True, random_state=0)
for train_index, test_index in stratified.split(X_train, W_train):
    index = np.zeros(700, dtype=bool)
    index[test_index] = 1
    print('Fitting Classifier')
    temp_model = RandomForestClassifier(n_estimators=100, max_depth=100, random_state=0)
    temp_model.fit(X_train[index], W_train[index])
    w_hats[~index] = temp_model.predict_proba(X_train[~index])[:, 1]

Fitting Classifier
Fitting Classifier
Fitting Classifier
Fitting Classifier


In [58]:
stratified = StratifiedKFold(n_splits=4, shuffle=True, random_state=0)
for train_index, test_index in stratified.split(X_train, W_train):
    index = np.zeros(700, dtype=bool)
    index[test_index] = 1
    print(index)
    print(~index)
    print(tf.convert_to_tensor(index))

[ True  True  True False  True  True False  True  True False False False
 False False False False False False False False False  True False False
 False False False False  True  True False False  True False False False
 False False False False False False False  True  True  True False False
  True  True  True False False False False False  True False False False
 False  True False False False False  True  True  True False  True False
 False False False False False  True False  True False False False False
 False  True False  True False False False False False False False False
 False False False False  True False False False False  True  True  True
 False False False  True  True  True  True False False  True False False
 False False False  True False False False  True False False  True  True
  True False False False False False False False False False  True False
 False False  True False False False  True False False False False False
  True  True False False False False False  True Fa

In [56]:
w_hats

array([0.51, 0.32, 0.79, 0.46, 0.74, 0.38, 0.52, 0.54, 0.36, 0.58, 0.63,
       0.29, 0.51, 0.37, 0.57, 0.21, 0.74, 0.4 , 0.55, 0.56, 0.77, 0.5 ,
       0.31, 0.38, 0.6 , 0.14, 0.35, 0.43, 0.43, 0.62, 0.45, 0.72, 0.54,
       0.46, 0.68, 0.52, 0.57, 0.39, 0.76, 0.38, 0.54, 0.37, 0.49, 0.46,
       0.44, 0.28, 0.46, 0.51, 0.31, 0.56, 0.49, 0.54, 0.41, 0.53, 0.41,
       0.48, 0.81, 0.32, 0.48, 0.41, 0.37, 0.35, 0.74, 0.38, 0.47, 0.64,
       0.5 , 0.61, 0.74, 0.73, 0.54, 0.26, 0.38, 0.59, 0.36, 0.51, 0.61,
       0.48, 0.48, 0.39, 0.53, 0.56, 0.36, 0.29, 0.69, 0.47, 0.54, 0.38,
       0.35, 0.54, 0.33, 0.4 , 0.29, 0.48, 0.56, 0.33, 0.58, 0.29, 0.31,
       0.36, 0.24, 0.44, 0.51, 0.64, 0.26, 0.61, 0.43, 0.55, 0.28, 0.63,
       0.51, 0.43, 0.27, 0.64, 0.59, 0.53, 0.64, 0.34, 0.52, 0.63, 0.26,
       0.64, 0.43, 0.52, 0.48, 0.73, 0.47, 0.61, 0.54, 0.5 , 0.35, 0.53,
       0.37, 0.53, 0.34, 0.11, 0.36, 0.41, 0.74, 0.58, 0.37, 0.37, 0.41,
       0.41, 0.55, 0.3 , 0.36, 0.32, 0.55, 0.61, 0.

In [51]:
w_hats

array([0.56, 0.46, 0.68, 0.7 , 0.71, 0.31, 0.75, 0.54, 0.43, 0.43, 0.4 ,
       0.69, 0.32, 0.53, 0.57, 0.26, 0.59, 0.35, 0.64, 0.45, 0.56, 0.71,
       0.64, 0.3 , 0.47, 0.44, 0.32, 0.48, 0.46, 0.68, 0.54, 0.56, 0.43,
       0.53, 0.35, 0.46, 0.52, 0.58, 0.32, 0.36, 0.31, 0.41, 0.76, 0.39,
       0.51, 0.37, 0.62, 0.74, 0.53, 0.58, 0.41, 0.32, 0.61, 0.58, 0.44,
       0.22, 0.73, 0.68, 0.29, 0.89, 0.32, 0.6 , 0.3 , 0.35, 0.34, 0.76,
       0.74, 0.68, 0.54, 0.65, 0.49, 0.39, 0.55, 0.36, 0.37, 0.44, 0.47,
       0.6 , 0.41, 0.62, 0.42, 0.41, 0.43, 0.41, 0.39, 0.36, 0.33, 0.51,
       0.48, 0.36, 0.33, 0.55, 0.61, 0.43, 0.48, 0.53, 0.56, 0.47, 0.58,
       0.38, 0.25, 0.6 , 0.43, 0.35, 0.45, 0.81, 0.67, 0.68, 0.32, 0.34,
       0.76, 0.41, 0.28, 0.62, 0.39, 0.46, 0.74, 0.42, 0.51, 0.82, 0.28,
       0.69, 0.27, 0.41, 0.58, 0.55, 0.48, 0.56, 0.47, 0.61, 0.26, 0.59,
       0.57, 0.55, 0.39, 0.49, 0.54, 0.48, 0.54, 0.45, 0.56, 0.52, 0.78,
       0.45, 0.7 , 0.46, 0.29, 0.49, 0.43, 0.53, 0.

In [42]:
index

array([False, False, False,  True, False, False,  True, False, False,
       False,  True,  True,  True, False,  True, False,  True, False,
        True,  True,  True, False,  True, False,  True,  True, False,
       False, False, False, False, False, False,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True, False, False,
       False,  True,  True, False, False, False,  True,  True, False,
        True,  True, False,  True,  True,  True,  True, False,  True,
        True, False, False, False, False, False, False, False, False,
        True, False, False,  True,  True, False,  True, False,  True,
        True, False,  True,  True, False,  True, False, False, False,
        True, False,  True,  True,  True,  True,  True,  True,  True,
       False, False,  True,  True, False,  True, False, False, False,
        True,  True,  True, False, False, False, False, False, False,
       False,  True, False,  True, False,  True, False,  True,  True,
       False, False,

In [29]:
index = np.zeros(700, dtype=bool)

False

# END

In [None]:
w = np.zeros((700,))

In [None]:
lol = load_model('model_ex')

In [None]:
lol.fit(X_train, w, batch_size=100, epochs=400, callbacks=callback, validation_split=0.3, verbose=1)

In [None]:
keras.activations.sigmoid(lol.predict(X_test))

Try out some stuff

In [None]:
W_train

In [None]:
X_train

In [None]:
classifier = RandomForestClassifier(n_estimators=1000, max_features=0.3, random_state=0)

In [None]:
classifier.fit(X_train, W_train)

In [None]:
classifier.predict_proba(X_test)

In [None]:
classifier.predict_proba(X_train)

In [None]:
# CROSS-FITTING WILL PROBABLY SOLVE THE PROBLEM!!

In [None]:
classifier.predict(X_train)

In [None]:
W_train

In [None]:
np.ones(100, dtype=bool)

In [None]:
import torch

In [None]:
pred_mask = torch.zeros(100, dtype=bool)
pred_mask

In [None]:
pred_mask

In [None]:
~pred_mask

In [None]:
rf = RandomForestRegressor()

In [None]:
hasattr(rf, 'predict')

In [None]:
rf_class = RandomForestClassifier()

In [None]:
hasattr(rf_class, 'train')

In [None]:
lasso = LassoCV()

In [None]:
hasattr(lasso, 'predict_proba')

MyModel

In [None]:
tf.keras.utils.set_random_seed(8953)

In [None]:
class MyModel(keras.Model):
    def __init__(
            self,
            input_dimension=FEATURE_DIMENSION,
            n_layers_1=3,
            n_layers_2=2,
            n_units_1=N_UNITS_FIRST_PART,
            n_units_2=N_UNITS_SECOND_PART,
            activation=NON_LINEARITY,
            regularizer=regularizers.L2(1e-4)):
        super().__init__()
        self.input_dimension = input_dimension
        self.n_layers_1 = n_layers_1
        self.n_layers_2 = n_layers_2
        self.n_units_1 = n_units_1
        self.n_units_2 = n_units_2
        self.activation = activation
        self.regularizer = regularizer

        self.dense1 = layers.Dense(units=self.n_units_1, activation=self.activation, name="layer1",
                                   kernel_regularizer=self.regularizer)
        self.dense2 = layers.Dense(units=self.n_units_1, activation=self.activation, name="layer2",
                                   kernel_regularizer=self.regularizer)
        self.dense3 = layers.Dense(units=self.n_units_1, activation=self.activation, name="layer3",
                                   kernel_regularizer=self.regularizer)
        self.dense4 = layers.Dense(units=self.n_units_2, activation=self.activation, name="layer4",
                                   kernel_regularizer=self.regularizer)
        self.dense5 = layers.Dense(units=self.n_units_2, activation=self.activation, name="layer5",
                                   kernel_regularizer=self.regularizer)
        self.dense6 = layers.Dense(units=1, activation='linear', name="layer6",
                                   kernel_regularizer=self.regularizer)

    def call(self, inputs):
        x = self.dense1(inputs)
        x = self.dense2(x)
        x = self.dense3(x)
        x = self.dense4(x)
        x = self.dense5(x)
        x = self.dense6(x)
        return x


In [None]:
model = MyModel()

In [None]:
model.compile(optimizer=keras.optimizers.legacy.Adam(learning_rate=LEARNING_RATE),  # Optimizer
              # Loss function to minimize
              loss=keras.losses.MeanSquaredError(),
              # List of metrics to monitor
              metrics=[keras.metrics.MeanSquaredError()],
              # weighted metrics
              weighted_metrics=[]
              )

In [None]:
model.fit(X_train, Y_train, epochs=4)

TRY

In [None]:
# 3 layers with 200 units (elu activation), 2 layers with 100 units (elu activations), 1 output layer (linear
# activation)
model1 = keras.Sequential([
    keras.Input(shape=(25,)),
    layers.Dense(units=200, activation="relu", name="layer1"),
    layers.Dense(units=200, activation="relu", name="layer2"),
    layers.Dense(units=200, activation="relu", name="layer3"),
    layers.Dense(units=100, activation="relu", name="layer4"),
    layers.Dense(units=100, activation="relu", name="layer5"),
    layers.Dense(units=1, activation="linear", name="layer6"),

], name="model_25")

# compile the model
model1.compile(
    optimizer=keras.optimizers.legacy.Adam(learning_rate=LEARNING_RATE),  # Optimizer
    # Loss function to minimize
    loss=keras.losses.MeanSquaredError(),
    # List of metrics to monitor
    metrics=[keras.metrics.MeanSquaredError()],
    # weighted metrics
    weighted_metrics=[]
)


In [None]:
model1

In [None]:
model1.summary()

In [None]:
model2 = tf.keras.models.clone_model(model1)

In [None]:
model2.summary()

In [None]:
model1.fit(X_train, Y_train, epochs=10)

In [None]:
model2.compile(
    optimizer=keras.optimizers.legacy.Adam(learning_rate=LEARNING_RATE),  # Optimizer
    # Loss function to minimize
    loss=keras.losses.MeanSquaredError(),
    # List of metrics to monitor
    metrics=[keras.metrics.MeanSquaredError()],
    # weighted metrics
    weighted_metrics=[]
)

In [None]:
model2.fit(X_train, Y_train, epochs=10)

In [None]:
def clone_model_regression(model):
    cloned_model = tf.keras.models.clone_model(model)
    cloned_model.compile(
        optimizer=keras.optimizers.legacy.Adam(learning_rate=LEARNING_RATE),
        # Loss function to minimize
        loss=keras.losses.MeanSquaredError(),
        # List of metrics to monitor
        metrics=[keras.metrics.MeanSquaredError()],
        # weighted metrics
        weighted_metrics=[]
    )
    return cloned_model

In [None]:
cloned_model = clone_model_regression(model1)

In [None]:
cloned_model.fit(X_train, Y_train, epochs=10)

In [None]:
def clone_model_classification(model):
    cloned_model = tf.keras.models.clone_model(model)
    cloned_model.compile(
        optimizer=keras.optimizers.legacy.Adam(learning_rate=LEARNING_RATE),
        # Loss function to minimize
        loss=keras.losses.MeanSquaredError(),
        # List of metrics to monitor
        metrics=[keras.metrics.MeanSquaredError()],
        # weighted metrics
        weighted_metrics=[]
    )
    return cloned_model

In [None]:
from HelperFuctions import *

In [None]:
model_nn = keras.Sequential([
    keras.Input(shape=(25,)),
    layers.Dense(units=200, activation="relu", name="layer1"),
    layers.Dense(units=200, activation="relu", name="layer2"),
    layers.Dense(units=200, activation="relu", name="layer3"),
    layers.Dense(units=100, activation="relu", name="layer4"),
    layers.Dense(units=100, activation="relu", name="layer5"),
    layers.Dense(units=1, activation="linear", name="layer6"),
], name="model_sequential")

In [None]:
model_nn_1 = keras.Sequential([
    keras.Input(shape=(26,)),
    layers.Dense(units=200, activation="relu", name="layer1"),
    layers.Dense(units=200, activation="relu", name="layer2"),
    layers.Dense(units=200, activation="relu", name="layer3"),
    layers.Dense(units=100, activation="relu", name="layer4"),
    layers.Dense(units=100, activation="relu", name="layer5"),
    layers.Dense(units=1, activation="linear", name="layer6"),
], name="model_sequential_1")

In [None]:
model_mu = clone_model_regression(model_nn)

In [None]:
model_ex = clone_model_classification(model_nn)

In [None]:
model_mu.fit(X_train, Y_train, epochs=10)

In [None]:
from NeuralNetworks import *