# Finding the difference between the S-learner and the T-learner

In [1]:
from sklearn.tree import DecisionTreeRegressor
from numpy.random import normal, uniform, seed
from numpy import exp, mean
from pandas import DataFrame

seed(54321)

n = 20
max_depth = 5
b_z_x = 5
b_z_y = 5
b_x_y = 2
sd = 1

# 1. Generate the data

In [2]:
z = normal(size=n, scale=sd)
x = (uniform(size=n) < 1 / (1 + exp(-b_z_x * z))).astype(int)
y = b_z_y * z + b_x_y * x + normal(size=n, scale=sd)
df = DataFrame({"z": z, "x": x, "y": y})

# 2. Calculate the difference between groups of outcome y

In [3]:
df.query("x==1").y.mean() - df.query("x==0").y.mean()

7.2363405261057805

# 3. S-learner

In [4]:
model = DecisionTreeRegressor(max_depth=max_depth)
X = df[["x", "z"]]
y = df["y"]
model.fit(X, y)


df_do_0 = df.copy()
df_do_0.x = 0
predictions_0 = model.predict(df_do_0[["x", "z"]])

df_do_1 = df.copy()
df_do_1.x = 1
predictions_1 = model.predict(df_do_1[["x", "z"]])

print("ATE")
print(mean(predictions_1 - predictions_0))
print(predictions_1 - predictions_0)

ATE
0.0
[0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]

# 4. T-Learner

In [5]:
df_0 = df.query("x==0")
df_1 = df.query("x==1")

model_0 = DecisionTreeRegressor(max_depth=max_depth)
X_0 = df_0[["x", "z"]]
y_0 = df_0["y"]
model_0.fit(X_0, y_0)

model_1 = DecisionTreeRegressor(max_depth=max_depth)
X_1 = df_1[["x", "z"]]
y_1 = df_1["y"]
model_1.fit(X_1, y_1)

predictions_0 = model_0.predict(df[["x", "z"]])
predictions_1 = model_1.predict(df[["x", "z"]])

print("ATE")
print(mean(predictions_1 - predictions_0))
print(predictions_1 - predictions_0)

ATE
4.278751389023496
[ 3.24930881  2.61665433  3.56572722  6.09852166 15.18519343  3.74718771
  2.01039955  3.06142779  7.02389793  3.33348004  5.85945538  2.01039955
  1.93609629  2.26595439  2.61665433  2.61665433  3.24930881  5.80432186
  2.61665433  6.70773005]