In [None]:
!pip install econml

# Tutorial Econml: Meta-learners and Generalized Random Forest

In [2]:
# Load libraries
import numpy as np
import pandas as pd
from sklearn.ensemble import RandomForestRegressor
from sklearn.linear_model import LinearRegression, LogisticRegression
from econml.metalearners import SLearner, TLearner, XLearner
from econml.grf import CausalForest

In [3]:
# Load the experiment dataset
datos = pd.read_csv("experimento-rfm.csv")

In [5]:
# Role definition
y = datos['outcome']
T = datos['treat']
X = datos.drop(['id','outcome','treat'], axis=1)
X = pd.get_dummies(X,drop_first=True)

In [6]:
# Load the new dataset
nuevos = pd.read_csv("experimento-rfm-newdata.csv")

In [8]:
# Role definition
nuevos_potential_outcomes = nuevos.drop(['recency','frequency','monetary','channel'],axis=1)
X2 = nuevos.drop(['id','Y0','Y1'],axis=1)
X2= pd.get_dummies(X2,drop_first=True)

In [9]:
# Model 01: SLearner, Base Algorithm = Linear Regression
est1 = SLearner(overall_model=LinearRegression())
est1.fit(y, T, X=X)
efecto1=est1.effect(X2)

In [11]:
# Model 02: TLearner, Base Algorithm = Linear Regression
est2 = TLearner(models=LinearRegression())
est2.fit(y, T, X=X)
efecto2=est2.effect(X2)

In [12]:
# Model 03: XLearner, Base Algorithm = Linear Regression
est3 = XLearner(models=LinearRegression())
est3.fit(y,T,X=X)
efecto3=est3.effect(X2)

In [13]:
# Model 04: SLearner, Base Algorithm = Random Forest
est4 = SLearner(overall_model=RandomForestRegressor(n_estimators=500))
est4.fit(y, T, X=X)
efecto4=est4.effect(X2)

In [14]:
# Model 05: TLearner, Base Algorithm = Random Forest
est5 = TLearner(models=RandomForestRegressor(n_estimators=500))
est5.fit(y, T, X=X)
efecto5=est5.effect(X2)

In [15]:
# Model 06: XLearner, Base Algorithm = Random Forest
est6 = XLearner(models=RandomForestRegressor(n_estimators=500))
est6.fit(y, T, X=X)
efecto6=est6.effect(X2)

In [16]:
# Model 07 : Generalized Random Forest
est7 = CausalForest(random_state=1234)
est7.fit(X, T, y)  # Notice that variable order changed
efecto7 = est7.predict(X2).ravel()   # Notice change from 'effect' to 'predict'

In [17]:
# Create dataframe with all 7 estimates of CATE
efectos = pd.DataFrame({
        'efecto1': efecto1,
        'efecto2': efecto2,
        'efecto3': efecto3,
        'efecto4': efecto4,
        'efecto5': efecto5,
        'efecto6': efecto6,
        'efecto7': efecto7
    })

In [20]:
efectos

Unnamed: 0,efecto1,efecto2,efecto3,efecto4,efecto5,efecto6,efecto7
0,14.287108,31.937770,31.937770,67.861242,70.481527,58.425125,52.497374
1,14.287108,-4.636770,-4.636770,19.499963,19.823260,11.017813,-6.891448
2,14.287108,-4.583262,-4.583262,-72.670078,-34.251847,-38.588727,-25.875260
3,14.287108,-14.564371,-14.564371,-36.318180,-57.177053,-40.479790,-34.932156
4,14.287108,47.715880,47.715880,48.436954,34.318051,34.339764,39.163785
...,...,...,...,...,...,...,...
79995,14.287108,6.887999,6.887999,-32.054606,-39.779716,-37.675757,-16.189181
79996,14.287108,-14.165316,-14.165316,-58.471612,-71.520585,-46.784765,-18.161320
79997,14.287108,52.711184,52.711184,44.461761,49.860828,54.557139,53.823518
79998,14.287108,12.519486,12.519486,2.748258,4.598546,12.547138,7.783462


In [21]:
# Decisions: Assign to treatment all the observations with a predicted CATE > 0
df_binary = pd.DataFrame({
        'dec1': (efecto1 > 0).astype(int),
        'dec2': (efecto2 > 0).astype(int),
        'dec3': (efecto3 > 0).astype(int),
        'dec4': (efecto4 > 0).astype(int),
        'dec5': (efecto5 > 0).astype(int),
        'dec6': (efecto6 > 0).astype(int),
        'dec7': (efecto7 > 0).astype(int)
    })

In [22]:
df_binary

Unnamed: 0,dec1,dec2,dec3,dec4,dec5,dec6,dec7
0,1,1,1,1,1,1,1
1,1,0,0,1,1,1,0
2,1,0,0,0,0,0,0
3,1,0,0,0,0,0,0
4,1,1,1,1,1,1,1
...,...,...,...,...,...,...,...
79995,1,1,1,0,0,0,0
79996,1,0,0,0,0,0,0
79997,1,1,1,1,1,1,1
79998,1,1,1,1,1,1,1


In [23]:
# Score decisions: If decision = 0 then Y0, if decision = 1 then Y1
df_results = pd.DataFrame()
for col in df_binary.columns:
        df_results[f'{col}_outcome'] = np.where(
            df_binary[col] == 1,
            nuevos_potential_outcomes['Y1'],
            nuevos_potential_outcomes['Y0']
        )

In [24]:
df_results

Unnamed: 0,dec1_outcome,dec2_outcome,dec3_outcome,dec4_outcome,dec5_outcome,dec6_outcome,dec7_outcome
0,278.402952,278.402952,278.402952,278.402952,278.402952,278.402952,278.402952
1,150.178338,262.740277,262.740277,150.178338,150.178338,150.178338,262.740277
2,105.085218,155.506374,155.506374,155.506374,155.506374,155.506374,155.506374
3,93.463931,107.297260,107.297260,107.297260,107.297260,107.297260,107.297260
4,272.777810,272.777810,272.777810,272.777810,272.777810,272.777810,272.777810
...,...,...,...,...,...,...,...
79995,193.455755,193.455755,193.455755,224.971206,224.971206,224.971206,224.971206
79996,182.784641,302.352708,302.352708,302.352708,302.352708,302.352708,302.352708
79997,238.195534,238.195534,238.195534,238.195534,238.195534,238.195534,238.195534
79998,114.580145,114.580145,114.580145,114.580145,114.580145,114.580145,114.580145


In [25]:
# Report results
print('Linear Regresion, S-Learner',df_results['dec1_outcome'].mean())
print('Linear Regresion, T-Learner',df_results['dec2_outcome'].mean())
print('Linear Regresion, X-Learner',df_results['dec3_outcome'].mean())
print('Random Forest, S-Learner',df_results['dec4_outcome'].mean())
print('Random Forest, T-Learner',df_results['dec5_outcome'].mean())
print('Random Forest, X-Learner',df_results['dec6_outcome'].mean())
print('Generalized Random Forest',df_results['dec7_outcome'].mean())

Linear Regresion, S-Learner 211.1471877503306
Linear Regresion, T-Learner 219.7559443743736
Linear Regresion, X-Learner 219.7559443743736
Random Forest, S-Learner 217.67217336292538
Random Forest, T-Learner 217.51748868657648
Random Forest, X-Learner 219.18570145300063
Generalized Random Forest 220.75630229451434
