In [None]:
!pip install econml

# Tutorial Econml: Meta-learners and Generalized Random Forest

In [None]:
# Load libraries
import numpy as np
import pandas as pd
from sklearn.ensemble import RandomForestRegressor
from sklearn.linear_model import LinearRegression, LogisticRegression
from econml.metalearners import SLearner, TLearner, XLearner
from econml.grf import CausalForest

In [None]:
# Load the experiment dataset
datos = pd.read_csv("experimento-rfm.csv")

In [None]:
# Role definition
y = datos['outcome']
T = datos['treat']
X = datos.drop(['id','outcome','treat'], axis=1)
X = pd.get_dummies(X,drop_first=True)

In [None]:
# Load the new dataset
nuevos = pd.read_csv("experimento-rfm-newdata.csv")

In [None]:
# Role definition
nuevos_potential_outcomes = nuevos.drop(['recency','frequency','monetary','channel'],axis=1)
X2 = nuevos.drop(['id','Y0','Y1'],axis=1)
X2= pd.get_dummies(X2,drop_first=True)

In [None]:
# Model 01: SLearner, Base Algorithm = Linear Regression
est1 = SLearner(overall_model=LinearRegression())
est1.fit(y, T, X=X)
efecto1=est1.effect(X2)

In [None]:
# Model 02: TLearner, Base Algorithm = Linear Regression
est2 = TLearner(models=LinearRegression())
est2.fit(y, T, X=X)
efecto2=est2.effect(X2)

In [None]:
# Model 03: XLearner, Base Algorithm = Linear Regression
est3 = XLearner(models=LinearRegression())
est3.fit(y,T,X=X)
efecto3=est3.effect(X2)

In [None]:
# Model 04: SLearner, Base Algorithm = Random Forest
est4 = SLearner(overall_model=RandomForestRegressor(n_estimators=500))
est4.fit(y, T, X=X)
efecto4=est4.effect(X2)

In [None]:
# Model 05: TLearner, Base Algorithm = Random Forest
est5 = TLearner(models=RandomForestRegressor(n_estimators=500))
est5.fit(y, T, X=X)
efecto5=est5.effect(X2)

In [None]:
# Model 06: XLearner, Base Algorithm = Random Forest
est6 = XLearner(models=RandomForestRegressor(n_estimators=500))
est6.fit(y, T, X=X)
efecto6=est6.effect(X2)

In [None]:
# Model 07 : Generalized Random Forest
est7 = CausalForest(random_state=1234)
est7.fit(X, T, y)  # Notice that variable order changed
efecto7 = est7.predict(X2).ravel()   # Notice change from 'effect' to 'predict'

In [None]:
# Create dataframe with all 7 estimates of CATE
efectos = pd.DataFrame({
        'efecto1': efecto1,
        'efecto2': efecto2,
        'efecto3': efecto3,
        'efecto4': efecto4,
        'efecto5': efecto5,
        'efecto6': efecto6,
        'efecto7': efecto7
    })

In [None]:
# Correlation matrix
correlation_matrix = efectos.corr()
correlation_matrix

In [None]:
# Decisions: Assign to treatment all the observations with a predicted CATE > 0
df_binary = pd.DataFrame({
        'dec1': (efecto1 > 0).astype(int),
        'dec2': (efecto2 > 0).astype(int),
        'dec3': (efecto3 > 0).astype(int),
        'dec4': (efecto4 > 0).astype(int),
        'dec5': (efecto5 > 0).astype(int),
        'dec6': (efecto6 > 0).astype(int),
        'dec7': (efecto7 > 0).astype(int)
    })

In [None]:
df_binary

In [None]:
# Score decisions: If decision = 0 then Y0, if decision = 1 then Y1
df_results = pd.DataFrame()
for col in df_binary.columns:
        df_results[f'{col}_outcome'] = np.where(
            df_binary[col] == 1,
            nuevos_potential_outcomes['Y1'],
            nuevos_potential_outcomes['Y0']
        )

In [None]:
df_results

In [None]:
# Report results
print('Linear Regresion, S-Learner',df_results['dec1_outcome'].mean())
print('Linear Regresion, T-Learner',df_results['dec2_outcome'].mean())
print('Linear Regresion, X-Learner',df_results['dec3_outcome'].mean())
print('Random Forest, S-Learner',df_results['dec4_outcome'].mean())
print('Random Forest, T-Learner',df_results['dec5_outcome'].mean())
print('Random Forest, X-Learner',df_results['dec6_outcome'].mean())
print('Generalized Random Forest',df_results['dec7_outcome'].mean())