## Econ ML Validation

**Authors:** Evan Flack and Amar Venugopal

**Description:** This notebook provides an example use-case for calibration scores for CATE models. Currently, it implements two scores: a linear regression score (Chernozhukov et al., 2022) and a calibration score (Dwivedi et al., 2020)

In [2]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.base import clone
import joblib

from datasets import fetch_data_generator
from myflaml import auto_reg, auto_clf
# from validation import DRLinear, cal_scorer
# from DRlinear import DRLinear
# from cal_scorer import cal_scorer

In [3]:
# Notebook options
# Set as true if you have already pre-hyper-tuned the nuisance and/or dr learner models
pre_tuned_n = True
pre_tuned_dr = True

### Prep Data

#### Semi-synthetic data on 401k savings

In [4]:
## For semi-synthetic data generation
data = '401k'
semi_synth = False # Whether true outcome y should be replaced by a fake outcome from a known CEF
simple_synth = True # Whether the true CEF of the fake y should be simple or fitted from data
max_depth = 2 # max depth of random forest during for semi-synthetic model fitting
scale = .2 # magnitude of noise in semi-synthetic data

np.random.seed(712)
def simple_true_cef(D, X): # simple CEF of the outcome for semi-synthetic data
    return .5 * np.array(X)[:, 1] * D + np.array(X)[:, 1]


get_data, abtest, true_cef, true_cate = fetch_data_generator(data=data, semi_synth=semi_synth,
                                                             simple_synth=simple_synth,
                                                             scale=scale, true_f=simple_true_cef,
                                                             max_depth=max_depth)
X, D, y, groups = get_data()

In [5]:
# Split into training (X), validation (Xval), and test (Xtest) sub-samples
X, Xval, D, Dval, y, yval = train_test_split(X, D, y, train_size=.6, shuffle=True, random_state=123)
Xval, Xtest, Dval, Dtest, yval, ytest = train_test_split(Xval, Dval, yval, train_size=.5, shuffle=True, random_state=123)

### Tune Nuisance Models

In [6]:
time_budget = 120
groups = None
n_splits = 5
split_type = 'auto'
verbose = 0

In [None]:
# model_reg = auto_reg(X, y, groups=groups, n_splits=n_splits, split_type=split_type,
#                          verbose=verbose, time_budget=time_budget)

In [None]:
# joblib.dump([model_t(), model_reg(), model_reg_zero(), model_reg_one()], 'nuisance.jbl')

In [7]:
# Tune hyper-parameters/find best prediction method (restricted here to xgboost)
if pre_tuned_n:
    mt, mreg, mreg_zero, mreg_one = joblib.load('nuisance.jbl')
    model_t = lambda: clone(mt)
    model_reg = lambda: clone(mreg)
    model_reg_zero = lambda: clone(mreg_zero)
    model_reg_one = lambda: clone(mreg_one)

else:
    model_reg_zero = auto_reg(X[D==0], y[D==0], groups=groups, n_splits=n_splits, split_type=split_type,
                              verbose=verbose, time_budget=time_budget)
    model_reg_one = auto_reg(X[D==1], y[D==1], groups=groups, n_splits=n_splits, split_type=split_type,
                             verbose=verbose, time_budget=time_budget)
    model_t = auto_clf(X, D, groups=groups, n_splits=n_splits, split_type=split_type,
                       verbose=verbose, time_budget=time_budget)

    joblib.dump([model_t(), model_reg_zero(), model_reg_one()], 'nuisance.jbl')

### Get OOS Predictions

In [12]:
model_t().fit(X, D).predict(Xval)

array([0.09591588, 0.68718326, 0.18263313, ..., 0.33677948, 0.18915652,
       0.33222175], dtype=float32)

In [None]:
# cv = StratifiedKFold(n_splits=n_splits, shuffle=True, random_state=123)
# splits = list(cv.split(X, D))
#
# n = X.shape[0]
# reg_preds = np.zeros(n)
# reg_zero_preds = np.zeros(n)
# reg_one_preds = np.zeros(n)
# reg_preds_t = np.zeros(n)
# reg_zero_preds_t = np.zeros(n)
# reg_one_preds_t = np.zeros(n)
#
# DX = np.column_stack((D, X))
# for train, test in splits:
#     reg_zero = model_reg_zero().fit(X.iloc[train][D[train]==0], y[train][D[train]==0])
#     reg_one = model_reg_one().fit(X.iloc[train][D[train]==1], y[train][D[train]==1])
#     reg_zero_preds_t[test] = reg_zero.predict(X.iloc[test])
#     reg_one_preds_t[test] = reg_one.predict(X.iloc[test])
#     reg_preds_t[test] = reg_zero_preds_t[test] * (1 - D[test]) + reg_one_preds_t[test] * D[test]
#
# prop_preds = cross_val_predict(model_t(), X, D, cv=splits)

### DR Meta-Learner

In [None]:
# Subset of features used for treatment effect heterogeneity
hetero_feats = ['inc']
Z, Zval, Ztest = X[hetero_feats], Xval[hetero_feats], Xtest[hetero_feats]

In [None]:
# # Calculate DR outcomes
# dr_preds = reg_one_preds_t - reg_zero_preds_t
# dr_preds += (y - reg_preds_t) * (D - prop_preds) / np.clip(prop_preds * (1 - prop_preds), .01, np.inf)

In [None]:
# # Predict DR outcomes using Z
if not pre_tuned_dr:
    model_final_fn = lambda Z, y: auto_reg(Z, y, groups=groups, n_splits=n_splits, split_type=split_type,
                                           verbose=verbose, time_budget=time_budget)
    drlearner_best = model_final_fn(Z, dr_preds)
    joblib.dump(drlearner_best(), 'drlearner.jbl')

drlearner = joblib.load('drlearner.jbl')
# drlearner = drlearner_best.fit(Z, dr_preds)

In [None]:
# # Fit nuisance models using the entire training sample
# reg_zero = model_reg_zero().fit(X[D == 0], y[D == 0])
# reg_one = model_reg_one().fit(X[D == 1], y[D == 1])
# reg_t = model_t().fit(X, D)

In [None]:
np.sort(np.array([0, 3, 2]))

In [None]:
import numpy as np
from statsmodels.api import OLS
from statsmodels.tools import add_constant
from sklearn.model_selection import cross_val_predict, StratifiedKFold

class DRtester:

    def __init__(
        self,
        reg_outcome,
        reg_t
    ):
        self.reg_outcome = reg_outcome
        self.reg_t = reg_t

    # Fits nusisance and CATE
    def fit(
        self,
        reg_cate,
        Xval,
        Dval,
        yval,
        Zval,
        Xtrain = None,
        Dtrain = None,
        ytrain = None,
        Ztrain = None):

        if (Xtrain is not None) & (Dtrain is not None) & (ytrain is not None) & (Ztrain is not None):
            reg_preds_train, prop_preds_train = self.fit_nuisance_cv(Xtrain, Dtrain, ytrain)
            self.dr_train = self.calculate_dr_outcomes(Dtrain, ytrain, reg_preds_train, prop_preds_train)

            reg_preds_val, prop_preds_val = self.fit_nuisance_train(Xtrain, Dtrain, ytrain, Xval, Dval)
            self.dr_val = self.calculate_dr_outcomes(Dval, yval, reg_preds_val, prop_preds_val)

            self.cate_preds_val = self.fit_cate_train(reg_cate, Ztrain, Zval)
            # self.cate_preds_train = self.fit_cate_cv(reg_cate, self.dr_train, Ztrain, Dtrain)

        else:
            reg_preds_val, prop_preds_val = self.fit_nuisance_cv(self.reg_outcome, self.reg_t, Xval, Dval, yval)
            self.dr_train = self.calculate_dr_outcomes(Dtrain, ytrain, reg_preds_val, prop_preds_val)
            self.cate_preds_val =  self.fit_cate_cv(reg_cate, self.dr_val, Zval, Dval)

        return self


    def evaluate_blp(self):
        self.res = OLS(self.dr_val, add_constant(self.cate_preds_val)).fit()

        return self

    # Fits nuisance in train, predicts in validation
    def fit_nuisance_train(self, Xtrain, Dtrain, ytrain, Xval, Dval):

        # Possible treatments (need to allow more than 2)
        tmts = np.sort(np.unique(D))
        n = Xval.shape[0]
        k = len(tmts)
        reg_preds = np.zeros((n, k))
        for i in range(k):
            reg_outcome_fitted = self.reg_outcome().fit(Xtrain[Dtrain == tmts[i]], ytrain[Dtrain == tmts[i]])
            reg_preds[:, i] = reg_outcome_fitted.predict(Xval)

        reg_t_fitted = self.reg_t().fit(Xtrain, Dtrain)
        prop_preds = reg_t_fitted.predict(Xval)

        return reg_preds, prop_preds

    # CV nuisance predictions
    def fit_nuisance_cv(self, X, D, y, n_splits = 5, shuffle = True, random_state = 712):

        cv = StratifiedKFold(n_splits=n_splits, shuffle=shuffle, random_state=random_state)
        splits = list(cv.split(X, D))

        tmts = np.sort(np.unique(D))
        n = X.shape[0]
        k = len(tmts)
        reg_preds = np.zeros((n, k))

        for i in range(k):
            for train, test in splits:
                reg_outcome_fitted = self.reg_outcome().fit(X.iloc[train][D[train] == tmts[i]], y[train][D[train] == tmts[i]])
                reg_preds[test, i] = reg_outcome_fitted.predict(X.iloc[test])

        prop_preds = cross_val_predict(model_t(), X, D, cv=splits)

        return reg_preds, prop_preds

    # Calculates DR outcomes
    def calculate_dr_outcomes(
            self,
            D,
            y,
            reg_preds,
            prop_preds
    ):

        reg_preds_chosen = np.sum(reg_preds * np.column_stack((D, 1 - D)), axis = 1)

        # Calculate doubly-robust outcome
        dr = reg_preds[:, 1] - reg_preds[:, 0]
        # Reiz representation, clip denominator at 0.01
        reisz = (D - prop_preds) / np.clip(prop_preds * (1 - prop_preds), .01, np.inf)
        dr += (y - reg_preds_chosen) * reisz

        return dr

    # Fits CATE in training, predicts in validation
    def fit_cate_train(self, reg_cate, Ztrain, Zval):

        reg_cate_fitted = reg_cate.fit(Ztrain, self.dr_train)
        cate_preds = reg_cate_fitted.predict(Zval)

        return cate_preds

    # CV prediction of CATEs
    def fit_cate_cv(reg_cate, dr, Z, D, shuffle = True, random_state = 712):

        cv = StratifiedKFold(n_splits=n_splits, shuffle=shuffle, random_state=random_state)
        splits = list(cv.split(Z, D))

        n = X.shape[0]
        cate_preds = np.zeros(n)

        for train, test in splits:
            reg_cate_fitted = reg_cate.fit(Z.iloc[train], dr[train])
            cate_preds[test] = reg_cate_fitted.predict(Z.iloc[test])

        return cate_preds

In [12]:
my_dr_test = DRtester(model_reg, model_t)
my_dr_test = my_dr_test.fit(drlearner, Xval, Dval, yval, Zval, X, D, y, Z)
my_dr_test = my_dr_test.evaluate_blp()
print(my_dr_test.res.params)

[597.11154873   0.9053472 ]


[597.11154873   0.9053472 ]


### Linear Regression Validation

In [None]:
my_drlinear = DRLinear(drlearner, reg_zero, reg_one, reg_t)
my_drlinear = my_drlinear.fit(Xval, Dval, yval, Zval)

print('Coefficient on CATE prediction:', round(my_drlinear.params[1], 3))
print('Standard Error:', round(my_drlinear.bse[1], 3))

### Calibration Validation

In [None]:
my_cal_scorer = cal_scorer(drlearner, reg_zero, reg_one, reg_t, 4)
res_cal = my_cal_scorer.score(Xval, Dval, yval, Zval, Ztest)

df_cal = pd.DataFrame({'gate': res_cal.gate, 'g_cate': res_cal.g_cate,
                       'se_gate': res_cal.se_gate})
df_cal['95_err'] = 1.96 * df_cal['se_gate']

In [None]:
df_cal.plot(
    kind='scatter',
    x='g_cate',
    y='gate',
    yerr='95_err',
    title=f"Calibration R^2 = {round(res_cal.r_squared_cal, 3)}"
)