# Estimating Average Treatment Effects with ML

<center>
<img 
  src="../assets/double_ml.png" 
  alt="Confounding Relationships" 
  style="width:300px;height:auto;"
> 

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
warnings.filterwarnings('ignore')

# Set style
sns.set_style("whitegrid") 
sns.set_palette('viridis')
plt.rcParams['axes.spines.top'] = False
plt.rcParams['axes.spines.right'] = False
plt.rcParams['font.family'] = 'monospace'

## Double ML
from doubleml import DoubleMLData, DoubleMLPLR
from sklearn.linear_model import LogisticRegression, LinearRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.calibration import CalibratedClassifierCV

In [None]:
# Load observational dataset
observational_df = pd.read_pickle('../data/observational_df.pkl')

# Identify columns
customer_features = observational_df.drop(columns=['converted', 'upsell_marketing']).columns.to_list()
target_outcome = 'converted'

print('Customer Features: ', customer_features)

observational_df.head(5)

## Causal Assumptions
All causal models share the following data assumptions

<br>
<br>

<center>
<img 
  src="../assets/causal_assumptions.png" 
  alt="Causal Assumptions" 
  style="width:750px;height:auto;"
> 

<br>
<br>
<br>

## Inverse Propensity Score Matching

In [None]:
propensity_logistic = LogisticRegression(random_state=42)
propensity_logistic.fit(observational_df[customer_features], observational_df[target_outcome])

propensity_scores = propensity_logistic.predict_proba(observational_df[customer_features]).clip(min=0.01, max=0.99)

print(f'Columns: {propensity_logistic.classes_}')
propensity_scores[0:5]

In [None]:
# Calculate weights for each customer
weighted_df = (
    observational_df
    .assign(
        prob_t0=propensity_scores[:, 0],
        prob_t1=propensity_scores[:, 1])
    .pipe(lambda df:
          df.assign(
              ipw=df['upsell_marketing'].case_when(
                  [
                      (df['upsell_marketing'] == 0, 1/df['prob_t0']),
                      (df['upsell_marketing'] == 1, 1/df['prob_t1'])
                  ]))
    )
)

weighted_df.head()

In [None]:
fig, ax = plt.subplots(figsize=(10,6))
ax.set(
    title='Treatment Overlap Check',
    ylabel='Proportion of Customers',
    xlabel='Propensity Score\nP(Upsell Marketing = 1)'
    )


sns.histplot(
    weighted_df.query("upsell_marketing==1")["prob_t1"],
    stat='proportion', binrange=(0, 0.6), bins=30, alpha=0.5,
    label="Upsell Marketing", color='tab:blue', ax=ax)

sns.histplot(
    weighted_df.query("upsell_marketing==0")["prob_t1"], 
    stat='proportion', binrange=(0, 0.6), bins=30, alpha=0.3,
    label="Non-Upsell", color='tab:orange', ax=ax)

plt.legend();

In [None]:
upsell_df = weighted_df.query('upsell_marketing == 1')
non_upsell_df = weighted_df.query('upsell_marketing == 0')

y_1 = np.average(upsell_df['converted'], weights=upsell_df['ipw'])
y_0 = np.average(non_upsell_df['converted'], weights=non_upsell_df['ipw'])

In [None]:
print(
    f'Y(1) Estimate: {y_1:.2%}',
    f'Y(0) Estimate: {y_0:.2%}',
    f'ATE [Y(1) - Y(0)]: {y_1 - y_0:.2%}',
    sep='\n'
)

## Double Machine Learning Model Families

The `doubleml` package has a number of models that can be used for various causal effects estimation tasks based on the assumed casual mechanisms present in observational data. 

All available model types are listed in their [model documentation](https://docs.doubleml.org/stable/guide/models.html)

We will be used the PLR model, which is the most common model when we have confounding due to customer features. The causal diagram for this model is shown below

<br>
<br>

<center>
<img 
  src="../assets/plr_model.png" 
  alt="Confounding Relationships" 
  style="width:550px;height:auto;"
> 

### Creating DoubleML Datasets

In [None]:
dml_data = (
    DoubleMLData(
        data=observational_df,
        y_col='converted',
        d_cols='upsell_marketing',
        x_cols=customer_features,
        use_other_treat_as_covariate=False)
)

print(dml_data)

### Defining the Various ML Models

<br>
<br>

<center>
<img 
  src="../assets/double_ml_process.png" 
  alt="Double ML Process" 
  style="width:750px;height:auto;"
> 

<br>
<br>
<br>

In [None]:
# Specify the model components
## Set random seed for reproducability
np.random.seed(314)

# Outcome and treatment models
outcome_model = LinearRegression()

treatment_model = LinearRegression()

# DML model
dml_model = DoubleMLPLR(
    dml_data,
    ml_l=outcome_model,
    ml_m=treatment_model,
    n_folds=5)

In [None]:
# Fit the model
dml_model.fit();

In [None]:
# View treatment effect estimates
dml_model.summary.style.format({
    'coef': '{:.2%}',
    'std err': '{:.2%}',
    't': '{:,.2f}',
    'P>|t|': '{:,.4f}',
    '2.5 %': '{:.2%}',
    '97.5 %': '{:.2%}'})

## Closing Remarks

Why hasn't causal ML taken over the world? 

- We have highly efficient methods for a wide range of estimation problems
- State of the art techniques are still relatively new, especially advanced Double ML methods
- Itâ€™s complicated
    - ML, semi-parametric statistical theory, probabilistic graphical models, matrix calculs, ... ðŸ¤¯

<br>
<br>
<br>
<center>
<img 
  src="../assets/complicated.png" 
  alt="It's complicated" 
  style="width:300px;height:auto;"
> 

<br>
<br>

## Is AGI Coming to Get Us?

<br>
<br>
<br>

The good news is that since causal ML requires deep understanding of business processes and reasoning about the underlying causal mechanisms, it is safe from the AI takeover

<br>
<br>
<br>
<center>
<img 
  src="../assets/safe_from_agi.png" 
  alt="AGI is here" 
  style="width:300px;height:auto;"
> 

<br>
<br>