# Causal Modelling with DoWhy and EconML

# Imports

In [None]:
import numpy as np
import pandas as pd

from IPython.display import Image, display
import matplotlib.pyplot as plt

import sklearn
from sklearn.linear_model import LassoCV
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split

import econml
from econml.dml import LinearDML, CausalForestDML
from econml.cate_interpreter import SingleTreeCateInterpreter

from dowhy import CausalModel

import shap

# Read data

In [None]:
df = pd.read_csv('../data/causal_df.csv')

In [None]:
df.head()

# Creating Causal Graph

In [None]:
# Define estimator inputs
treatment = "ice_cream_sold"  # outcome of interest
outcome = "maximum_feels_like_temperature"  # intervention, or treatment
covariates = ["rainfall", "sunshine_td", "avg_ice_cream_price", "cpih"]  # confounders
features = ["weather_region_cd", "day_of_week", "month"]  # confounders

In [None]:
X = df[covariates]
Y = df[outcome]
T = df[treatment]
W = df[features]

In [None]:
Y_train, Y_test, T_train, T_test, X_train, X_test, W_train, W_test = train_test_split(Y, T, X, W, test_size=.2)

# Estimate Causal Effects

In [None]:
est = CausalForestDML(model_y=RandomForestRegressor(criterion='absolute_error'),
                model_t=RandomForestRegressor(min_samples_leaf=10, criterion='absolute_error'),
                discrete_treatment=False,
                cv=6)
est.fit(Y, T, X=X, W=W
      , cache_values=True)
te_pred = est.effect(X_test)
lb, ub = est.effect_interval(X_test, alpha=0.01)

In [None]:
%matplotlib inline

plt.figure(figsize=(10,6))
plt.scatter(Y_test, te_pred, label='DML default')
plt.ylabel('Treatment Effect')
plt.xlabel('Temperature')
plt.legend()
plt.show()

In [None]:
%matplotlib inline

shap_values = est.shap_values(X)
shap.plots.beeswarm(shap_values['maximum_feels_like_temperature']['ice_cream_sold'])

In [None]:
intrp = SingleTreeCateInterpreter(include_model_uncertainty=True, max_depth=2)
intrp.interpret(est, X)