In [None]:
import numpy as np
import pandas as pd
from dowhy import CausalModel
from xgboost import XGBRegressor
from sklearn.linear_model import LassoCV

# Data

In [None]:
data = pd.read_csv('fpl_df.csv', index_col=0)
data

In [None]:
data.columns.values

In [None]:
my_data = data[['home', 'event_points', 'element_type', 'creativity', 'ict_index','corners_and_indirect_freekicks_order']].copy()
my_data.info()

In [None]:
np.sort(my_data['corners_and_indirect_freekicks_order'].unique())

In [None]:
# fill nans in corners_and_indirect_freekicks_order
my_data['corners_and_indirect_freekicks_order'] = my_data['corners_and_indirect_freekicks_order'].fillna(8)

In [None]:
my_data.info()

# Causal inference

In [None]:
model=CausalModel(
        data = my_data,
        treatment='home',
        outcome='event_points',
        common_causes='element_type,creativity,ict_index,corners_and_indirect_freekicks_order'.split(','))

identified_estimand = model.identify_effect()

In [None]:
model.view_model()

# Naive estimator

In [None]:
home_vs_away_points = my_data[['home','event_points']].groupby('home').mean()
home_vs_away_points

In [None]:
naive_estimate = (home_vs_away_points.loc[1] - home_vs_away_points.loc[0])['event_points'] 
naive_estimate

On average, players gain 0.273221 points more from home games than from away games.

# Linear regression

In [None]:
linear_regression_estimate = model.estimate_effect(identified_estimand,
                                        method_name="backdoor.linear_regression",
                                        control_value=0,
                                        treatment_value=1)
print(linear_regression_estimate)

# Meta learner: T-Learner

In [None]:
t_learner_estimate = model.estimate_effect(identified_estimand,
                                method_name="backdoor.econml.metalearners.TLearner",
                                confidence_intervals=False,
                                method_params={"init_params":{
                                                    'models': XGBRegressor()
                                                    },
                                               "fit_params":{}
                                              })
print(t_learner_estimate)

# Meta Learner: X-Learner

In [None]:
x_learner_estimate = model.estimate_effect(identified_estimand,
                                method_name="backdoor.econml.metalearners.XLearner",
                                confidence_intervals=False,
                                method_params={"init_params":{
                                                    'models': XGBRegressor()
                                                    },
                                               "fit_params":{}
                                              })
print(x_learner_estimate)

# Double Machine Learner

In [None]:
dml_estimate = model.estimate_effect(identified_estimand,
                                    method_name="backdoor.econml.dml.DML",
                                    method_params={
                                        'init_params': {'model_y':XGBRegressor(),
                                                        'model_t': XGBRegressor(),
                                                        'model_final':LassoCV(fit_intercept=False), },
                                        'fit_params': {}
                                     })
print(dml_estimate)

# Propensity Score Matching

In [None]:
propensity_matching_estimate = model.estimate_effect(identified_estimand,
        method_name="backdoor.propensity_score_matching",
        target_units="ate",
        method_params={})

print("Causal Estimate is " + str(propensity_matching_estimate.value))

# Propensity Score Stratification

In [None]:
propensity_stratification_estimate = model.estimate_effect(identified_estimand,
        method_name="backdoor.propensity_score_stratification",
        target_units="ate",
        method_params={})

print("Causal Estimate is " + str(propensity_stratification_estimate.value))

# Inverse Propensity Score weighting

In [None]:
causal_estimate_ipw = model.estimate_effect(identified_estimand,
                                            method_name="backdoor.propensity_score_weighting",
                                            target_units = "ate",
                                            method_params={"weighting_scheme":"ips_weight"})
print(causal_estimate_ipw)
print("Causal Estimate is " + str(causal_estimate_ipw.value))

# Estimates

In [None]:
print(f'naive_estimate: {np.round(naive_estimate,3)}')
print(f'linear_regression_estimate: {np.round(linear_regression_estimate.value,3)}')
print(f't_learner_estimate: {np.round(t_learner_estimate.value,3)}')
print(f'x_learner_estimate: {np.round(x_learner_estimate.value,3)}')
print(f'dml_estimate: {np.round(dml_estimate.value,3)}')
print(f'propensity_matching_estimate: {np.round(propensity_matching_estimate.value,3)}')
print(f'propensity_stratification_estimate: {np.round(propensity_stratification_estimate.value,3)}')
print(f'causal_estimate_ipw: {np.round(causal_estimate_ipw.value,3)}')