In [None]:
import numpy as np
import pandas as pd
from dowhy import CausalModel
from xgboost import XGBRegressor
from sklearn.linear_model import LassoCV

# Data

In [None]:
data = pd.read_csv('fpl_df.csv', index_col=0)
data

In [None]:
data['ict_index_rank'].plot()

In [None]:
data.columns.values

In [None]:
my_data = data[['home', 'event_points', 'element_type', 'influence_rank', 'creativity_rank', 'threat_rank', 'ict_index_rank', 'corners_and_indirect_freekicks_order', 'direct_freekicks_order']].copy()
my_data.info()

In [None]:
np.sort(my_data['corners_and_indirect_freekicks_order'].unique())

In [None]:
np.sort(my_data['direct_freekicks_order'].unique())

In [None]:
# fill nans for corners_and_indirect_freekicks_order and direct_freekicks_order
my_data['corners_and_indirect_freekicks_order'] = my_data['corners_and_indirect_freekicks_order'].fillna(8)
my_data['direct_freekicks_order'] = my_data['direct_freekicks_order'].fillna(8)

In [None]:
my_data.info()

In [None]:
my_data['ict_index_rank'].plot(kind='hist')

In [None]:
my_data['ict_index_rank_treatment'] = my_data.apply(lambda x: 1 if x['ict_index_rank']<50 else 0, axis=1)
my_data['ict_index_rank_treatment'].sum()

# Causal inference

In [None]:
model=CausalModel(
        data = my_data,
        treatment='ict_index_rank_treatment',
        outcome='event_points',        
        common_causes='element_type,influence_rank,creativity_rank,threat_rank,corners_and_indirect_freekicks_order,direct_freekicks_order'.split(','))

identified_estimand = model.identify_effect()

In [None]:
model.view_model()

# Naive estimator

In [None]:
naive_data = my_data[['ict_index_rank_treatment','event_points']].groupby('ict_index_rank_treatment').mean()
naive_data

In [None]:
naive_estimate = (naive_data.loc[1] - naive_data.loc[0])['event_points'] 
naive_estimate

In [None]:
naive_estimate/naive_data.loc[0,'event_points']

On average, players in top-50 of ict_index gain 1.95 points more than other players (75% increase).

# Linear regression

In [None]:
linear_regression_estimate = model.estimate_effect(identified_estimand,
                                        method_name="backdoor.linear_regression",
                                        control_value=0,
                                        treatment_value=1
                                        )
print(linear_regression_estimate)

# Meta learner: T-Learner

In [None]:
t_learner_estimate = model.estimate_effect(identified_estimand,
                                method_name="backdoor.econml.metalearners.TLearner",
                                confidence_intervals=False,
                                method_params={"init_params":{
                                                    'models': XGBRegressor()
                                                    },
                                               "fit_params":{}
                                              })
print(t_learner_estimate)

# Meta Learner: X-Learner

In [None]:
x_learner_estimate = model.estimate_effect(identified_estimand,
                                method_name="backdoor.econml.metalearners.XLearner",
                                confidence_intervals=False,
                                method_params={"init_params":{
                                                    'models': XGBRegressor()
                                                    },
                                               "fit_params":{}
                                              })
print(x_learner_estimate)

# Double Machine Learner

In [None]:
dml_estimate = model.estimate_effect(identified_estimand,
                                    method_name="backdoor.econml.dml.DML",
                                    method_params={
                                        'init_params': {'model_y':XGBRegressor(),
                                                        'model_t': XGBRegressor(),
                                                        'model_final':LassoCV(fit_intercept=False), },
                                        'fit_params': {}
                                     })
print(dml_estimate)

# Propensity Score Matching

In [None]:
propensity_matching_estimate = model.estimate_effect(identified_estimand,
        method_name="backdoor.propensity_score_matching",
        target_units="ate",
        method_params={})

print("Causal Estimate is " + str(propensity_matching_estimate.value))

# Propensity Score Stratification

In [None]:
propensity_stratification_estimate = model.estimate_effect(identified_estimand,
        method_name="backdoor.propensity_score_stratification",
        target_units="ate",
        method_params={})

print("Causal Estimate is " + str(propensity_stratification_estimate.value))

# Inverse Propensity Score weighting

In [None]:
causal_estimate_ipw = model.estimate_effect(identified_estimand,
                                            method_name="backdoor.propensity_score_weighting",
                                            target_units = "ate",
                                            method_params={"weighting_scheme":"ips_weight"})
print(causal_estimate_ipw)
print("Causal Estimate is " + str(causal_estimate_ipw.value))

# Estimates

In [None]:
print(f'naive_estimate: {np.round(naive_estimate,3)}')
print(f'linear_regression_estimate: {np.round(linear_regression_estimate.value,3)}')
print(f't_learner_estimate: {np.round(t_learner_estimate.value,3)}')
print(f'x_learner_estimate: {np.round(x_learner_estimate.value,3)}')
print(f'dml_estimate: {np.round(dml_estimate.value,3)}')
print(f'propensity_matching_estimate: {np.round(propensity_matching_estimate.value,3)}')
print(f'propensity_stratification_estimate: {np.round(propensity_stratification_estimate.value,3)}')
print(f'causal_estimate_ipw: {np.round(causal_estimate_ipw.value,3)}')

# Refutation

## Random common cause

In [None]:
res_random = model.refute_estimate(identified_estimand, causal_estimate_ipw, method_name="random_common_cause", show_progress_bar=True)
print(res_random)

## Removing a random subset

In [None]:
res_subset=model.refute_estimate(identified_estimand, causal_estimate_ipw,
        method_name="data_subset_refuter", show_progress_bar=True, subset_fraction=0.9)
print(res_subset)

## Placebo treatment

In [None]:
res_subset=model.refute_estimate(identified_estimand, causal_estimate_ipw,
        method_name="data_subset_refuter", show_progress_bar=True, subset_fraction=0.9)
print(res_subset)