In [1]:
import numpy as np
import pandas as pd
from dowhy import CausalModel
import graphviz
import warnings
from sklearn.preprocessing import PolynomialFeatures
from sklearn.linear_model import LassoCV
from sklearn.ensemble import GradientBoostingRegressor
# EconML imports
from econml.dml import LinearDML, CausalForestDML
from econml.cate_interpreter import SingleTreeCateInterpreter, SingleTreePolicyInterpreter
import seaborn as sns
import dowhy.plotter
%matplotlib inline
import matplotlib.pyplot as plt

warnings.filterwarnings('ignore')



# The Direct Acyclic Graph

G=graphviz.Digraph()
G.edge("ta_h" , "tr")
G.edge("vel_h" , "ta_h")
G.edge("rh" , "ta_h")
G.edge("ta_h" , "clo")
G.edge("ta_h" , "thermal_sensation")
G.edge("vel_h" , "tr")
G.edge("rh" , "tr")
G.edge("tr" , "clo")
G.edge("tr" , "thermal_sensation")
G.edge("vel_h" , "clo")
G.edge("vel_h" , "thermal_sensation")
G.edge("rh" , "clo")
G.edge("rh" , "thermal_sensation")
G.edge("met" , "thermal_sensation")
G.edge("clo" , "thermal_sensation")



# print(dot.source)

G.format = 'pdf'
G.render(directory='DAG', view = False).replace('\\', '/')



'DAG/Digraph.gv.pdf'

In [2]:
# Reading the data
data = pd.read_excel("data.xlsx")

data_for_causal = data[["ta_h","tr", "vel_h", "rh", "met", "clo", "thermal_sensation"]]
data_for_causal = data_for_causal.dropna()

In [3]:
data_for_causal.describe()

Unnamed: 0,ta_h,tr,vel_h,rh,met,clo,thermal_sensation
count,14861.0,14861.0,14861.0,14861.0,14861.0,14861.0,14861.0
mean,24.481171,24.347124,0.202645,51.515186,1.209126,0.587638,-0.019958
std,3.026901,3.159064,0.250611,14.457996,0.205215,0.291305,1.267191
min,10.1,8.5,0.0,8.9,0.7,0.04,-3.0
25%,22.66,22.48579,0.08,41.195754,1.1,0.39384,-1.0
50%,23.8,23.6,0.138955,53.052908,1.2,0.53,0.0
75%,26.6,26.238518,0.25,62.2,1.2,0.71,1.0
max,34.6,36.3,6.54,89.0,3.1,2.87,3.0


In [4]:
# Initializing causal model
model = CausalModel(data=data_for_causal,
                     graph=G.source.replace("\t", ' ').replace("\n", ' '),
                     treatment="vel_h",
                     outcome="thermal_sensation")

# Identifying the estimation method
identified_estimand= model.identify_effect(proceed_when_unidentifiable=True)
print(identified_estimand)

Estimand type: nonparametric-ate

### Estimand : 1
Estimand name: backdoor
Estimand expression:
   d                        
───────(E[thermalₛₑₙₛₐₜᵢₒₙ])
d[velₕ]                     
Estimand assumption 1, Unconfoundedness: If U→{vel_h} and U→thermal_sensation then P(thermal_sensation|vel_h,,U) = P(thermal_sensation|vel_h,)

### Estimand : 2
Estimand name: iv
No such variable(s) found!

### Estimand : 3
Estimand name: frontdoor
No such variable(s) found!



In [6]:
random_state = 120
dml_estimate = model.estimate_effect(identified_estimand,
                                     method_name="backdoor.econml.dml.DML", # Calling EconMl double machine learning algorithm
                                   #   control_value = 0,
                                     treatment_value = 0.5,
                                     target_units = 'ate',
                                     confidence_intervals=False,
                                method_params={"init_params":{'model_y':GradientBoostingRegressor(random_state=random_state, learning_rate=0.0001),
                                                              'model_t': GradientBoostingRegressor(random_state=random_state, learning_rate=0.0001),
                                                              "model_final":LassoCV(fit_intercept=False, random_state=random_state),
                                                              'featurizer':PolynomialFeatures(degree=1, include_bias=True),
                                                              'random_state':random_state},
                                               "fit_params":{}})
print(dml_estimate.value)

0.3702359630578024


In [7]:
# Random cause
res_random=model.refute_estimate(identified_estimand, dml_estimate, method_name="random_common_cause", random_seed=123)
print(res_random)

# Add Unobserved Common Causes
res_unobserved=model.refute_estimate(identified_estimand, dml_estimate, method_name="add_unobserved_common_cause",
                                     confounders_effect_on_treatment="linear", confounders_effect_on_outcome="linear",
                                    effect_strength_on_treatment=0.01, effect_strength_on_outcome=0.02, random_seed=123)
print(res_unobserved)

# Placebo Treatment
res_placebo=model.refute_estimate(identified_estimand, dml_estimate,
        method_name="placebo_treatment_refuter", random_seed=123)
print(res_placebo)


# Data Subsets Validation
res_subset=model.refute_estimate(identified_estimand, dml_estimate,
        method_name="data_subset_refuter", subset_fraction=0.8,
        num_simulations=10, random_seed=123)
print(res_subset)

Refute: Add a random common cause
Estimated effect:0.3702359630578024
New effect:0.3702424455655621
p value:0.29

Refute: Add an Unobserved Common Cause
Estimated effect:0.3702359630578024
New effect:0.3679555136798398

Refute: Use a Placebo Treatment
Estimated effect:0.3702359630578024
New effect:0.0
p value:1.0

Refute: Use a subset of data
Estimated effect:0.3702359630578024
New effect:0.37145071384824596
p value:0.4638543293727386

