In [1]:
import numpy as np
import pandas as pd
from dowhy import CausalModel
import graphviz
import warnings
from sklearn.preprocessing import PolynomialFeatures
from sklearn.linear_model import LassoCV
from sklearn.ensemble import GradientBoostingRegressor
# EconML imports
from econml.dml import LinearDML, CausalForestDML
from econml.cate_interpreter import SingleTreeCateInterpreter, SingleTreePolicyInterpreter
import seaborn as sns
import dowhy.plotter
%matplotlib inline
import matplotlib.pyplot as plt

warnings.filterwarnings('ignore')



# The Direct Acyclic Graph

G=graphviz.Digraph()
G.edge("ta" , "tr")
G.edge("vel" , "ta")
G.edge("rh" , "ta")
G.edge("ta" , "clo")
G.edge("ta" , "thermal_sensation")
G.edge("vel" , "tr")
G.edge("rh" , "tr")
G.edge("tr" , "clo")
G.edge("tr" , "thermal_sensation")
G.edge("vel" , "clo")
G.edge("vel" , "thermal_sensation")
G.edge("rh" , "clo")
G.edge("rh" , "thermal_sensation")
G.edge("met" , "thermal_sensation")
G.edge("clo" , "thermal_sensation")

######################################
#########################################


G.edge("ta" , "thermal_preference")
G.edge("tr" , "thermal_preference")
G.edge("vel" , "thermal_preference")
G.edge("rh" , "thermal_preference")
G.edge("met" , "thermal_preference")
G.edge("clo" , "thermal_preference")
G.edge("thermal_preference" , "thermal_sensation")




# print(dot.source)

G.format = 'pdf'
G.render(directory='DAG', view = False).replace('\\', '/')



'DAG/Digraph.gv.pdf'

In [2]:
# Reading the data
data = pd.read_excel("data.xlsx")

data_for_causal = data[["ta","tr", "vel", "rh", "met", "clo", "thermal_sensation", "thermal_preference"]]
data_for_causal = data_for_causal.dropna()

In [3]:
thermal_preference = data_for_causal.copy()
thermal_preference["thermal_preference"] = thermal_preference['thermal_preference'].replace(["cooler", "no change", "warmer"], 
                                                                           [-1, 0,1])

In [4]:
thermal_preference.groupby("thermal_preference").size()

thermal_preference
-1    15308
 0    24288
 1     8617
dtype: int64

In [9]:
filtered_data = thermal_preference.loc[(thermal_preference['thermal_preference'] == 1) | (thermal_preference['thermal_preference'] == 0)]
filtered_data["thermal_preference"] = filtered_data['thermal_preference'].replace([1,0], [0, 1])

In [10]:
filtered_data.groupby("thermal_preference").size()

thermal_preference
0     8617
1    24288
dtype: int64

In [11]:
# Initializing causal model
model = CausalModel(data=thermal_preference,
                     graph=G.source.replace("\t", ' ').replace("\n", ' '),
                     treatment="thermal_preference",
                     outcome="thermal_sensation")

# Identifying the estimation method
identified_estimand= model.identify_effect(proceed_when_unidentifiable=True)
print(identified_estimand)

Estimand type: nonparametric-ate

### Estimand : 1
Estimand name: backdoor
Estimand expression:
          d                                                     
─────────────────────(E[thermal_sensation|rh,tr,clo,met,ta,vel])
d[thermal_preference]                                           
Estimand assumption 1, Unconfoundedness: If U→{thermal_preference} and U→thermal_sensation then P(thermal_sensation|thermal_preference,rh,tr,clo,met,ta,vel,U) = P(thermal_sensation|thermal_preference,rh,tr,clo,met,ta,vel)

### Estimand : 2
Estimand name: iv
No such variable(s) found!

### Estimand : 3
Estimand name: frontdoor
No such variable(s) found!



In [12]:
random_state = 120
dml_estimate = model.estimate_effect(identified_estimand,
                                     method_name="backdoor.econml.dml.DML", # Calling EconMl double machine learning algorithm
                                     control_value = 0,
                                     treatment_value = 1,
                                     target_units = 'ate',
                                     confidence_intervals=False,
                                method_params={"init_params":{'model_y':GradientBoostingRegressor(random_state=random_state, learning_rate=0.0001),
                                                              'model_t': GradientBoostingRegressor(random_state=random_state, learning_rate=0.0001),
                                                              "model_final":LassoCV(fit_intercept=False, random_state=random_state),
                                                              'featurizer':PolynomialFeatures(degree=1, include_bias=True),
                                                              'random_state':random_state},
                                               "fit_params":{}})
print(dml_estimate.value)

-1.2850501855585097


In [None]:
# Random cause
res_random=model.refute_estimate(identified_estimand, dml_estimate, method_name="random_common_cause", random_seed=123)
print(res_random)

# Add Unobserved Common Causes
res_unobserved=model.refute_estimate(identified_estimand, dml_estimate, method_name="add_unobserved_common_cause",
                                     confounders_effect_on_treatment="linear", confounders_effect_on_outcome="linear",
                                    effect_strength_on_treatment=0.01, effect_strength_on_outcome=0.02, random_seed=123)
print(res_unobserved)

# Placebo Treatment
res_placebo=model.refute_estimate(identified_estimand, dml_estimate,
        method_name="placebo_treatment_refuter", random_seed=123)
print(res_placebo)


# Data Subsets Validation
res_subset=model.refute_estimate(identified_estimand, dml_estimate,
        method_name="data_subset_refuter", subset_fraction=0.8,
        num_simulations=10, random_seed=123)
print(res_subset)