In [None]:
import pandas as pd
import networkx as nx
import json
from dowhy import CausalModel
import pickle
import logging

In [None]:
logging.basicConfig(level=logging.WARNING)

In [2]:
with open('../graphs/full_causal.gpickle', 'rb') as f:
    causal_graph = pickle.load(f)

data = pd.read_csv('/home/paperspace/EQLS-Causal-Project/data/eqls_processed.csv')
data = data[list(causal_graph.nodes)]

In [3]:
model = CausalModel(
    data=data,
    treatment="Y11_Q57",
    outcome="Y11_MWIndex",
    graph=nx.nx_pydot.to_pydot(causal_graph).to_string()
)

In [4]:
estimand = model.identify_effect()
print("Identified estimand:", estimand)

Identified estimand: Estimand type: EstimandType.NONPARAMETRIC_ATE

### Estimand : 1
Estimand name: backdoor
Estimand expression:
     d                                                                         ↪
───────────(E[Y_11_MWIndex|Y11_Agecategory,Y11_EmploymentStatus,Y11_HHsize,Y11 ↪
d[Y_11_Q57]                                                                    ↪

↪                                                           
↪ _Q32,Y11_HH2a,Y11_HHstructure,Y11_Country,Y11_Q7,Y11_Q31])
↪                                                           
Estimand assumption 1, Unconfoundedness: If U→{Y11_Q57} and U→Y11_MWIndex then P(Y11_MWIndex|Y11_Q57,Y11_Agecategory,Y11_EmploymentStatus,Y11_HHsize,Y11_Q32,Y11_HH2a,Y11_HHstructure,Y11_Country,Y11_Q7,Y11_Q31,U) = P(Y11_MWIndex|Y11_Q57,Y11_Agecategory,Y11_EmploymentStatus,Y11_HHsize,Y11_Q32,Y11_HH2a,Y11_HHstructure,Y11_Country,Y11_Q7,Y11_Q31)

### Estimand : 2
Estimand name: iv
No such variable(s) found!

### Estimand : 3
Estimand name: fr

In [5]:
estimate = model.estimate_effect(estimand,
                                 method_name="backdoor.propensity_score_matching")
print(estimate)

*** Causal Estimate ***

## Identified estimand
Estimand type: EstimandType.NONPARAMETRIC_ATE

### Estimand : 1
Estimand name: backdoor
Estimand expression:
     d                                                                         ↪
───────────(E[Y_11_MWIndex|Y11_Agecategory,Y11_EmploymentStatus,Y11_HHsize,Y11 ↪
d[Y_11_Q57]                                                                    ↪

↪                                                           
↪ _Q32,Y11_HH2a,Y11_HHstructure,Y11_Country,Y11_Q7,Y11_Q31])
↪                                                           
Estimand assumption 1, Unconfoundedness: If U→{Y11_Q57} and U→Y11_MWIndex then P(Y11_MWIndex|Y11_Q57,Y11_Agecategory,Y11_EmploymentStatus,Y11_HHsize,Y11_Q32,Y11_HH2a,Y11_HHstructure,Y11_Country,Y11_Q7,Y11_Q31,U) = P(Y11_MWIndex|Y11_Q57,Y11_Agecategory,Y11_EmploymentStatus,Y11_HHsize,Y11_Q32,Y11_HH2a,Y11_HHstructure,Y11_Country,Y11_Q7,Y11_Q31)

## Realized estimand
b: Y11_MWIndex~Y11_Q57+Y11_Agecategory+Y11_Employ

STOP: TOTAL NO. OF ITERATIONS REACHED LIMIT

Increase the number of iterations to improve the convergence (max_iter=100).
You might also want to scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


In [6]:
refute_results = model.refute_estimate(estimand, estimate,
                                       method_name="placebo_treatment_refuter")
print(refute_results)

Refute: Use a Placebo Treatment
Estimated effect:4.270291883246701
New effect:-0.02141314902610384
p value:0.88



In [7]:
refute_results = model.refute_estimate(estimand, estimate,
                                       method_name="data_subset_refuter",)
print(refute_results)

Refute: Use a subset of data
Estimated effect:4.270291883246701
New effect:4.241636441525061
p value:0.74

