In [1]:
import pandas as pd
data = pd.read_csv('./Downloads/cleaned_loans_full_schema_loan1_Bool.csv')

In [2]:
from dowhy import CausalModel

In [3]:
file = open("./Downloads/Final_t.dot",'r')
graph = file.read().replace("\n", " ")
file.close()

In [4]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 9167 entries, 0 to 9166
Data columns (total 56 columns):
 #   Column                            Non-Null Count  Dtype  
---  ------                            --------------  -----  
 0   emp_title                         9167 non-null   object 
 1   emp_length                        9167 non-null   bool   
 2   state                             9167 non-null   int64  
 3   homeownership                     9167 non-null   bool   
 4   annual_income                     9167 non-null   bool   
 5   verified_income                   9167 non-null   int64  
 6   debt_to_income                    9167 non-null   float64
 7   annual_income_joint               9167 non-null   float64
 8   verification_income_joint         9167 non-null   int64  
 9   debt_to_income_joint              9167 non-null   float64
 10  delinq_2y                         9167 non-null   int64  
 11  months_since_last_delinq          9167 non-null   int64  
 12  earlie

In [5]:
model= CausalModel(
        data = data,
        graph= graph,
        treatment='emp_length',
        outcome='Is_bad_loan')

In [6]:
import warnings
warnings.filterwarnings("ignore")
model.view_model()

In [7]:
identified_estimand = model.identify_effect(proceed_when_unidentifiable=True)

In [8]:
print(identified_estimand)

Estimand type: nonparametric-ate

### Estimand : 1
Estimand name: backdoor
Estimand expression:
      d                                                                       
─────────────(Expectation(Is_bad_loan|total_credit_lines,loan_amount,term,acco
d[emp_length]                                                                 

                                                                              
unt_never_delinq_percent,paid_late_fees,accounts_opened_24m,public_record_bank
                                                                              

                                                                              
rupt,homeownership,num_mort_accounts,interest_rate,grade,loan_purpose,open_cre
                                                                              

                                                                              
dit_lines,balance,initial_listing_status,state,earliest_credit_line,num_accoun
                                

In [10]:
estimate = model.estimate_effect(identified_estimand, 
                                 method_name="backdoor.propensity_score_matching",test_significance=True)

In [11]:
print(estimate)

*** Causal Estimate ***

## Identified estimand
Estimand type: nonparametric-ate

### Estimand : 1
Estimand name: backdoor
Estimand expression:
      d                                                                       
─────────────(Expectation(Is_bad_loan|installment,num_satisfactory_accounts,gr
d[emp_length]                                                                 

                                                                              
ade,months_since_last_credit_inquiry,num_accounts_120d_past_due,issue_month,pa
                                                                              

                                                                              
id_interest,num_accounts_30d_past_due,initial_listing_status,annual_income,sta
                                                                              

                                                                              
te,earliest_credit_line,interest_rate,loan_purpose,open_credit_

In [12]:
refuter1=model.refute_estimate(identified_estimand, estimate, method_name="random_common_cause")
print(refuter1)

Refute: Add a Random Common Cause
Estimated effect:-0.00032726082687902256
New effect:-0.0007636085960510527



In [13]:
refuter2=model.refute_estimate(identified_estimand, estimate, method_name="placebo_treatment_refuter",
                              placebo_type="permute", num_simulations=5)
print(refuter2)

Refute: Use a Placebo Treatment
Estimated effect:-0.00032726082687902256
New effect:0.0008726955383440602
p value:0.2291939543589669



In [14]:
refuter3=model.refute_estimate(identified_estimand, estimate,  method_name="data_subset_refuter")
print(refuter3)

Refute: Use a subset of data
Estimated effect:-0.00032726082687902256
New effect:-0.0008140169075538588
p value:0.39



In [15]:
refuter4=model.refute_estimate(identified_estimand, estimate,  method_name="bootstrap_refuter", num_simulations=5)
print(refuter4)

Refute: Bootstrap Sample Dataset
Estimated effect:-0.00032726082687902256
New effect:0.0005017999345478344
p value:0.4432054892217853



In [9]:
estimate = model.estimate_effect(identified_estimand, 
                                 method_name="backdoor.linear_regression",test_significance=True)
print(estimate)

*** Causal Estimate ***

## Identified estimand
Estimand type: nonparametric-ate

### Estimand : 1
Estimand name: backdoor
Estimand expression:
      d                                                                       
─────────────(Expectation(Is_bad_loan|total_credit_lines,loan_amount,term,acco
d[emp_length]                                                                 

                                                                              
unt_never_delinq_percent,paid_late_fees,accounts_opened_24m,public_record_bank
                                                                              

                                                                              
rupt,homeownership,num_mort_accounts,interest_rate,grade,loan_purpose,open_cre
                                                                              

                                                                              
dit_lines,balance,initial_listing_status,state,earliest_credit_

In [10]:
refuter1=model.refute_estimate(identified_estimand, estimate, method_name="random_common_cause")
print(refuter1)

Refute: Add a Random Common Cause
Estimated effect:-0.0005392590083168683
New effect:-0.0005437375326614521



In [11]:
refuter2=model.refute_estimate(identified_estimand, estimate, method_name="placebo_treatment_refuter",
                              placebo_type="permute", num_simulations=5)
print(refuter2)

Refute: Use a Placebo Treatment
Estimated effect:-0.0005392590083168683
New effect:-0.0010234369440739618
p value:0.15472814811047497



In [12]:
refuter3=model.refute_estimate(identified_estimand, estimate,  method_name="data_subset_refuter")
print(refuter3)

Refute: Use a subset of data
Estimated effect:-0.0005392590083168683
New effect:-0.0005444604263136515
p value:0.48



In [13]:
refuter4=model.refute_estimate(identified_estimand, estimate,  method_name="bootstrap_refuter", num_simulations=5)
print(refuter4)

Refute: Bootstrap Sample Dataset
Estimated effect:-0.0005392590083168683
New effect:-0.0011010106858555843
p value:0.3818153220190198

