In [6]:
import pandas as pd
import numpy as np
from scipy.special import expit
import statsmodels.api as sm
import scipy.stats as stats
from adjustment import *
from fcit import fcit

from proximal import *
from regex_predictor import *
from odds_ratio import *
from backdoor import *
from bag_of_words import *

In [8]:
master_data = pd.read_csv('csv_files/master_data.csv')
zero_shot_preds = np.random.binomial(1, 0.5, len(master_data))
regex_preds = np.random.binomial(1, 0.5, len(master_data))

print(np.mean(master_data['afib'] == zero_shot_preds))
print(np.mean(zero_shot_preds))
print(create_confusion_matrix(master_data['afib'], zero_shot_preds))
print()

print(np.mean(master_data['afib'] == regex_preds))
print(np.mean(regex_preds))
print(create_confusion_matrix(master_data['afib'], regex_preds))

0.4995076567858477
0.4978778309734814
{'tn': 10648, 'fp': 10600, 'fn': 4140, 'tp': 4063, 'sensitivity': 0.49530659514811654, 'specificity': 0.5011295180722891, 'precision': 0.27709200027279546, 'recall': 0.49530659514811654}

0.4989983362194832
0.5015109843468812
{'tn': 10587, 'fp': 10661, 'fn': 4094, 'tp': 4109, 'sensitivity': 0.5009142996464708, 'specificity': 0.4982586596385542, 'precision': 0.2781990521327014, 'recall': 0.5009142996464708}


In [9]:
semi_synthetic_data = pd.DataFrame({'U': master_data['heart_fail'], 'W': zero_shot_preds, 'Z': regex_preds,
                                    'age': master_data['age'], 'gender': master_data['gender']})

In [10]:
# generate A and Z as a function of U
np.random.seed(3)

size = len(semi_synthetic_data)

C = np.random.normal(0, 1, size)

A = np.random.binomial(1, expit(0.8*semi_synthetic_data['U']+C), size)

print(np.mean(A))

Y = np.random.normal(0, 1, size) + 1.3*A + 1.4*semi_synthetic_data['U'] + C

semi_synthetic_data['A'] = A
semi_synthetic_data['Y'] = Y
semi_synthetic_data['C'] = C

0.5373331975145156


In [11]:
print(odds_ratio('U', 'W', [], semi_synthetic_data))
print(odds_ratio('U', 'Z', [], semi_synthetic_data))

print()
print(np.mean(semi_synthetic_data['W'] == semi_synthetic_data['Z']))
print()

print(odds_ratio('W', 'Z', ['U'], semi_synthetic_data))
print(odds_ratio('W', 'Z', ['U', 'age', 'gender'], semi_synthetic_data))

0.9599422733442343
0.9842254960121284

0.5011035278937896

1.0087927085749555
1.0089570997266029


In [12]:
print(proximal_find_ace('A', 'Y', 'W', 'Z', ['C'], semi_synthetic_data))
print(compute_confidence_intervals("A", "Y", "W", "Z", ['C'], semi_synthetic_data))

1.5074944213016388
(1.4177832999061781, 1.7466150623656154)
