In [10]:
# Binary(prorportion) confint

import numpy as np
from scipy.stats import norm # Gaussian distribution
from statsmodels.stats.proportion import proportion_confint # binary confint

def proportions_confint_diff_ind(sample1, sample2, alpha = 0.05):       
    """ Confint difference of 2 binary independent samples """
    
    z = norm.ppf(1 - alpha / 2)   
    p1 = float(sum(sample1)) / len(sample1)
    p2 = float(sum(sample2)) / len(sample2)
    
    left_boundary = (p1 - p2) - z * np.sqrt(p1 * (1 - p1)/ len(sample1) + p2 * (1 - p2)/ len(sample2))
    right_boundary = (p1 - p2) + z * np.sqrt(p1 * (1 - p1)/ len(sample1) + p2 * (1 - p2)/ len(sample2))
    
    return (left_boundary, right_boundary)

def get_bootstrap_samples(data, n_samples):
    """ bootstrap """
    
    indices = np.random.randint(0, len(data), (n_samples, len(data)))
    samples = data[indices]
    return samples

def stat_intervals(stat, alpha):
    """ confint for bootstrap """
    
    boundaries = np.percentile(stat, [100 * alpha / 2., 100 * (1 - alpha / 2.)])
    return boundaries

def get_odds(sample):
    """ 
    get odd: odd = p / (1-p)
    from binary sample 
    """
    
    success = np.sum(sample)
    n = len(sample)
    p = success / n
    return p / (1 - p)

In [11]:
# test : 104 = 1, n = 11037; control: 189 = 1, n = 11034

test = np.array([1] * 104 + [0] * (11037 - 104))
control = np.array([1] * 189 + [0] * (11034 - 189))

# proba difference

# aspirin_interval = proportion_confint(104, 11037, method = 'normal')
# placebo_interval = proportion_confint(189, 11034, method = 'normal')

189 / 11034 - 104 / 11037


0.0077060239760047815

In [12]:
# odds 

aspirin_odds = (104 / 11037) / (1 - (104 / 11037))
placebo_odds = (189 / 11034) / (1 - (189 / 11034))
placebo_odds / aspirin_odds

1.8320539419087138

In [13]:
# difference between binary samples

proportions_confint_diff_ind(control, test)

(0.0046877506750494392, 0.010724297276960124)

In [14]:
# bootstrap

np.random.seed(0)

test_odds = np.array(list(map(get_odds, get_bootstrap_samples(test, 1000))))
control_odds = np.array(list(map(get_odds, get_bootstrap_samples(control, 1000))))

odds_ratio = control_odds / test_odds
stat_intervals(odds_ratio, 0.05)

array([ 1.44419465,  2.34321168])