In [1]:
import numpy as np
from scipy.stats import norm

In [2]:
norm.ppf((1 - 0.997) / 2)

-2.9677379253417833

In [3]:
asp_count = 11037 
pla_count = 11034 
all_count = 11037 + 11034
asp_inf_count = 104 
pla_inf_count = 189 

In [4]:
pla_inf_count / pla_count - asp_inf_count / asp_count

0.0077060239760047815

In [5]:
def proportions_confint_diff_ind(sample1, sample2, alpha = 0.05):    
    z = norm.ppf(1 - alpha / 2.)   
    p1 = float(sum(sample1)) / len(sample1)
    p2 = float(sum(sample2)) / len(sample2)
    
    left_boundary = (p1 - p2) - z * np.sqrt(p1 * (1 - p1)/ len(sample1) + p2 * (1 - p2)/ len(sample2))
    right_boundary = (p1 - p2) + z * np.sqrt(p1 * (1 - p1)/ len(sample1) + p2 * (1 - p2)/ len(sample2))
    
    return (left_boundary, right_boundary)

In [6]:
def proportions_confint_diff_rel(sample1, sample2, alpha = 0.05):
    z = norm.ppf(1 - alpha / 2.)
    sample = list(zip(sample1, sample2))
    n = len(sample)
        
    f = sum([1 if (x[0] == 1 and x[1] == 0) else 0 for x in sample])
    g = sum([1 if (x[0] == 0 and x[1] == 1) else 0 for x in sample])
    
    left_boundary = float(f - g) / n  - z * np.sqrt(float((f + g)) / n**2 - float((f - g)**2) / n**3)
    right_boundary = float(f - g) / n  + z * np.sqrt(float((f + g)) / n**2 - float((f - g)**2) / n**3)
    return (left_boundary, right_boundary)

In [7]:
asp_sample = np.zeros(asp_count)
pla_sample = np.zeros(pla_count)

for i in range(asp_inf_count):
    asp_sample[i] = 1

for i in range(pla_inf_count):
    pla_sample[i] = 1

In [8]:
proportions_confint_diff_ind(pla_sample, asp_sample)

(0.004687750675049439, 0.010724297276960124)

In [9]:
def odds(sample):
    p = sum(sample) / sample.shape[0]
    return p / (1 - p)

In [17]:
res1 = 1 / (odds(asp_sample) / odds(pla_sample)) 
print(res1)
print(np.round(res1, 4))

1.832053941908714
1.8321


In [11]:
def get_bootstrap_samples(data, n_samples):
    indices = np.random.randint(0, len(data), (n_samples, len(data)))
    samples = data[indices]
    return samples

In [12]:
def stat_intervals(stat, alpha):
    boundaries = np.percentile(stat, [100 * alpha / 2., 100 * (1 - alpha / 2.)])
    return boundaries

In [13]:
np.random.seed(0)

asp_odds_scores = list(map(odds, get_bootstrap_samples(asp_sample, 1000)))
pla_odds_scores = list(map(odds, get_bootstrap_samples(pla_sample, 1000)))

print("95% confidence interval for the ASP odds repair time:",  stat_intervals(asp_odds_scores, 0.05))
print("95% confidence interval for the PLA odds repair time:",  stat_intervals(pla_odds_scores, 0.05))

95% confidence interval for the ASP odds repair time: [0.00757714 0.0113626 ]
95% confidence interval for the PLA odds repair time: [0.01499402 0.01996672]


In [14]:
delta_odds_scores = list(map(lambda x: x[1] / x[0], zip(asp_odds_scores, pla_odds_scores)))
delta_odds_scores

[1.660043787221006,
 1.766030085786543,
 1.9655216665953714,
 2.1045987060674944,
 1.693909786406791,
 1.607534197744018,
 1.795642250792243,
 2.0633574551484997,
 1.5463617042809434,
 1.8730773793048914,
 1.4856279179231433,
 1.6592309424383849,
 1.3974141603721444,
 2.0608281672774598,
 2.2406954441790266,
 1.6687061773888119,
 1.9589114715653702,
 1.7391964611567374,
 1.6438582139500921,
 1.544135109853093,
 1.6329492118967632,
 1.7488079297452395,
 1.7610364724645,
 1.9348165598527811,
 2.0398169969218927,
 1.8454085489968017,
 1.6428592221704703,
 1.8002380206702158,
 1.6353129226607646,
 1.5976071018582463,
 1.7534714804528948,
 2.1528333795397843,
 1.6291815695350307,
 1.878411958530901,
 1.9541102046460177,
 1.6344724960770247,
 1.791114670776913,
 2.0398683344305466,
 1.8118854190242202,
 1.4871643260361795,
 2.0593698022546665,
 1.6090470483390187,
 1.7625702182521414,
 2.0072679624372136,
 1.8898607676897214,
 1.8445037323748967,
 1.7915444327246355,
 1.6389158480293529,
 1.

In [19]:
lower_bound, upper_bound = stat_intervals(delta_odds_scores, 0.05)

print(f"95% confidence interval for the difference between odds [{lower_bound}, {upper_bound}]")
print(f"95% confidence interval for the difference between odds[{np.round(lower_bound, 4)},\
{np.round(upper_bound, 4)}]")

95% confidence interval for the difference between odds [1.444194647870839, 2.343211676263322]
95% confidence interval for the difference between odds[1.4442,2.3432]
