In [1]:
import numpy as np
import scipy
from scipy import stats

In [2]:
def calculate_n(var, desired_ci, alpha=0.05):
    z_statistic = stats.norm.ppf(1 - (alpha / 2))
    se_squared = np.square(desired_ci / z_statistic)
    n = var / se_squared
    return n

In [3]:
def calculate_ci(n, p, alpha=0.05):
    var = p * (1 - p)
    stderr = np.sqrt(var / n)
    ci = stats.norm.ppf(1 - (alpha / 2)) * stderr
    return ci

In [4]:
for p in [0.2, 0.1, 0.08, 0.059, 0.01, 0.001, 0.0001]:
    print(p, end=' ')
    for within in [0.2, 0.1, 0.05]:
        desired_ci = p * within     # within 5 percent of p
        var = p * (1 - p)
        
        n = calculate_n(var, desired_ci)
        print(int(n)+1, end=' ')
    print()

0.2 385 1537 6147 
0.1 865 3458 13830 
0.08 1105 4418 17671 
0.059 1532 6127 24508 
0.01 9508 38031 152122 
0.001 95941 383762 1535047 
0.0001 960269 3841075 15364299 


In [5]:
calculate_ci(6127, 0.059)

0.005899904316404775

In [6]:
calculate_n(0.00744, 0.005)

1143.2181450385715

In [7]:
for pow in range(1, 5):
    proportion_captured = 0.2
    p = 10 ** -pow
    desired_ci = p * 0.1      # within 5 percent of p

    alpha = 0.05
    remaining_p = p * (1-proportion_captured)             
    var = remaining_p * (1 - remaining_p)
    n = (var / np.square(desired_ci / stats.norm.ppf(1 - (alpha / 2))))
    print(p, proportion_captured, n)

0.1 0.2 2827.3136920308752
0.01 0.2 30485.817201028578
0.001 0.2 307070.85229100555
0.0001 0.2 3072921.203190775
