In [111]:
# reading: http://armsnet.info/journals/arms/Vol_3_No_1_June_2015/3.pdf#:~:text=The%20term%20%E2%80%9Cstatistical%20confidence%E2%80%9D%20has%20been%20defined%20clearly,samples%2C%20that%20an%20interval%20will%20capture%20a%20parameter.

# c_cont is number of conversions in control sample
# c_exp is number of conversions in experiment sample
# n_cont is total number of users in control sample
# n_exp is total number of users in experiment sample
# p_cont is the conversion rate in the control sample (estimated probability of conversions)
# p_exp is the conversion rate in the experiment sample (estimated probability of conversions)
# p_pool is the estimated probability of conversions in the pooled sample
# se_pool is the pooled standard error
# d is the estimated difference of p_exp and p_cont
# null hypothesis: d ~ Normal(0, se_pool^2) with mean of 0 and standard deviation of se_pool
# z_score of 1.96 corresponds to 95% from (-infinity, z_score) (one-tailed)
# z_score of 1.65 corresponds to 90% (one-tailed)
# z_score of 2.575 corresponds to 99% (one-tailed)

# cumulative alpha = 1-(1-Alpha)^k where k is number of experiments (excluding control)

from math import pi, sqrt, exp, erf as erf_py

In [112]:
def erf(z):
    n = 1000000
    delta = z/n
    sum = 0
    for i in range(0, n):
        sum += exp(-1*(i*delta)**2)
    return 2*sum*delta/sqrt(pi)

def z_score_to_percentile_py(z_score):
    return (erf_py(z_score / sqrt(2)) + 1) / 2
def z_score_to_percentile(z_score):
    return (erf(z_score / sqrt(2)) + 1) / 2

In [113]:
a_conv = 100
a_vis = 1000
b_conv = 110
b_vis = 1000	
# a_conv = 200
# a_vis = 1000
# b_conv = 200
# b_vis = 1000	
z_score_confidence_threshold = 1.96
confidence_threshold = 0.95

In [114]:
a_conv_rate = a_conv/a_vis
b_conv_rate = b_conv/b_vis
[a_conv_rate, b_conv_rate] # conversion rates

[0.1, 0.11]

In [115]:
a_standard_error = sqrt(a_conv_rate*(1-a_conv_rate)/a_vis)
b_standard_error = sqrt(b_conv_rate*(1-b_conv_rate)/b_vis)
[a_standard_error, b_standard_error]

[0.009486832980505138, 0.009894442884771228]

In [116]:
standard_error_diff = sqrt(a_standard_error**2 + b_standard_error**2)

In [117]:
z_score = (b_conv_rate - a_conv_rate) / standard_error_diff
z_score # z-score

0.7295190043087874

In [118]:
a_conf_int = (a_conv_rate - z_score_confidence_threshold * a_standard_error, a_conv_rate + z_score_confidence_threshold * a_standard_error)
b_conf_int = (b_conv_rate - z_score_confidence_threshold * b_standard_error, b_conv_rate + z_score_confidence_threshold * b_standard_error)
[a_conf_int, b_conf_int] # confidence intervals

[(0.08140580735820993, 0.11859419264179008),
 (0.0906068919458484, 0.1293931080541516)]

In [119]:
relative_lift = (b_conv_rate - a_conv_rate) / a_conv_rate
relative_lift # improvement in conversion rate

0.09999999999999995

In [120]:
abs_z_score = abs(z_score)
confident = abs_z_score > z_score_confidence_threshold
confident # Reject null hypothesis?

False

In [121]:
percentile = z_score_to_percentile(abs_z_score)
p = 2 * (1 - percentile)
confidence = 1 - p
# Confidence as percentile for two-tailed test
[percentile, p, confidence]

[0.7671579104239583, 0.4656841791520834, 0.5343158208479166]