In [263]:
# reading: http://armsnet.info/journals/arms/Vol_3_No_1_June_2015/3.pdf#:~:text=The%20term%20%E2%80%9Cstatistical%20confidence%E2%80%9D%20has%20been%20defined%20clearly,samples%2C%20that%20an%20interval%20will%20capture%20a%20parameter.

# c_cont is number of conversions in control sample
# c_exp is number of conversions in experiment sample
# n_cont is total number of users in control sample
# n_exp is total number of users in experiment sample
# p_cont is the conversion rate in the control sample (estimated probability of conversions)
# p_exp is the conversion rate in the experiment sample (estimated probability of conversions)
# p_pool is the estimated probability of conversions in the pooled sample
# se_pool is the pooled standard error
# d is the estimated difference of p_exp and p_cont
# null hypothesis: d ~ Normal(0, se_pool^2) with mean of 0 and standard deviation of se_pool
# z_score of 1.96 corresponds to 95% from (-infinity, z_score) (one-tailed)
# z_score of 1.65 corresponds to 90% (one-tailed)
# z_score of 2.575 corresponds to 99% (one-tailed)

# cumulative alpha = 1-(1-Alpha)^k where k is number of experiments (excluding control)

from math import pi, sqrt, exp, erf

In [264]:
normal = lambda mu, sd: lambda x: exp(-((x-mu)/sd)**2/2)/(sd*sqrt(2*pi))
def erf_implementation(z):
    n = 1000000
    delta = z/n
    sum = 0
    for i in range(0, n):
        sum += exp(-1*(i*delta)**2)
    return 2*sum*delta/sqrt(pi)

def z_score_to_percentile_py(z_score):
    return (erf(z_score / sqrt(2)) + 1) / 2
def z_score_to_percentile(z_score):
    return (erf_implementation(z_score / sqrt(2)) + 1) / 2
def confidence(pdf, d, mu=0):
    n = 1000000
    delta = abs(d - mu)/n
    sum = 0
    for i in range(1, n+1):
        sum += pdf(mu+i*delta)
    return 2*sum*delta
score = 0.72
[erf(2), z_score_to_percentile(score)]

[0.9953222650189527, 0.7642375350135178]

In [265]:
a_conv = 200
a_vis = 1000
b_conv = 236
b_vis = 1000	
# a_conv = 200
# a_vis = 1000
# b_conv = 200
# b_vis = 1000	
z_score_confidence_threshold = 1.96
confidence_threshold = 0.95

In [266]:
a_conv_rate = a_conv/a_vis
b_conv_rate = b_conv/b_vis
[a_conv_rate, b_conv_rate] # conversion rates

[0.2, 0.236]

In [267]:
a_standard_error = sqrt(a_conv_rate*(1-a_conv_rate)/a_vis)
b_standard_error = sqrt(b_conv_rate*(1-b_conv_rate)/b_vis)
[a_standard_error, b_standard_error]

[0.01264911064067352, 0.013427732496590778]

In [268]:
standard_error_diff = sqrt(a_standard_error**2 + b_standard_error**2)

In [269]:
z_score = (b_conv_rate - a_conv_rate) / standard_error_diff
z_score # z-score

1.951501879161692

In [270]:
a_conf_int = (a_conv_rate - z_score * a_standard_error, a_conv_rate + z_score * a_standard_error)
b_conf_int = (b_conv_rate - z_score * b_standard_error, b_conv_rate + z_score * b_standard_error)
[a_conf_int, b_conf_int] # confidence intervals

[(0.1753152368150015, 0.22468476318499853),
 (0.20979575480002258, 0.2622042451999774)]

In [271]:
relative_lift = (b_conv_rate - a_conv_rate) / a_conv_rate
relative_lift # improvement in conversion rate

0.17999999999999988

In [272]:
abs_z_score = abs(z_score)
confident = abs_z_score > z_score_confidence_threshold
confident # Reject null hypothesis?

False

In [273]:
percentile = z_score_to_percentile(abs_z_score)
p = 2 * (1 - percentile)
confidence = 1 - p
# Confidence as percentile for two-tailed test
[percentile, p, confidence]

[0.9745016448449544, 0.05099671031009123, 0.9490032896899088]