In [1]:
###################################
#
#    Author: Grzegorz Melniczak
#    Date: 2017.09.01
#
###################################

import math
from scipy.stats.distributions import norm

def convertion_test(control_trials, variation_trials, control_conversions, variation_conversions, alternative='two-sided'):
    if alternative not in set(['greater', 'two-sided', 'smaller']):
        print('\'alternative\' should be one of [\'greater\', \'two-sided\', \'smaller\']')
        return None

    if control_trials < 20 or variation_trials < 20:
        print("There must be at least 20 control and variation trials.")
        return None

    p_c = control_conversions / control_trials
    p_v = variation_conversions / variation_trials
    p_total = (control_conversions + variation_conversions) / (control_trials + variation_trials)
    std_error = math.sqrt(p_total*(1-p_total)*(1/control_trials + 1/variation_trials))
    z_value = (p_v - p_c)/std_error
    p_value = norm.cdf(z_value)
    
    result = {'test': 'two-sample proportion test', 
              'alternative': alternative, 
              'proportion 1': p_c,
              'proportion 2': p_v,
              'z-stat': z_value}
    if alternative == 'greater':
        result['p-value'] = 1 - p_value
    elif alternative == 'two-sided':
        result['p-value'] = 2*min([p_value, 1-p_value])
    elif alternative == 'smaller':
        result['p-value'] = p_value
    return result

In [2]:
import itertools
import math
import pandas as pd

n = [30, 50, 100, 1000, 10000]
ps = [2**i for i in range(-7, 0)]

table = []

for (n1, n2) in itertools.product(n, n):
    for (p1, p2) in itertools.product(ps, ps):
        table.append([n1, n2, p1, p2, 
            round(convertion_test(n1, n2, math.ceil(p1*n1), math.ceil(p2*n2), alternative='two-sided')['z-stat'],3),
            round(convertion_test(n1, n2, math.ceil(p1*n1), math.ceil(p2*n2), alternative='two-sided')['p-value'],3),
            round(convertion_test(n1, n2, math.ceil(p1*n1), math.ceil(p2*n2), alternative='greater')['p-value'],3),
            round(convertion_test(n1, n2, math.ceil(p1*n1), math.ceil(p2*n2), alternative='smaller')['p-value'],3)
        ])
        
df = pd.DataFrame(data=table, columns=['n1', 'n2', 'prop1', 'prop2', 'z-stat',
                                       'p-val two-sided', 'p-val greater', 'p-val smaller'])
df.sample(30, random_state=123)

Unnamed: 0,n1,n2,prop1,prop2,z-stat,p-val two-sided,p-val greater,p-val smaller
431,50,1000,0.25,0.125,-2.757,0.006,0.997,0.003
203,30,10000,0.015625,0.007812,-1.564,0.118,0.941,0.059
1112,10000,100,0.125,0.5,11.142,0.0,0.0,1.0
645,100,1000,0.015625,0.015625,-0.301,0.764,0.618,0.382
263,50,30,0.03125,0.125,1.534,0.125,0.062,0.938
964,1000,10000,0.125,0.25,8.842,0.0,0.0,1.0
35,30,30,0.25,0.007812,-2.531,0.011,0.994,0.006
155,30,1000,0.015625,0.015625,-0.734,0.463,0.769,0.231
874,1000,100,0.25,0.5,5.352,0.0,0.0,1.0
650,100,1000,0.015625,0.5,9.188,0.0,0.0,1.0
