In [1]:
import math
import numpy as np
import scipy.stats as stats
from scipy.stats import norm
import statsmodels.stats.api as sms
from statsmodels.stats.proportion import proportions_ztest
from tqdm.notebook import tqdm

  import pandas.util.testing as tm


# **Вариант 1**

In [2]:
# Evan's Awesome A/B Tools - https://www.evanmiller.org/ab-testing/sample-size.html

In [3]:
# https://www.evanmiller.org/ab-testing/sample-size.html#!10;80;5;1;0

In [4]:
# Результат - 14313

# **Вариант 2**

In [5]:
baseline_rate = 0.1
practical_significance = 0.01
confidence_level = 0.05 
sensitivity = 0.8 

effect_size = sms.proportion_effectsize(baseline_rate, baseline_rate + practical_significance)
sample_size = sms.NormalIndPower().solve_power(effect_size = effect_size, power = sensitivity, 
                                               alpha = confidence_level, ratio=1)
print(sample_size)

14744.104836925611


# **Вариант 3**

In [6]:
def get_sample_size(p1,p2,alpha=0.05,power=0.8,r=1):
    
    p_bar = (p1 + r*p2)/(r+1)
    q_bar = 1 - p_bar
    s = np.abs(p1-p2)
    
    q1 = 1 - p1
    q2 = 1 - p2
    
    z_alpha = norm.ppf(1 - alpha/2)
    z_beta = norm.ppf(power)
    
    m = ((z_alpha * np.sqrt((r+1) * p_bar * q_bar) +
        z_beta * np.sqrt(r*p1*q1 + p2*q2))**2/
        (r*s**2))
    
    return m,  m*r

In [7]:
get_sample_size(0.10,0.11)

(14750.790469044954, 14750.790469044954)

# **Вариант 4**

In [8]:
n = 10000
res = []
for _ in tqdm(range(n)):

  p1 = np.random.binomial(1,0.10,14313)
  p2 = np.random.binomial(1,0.11,14313)

  cnts = [p1.sum(),p2.sum()]
  nobs = [len(p1),len(p2)]
  pval = proportions_ztest(cnts,nobs)[1]
  res.append(pval <= 0.05)

  0%|          | 0/10000 [00:00<?, ?it/s]

In [9]:
np.mean(res)

0.7922

# **Вариант 5**

In [10]:
def calc_sample_size(alpha, power, p, pct_mde, var='Absolute'):
    """ Based on https://www.evanmiller.org/ab-testing/sample-size.html

    Args:
        alpha (float): How often are you willing to accept a Type I error (false positive)?
        power (float): How often do you want to correctly detect a true positive (1-beta)?
        p (float): Base conversion rate
        pct_mde (float): Minimum detectable effect, relative to base conversion rate.

    """
    if var=='Absolute':
        delta = pct_mde
    else:
        delta = p*pct_mde

    t_alpha2 = norm.ppf(1.0-alpha/2)
    t_beta = norm.ppf(power)

    sd1 = np.sqrt(2 * p * (1.0 - p))
    sd2 = np.sqrt(p * (1.0 - p) + (p + delta) * (1.0 - p - delta))

    return (t_alpha2 * sd1 + t_beta * sd2) * (t_alpha2 * sd1 + t_beta * sd2) / (delta * delta)

In [11]:
calc_sample_size(0.05,0.8,0.1,0.01)

14312.856241916566

# **Вариант 6 (разный размер выборок)**

In [12]:
get_sample_size(0.20,0.25,r=2)

(828.557745393677, 1657.115490787354)

In [13]:
es = sms.proportion_effectsize(0.20, 0.25)
sms.NormalIndPower().solve_power(es, power=0.80, alpha=0.05, ratio=2)

818.9221190753055