<a href="https://colab.research.google.com/github/glebmikha/ab-testing-course/blob/main/04_stat_test_metrics.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Randomness

In [1]:
import pandas as pd
import numpy as np

In [2]:
np.random.binomial(1,0.03,size=10)

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0])

In [3]:
np.mean([0, 0, 0, 1, 0, 0, 0, 0, 0, 0])

0.1

In [4]:
np.random.binomial(1,0.03,size=100).mean()

0.01

In [5]:
np.random.binomial(1,0.03,size=1000).mean()

0.041

In [6]:
np.random.binomial(1,0.03,size=10).mean()

0.0

In [7]:
a = np.random.binomial(1,0.03,size=1000).mean()
b = np.random.binomial(1,0.05,size=1000).mean()

In [8]:
a, b

(0.034, 0.051)

In [9]:
n = 1000
result = []
for i in range(n):
  a = np.random.binomial(1,0.03,size=1000).mean()
  b = np.random.binomial(1,0.05,size=1000).mean()
  result.append((a,b))

In [10]:
df = pd.DataFrame(result,columns=['a','b'])

In [11]:
df

Unnamed: 0,a,b
0,0.030,0.053
1,0.031,0.048
2,0.043,0.058
3,0.025,0.047
4,0.027,0.062
...,...,...
995,0.033,0.050
996,0.027,0.037
997,0.027,0.040
998,0.034,0.038


In [12]:
df[df['a'] > df['b']]

Unnamed: 0,a,b
150,0.039,0.037
429,0.04,0.037
465,0.038,0.037
481,0.037,0.033
547,0.037,0.036
794,0.045,0.033
863,0.039,0.033


# Testing stat test

https://glebmikha.github.io/ab-test-calculator-by-gleb-mikhaylov/

In [13]:
a = np.random.binomial(1,0.03,size=1484).mean()
b = np.random.binomial(1,0.05,size=1484).mean()

In [14]:
a, b

(0.028975741239892182, 0.0444743935309973)

In [15]:
a * 1484, b * 1484

(43.0, 66.0)

In [16]:
np.random.binomial(10,0.5)

6

In [17]:
np.random.binomial(1484,0.03)

47

In [18]:
a = np.random.binomial(1484,0.03)
b = np.random.binomial(1484,0.05)

In [19]:
a, b

(48, 74)

In [20]:
from statsmodels.stats.proportion import proportions_ztest

In [21]:
def test(conv_a,conv_b,size_a,size_b,significance=0.05):
  _, p_value = proportions_ztest([conv_a, conv_b],
                                 [size_a, size_b],
                                 alternative='two-sided')
  return p_value < significance

In [22]:
test(49,52,1484,1484)

False

In [23]:
test(44,78,1484,1484)

True

# TPR (Sensitivity)

In [24]:
n = 100000
result = []
for _ in range(n):
  a = np.random.binomial(1484,0.03)
  b = np.random.binomial(1484,0.05)
  result.append((a,b))

In [25]:
df = pd.DataFrame(result,columns=['a','b'])

In [26]:
df

Unnamed: 0,a,b
0,50,75
1,37,62
2,41,95
3,47,61
4,48,88
...,...,...
99995,55,68
99996,39,47
99997,44,71
99998,41,64


In [27]:
df['test'] = df.apply(lambda row: test(row['a'],row['b'],1484,1484),axis=1)

In [28]:
df

Unnamed: 0,a,b,test
0,50,75,True
1,37,62,True
2,41,95,True
3,47,61,False
4,48,88,True
...,...,...,...
99995,55,68,False
99996,39,47,False
99997,44,71,True
99998,41,64,True


In [29]:
df['test'].mean()

0.79908

# FPR

In [30]:
from tqdm.notebook import tqdm

In [31]:
n = 100000
result = []
for _ in tqdm(range(n)):
  a = np.random.binomial(1484,0.03)
  b = np.random.binomial(1484,0.03)
  result.append((a,b))

  0%|          | 0/100000 [00:00<?, ?it/s]

In [32]:
df = pd.DataFrame(result,columns=['a','b'])

In [33]:
df

Unnamed: 0,a,b
0,37,44
1,41,49
2,43,51
3,55,54
4,41,46
...,...,...
99995,39,40
99996,46,41
99997,45,46
99998,41,36


In [34]:
from tqdm import tqdm

In [35]:
tqdm.pandas()

In [36]:
df['test'] = df.progress_apply(lambda row: test(row['a'],row['b'],1484,1484),axis=1)

100%|██████████| 100000/100000 [00:20<00:00, 4952.52it/s]


In [37]:
df

Unnamed: 0,a,b,test
0,37,44,False
1,41,49,False
2,43,51,False
3,55,54,False
4,41,46,False
...,...,...,...
99995,39,40,False
99996,46,41,False
99997,45,46,False
99998,41,36,False


In [38]:
df['test'].mean()

0.05003

# MDE

In [39]:
from tqdm.notebook import tqdm

In [40]:
n = 1000
result = []
for _ in tqdm(range(n)):
  a = np.random.binomial(1484,0.03)
  b = np.random.binomial(1484,0.05)
  result.append((a,b))

  0%|          | 0/1000 [00:00<?, ?it/s]

In [41]:
df = pd.DataFrame(result,columns=['a','b'])

In [42]:
from tqdm import tqdm

In [43]:
tqdm.pandas()

In [44]:
df['test'] = df.progress_apply(lambda row: test(row['a'],row['b'],1484,1484),axis=1)

100%|██████████| 1000/1000 [00:00<00:00, 5159.75it/s]


In [45]:
df['test'].mean()

0.788

# Testing Evan Miller

## TPR

In [46]:
from tqdm.notebook import tqdm

In [47]:
sample_size = 1245

In [48]:
n = 10000
result = []
for _ in tqdm(range(n)):
  a = np.random.binomial(sample_size,0.03)
  b = np.random.binomial(sample_size,0.05)
  result.append((a,b))

  0%|          | 0/10000 [00:00<?, ?it/s]

In [49]:
df = pd.DataFrame(result,columns=['a','b'])

In [50]:
from tqdm import tqdm

In [51]:
tqdm.pandas()

In [52]:
df['test'] = df.progress_apply(lambda row: test(row['a'],row['b'],
                                                sample_size,sample_size),axis=1)

100%|██████████| 10000/10000 [00:01<00:00, 5470.00it/s]


In [53]:
df['test'].mean()

0.728

## FPR

In [54]:
from tqdm.notebook import tqdm

In [55]:
sample_size = 1245

In [56]:
n = 10000
result = []
for _ in tqdm(range(n)):
  a = np.random.binomial(sample_size,0.03)
  b = np.random.binomial(sample_size,0.03)
  result.append((a,b))

  0%|          | 0/10000 [00:00<?, ?it/s]

In [57]:
df = pd.DataFrame(result,columns=['a','b'])

In [58]:
from tqdm import tqdm

In [59]:
tqdm.pandas()

In [60]:
df['test'] = df.progress_apply(lambda row: test(row['a'],row['b'],
                                                sample_size,sample_size),axis=1)

100%|██████████| 10000/10000 [00:01<00:00, 5484.19it/s]


In [61]:
df['test'].mean()

0.0533

# Sample Sizes in Python

In [62]:
import numpy as np
from statsmodels.stats.power import NormalIndPower
import statsmodels.api as sm

baseline = 0.03
mde = 0.02
ratio = 1
power = 0.8
alpha = 0.05

analysis = NormalIndPower()

effect_size = sm.stats.proportion_effectsize(baseline, baseline + mde)

result = analysis.solve_power(effect_size=effect_size, power=power, alpha=alpha, ratio=ratio, alternative='two-sided')
sample_size = int(np.ceil(result))

sample_size, sample_size*ratio

(1484, 1484)

In [64]:
70000 * 0.01

700.0

In [None]:
26000