<a href="https://colab.research.google.com/github/glebmikha/ab-testing-course/blob/main/05_hypotheses.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import pandas as pd
import numpy as np

In [2]:
np.random.binomial(1,0.5,size=10)

array([1, 0, 1, 0, 1, 1, 0, 1, 1, 1])

In [3]:
np.random.binomial(10,0.5)

4

# FPR

In [4]:
n = 100000
result = []
for i in range(n):
  np.random.seed(i)
  result.append(np.random.binomial(10,0.5))

In [5]:
df = pd.DataFrame(result)

In [6]:
df

Unnamed: 0,0
0,5
1,5
2,5
3,5
4,8
...,...
99995,8
99996,7
99997,7
99998,3


In [7]:
df['dev'] = abs(5 - df[0])

In [8]:
df

Unnamed: 0,0,dev
0,5,0
1,5,0
2,5,0
3,5,0
4,8,3
...,...,...
99995,8,3
99996,7,2
99997,7,2
99998,3,2


In [9]:
(df['dev'] >= 4).mean()

0.02179

# TPR

In [10]:
mde = 0.3

In [11]:
n = 100000
result = []
for i in range(n):
  np.random.seed(i)
  result.append(np.random.binomial(10,0.5 + mde))

In [12]:
df = pd.DataFrame(result)

In [13]:
df['dev'] = abs(5 - df[0])

In [14]:
df

Unnamed: 0,0,dev
0,8,3
1,8,3
2,8,3
3,8,3
4,6,1
...,...,...
99995,6,1
99996,6,1
99997,6,1
99998,10,5


In [15]:
(df['dev'] >= 4).mean()

0.37491

# Increase FPR

## FPR

In [16]:
n = 100000
result = []
for i in range(n):
  np.random.seed(i)
  result.append(np.random.binomial(10,0.5))

In [17]:
df = pd.DataFrame(result)

In [18]:
df['dev'] = abs(5 - df[0])

In [19]:
(df['dev'] >= 3).mean()

0.11145

## TPR

In [20]:
mde = 0.3

In [21]:
n = 100000
result = []
for i in range(n):
  np.random.seed(i)
  result.append(np.random.binomial(10,0.5 + mde))

In [22]:
df = pd.DataFrame(result)

In [23]:
df['dev'] = abs(5 - df[0])

In [24]:
(df['dev'] >= 3).mean()

0.67735

# Coin check

MDE: 10%  
Power (1 - beta) : 80%  
Significance (alpha): 5%

## FPR

In [25]:
sample_size = 200

In [26]:
n = 100000
result = []
for i in range(n):
  np.random.seed(i)
  result.append(np.random.binomial(sample_size,0.5))

In [27]:
df = pd.DataFrame(result)

In [28]:
df['dev'] = abs(sample_size * 0.5 - df[0])

In [29]:
df

Unnamed: 0,0,dev
0,104,4.0
1,100,0.0
2,110,10.0
3,104,4.0
4,116,16.0
...,...,...
99995,114,14.0
99996,113,13.0
99997,91,9.0
99998,96,4.0


In [30]:
threshold = sample_size * 0.07

In [31]:
threshold

14.000000000000002

In [32]:
(df['dev'] >= threshold).mean()

0.04011

## TPR

In [33]:
mde = 0.1

In [34]:
n = 100000
result = []
for i in range(n):
  np.random.seed(i)
  result.append(np.random.binomial(sample_size,0.5 + mde))

In [35]:
df = pd.DataFrame(result)

In [36]:
df['dev'] = abs(sample_size * 0.5 - df[0])

In [37]:
(df['dev'] >= threshold).mean()

0.78681

# Satisfaction check

H0: 80%  
H1: < 80%  

MDE: 10%  
Power (1 - beta) : 80%  
Significance (alpha): 5%

In [38]:
np.random.binomial(100,0.8)

76

## FPR

In [163]:
sample_size = 115

In [164]:
n = 100000
result = []
for i in range(n):
  np.random.seed(i)
  result.append(np.random.binomial(sample_size,0.8))

In [165]:
df = pd.DataFrame(result)

In [166]:
sample_size * 0.8

92.0

In [167]:
df['dev'] = df[0] - sample_size * 0.8

In [168]:
df

Unnamed: 0,0,dev
0,92,0.0
1,93,1.0
2,93,1.0
3,92,0.0
4,84,-8.0
...,...,...
99995,84,-8.0
99996,86,-6.0
99997,87,-5.0
99998,98,6.0


In [181]:
threshold = sample_size * 0.06

In [182]:
threshold

6.8999999999999995

In [183]:
(df['dev'] <= -threshold).mean()

0.06851

## TPR

In [184]:
mde = 0.1

In [185]:
n = 100000
result = []
for i in range(n):
  np.random.seed(i)
  result.append(np.random.binomial(sample_size,0.8 - mde))

In [186]:
df = pd.DataFrame(result)

In [187]:
df['dev'] = df[0] - sample_size * 0.8

In [188]:
df

Unnamed: 0,0,dev
0,74,-18.0
1,83,-9.0
2,87,-5.0
3,79,-13.0
4,78,-14.0
...,...,...
99995,73,-19.0
99996,67,-25.0
99997,85,-7.0
99998,83,-9.0


In [189]:
(df['dev'] <= -threshold).mean()

0.84279