In [11]:
import numpy as np
import pandas as pd
np.random.seed(42)
from scipy.stats import beta

In [2]:
# 1000 trials in both A and B groups
group_size = 1000
control_group, treatment_group = np.random.rand(2, group_size)

In [7]:
# in the control group, we define success as the value < 0.15
control_success = sum(control_group < 0.15)
control_failure = group_size - control_success
# in the treatment group, we define success as the value < 0.2
treatment_success = sum(treatment_group < 0.2)
treatment_failure = group_size - treatment_success

In [30]:
# prior - beta distribution
beta_prior = beta(8, 42)
beta_prior.mean()

0.16

In [24]:
# When the metric is proportion, we assume that it will follow beta distribution: beta(8, 42)
# update prior: posterior = prior + observed data
control_posterior = beta(control_success + 8, control_failure + 42)
treatment_posterior = beta(treatment_success + 8, treatment_failure + 42)
# posterior mean
print('control group posterior mean: ', control_posterior.mean())
print('treatment group posterior mean: ', treatment_posterior.mean())

control group posterior mean:  0.1657142857142857
treatment group posterior mean:  0.20095238095238097


In [15]:
# Monte Carlo Simulation
## draw 100k samples from control and treament posterior dist.
n_trials = 100000
control_sample = pd.Series([control_posterior.rvs() for i in range(n_trials)])
treatment_sample = pd.Series([treatment_posterior.rvs() for i in range(n_trials)])

In [28]:
## how many times did treatment outperform control?
treatment_wins = sum(treatment_sample > control_sample)
## percentage of treatment wins - probability to be the best
perc_treatment_wins = round((treatment_wins / n_trials) * 100, 0)
print('probability that treatment is better than control: ', perc_treatment_wins, '%')

probability that treatment is better than control:  98.0 %


In [23]:
# get 5% - 95% of the treatment sample
print(r"the 90% credible interval of the control group is:", [control_sample.quantile(0.05), control_sample.quantile(0.95)])
print(r"the 90% credible interval of the treatment group is:", [treatment_sample.quantile(0.05), treatment_sample.quantile(0.95)])

the 90% credible interval of the control group is: [0.1470891750067972, 0.18494424162056522]
the 90% credible interval of the treatment group is: [0.1808940075468876, 0.22161401176831239]
