In [1]:
import numpy as np
import pandas as pd
%matplotlib inline
from IPython.core.pylabtools import figsize
import matplotlib.pyplot as plt
import scipy.stats as stats
import pymc3 as pm

# Lab: Apply the Bayesian approach on a advertisement CTR A/B test using user click data (impressions)

## 1. Objective

- We will consider the same case study we discussed in class, where we are interested in running an A/B test for determining the best of two possible ads designs, ads A and B, running on the same marketing channel based on their Click Through Rate (CTR), wich is simply the number of clicks the ad gets out of the total number of impressions (views)


- Rather than using the daily CTR data as we did in class, we will use the ad-click events data collected from the website directly


- Our objetive is the same: Is Ad A better than B or vice versa ? If yes, by how much ?

## 2. Code - Fill in the blanks

In [None]:
# We will synthesize the collected click events data here ourselves as follows:

# we can drop the assumption that we have equal number of impressions during the 30 days the test was running

CTR_A_true = 3.0 
CTR_B_true = 3.5 # Let's make CTR_B 15% better, similar to what we did in class

N_impressions_A = 5000
N_impressions_B = 7500

# we are using a Bernoulli RV here with prob('user clicks the Ad') = prob('1') = CTR/100
p_A_true = CTR_A_true/100
p_B_true = CTR_B_true/100

data_user_A = stats.bernoulli.rvs(p_A_true, size=N_impressions_A)
data_user_B = stats.bernoulli.rvs(p_B_true, size=N_impressions_B)

print("observed CTR_A = ", ...)
print("observed CTR_B = ", ...)

### Step 1: Choose a likelihood distribution for the observed data → how can the data be generated ?

In [None]:
# since we know CTR or p is between 0 and 1 and we can assume we have no prior idea of the value, 

# we can use a uniform prior for both p_A and p_B

with pm.Model() as ab_test:
    p_A = pm.Uniform('p_A', lower=..., upper=...)
    p_B = pm.Uniform('p_B', lower=..., upper=...)

In [None]:
# Define the deterministic delta function, which is the difference between both CTRs 
    
with ...:    
    delta = pm.Deterministic("delta", ...)


In [None]:
# what liklihood function does our data follow ? 
# since we are modeling binary data, the Bernoulli RV is the suitable choice

with ...:       
    obs_A = pm.Bernoulli("obs_A", ..., observed=...)
    obs_B = pm.Bernoulli("obs_B", ..., observed=...)

In [None]:
# sample the posterior distributions

with ...:
    step = ...
    trace = pm.sample(20000, step=..., return_inferencedata=False)
    usable_trace = trace[1000:]

In [None]:
# extract samples of posterior distributions

p_A_samples = ...
p_B_samples = ...
delta_samples = ...

In [None]:
# plot the histograms of the posteriors

figsize(15, 10)
ax = plt.subplot(311)

...
...
...
...

In [None]:
# count the number of posterior distribution samples of delta which are less than 0
# this is equivalent to the area under the curve before 0
# this in turn represents the probability that site A is worse than site B

print("Probability Ad B is WORSE than Ad A = {}".format(np.mean(... < 0)))
    

print("Probability Ad B is BETTER than Ad A = {}".format(np.mean(... > 0)))
    

print("Estimated Improvement of CTR of Ad B over Ad A = {}".format(np.mean(...)*100))

## 3. Conclusions

In [None]:
...