# A/B/C testing with Bernoulli trials

A small example on Bayesian A/B/C testing with Bernoulli trials.

In [None]:
import itertools
import warnings
import numpy as np
from scipy.fftpack import next_fast_len
import matplotlib.pyplot as plt
import seaborn as sns
import tensorflow as tf
import tensorflow_probability as tfp
import arviz as az

az.style.use("arviz-darkgrid")
sns.set_theme()

tfd = tfp.distributions

In [None]:
def trace_stuff(states, previous_kernel_results):
    """
    """
    # I couldn't find a way not to make the counter global.
    step = next(counter)
    
    if (step % 100) == 0:
        print(f"Step {step}, state: {states}")
    
    return previous_kernel_results

In [None]:
# Data taken from the contingency table - please double check!
data_no_action = tf.concat([tf.ones(320), tf.zeros(1015)], axis=0)
data_free_delivery = tf.concat([tf.ones(1288), tf.zeros(3205)], axis=0)
data_no_discount = tf.concat([tf.ones(1198), tf.zeros(3235)], axis=0)

data_no_action.shape, data_free_delivery.shape, data_no_discount.shape

In [None]:
n_chains = 4

In [None]:
# First possibility: define a joint distribution object that behaves well with
# varying parameter shapes.
joint_distr_no_action = tfd.JointDistributionSequential([
    tfd.Uniform(low=0., high=1.),
    lambda p: tfd.Independent(
        tfd.Bernoulli(
            probs=tf.expand_dims(p, -1) * tf.ones_like(data_no_action)
        ),
        reinterpreted_batch_ndims=1
    )
])

joint_distr_free_delivery = tfd.JointDistributionSequential([
    tfd.Uniform(low=0., high=1.),
    lambda p: tfd.Independent(
        tfd.Bernoulli(
            probs=tf.expand_dims(p, -1) * tf.ones_like(data_free_delivery)
        ),
        reinterpreted_batch_ndims=1
    )
])

joint_distr_no_discount = tfd.JointDistributionSequential([
    tfd.Uniform(low=0., high=1.),
    lambda p: tfd.Independent(
        tfd.Bernoulli(
            probs=tf.expand_dims(p, -1) * tf.ones_like(data_no_discount)
        ),
        reinterpreted_batch_ndims=1
    )
])

In [None]:
# Target log prob function in the two cases. Comment/uncomment to select which
# one to use.
# Using a joint distribution object.
unnormalized_posterior_log_prob_combined = lambda p_control, p_treatment, p_treatment_2: (
    joint_distr_no_action.log_prob(p_control, data_no_action)
    + joint_distr_free_delivery.log_prob(p_treatment, data_free_delivery)
    + joint_distr_no_discount.log_prob(p_treatment_2, data_no_discount)
)

# # Test if the unnormalized posterior log prob behaves as expected with a
# possible initial state as the input.
state_batch = [
    tf.stack([
        tf.reduce_mean(tf.cast(data_no_action, tf.float32)),
    ] * n_chains),
    tf.stack([
        tf.reduce_mean(tf.cast(data_free_delivery, tf.float32)),
    ] * n_chains),
    tf.stack([
        tf.reduce_mean(tf.cast(data_no_discount, tf.float32)),
    ] * n_chains)
]

unnormalized_posterior_log_prob_combined(*state_batch)

In [None]:
number_of_steps = 2000
burnin = 500
leapfrog_steps=2

# Set the chain's start state.
initial_chain_state = [
    tf.stack([
        tf.reduce_mean(tf.cast(data_no_action, tf.float32)),
    ] * n_chains),
    tf.stack([
        tf.reduce_mean(tf.cast(data_free_delivery, tf.float32)),
    ] * n_chains),
    tf.stack([
        tf.reduce_mean(tf.cast(data_no_discount, tf.float32)),
    ] * n_chains)
]

# Since HMC operates over unconstrained space, we need to transform the
# samples so they live in real-space.
unconstraining_bijectors = [
    tfp.bijectors.Sigmoid(),  # Maps R to (0, 1).
    tfp.bijectors.Sigmoid(),   # Maps R to (0, 1).
    tfp.bijectors.Sigmoid()   # Maps R to (0, 1).
]

step_size = tf.Variable(0.5, dtype=tf.float32)

# Defining the HMC
hmc = tfp.mcmc.TransformedTransitionKernel(
    inner_kernel=tfp.mcmc.HamiltonianMonteCarlo(
        # target_log_prob_fn=unnormalized_posterior_log_prob_control,
        target_log_prob_fn=unnormalized_posterior_log_prob_combined,
        num_leapfrog_steps=leapfrog_steps,
        step_size=step_size,
        # The step size adaptation prevents stationarity to occur, so the
        # number of adaptation steps should be smaller than the number of
        # burnin steps so that in the remaining part of the burnin phase
        # stationarity can be reached.
        step_size_update_fn=tfp.mcmc.make_simple_step_size_update_policy(num_adaptation_steps=int(burnin * 0.8)),
        state_gradients_are_stopped=True),
    bijector=unconstraining_bijectors)

# Sampling from the chain.
print('Sampling started')

counter = itertools.count(1)

[
    posterior_prob_no_action,
    posterior_prob_free_delivery,
    posterior_prob_no_discount
], kernel_results = tfp.mcmc.sample_chain(
    num_results=number_of_steps + burnin,
    num_burnin_steps=burnin,
    current_state=initial_chain_state,
    kernel=hmc,
    trace_fn=trace_stuff)

print('Sampling finished')

trace_no_action_combined_burned = posterior_prob_no_action[burnin:]
trace_free_delivery_combined_burned = posterior_prob_free_delivery[burnin:]
trace_no_discount_combined_burned = posterior_prob_no_discount[burnin:]

inference_data = az.convert_to_inference_data({
    'p_no_action': tf.transpose(trace_no_action_combined_burned),
    'p_free_delivery': tf.transpose(trace_free_delivery_combined_burned),
    'p_no_discount': tf.transpose(trace_no_discount_combined_burned)
})

In [None]:
inference_data

In [None]:
az.summary(inference_data)

In [None]:
az.plot_trace(inference_data)

az.plot_autocorr(inference_data)

az.plot_posterior(inference_data)

az.plot_forest(inference_data)

In [None]:
trace_no_action_flattened = tf.reshape(
    trace_no_action_combined_burned,
    shape=(trace_no_action_combined_burned.shape[0] * trace_no_action_combined_burned.shape[1])
)

trace_free_delivery_flattened = tf.reshape(
    trace_free_delivery_combined_burned,
    shape=(trace_free_delivery_combined_burned.shape[0] * trace_free_delivery_combined_burned.shape[1])
)

trace_no_discount_flattened = tf.reshape(
    trace_no_discount_combined_burned,
    shape=(trace_no_discount_combined_burned.shape[0] * trace_no_discount_combined_burned.shape[1])
)

In [None]:
trace_free_delivery_flattened.numpy().mean(), trace_free_delivery_flattened.numpy().std()

In [None]:
trace_no_discount_flattened.numpy().mean(), trace_no_discount_flattened.numpy().std()

In [None]:
data_free_delivery.numpy().mean(), data_no_discount.numpy().mean()

In [None]:
print(
    'Estimated probability that the no discount group returns more than the no action group:',
    (trace_no_discount_flattened > trace_no_action_flattened).numpy().mean()
)

print(
    'Estimated probability that the free delivery group returns more than the no action group:',
    (trace_free_delivery_flattened > trace_no_action_flattened).numpy().mean()
)

print(
    'Estimated probability that the free delivery group returns more than the no discount group:',
    (trace_free_delivery_flattened > trace_no_discount_flattened).numpy().mean()
)

In [None]:
fig = plt.figure(figsize=(14, 6))

sns.histplot(
    x=trace_no_action_flattened.numpy(),
    label=f'No action (est. mean: {trace_no_action_flattened.numpy().mean()})',
    color=sns.color_palette()[0],
    stat='density',
    kde=True)
sns.histplot(
    x=trace_free_delivery_flattened.numpy(),
    label=f'Free delivery (est. mean: {trace_free_delivery_flattened.numpy().mean()})',
    color=sns.color_palette()[1],
    stat='density',
    kde=True)
sns.histplot(
    x=trace_no_discount_flattened.numpy(),
    label=f'No discount (est. mean: {trace_no_discount_flattened.numpy().mean()})',
    color=sns.color_palette()[2],
    stat='density',
    kde=True)

plt.axvline(
    x=trace_no_action_flattened.numpy().mean(),
    ymin=0.,
    ymax=1.,
    color=sns.color_palette()[0]
)

plt.axvline(
    x=trace_free_delivery_flattened.numpy().mean(),
    ymin=0.,
    ymax=1.,
    color=sns.color_palette()[1]
)

plt.axvline(
    x=trace_no_discount_flattened.numpy().mean(),
    ymin=0.,
    ymax=1.,
    color=sns.color_palette()[2]
)

plt.legend()
plt.title('Rate of returning customers by CRM action', fontsize=14)