In [32]:
import pymc3 as pm
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import janitor


%load_ext autoreload
%autoreload 2
%matplotlib inline
%config InlineBackend.figure_format = 'retina'

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In this notebook, I will simulate data that we will use to answer a "click data" question.

The question at hand is this: We want to run an experiment on our website, in which we measure the conversion rate of our customers. Our customers are searching for flights, and we have collected the number of clicks that they have made before making their purchases.

In [29]:
import pandas_flavor as pf

@pf.register_dataframe_method
def find_replace(df, column, mapper):
    df = df.copy()
    df[column] = df[column].apply(lambda x: mapper.get(x, x))
    return df


mapper = {'ctrl': 0, 'grp1': 1, 'grp2': 2}

  register_dataframe_accessor(method.__name__)(AccessorMethod)


In [16]:
# Experiment 1 Parameters
p_1 = 0.06  # 90% of customers do not click anything at all.
mu_1 = 2  # mu/(mu+alpha) gives probability of stopping at each chance.
alpha_1 = 6  # number of trials until stopping
# (the probability of successfully buying per trial is 6/8)

# Experiment 2 Parameters
p_2 = 0.11
mu_2 = 3  
alpha_2 = 4

# Experiment 3 Parameters
p_3 = 0.13
mu_3 = 1
alpha_3 = 3

In [25]:
with pm.Model() as expt1_sim:
    ctrl = pm.ZeroInflatedNegativeBinomial('ctrl', mu=mu_1, alpha=alpha_1, psi=p_1)
    grp1 = pm.ZeroInflatedNegativeBinomial('grp1', mu=mu_2, alpha=alpha_2, psi=p_2)
    grp2 = pm.ZeroInflatedNegativeBinomial('grp2', mu=mu_3, alpha=alpha_3, psi=p_3)

In [41]:
data = (
    pd.DataFrame(pm.sample_prior_predictive(3000, expt1_sim))
    .melt(
        id_vars=None, 
        value_vars=['ctrl', 'grp1', 'grp2'],
        var_name='experiment_group',
        value_name='clicks'
    )
    .label_encode('experiment_group')
)

In [54]:
with pm.Model() as model:
    mu_prior_lam = pm.HalfCauchy('mu_prior_lam', beta=1)
    mu = pm.HalfNormal('mu', sd=mu_prior_lam, shape=(3,))
    mu = mu[data['experiment_group_enc']]

    alpha_prior_lam = pm.HalfCauchy('alpha_prior_lam', beta=1)
    alpha = pm.HalfNormal('alpha', sd=alpha_prior_lam, shape=(3,))
    alpha = alpha[data['experiment_group_enc']]

    p = pm.Beta('p', alpha=1, beta=1, shape=(3,))
    p = p[data['experiment_group_enc']]
    
    like = pm.ZeroInflatedNegativeBinomial('like', mu=mu, alpha=alpha, psi=p, observed=data['clicks'])

In [55]:
with model:
    trace = pm.sample(2000)

Auto-assigning NUTS sampler...
Initializing NUTS using jitter+adapt_diag...
Multiprocess sampling (2 chains in 2 jobs)
NUTS: [p, alpha, alpha_prior_lam, mu, mu_prior_lam]
Sampling 2 chains:   7%|▋         | 342/5000 [02:44<20:45,  3.74draws/s]


ValueError: Not enough samples to build a trace.

In [None]:
pm.traceplot(trace)