Scratch code for generalization to n_variant a/b test + analysis

In [1]:
import os
import sys
import pandas as pd
import numpy as np
import scipy.stats as stats
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns
import matplotlib.ticker as mtick

from matplotlib import style
import statsmodels.api as sm
from statsmodels.stats.multitest import multipletests as mult_test
from tqdm.auto import tqdm

In [2]:
# additional imports 
sys.path.append('../../ab_testing_utils/')

In [3]:
import conversion_rate_utils

In [4]:
# Set pandas preferences
pd.options.display.max_columns=500
pd.options.display.max_colwidth=500
pd.options.display.max_rows=500

In [5]:
# Set plot style
style.use('fivethirtyeight')

In [6]:
# Initialize tqdm for pandas
tqdm.pandas()

We'll continue to work in units of weeks for the experiment run times.  Suppose that we already know the average baseline conversion rate, as well as the average number of daily visitors from historical data. There could be variance in these estimates, but we'll ignore that problem here for now

In [8]:
daily_num_observations = 10000
monthly_num_observations = daily_num_observations * 7 * 4
baseline_conversion_rate = 0.05

In [7]:
number_of_weeks_for_experiment = 4
number_of_days_for_experiment = number_of_weeks_for_experiment * 7

In [9]:
# First, we need to simulate the number of expected visitors per day
daily_units = []
day_list = []
for i in range(number_of_days_for_experiment):
    day_ = str(i)
    number_of_observations = np.random.poisson(daily_num_observations)
    observations = [1] * number_of_observations
    daily_units = daily_units + observations
    day_index = [day_] * number_of_observations
    day_list = day_list + day_index

In [10]:
df_ab = pd.DataFrame()
df_ab['day'] = day_list
df_ab['units'] = daily_units

In [11]:
df_ab.head()

Unnamed: 0,day,units
0,0,1
1,0,1
2,0,1
3,0,1
4,0,1


In [93]:
# Let's consider a 3 variant test with equal group sizes as a first test
df_ab_3_var = assign_randomly(df=df_ab, n_variants=3, p_vals='equal')

In [92]:
def assign_randomly(df, n_variants=2, p_vals='equal'):
    df_ = df.copy()
    
    if p_vals == 'equal':
        p_vals = [1.0 / n_variants] * n_variants
    else:
        assert type(p_vals) in [list, np.ndarray], "if p_vals is not 'equal', then it must be either a list of np.ndarray"
        assert sum(p_vals) == 1, "probabilities of assignment to each group {0} must sum to 1".format(p_vals)
    
    df_g = df_.groupby('day', as_index=False).sum()
    assignment = []
    for day_, num_obs in zip(df_g['day'].values, df_g['units'].values):
        assignments_ = np.random.multinomial(n=1, pvals=p_vals, size=num_obs)
        assignments_ = [np.argmax(x) for x in assignments_]
        assignment = assignment + list(assignments_)
    df_['group'] = assignment
    df_['group'] = df_['group'].apply(lambda x: 'control' if x == 0 else 'treatment_{0}'.format(x))
    
    return df_

In [94]:
df_ab_3_var.head()

Unnamed: 0,day,units,group
0,0,1,treatment_2
1,0,1,control
2,0,1,control
3,0,1,treatment_1
4,0,1,treatment_1


In [95]:
df_ab_3_var['group'].value_counts(normalize=True)

treatment_1    0.334286
control        0.333595
treatment_2    0.332120
Name: group, dtype: float64

In [96]:
# Looks good
df_ab_3_var[['day', 'group']].groupby('day').value_counts(normalize=True)

day  group      
0    control        0.337371
     treatment_2    0.335303
     treatment_1    0.327326
1    control        0.341557
     treatment_1    0.331728
     treatment_2    0.326715
10   treatment_1    0.335419
     control        0.334511
     treatment_2    0.330071
11   control        0.335252
     treatment_1    0.332727
     treatment_2    0.332021
12   treatment_2    0.336809
     control        0.335205
     treatment_1    0.327986
13   treatment_2    0.340163
     treatment_1    0.331626
     control        0.328211
14   treatment_1    0.335433
     control        0.332382
     treatment_2    0.332185
15   control        0.334574
     treatment_1    0.334280
     treatment_2    0.331145
16   control        0.344341
     treatment_2    0.329531
     treatment_1    0.326128
17   treatment_2    0.336070
     control        0.332566
     treatment_1    0.331365
18   treatment_2    0.334608
     treatment_1    0.334004
     control        0.331388
19   treatment_1    0.3351

In [115]:
# This is likely slow.  See below for better
# def generate_conversions(df, conversion_rate_dict, group_col='group'):
    
#     df_ = df.copy()
#     conversions = []
    
#     for n in df_[group_col].values:
#         p_ = conversion_rate_dict[n]
#         conv_ = np.random.binomial(n=1, p=p_)
#         conversions.append(conv_)
    
#     df_['conversion'] = conversions

In [114]:
def generate_conversions(df, conversion_rate_dict, group_col='group'):
    
    df_ = df.copy()
    df_cr = pd.DataFrame.from_dict(conversion_rate_dict, orient='index').reset_index()
    df_cr.columns = [group_col, 'conversion_probability']
    
    df_ = df_.merge(df_cr, on=group_col, how='left')
    
    assert not df_cr.isnull().values.any(), 'Ensure that all treatment groups are represented in the input conversion_rate_dict'
    
    df_['conversion'] = df_['conversion_probability'].apply(lambda x: np.random.binomial(n=1, p=x))
    
    return df_

Let's suppose that the control conversion rate is the same as the input baseline (which it should be, on average, or at least close): 0.05
Further, let's assume that treatment 1 is successfully and drives a true conversion rate of 0.2, while treatment 2 is not successful and drives a conversion rate of 0.01 (i.e. worse than control)

In [116]:
simulate_conversion_rates_dict = {'control': baseline_conversion_rate, 'treatment_1': 0.2, 'treatment_2': 0.01}

In [117]:
df_ab_3_var = generate_conversions(df=df_ab_3_var, conversion_rate_dict=simulate_conversion_rates_dict, group_col='group')

In [118]:
df_ab_3_var.head()

Unnamed: 0,day,units,group,conversion_probability,conversion
0,0,1,treatment_2,0.01,0
1,0,1,control,0.05,0
2,0,1,control,0.05,0
3,0,1,treatment_1,0.2,0
4,0,1,treatment_1,0.2,0


In [119]:
# Yep, very nice :) 
df_ab_3_var[['group', 'conversion']].groupby('group').value_counts(normalize=True)

group        conversion
control      0             0.948913
             1             0.051087
treatment_1  0             0.799348
             1             0.200652
treatment_2  0             0.990389
             1             0.009611
dtype: float64

In [120]:
# This will be our simple simulation code for testing