We already have code to look at the results of a simple AB test (50/50 split with two groups only). Let's visualize the results

We'll also want to extend the existing functions to handle different splits as well as different numbers of variants

In [22]:
import os
import sys
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns
import scipy.stats as stats
import matplotlib.ticker as mtick
import statsmodels.api as sm
from time import gmtime, strftime
from matplotlib import style

In [23]:
sys.path.append('../../ab_testing_utils/')

In [24]:
import conversion_rate_utils

In [52]:
from importlib import reload

In [25]:
# set pandas preferences
pd.options.display.max_columns=500
pd.options.display.max_colwidth=500
pd.options.display.max_rows=500

In [26]:
# set plot style
style.use('fivethirtyeight')

In [121]:
reload(conversion_rate_utils)

<module 'conversion_rate_utils' from '/Users/danielokeeffe/Documents/src/stats_utils/stats_utils/notebooks/scratch_work/../../ab_testing_utils/conversion_rate_utils.py'>

Generate some fake data

In [27]:
daily_num_observations = 10000
monthly_num_observations = daily_num_observations * 7 * 4
baseline_conversion_rate = 0.05
number_of_weeks_for_experiment = 2
number_of_days_for_experiment = number_of_weeks_for_experiment * 7

In [28]:
daily_units = []
day_list = []
for i in range(number_of_days_for_experiment):
    day_ = str(i)
    number_of_observations = np.random.poisson(daily_num_observations)
    observations = [1] * number_of_observations
    daily_units = daily_units + observations
    day_index = [day_] * number_of_observations
    day_list = day_list + day_index

In [29]:
df_ab = pd.DataFrame()
df_ab['day'] = day_list
df_ab['units'] = daily_units

In [30]:
# Now we need to do random assignment
def assign_randomly(df, p=0.5):
    df_ = df.copy()
    df_g = df_.groupby('day', as_index=False).sum()
    assignment = []
    for day_, num_obs in zip(df_g['day'].unique(), df_g['units'].unique()):
        assignments_ = np.random.binomial(n=1, p=0.5, size=num_obs)
        assignment = assignment + list(assignments_)
    df_['group'] = assignment
    df_['group'] = df_['group'].apply(lambda x: 'treatment' if x == 1 else 'control')
    
    return df_

In [31]:
df_ab = assign_randomly(df_ab)

In [32]:
def generate_conversions(df, control_p, treatment_p, treatment_name, group_col='group'):
    df_ = df.copy()
    conversions = []
    for n in df_[group_col].values:
        if n == treatment_name:
            p_ = treatment_p
        else:
            p_ = control_p
        conv_ = np.random.binomial(n=1, p=p_)
        conversions.append(conv_)
    
    df_['conversion'] = conversions
    
    return df_

In [33]:
df_ab = generate_conversions(df=df_ab, control_p=baseline_conversion_rate, treatment_p=baseline_conversion_rate + 0.1*baseline_conversion_rate, treatment_name='treatment')

In [34]:
# Let's run this manually and compare to the results of our code base

In [35]:
df_ab_results = df_ab[['group', 'units', 'conversion']].groupby('group', as_index=False).sum()

In [36]:
df_ab_results['conversion_rate'] = df_ab_results['conversion'] / df_ab_results['units']

In [37]:
# Is this a significant result?
z_score, p_value = sm.stats.proportions_ztest([df_ab_results.query("group=='control'")['conversion'].values, df_ab_results.query("group=='treatment'")['conversion'].values], 
                                              [df_ab_results.query("group=='control'")['units'].values, df_ab_results.query("group=='treatment'")['units'].values])

In [38]:
z_score

array([-4.28414083])

In [39]:
p_value

array([1.83446695e-05])

In [40]:
lift = (df_ab_results.query("group=='treatment'")['conversion_rate'].iloc[0] - df_ab_results.query("group=='control'")['conversion_rate'].iloc[0]) / df_ab_results.query("group=='control'")['conversion_rate'].iloc[0]

In [41]:
lift = lift * 100

In [42]:
lift

10.220871367098388

In [43]:
# We'll compute a 95% confidence interval as well. We'll treat this as a Bernoulli random variable. We could probably also bootstrap this
sigma_treated = df_ab_results.query("group=='treatment'")['conversion_rate'].iloc[0] * (1 - df_ab_results.query("group=='treatment'")['conversion_rate'].iloc[0])
treatment_sample_size = df_ab_results.query("group=='treatment'")['units'].iloc[0]

In [44]:
se_treated = np.sqrt(sigma_treated / treatment_sample_size)

In [45]:
treated_lower_ci_95 = df_ab_results.query("group=='treatment'")['conversion_rate'].iloc[0] - 1.96 * se_treated
treated_upper_ci_95 = df_ab_results.query("group=='treatment'")['conversion_rate'].iloc[0] + 1.96 * se_treated

In [46]:
# And for the control measurement:
sigma_control = df_ab_results.query("group=='control'")['conversion_rate'].iloc[0] * (1 - df_ab_results.query("group=='control'")['conversion_rate'].iloc[0])
control_sample_size = df_ab_results.query("group=='control'")['units'].iloc[0]
se_control = np.sqrt(sigma_control / control_sample_size)
control_lower_ci_95 = df_ab_results.query("group=='control'")['conversion_rate'].iloc[0] - 1.96 * se_control
control_upper_ci_95 = df_ab_results.query("group=='control'")['conversion_rate'].iloc[0] + 1.96 * se_control

In [47]:
df_ab_results['lower_ci_95'] = [control_lower_ci_95, treated_lower_ci_95]
df_ab_results['upper_ci_95'] = [control_upper_ci_95, treated_upper_ci_95]

In [48]:
df_ab_results

Unnamed: 0,group,units,conversion,conversion_rate,lower_ci_95,upper_ci_95
0,control,69750,3487,0.049993,0.048375,0.05161
1,treatment,70269,3872,0.055103,0.053415,0.05679


Ok now compare this with our code base

In [50]:
df_ab.head()

Unnamed: 0,day,units,group,conversion
0,0,1,treatment,0
1,0,1,control,0
2,0,1,treatment,0
3,0,1,treatment,0
4,0,1,control,0


In [122]:
exp = conversion_rate_utils.ConversionExperiment()

In [123]:
df_code_res = exp.simple_ab_test(df=df_ab, group_column_name='group', treatment_name='treatment', outcome_column='conversion', alpha=0.05, null_hypothesis=0)

In [124]:
df_code_res

Unnamed: 0,treatment_mean,treatment_confidence_interval_95.0_percent_lower,treatment_confidence_interval_95.0_percent_upper,control_mean,control_confidence_interval_95.0_percent_lower,control_confidence_interval_95.0_percent_upper,treatment_minus_control_mean,treatment_minus_control_95.0_percent_lower,treatment_minus_control_95.0_percent_upper,z_statistic,p_value
0,0.055103,0.053415,0.05679,0.049993,0.048376,0.05161,0.00511,0.002773,0.007447,4.28512,0.999991
