In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt 
import seaborn as sns
import scipy.stats as st

# A/B Testing

One of the main things about A/B testing is that we need to run the experiments for two or more groups simultaneously. And the groups must be identical in observable and unobservable ways and the only way to make the groups identical in unobservable ways is to use randomisation (basicall you have to have a group of people and randomly assign them to groups)

In [88]:
# Simulating data

# np.random.seed(11)
# This code locks the random generated numbers, so you have just 1 set of 
# random numbers to work with, instead of getting new sets each time this
# cell is run. Helpful for debugging. Also useful for analysis and reproducing
# code.

# We have a subscribe button, Button A, that half of the people (50%) who see it click on
# And we have a new button that could be a better button provided that 60%
# of people click on it
A_rate = .5 
B_rate = .7

# diff between the two numbers is 10%; people will use the term LIFT
# basically the percent increase
# 60% is actually 20% higher than 50%

# simulate clicks
clicks_A = np.random.choice(['yes', 'no'],  
                            size=100, 
                            p=[A_rate, 1-A_rate]) # 50% rate of clicking the button, and a 50% rate of not clicking the button

clicks_B = np.random.choice(['yes', 'no'],  
                            size=100, 
                            p=[B_rate, 1-B_rate])

# create dataframe
# first create an joined list of clicks A and B
outcome = list(clicks_A) + list(clicks_B)

# create new list of 100 As and 100 Bs
group = ['A']*100 + ['B']*100
# is this syntax safe?

clicked_data = pd.DataFrame(data={'Group': group, 'Clicked': outcome})
pd.crosstab(clicked_data['Group'], clicked_data['Clicked'])
# we can see the results

# now we'll run a chi-squared test
ab_contingency = pd.crosstab(clicked_data['Group'], clicked_data['Clicked']) # what is a contingency?

chi2, pval, dof, expected = st.chi2_contingency(ab_contingency)
print(pval)
print(ab_contingency)
# we're going to get a p-value, which is testing the null hypothesis that there
# is no association between the two variables, no relationship between the 
# group identity and whether or not somebody clicked

# So as a researcher, after seeing the data below, we want to know if 
# the proportions are sustainable if we run the experiment again

# Hypothesis testing is really built around this idea that you can only 
# observe a sample but you want to something about a larger population that
# you can't observe. So in relation to the fake A/B Test that we're running,
# the larger sample that we can't observe is all the visitors to the website
# who view the subscribe button and what they would do

# for the data that we have, two categorical variables (what group you're in
# and whether you clicked or not), the outcome is categorical (whether you 
# click or not click) - so the hypothesis test we would use is a chi-squared test

# if the outcome we cared about was quantitative, like how much time the visitors
# spent watching a youtube video, then we would need a different kind of test,
# perhaps a two-sample t-test or a z-test

# if you're a researcher, you have to think about how big of a difference
# you care to measure in your test. So once you have some data, you definitely
# want to measure what the rate is in Group A and what the rate is in Group
# B, and how large the difference is. Then you want to run a hypothesis test
# to see if you have a significant difference, and allows you to reject the
# null hypothesis

# Usually for an A/B Test, you're comparing something that already exists to
# a new version. You'll have some baseline data. But you won't know how well
# the new feature performs.

# null hypothesis for chi-squared test is that the two rates are the same

# in performing this test, we could make a mistake by not finding a difference
# even when there is one, so a Type II error. We could have the opposite problem
# finding a Type I error

0.0008063082361477824
Clicked  no  yes
Group           
A        50   50
B        26   74


In [2]:
# simulate data:
samp_size = 100
A_rate = .5
B_rate = (1 + lift) * A_rate

clicks_A = np.random.choice(['yes', 'no'],  
                            size=int(sample_size/2), 
                            p=[A_rate, 1-A_rate])
clicks_B = np.random.choice(['yes', 'no'],  
                            size=int(sample_size/2), 
                            p=[B_rate, 1-B_rate])
outcome = list(clicks_A) + list(clicks_B)
group = ['Button A']*int(sample_size/2) + ['Button B']*int(sample_size/2)
sim_data = {"Group": group, "Clicked": outcome}
sim_data = pd.DataFrame(sim_data)
print(sim_data)

NameError: name 'lift' is not defined