In [16]:
#https://www.kaggle.com/tammyrotem/ab-tests-with-python
import math as mt
import pandas as pd
import numpy as np
import scipy
import scipy.stats
from scipy.stats import norm

In [2]:
baseline = {"Cookies":40000,"Clicks":3200,"Enrollments":660,"CTP":0.08,"GConversion":0.20625,
           "Retention":0.53,"NConversion":0.109313}

In [3]:
baseline["Cookies"] = 5000
baseline["Clicks"]=baseline["Clicks"]*(5000/40000)
baseline["Enrollments"]=baseline["Enrollments"]*(5000/40000)
baseline

{'CTP': 0.08,
 'Clicks': 400.0,
 'Cookies': 5000,
 'Enrollments': 82.5,
 'GConversion': 0.20625,
 'NConversion': 0.109313,
 'Retention': 0.53}

In [6]:
#doing standard deviation mathematically :-/
#Gross conversion: number of users enrolling in free trial / number of cookies clicking free trial
# --> probability of enrollment given a click
#Cookies = 'unit of diversion' --> element by which assign control/exp groups, also = unit of analysis

#need p (baseline probability event will occur) and sample size

GC={}
GC["d_min"]=0.01 #d_min = *MINIMUM DETECTABLE CHANGE*
GC["p"]=baseline["GConversion"]
GC["n"]=baseline["Clicks"]
GC["sd"]=round(mt.sqrt((GC["p"]*(1-GC["p"]))/GC["n"]),4)
GC["sd"]

0.0202

In [8]:
#Retention: # of paying users (enrolled after 14 free days)/number of total enrolled users
#--> probability of payment, given enrollment 
# unit of diversion =/= unit of analysis, so empirical variance would be better
         
R={}
R["d_min"]=0.01
R["p"]=baseline["Retention"]
R["n"]=baseline["Enrollments"]
R["sd"]=round(mt.sqrt((R["p"]*(1-R["p"]))/R["n"]),4)
R["sd"]             

0.0549

In [10]:
#Net conversion: # of paying users/# cookies clicking 'free trial' button
#--> probability of payment, given a click
#unit of diversion = unit of analysis
NC={}
NC["d_min"]=0.01
NC["p"]=baseline["NConversion"]
NC["n"]=baseline["Clicks"]
NC["sd"]=round(mt.sqrt((NC["p"]*(1-NC["p"]))/NC["n"]), 4)
NC["sd"]

0.0156

In [13]:
#Next is to get z-score critical value and SDs
def get_z_score(alpha):
    return norm.ppf(alpha)

def get_sds(p,d):
    sd1=mt.sqrt(2*p*(1-p))
    sd2=mt.sqrt(p*(1-p)+(p+d)*(1-(p+d)))
    sds=[sd1, sd2]
    return sds

def get_sampSize(sds,alpha,beta,d):
    n=pow((get_z_score(1-alpha/2)*sds[0]+get_z_score(1-beta)*sds[1]),2)/pow(d,2)
    return n

In [14]:
GC["d"]=0.01
R["d"]=0.01
NC["d"]=0.0075

In [17]:
GC["SampSize"]=round(get_sampSize(get_sds(GC["p"],GC["d"]),0.05,0.2,GC["d"]))
GC["SampSize"]
#This means we need 25,835 cookies to click free trial button

25835.0

In [18]:
#But if we get 400 clicks out of 5,000 page views, need to inflate 
GC["SampSize"]=round(GC["SampSize"]/0.08*2)
GC["SampSize"]

645875.0

In [19]:
R["SampSize"]=round(get_sampSize(get_sds(R["p"],R["d"]), 0.05, 0.2, R["d"]))
R["SampSize"]
#Need 39k users per group

39087.0

In [20]:
#Next, convert from cookies to click, then cookies who viewed, then multiply by 2 (for 2 groups)
R["SampSize"]=R["SampSize"]/0.08/0.20625*2
R["SampSize"]
#More than 4 mil, too many given our pace of 40k views/day, so we drop this metric

4737818.1818181816

In [21]:
NC["SampSize"]=round(get_sampSize(get_sds(NC["p"],NC["d"]),0.05,0.2,NC["d"]))
NC["SampSize"]

27413.0

In [22]:
NC["SampSize"]=NC["SampSize"]/0.08*2
NC["SampSize"]
#So, 685k is our sample size, assumed to take about 3 weeks

685325.0

In [23]:
control=pd.read_csv("//prc-cs-f9dkb42/ecozzolino$/Desktop/code/AB Test Library/control_data.csv")
experiment=pd.read_csv("//prc-cs-f9dkb42/ecozzolino$/Desktop/code/AB Test Library/experiment_data.csv")
control.head()

Unnamed: 0,Date,Pageviews,Clicks,Enrollments,Payments
0,"Sat, Oct 11",7723,687,134.0,70.0
1,"Sun, Oct 12",9102,779,147.0,70.0
2,"Mon, Oct 13",10511,909,167.0,95.0
3,"Tue, Oct 14",9871,836,156.0,105.0
4,"Wed, Oct 15",10014,837,163.0,64.0


In [24]:
#Next, sanity checks. Help to verify that experiment was conducted as expected
#3 invariant metrics:
# number of cookies in course overview page
# number clicks free trial button
# free tiral button click-thru probability

#1. Number of cookies who viewed course overview page
# --> want to make sure the same amt of cookies were diverted to A v. B

pageviews_cont=control['Pageviews'].sum()
pageviews_exp=experiment['Pageviews'].sum()
pageviews_total=pageviews_cont+pageviews_exp
print("# pgviews control:", pageviews_cont)
print("# pgviews exp:", pageviews_exp)

# pgviews control: 345543
# pgviews exp: 344660


In [25]:
#Pretty close, but wanted to check to see if statistically significant - mathematically (UGH)
p=0.5
alpha=0.05
p_hat=round(pageviews_cont/(pageviews_total),4)
sd=mt.sqrt(p*(1-p)/(pageviews_total))
ME=round(get_z_score(1-(alpha/2))*sd,4)
print ("The confidence interval is between",p-ME,"and",p+ME,"; Is",p_hat,"inside this range?")

The confidence interval is between 0.4988 and 0.5012 ; Is 0.5006 inside this range?


In [26]:
#2. Number of cookies who clicked the free trial button
clicks_cont=control['Clicks'].sum()
clicks_exp=experiment['Clicks'].sum()
clicks_total=clicks_cont+clicks_exp
print(clicks_cont)
print(clicks_exp)

28378
28325


In [27]:
#Again very close numbers, but still wants to see if statistically different (bet you they aren't)
p_hat=round(clicks_cont/clicks_total,4)
sd=mt.sqrt(p*(1-p)/clicks_total)
ME=round(get_z_score(1-(alpha/2))*sd,4)
print ("The confidence interval is between",p-ME,"and",p+ME,"; Is",p_hat,"inside this range?")

The confidence interval is between 0.4959 and 0.5041 ; Is 0.5005 inside this range?


In [30]:
#3. Click-thru-probability of 'Free Trial' button
#want to make sure it doesn't differ b/w control and exp groups

ctp_cont=clicks_cont/pageviews_cont
ctp_exp=clicks_exp/pageviews_exp
d_hat=round(ctp_exp-ctp_cont,4) #take diff b/w the 2, round to 4 decimals
p_pooled=clicks_total/pageviews_total
sd_pooled=mt.sqrt(p_pooled*(1-p_pooled)*(1/pageviews_cont+1/pageviews_exp))
ME=round(get_z_score(1-(alpha/2))*sd_pooled,4)
print ("The confidence interval is between",0-ME,"and",0+ME,"; Is",d_hat,"within this range?")

The confidence interval is between -0.0013 and 0.0013 ; Is 0.0001 within this range?


In [31]:
#Next, looking at changes b/w cont & exp WRT eval metrics to make sure diff is there/
#is significant/ is substantively big enough 

#pageviews/clicks observed for 39 days, enrl/pay only for 23 days. so drop incomplete records
clicks_cont=control["Clicks"].loc[control["Enrollments"].notnull()].sum()
clicks_exp=experiment["Clicks"].loc[experiment["Enrollments"].notnull()].sum()

In [32]:
#Gross conversion: # enrollments/# clicks
enrollments_cont=control["Enrollments"].sum()
enrollments_exp=experiment["Enrollments"].sum()

GC_cont=enrollments_cont/clicks_cont
GC_exp=enrollments_exp/clicks_exp
GC_pooled=(enrollments_cont+enrollments_exp)/(clicks_cont+clicks_exp)
GC_sd_pooled=mt.sqrt(GC_pooled*(1-GC_pooled)*(1/clicks_cont+1/clicks_exp))
GC_ME=round(get_z_score(1-alpha/2)*GC_sd_pooled,4)
GC_diff=round(GC_exp-GC_cont,4)

print("the change due to experiment is",GC_diff*100,"%")
print("confidence interval: [",GC_diff-GC_ME,",",GC_diff+GC_ME,"]")
print("the change is statistically sig if the CI doesn't include 0. it is practically sig if", 
      -GC["d_min"],"is not in the CI as well")

the change due to experiment is -2.06 %
confidence interval: [ -0.0292 , -0.012 ]
the change is statistically sig if the CI doesn't include 0. it is practically sig if -0.01 is not in the CI as well


In [33]:
#Net Conversion - number of payments divided by number of clicks
payments_cont=control["Payments"].sum()
payments_exp=experiment["Payments"].sum()

NC_cont=payments_cont/clicks_cont
NC_exp=payments_exp/clicks_exp
NC_pooled=(payments_cont+payments_exp)/(clicks_cont+clicks_exp)
NC_sd_pooled=mt.sqrt(NC_pooled*(1-NC_pooled)*(1/clicks_cont+1/clicks_exp))
NC_ME=round(get_z_score(1-alpha/2)*NC_sd_pooled,4)
NC_diff=round(NC_exp-NC_cont,4)
print("The change due to the experiment is",NC_diff*100,"%")
print("Confidence Interval: [",NC_diff-NC_ME,",",NC_diff+NC_ME,"]")
print ("The change is statistically sig if the CI doesn't include 0. it is practically sig if"
       ,NC["d_min"],"is not in the CI as well.")

The change due to the experiment is -0.49 %
Confidence Interval: [ -0.0116 , 0.0018 ]
The change is statistically sig if the CI doesn't include 0. it is practically sig if 0.01 is not in the CI as well.
