In [1]:
import numpy as np
import pandas as pd
from scipy.stats import norm

In [2]:
###1.Define the functions that we need in the following analysis.
##1.1 Define function to calculate standard deviation for normal distribution
def get_sd(p, n):
    sd = np.sqrt(p*(1-p)/n)
    return sd

##1.2 Define function to calculate pooled standard deviation for two samples
def get_pooled_sd(p, n1, n2):
    pooled_sd = np.sqrt(p*(1-p)*(1/n1+1/n2))
    return pooled_sd

##1.3 Define function to get Z-score
def get_z_score(alpha):
    z_score = norm.ppf(1-alpha/2)
    return z_score

In [3]:
###2.Calculate sample size
##2.1 Import baseline data
bsl_val = pd.read_csv("C:/Users/Dell/Desktop/AB Testing/baseline_values.csv", header=None, names=['metric_description', 'baseline_value'], index_col=False)
bsl_val

Unnamed: 0,metric_description,baseline_value
0,Unique cookies to view course overview page pe...,40000.0
1,"Unique cookies to click ""Start free trial"" per...",3200.0
2,Enrollments per day:,660.0
3,"Click-through-probability on ""Start free trial"":",0.08
4,"Probability of enrolling, given click:",0.20625
5,"Probability of payment, given enroll:",0.53
6,"Probability of payment, given click",0.109313


In [4]:
##2.2 Measuring analytical standard deviation
sp_size_pageviews = 5000
sp_size_clicks = 3200*(5000/40000)
sp_size_enrollments = 660*(5000/40000)

sd_gross_conversion = round(get_sd(0.20625, sp_size_clicks), 4)
sd_retention = round(get_sd(0.53, sp_size_enrollments), 4)
sd_net_conversion = round(get_sd(0.10931, sp_size_clicks), 4)
print ("The SD for gross conversion is ", sd_gross_conversion)
print("The SD for retention is ", sd_retention)
print("The SD for net conversion is ", sd_net_conversion)

The SD for gross conversion is  0.0202
The SD for retention is  0.0549
The SD for net conversion is  0.0156


In [5]:
##2.3 Sizing
#We use an alpha of 0.05 and a beta of 0.2
#Use online calculator to get the sample size.
#Gross Conversion: the pageview requirede is 645,875
#Retention: the pageview required is 4,741,213
#Net Conversion: the pageview required is 685,325

In [6]:
###3.Experiment Analysis
##3.1 Import the data for analyzing
control = pd.read_csv("C:/Users/Dell/Desktop/AB Testing/control_data.csv", index_col=False)
experiment = pd.read_csv("C:/Users/Dell/Desktop/AB Testing/experiment_data.csv", index_col=False)
control.head()

Unnamed: 0,Date,Pageviews,Clicks,Enrollments,Payments
0,"Sat, Oct 11",7723,687,134.0,70.0
1,"Sun, Oct 12",9102,779,147.0,70.0
2,"Mon, Oct 13",10511,909,167.0,95.0
3,"Tue, Oct 14",9871,836,156.0,105.0
4,"Wed, Oct 15",10014,837,163.0,64.0


In [7]:
##3.2 Sanity checks
#3.2.1 Number of cookies: the number of unique cookies to view the course overview page
pageviews_ctrl = control['Pageviews'].sum()
pageviews_exp = experiment['Pageviews'].sum()
pageviews_total = pageviews_ctrl + pageviews_exp
print ("The number of unique cookies to view the course overview page in control:", pageviews_ctrl)
print ("number of unique cookies to view the course overview page in experiment:" , pageviews_exp)

The number of unique cookies to view the course overview page in control: 345543
number of unique cookies to view the course overview page in experiment: 344660


In [8]:
#The two numbers of cookies in control and experiment group seem close
#We expect 
p = 0.5
alpha = 0.05
p_hat = round(pageviews_exp/pageviews_total, 4)
sd = get_sd(p_hat, pageviews_total)
margin_error = round(get_z_score(alpha)*sd, 4)
print("The p-hat is", p_hat)
print("The", (1-alpha)*100, "% confidence interval is [", p-margin_error, ",", p+margin_error, "]")

The p-hat is 0.4994
The 95.0 % confidence interval is [ 0.4988 , 0.5012 ]


In [9]:
#3.2.2 Number of clicks: The number of unique cookies to click the "Start free trial" button
clicks_ctrl = control['Clicks'].sum()
clicks_exp = experiment['Clicks'].sum()
clicks_total = clicks_ctrl + clicks_exp
print('The number of unique cookies to click the "Start free trial" button in control:', clicks_ctrl)
print('The number of unique cookies to click the "Start free trial" button in experiment:', clicks_exp)

The number of unique cookies to click the "Start free trial" button in control: 28378
The number of unique cookies to click the "Start free trial" button in experiment: 28325


In [10]:
#The two numbers of cookies in control and experiment group seem close too
#
p = 0.5
alpha = 0.05
p_hat = round(clicks_exp/clicks_total, 4)
sd = get_sd(p_hat, clicks_total)
margin_error = round(get_z_score(alpha)*sd, 4)
print("The p-hat is", p_hat)
print("The", (1-alpha)*100, "% confidence interval is [", p-margin_error, ",", p+margin_error, "]")

The p-hat is 0.4995
The 95.0 % confidence interval is [ 0.4959 , 0.5041 ]


In [11]:
#3.2.3 Click-through-probability: The number of unique cookies to click the "Start free trial" button
##                                divided by number of unique cookies to view the course overview page
#When compare two samples of probability, 
ctp_ctrl = clicks_ctrl/pageviews_ctrl
ctp_exp = clicks_exp/pageviews_exp
ctp_diff = round(ctp_exp-ctp_ctrl, 4)

alpha = 0.05
p_hat_pooled = clicks_total/pageviews_total
sd_pooled = get_pooled_sd(p_hat_pooled, pageviews_ctrl, pageviews_exp)
margin_error = round(get_z_score(alpha)*sd_pooled, 4)
print("The difference is", ctp_diff)
print("The", (1-alpha)*100, "% confidence interval is [", 0-margin_error, ",", 0+margin_error, "]")

The difference is 0.0001
The 95.0 % confidence interval is [ -0.0013 , 0.0013 ]


In [12]:
##3.3 Effect Size Tests
#We noticed that the data given by Udacity listed pageviews and clicks for 39 days,
#while it only lists enrollments and payments for 23 days.
#When examining the effect size, we could only use data of complete information, that is, the former 23 days data.
#3.3.1 Keep the data with complete information
control_notnull = control[pd.notnull(control['Enrollments'])]
experiment_notnull = experiment[pd.notnull(experiment['Enrollments'])]

In [13]:
#3.3.2 Gross Conversion: the probability of enrolling, given click
clicks_ctrl = control_notnull['Clicks'].sum()
clicks_exp = experiment_notnull['Clicks'].sum()
clicks_total = clicks_ctrl+clicks_exp

enrollments_ctrl = control_notnull['Enrollments'].sum()
enrollments_exp = experiment_notnull['Enrollments'].sum()
enrollments_total = enrollments_ctrl+enrollments_exp

GC_ctrl = enrollments_ctrl/clicks_ctrl
GC_exp = enrollments_exp/clicks_exp
GC_diff = round(GC_exp-GC_ctrl, 4)

alpha = 0.05
GC_pooled = enrollments_total/clicks_total
GC_sd_pooled = get_pooled_sd(GC_pooled, clicks_ctrl, clicks_exp)
GC_margin_error = round(get_z_score(alpha)*GC_sd_pooled, 4)
GC_dmin = 0.01

print("The change due to the experiment is", GC_diff*100,"%")
print("The", (1-alpha)*100, "% confidence Interval is [", GC_diff-GC_margin_error, ",", GC_diff+GC_margin_error, "]")
print ("A meaningfull change for business is", -GC_dmin)

The change due to the experiment is -2.06 %
The 95.0 % confidence Interval is [ -0.0292 , -0.012 ]
A meaningfull change for business is -0.01


In [14]:
#Conclusion:
#The GC change is both statistically significant and pratically significant.

In [15]:
#3.3.3 Net Conversion: the probability of payment, given click
payments_ctrl = control_notnull['Payments'].sum()
payments_exp = experiment_notnull['Payments'].sum()
payments_total = payments_ctrl+payments_exp

NC_ctrl = payments_ctrl/clicks_ctrl
NC_exp = payments_exp/clicks_exp
NC_pooled = payments_total/clicks_total

NC_ctrl = payments_ctrl/clicks_ctrl
NC_exp = payments_exp/clicks_exp
NC_diff = round(NC_exp-NC_ctrl, 4)

alpha = 0.05
NC_pooled = payments_total/clicks_total
NC_sd_pooled = get_pooled_sd(NC_pooled, clicks_ctrl, clicks_exp)
NC_margin_error = round(get_z_score(alpha)*NC_sd_pooled, 4)
NC_dmin = 0.0075

print("The change due to the experiment is", NC_diff*100,"%")
print("The", (1-alpha)*100, "% confidence Interval is [", NC_diff-NC_margin_error, ",", NC_diff+NC_margin_error, "]")
print ("A meaningfull change for business is", -NC_dmin)

The change due to the experiment is -0.49 %
The 95.0 % confidence Interval is [ -0.0116 , 0.0018000000000000004 ]
A meaningfull change for business is -0.0075


In [16]:
#Conclusion:
#The NC change is neither statistically significant nor practically significant.

In [17]:
##3.4 Sign Tests
df_sign_test = pd.merge(control_notnull, experiment_notnull, how='inner', on='Date', suffixes=('_ctrl', '_exp'))
df_sign_test.head()

Unnamed: 0,Date,Pageviews_ctrl,Clicks_ctrl,Enrollments_ctrl,Payments_ctrl,Pageviews_exp,Clicks_exp,Enrollments_exp,Payments_exp
0,"Sat, Oct 11",7723,687,134.0,70.0,7716,686,105.0,34.0
1,"Sun, Oct 12",9102,779,147.0,70.0,9288,785,116.0,91.0
2,"Mon, Oct 13",10511,909,167.0,95.0,10480,884,145.0,79.0
3,"Tue, Oct 14",9871,836,156.0,105.0,9867,827,138.0,92.0
4,"Wed, Oct 15",10014,837,163.0,64.0,9793,832,140.0,94.0


In [18]:
df_sign_test['GC_ctrl'] = df_sign_test['Enrollments_ctrl']/df_sign_test['Clicks_ctrl']
df_sign_test['GC_exp'] = df_sign_test['Enrollments_exp']/df_sign_test['Clicks_exp']
df_sign_test['GC'] = np.where(df_sign_test['GC_exp']>df_sign_test['GC_ctrl'], 1, 0)

df_sign_test['NC_ctrl'] = df_sign_test['Payments_ctrl']/df_sign_test['Clicks_ctrl']
df_sign_test['NC_exp'] = df_sign_test['Payments_exp']/df_sign_test['Clicks_exp']
df_sign_test['NC'] = np.where(df_sign_test['NC_exp']>df_sign_test['NC_ctrl'], 1, 0)

GC_success = df_sign_test['GC'].sum()
NC_success = df_sign_test['NC'].sum()
num_trials = df_sign_test['GC'].count()
print("For gross conversion, the number of successes we observed is", GC_success)
print("For net conversion, the number of successes we observed is", NC_success)
print("The total number of trials is", num_trials)

For gross conversion, the number of successes we observed is 4
For net conversion, the number of successes we observed is 10
The total number of trials is 23


In [19]:
#Use online calculater to get the p value for each metric.
#GC: The two-tail P value is 0.0026, less than 0.05 and we can reject H0, which means GC change is significant
#NC: The two-tail P value is 0.6776, greater than 0.05 and we can't reject H0, which means NC change is not significant
#The conclusion from sign tests aligns with our parametric hypothesis test.