## Measuring Variability

In [95]:
import math
import numpy as np
import pandas as pd

In [4]:
# read the baseline values

baseline_v = pd.read_csv('Final Project Baseline Values.csv',index_col=False,header = None, names = ['metric','baseline_values'])
baseline_v['dmin'] = [3000,50,240,0.01,0.01,0.01,0.0075]
baseline_v

Unnamed: 0,metric,baseline_values,dmin
0,Unique cookies to view course overview page pe...,40000.0,3000.0
1,"Unique cookies to click ""Start free trial"" per...",3200.0,50.0
2,Enrollments per day:,660.0,240.0
3,"Click-through-probability on ""Start free trial"":",0.08,0.01
4,"Probability of enrolling, given click:",0.20625,0.01
5,"Probability of payment, given enroll:",0.53,0.01
6,"Probability of payment, given click",0.109313,0.0075


In [14]:
#the number of unique cookies visit course pages
cv = baseline_v['baseline_values'][0]

#the number of unique cookies click "start free trial"
cc = baseline_v['baseline_values'][1]

#the number of enrollments
id = baseline_v['baseline_values'][2]

#click through probability
ctp = baseline_v['baseline_values'][3]

# gross_convention = number of unique user-ids / number of unique cookies click "start free trail"
gc = baseline_v['baseline_values'][4]

# retention = number of unique user-ids past the 14-day boundary / number of unique user-ids complete checkout.
r = baseline_v['baseline_values'][5]

# net_conversion = number of unique user-ids complete checkout / number of unique cookies click "start free trial"
nc = baseline_v['baseline_values'][6]

In [22]:
#calculate the standard errors
se_gc = np.sqrt(gc*(1-gc)/(5000*cc/cv))
se_r = np.sqrt(r*(1-r)/(5000*id/cv))
se_nc = np.sqrt(nc*(1-nc)/(5000*cc/cv))

print('gross_convetion SE:', round(se_gc,4))
print('retention SE:', round(se_r,4))
print('net_convetion SE:', round(se_nc,4))

gross_convetion SE: 0.0202
retention SE: 0.0549
net_convetion SE: 0.0156


## Sizing

So we will compute the appropriate number of samples by using standard errors to ensure the size and the power of metrics. (The online calculator)

#### Gross Conversion:

Baseline Conversion: 20.625%

Minimum Detectable Effect: 1%

Alpha: 5%

Beta: 20% -Sensitivity (1 - Beta): 80%

Sample Size: 25,835*2 = 51,670 clicks

Pageviews needed:  sample clicks/(clicks/pageviews) = 645,875
    
#### Retention:

Baseline Conversion: 53%

Minimum Detectable Effect: 1%

Alpha: 5%

Beta: 20%

Sensitivity (1 - Beta): 80%

Sample size: 39,115*2 = 78,230 enrollments

Pageviews needed: sample enrollments/(enrollments/pageviews) = 4,741,212
    
#### Net Conversion:

Baseline Conversion: 10.9313%

Minimum Detectable Effect: 0.75%

Alpha: 5% -Beta: 20%

Sensitivity (1 - Beta): 80%

Sample size: 27,413*2 = 54,826 clicks

Pageviews needed: sample clicks/(clicks/pageviews) = 685,325

## Duration and Exposure

In [24]:
sample_size = 4741212

# If we divert 100% of traffic per day to the experiment, we will need:
fraction_of_traffic = 1
duration_100 = sample_size/cv
print(duration_100)

118.5303


That's too risky for Udacity. If the experiment is harmful to user experience, the long-period experiment will incur great frustration among students and cause huge business loss.

Besides, it's impossible to run other experiments during the 4 months.

Since the net conversion metric can also measure the effectiveness of the free trial screener function after the 14-day boundary, so we choose to discard the retention metric and reduce our sample size.

In [25]:
sample_size = 685325

# If we divert 100% of traffic per day to the experiment, we will need (days):
fraction_of_traffic = 1
duration_100 = sample_size/cv
print(duration_100)

17.133125


In [74]:
# If we divert 75% of traffic per day to the experiment, we will need (days):
fraction_of_traffic = 0.75
duration_50 = sample_size/(cv*fraction_of_traffic)
print(duration_50)

22.844166666666666


## Sanity Check
As our invariant metrics are number of coockies, number of clicks, and click-through probability, we will run sanity check for the three metrics.

In [41]:
experiment_df = pd.read_csv('Final Project Results - Experiment.csv')
experiment_df.head()

Unnamed: 0,Date,Pageviews,Clicks,Enrollments,Payments
0,"Sat, Oct 11",7716,686,105.0,34.0
1,"Sun, Oct 12",9288,785,116.0,91.0
2,"Mon, Oct 13",10480,884,145.0,79.0
3,"Tue, Oct 14",9867,827,138.0,92.0
4,"Wed, Oct 15",9793,832,140.0,94.0


In [42]:
control_df = pd.read_csv('Final Project Results - Control.csv')
control_df.head()

Unnamed: 0,Date,Pageviews,Clicks,Enrollments,Payments
0,"Sat, Oct 11",7723,687,134.0,70.0
1,"Sun, Oct 12",9102,779,147.0,70.0
2,"Mon, Oct 13",10511,909,167.0,95.0
3,"Tue, Oct 14",9871,836,156.0,105.0
4,"Wed, Oct 15",10014,837,163.0,64.0


In [44]:
c_std = control_df.describe()
c_std

Unnamed: 0,Pageviews,Clicks,Enrollments,Payments
count,37.0,37.0,23.0,23.0
mean,9339.0,766.972973,164.565217,88.391304
std,740.239563,68.286767,29.977,20.650202
min,7434.0,632.0,110.0,56.0
25%,8896.0,708.0,146.5,70.0
50%,9420.0,759.0,162.0,91.0
75%,9871.0,825.0,175.0,102.5
max,10667.0,909.0,233.0,128.0


In [45]:
x_std = experiment_df.describe()
x_std

Unnamed: 0,Pageviews,Clicks,Enrollments,Payments
count,37.0,37.0,23.0,23.0
mean,9315.135135,765.540541,148.826087,84.565217
std,708.070781,64.578374,33.234227,23.060841
min,7664.0,642.0,94.0,34.0
25%,8881.0,722.0,127.0,69.0
50%,9359.0,770.0,142.0,91.0
75%,9737.0,827.0,172.0,99.0
max,10551.0,884.0,213.0,123.0


In [46]:
pageview_cont = control_df['Pageviews'].sum()
pageview_exp = experiment_df['Pageviews'].sum()
click_cont = control_df['Clicks'].sum()
click_exp = experiment_df['Clicks'].sum()

print("the control group cookies are", pageview_cont,
      "\nthe experiment group cookies are", pageview_exp,
      "\nthe control group clicks are", click_cont,
      "\nthe experiment group clicks are", click_exp,)

the control group cookies are 345543 
the experiment group cookies are 344660 
the control group clicks are 28378 
the experiment group clicks are 28325


In [47]:
from scipy.stats import norm
# Inputs:
#   The desired alpha for a two-tailed test
# Returns: The z-critical value
def get_z_star(alpha):
    return norm.ppf((1-alpha/2))

In [62]:
#sanity check for clicks and coockies
pageview_sd = math.sqrt(0.5*0.5/(pageview_cont+pageview_exp))
click_sd = math.sqrt(0.5*0.5/(click_cont+click_exp))

def sanity_check(sd,d,alpha):
    z = get_z_star(alpha)
    m = sd*z
    if fraction <= d+m and fraction >= d-m:
        print('lower bound: ', d-m,
             '\nupper bound: ', d+m,
             '\nobserved: ', fraction,
             '\npass sanity check')
    else:
        return "no pass sanity check"

In [63]:
fraction = pageview_cont/(pageview_cont+pageview_exp)
sanity_check(pageview_sd,0.5,0.05)

lower bound:  0.4988204138245942 
upper bound:  0.5011795861754058 
observed:  0.5006396668806133 
pass sanity check


In [64]:
fraction = click_cont/(click_cont+click_exp)
sanity_check(click_sd,0.5,0.05)

lower bound:  0.4958845713471463 
upper bound:  0.5041154286528536 
observed:  0.5004673474066628 
pass sanity check


In [71]:
# sanity check for click-through probability
pool_p = (click_cont + click_exp) / (pageview_cont + pageview_exp)
se_p = math.sqrt(pool_p*(1-pool_p)*(1/pageview_cont + 1/pageview_exp))
d = click_exp/pageview_exp - click_cont/pageview_cont
z = get_z_star(0.05)
if d <= z*se_p and d >= -z*se_p:
    print('lower bound: ', -z*se_p,
          '\nupper bound: ', z*se_p,
          '\nobserved: ', d,
          '\npass sanity check')
else:
    print("no pass sanity check")

lower bound:  -0.001295655390242568 
upper bound:  0.001295655390242568 
observed:  5.662709158693602e-05 
pass sanity check


## Effect Size Test

Accroding to the data from Udacity, although in total we have 690,203 pageviews, we miss the last 14 days' enrollment and payment. Because the payment of students who tried for free in last 14 days wasn't trackable, so we will only have 23 days' data and 423,525 pageviews in total.

In [77]:
# pageviews, clicks, enrollments, and payments in total
pageviews = experiment_df['Pageviews'][:23].sum() + control_df['Pageviews'][:23].sum()
clicks = experiment_df['Clicks'][:23].sum() + control_df['Clicks'][:23].sum()
enrollments = experiment_df['Enrollments'][:23].sum() + control_df['Enrollments'][:23].sum()
payments = experiment_df['Payments'][:23].sum() + control_df['Payments'][:23].sum()

print("pageviews: ",pageviews,
     "\nclicks: ", clicks,
     "\nenrollments: ", enrollments,
     "\npayments: ", payments)

pageviews:  423525 
clicks:  34553 
enrollments:  7208.0 
payments:  3978.0


In [90]:
# clicks, enrollments, and payments in control and experiment group
clicks_cont = control_df['Clicks'][:23].sum()
clicks_exp = experiment_df['Clicks'][:23].sum()
enrollments_cont =  control_df['Enrollments'][:23].sum()
enrollments_exp = experiment_df['Enrollments'][:23].sum()
payments_cont = control_df['Payments'][:23].sum()
payments_exp = experiment_df['Payments'][:23].sum()

In [91]:
# gross_conversion = enrollments/clicks
# net_conversion = payments/clicks
# differences in gross convention and net conversion
d_gc = enrollments_exp/clicks_exp - enrollments_cont/clicks_cont
d_nc = payments_exp/clicks_exp - payments_cont/clicks_cont

print("observed difference in gross conversion: ",d_gc,
     "\nobserved difference in net conversion: ", d_nc,)

observed difference in gross conversion:  -0.020554874580361565 
observed difference in net conversion:  -0.0048737226745441675


In [92]:
# pool probability
p_pool_gc = (enrollments_exp + enrollments_cont)/(clicks_exp + clicks_cont)
p_pool_nc = (payments_exp + payments_cont)/(clicks_exp + clicks_cont)

print("pool probability of gross conversion: ", p_pool_gc,
     "\npool probability of net conversion: ", p_pool_nc)

pool probability of gross conversion:  0.20860706740369866 
pool probability of net conversion:  0.1151274853124186


In [110]:
def result_analysis(p_pool,Nexp,Ncont,d,dmin,alpha):
    z = get_z_star(alpha)
    se = math.sqrt(p_pool*(1-p_pool)*(1/Nexp + 1/Ncont))
    m = z*se
    if d > m or d < -m:
        print('statistically significant',
             '\nlower bound: ',d-m,
             '\nupper bound: ', d+m)
        if dmin > m or dmin < -m:
            print('practically significant')
        else:
            return 'pracatically insignificant'
    else:
        print('statistically insignificant',
             '\nlower bound: ',d-m,
             '\nupper bound: ', d+m)
        if d-m >= dmin or d+m >= dmin:
            print('practically significant')
        else:
            return 'practically insignificant'

In [111]:
# gross conversion dmin=0.01
result_analysis(p_pool_gc,clicks_exp,clicks_cont,d_gc,0.01,0.05)

statistically significant 
lower bound:  -0.02912320088750467 
upper bound:  -0.011986548273218463
practically significant


In [112]:
# net conversion dmin=0.0075
result_analysis(p_pool_nc,clicks_exp,clicks_cont,d_nc,0.0075,0.05)

statistically insignificant 
lower bound:  -0.011604500677993734 
upper bound:  0.0018570553289053993


'practically insignificant'

## Sign Test

Used the sign test to double check the results.

In [142]:
from scipy.stats import binom_test 

alpha = 0.05
Beta = 0.2
k=0

enroll_exp = [i/j for i,j in zip(experiment_df['Enrollments'][:23],experiment_df['Clicks'][:23])]
enroll_cont = [i/j for i,j in zip(control_df['Enrollments'][:23],control_df['Clicks'][:23])]
pay_exp = [i/j for i,j in zip(experiment_df['Payments'][:23],experiment_df['Clicks'][:23])]
pay_cont = [i/j for i,j in zip(control_df['Payments'][:23],control_df['Clicks'][:23])]

In [152]:
success_gc = sum([i<j for i,j in zip(enroll_exp,enroll_cont)])
success_nc = sum([i>j for i,j in zip(pay_exp,pay_cont)])

print(success_gc,success_nc)

19 10


In [150]:
p_value = binom_test(success_gc,n=23,p=0.5)
print('gross conversion p-value: ', p_value,
     '\nstatistically significant: ', p_value<alpha)

gross conversion p-value:  0.002599477767944336 
statistically significant:  True


In [151]:
p_value = binom_test(success_nc,n=23,p=0.5)
print('net conversion p-value: ', p_value,
     '\nstatistically significant: ', p_value<alpha)

net conversion p-value:  0.6776394844055176 
statistically significant:  False
