In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

from scipy.stats import norm
from statsmodels.stats.proportion import proportions_ztest
import math

In [3]:
data_df = pd.read_csv('data.csv', header=None, names = ['metric', 'estimate_baseline_value'])

In [4]:
data_df

Unnamed: 0,metric,estimate_baseline_value
0,Unique cookies to view course overview page pe...,40000.0
1,"Unique cookies to click ""Start free trial"" per...",3200.0
2,Enrollments per day:,660.0
3,"Click-through-probability on ""Start free trial"":",0.08
4,"Probability of enrolling, given click:",0.20625
5,"Probability of payment, given enroll:",0.53
6,"Probability of payment, given click",0.109313


In [5]:
pd.set_option('max_colwidth', None)

In [6]:
data_df['metric'] = data_df.metric.str.lower()

In [7]:
metric_name = ['number of cookies', 'number of clicks', 'number of enrollments','CTP','gross conversion','retention','net conversion']
data_df['metric_name'] = metric_name

In [8]:
data_df

Unnamed: 0,metric,estimate_baseline_value,metric_name
0,unique cookies to view course overview page per day:,40000.0,number of cookies
1,"unique cookies to click ""start free trial"" per day:",3200.0,number of clicks
2,enrollments per day:,660.0,number of enrollments
3,"click-through-probability on ""start free trial"":",0.08,CTP
4,"probability of enrolling, given click:",0.20625,gross conversion
5,"probability of payment, given enroll:",0.53,retention
6,"probability of payment, given click",0.109313,net conversion


In [9]:
dmin = [3000, -50, 240, 0.01, 0.01, 0.01, 0.0075]
data_df['dmin'] = dmin

In [10]:
data_df

Unnamed: 0,metric,estimate_baseline_value,metric_name,dmin
0,unique cookies to view course overview page per day:,40000.0,number of cookies,3000.0
1,"unique cookies to click ""start free trial"" per day:",3200.0,number of clicks,-50.0
2,enrollments per day:,660.0,number of enrollments,240.0
3,"click-through-probability on ""start free trial"":",0.08,CTP,0.01
4,"probability of enrolling, given click:",0.20625,gross conversion,0.01
5,"probability of payment, given enroll:",0.53,retention,0.01
6,"probability of payment, given click",0.109313,net conversion,0.0075


In [11]:
new_index = ['C','CL','EN','CTP','GC','R','NC']
data_df.index = new_index
data_df

Unnamed: 0,metric,estimate_baseline_value,metric_name,dmin
C,unique cookies to view course overview page per day:,40000.0,number of cookies,3000.0
CL,"unique cookies to click ""start free trial"" per day:",3200.0,number of clicks,-50.0
EN,enrollments per day:,660.0,number of enrollments,240.0
CTP,"click-through-probability on ""start free trial"":",0.08,CTP,0.01
GC,"probability of enrolling, given click:",0.20625,gross conversion,0.01
R,"probability of payment, given enroll:",0.53,retention,0.01
NC,"probability of payment, given click",0.109313,net conversion,0.0075


### Measuring Variability

Evaluation Metrics

- Gross conversion
- Retention
- Net conversion

Calculate standard deviation of sample (standard error) for the evaluation metrics. Given sample size of 5000 cookies visiting the course overview page. 

In [12]:
#scaling estimate baseline value based on sample size specified for variance estimation, 40000 cookies >> 5000 cookies
scaling_factor = 5000/data_df.iloc[0,1]
scaled_bv = []
for i in range(3):
    scaled_bv.append(data_df.iloc[i,1]*scaling_factor)
scaled_bv = np.append(scaled_bv, np.repeat(np.nan, 4))
data_df['scaled_est'] = scaled_bv
data_df

Unnamed: 0,metric,estimate_baseline_value,metric_name,dmin,scaled_est
C,unique cookies to view course overview page per day:,40000.0,number of cookies,3000.0,5000.0
CL,"unique cookies to click ""start free trial"" per day:",3200.0,number of clicks,-50.0,400.0
EN,enrollments per day:,660.0,number of enrollments,240.0,82.5
CTP,"click-through-probability on ""start free trial"":",0.08,CTP,0.01,
GC,"probability of enrolling, given click:",0.20625,gross conversion,0.01,
R,"probability of payment, given enroll:",0.53,retention,0.01,
NC,"probability of payment, given click",0.109313,net conversion,0.0075,


In [13]:
data_df['std_error'] = np.nan

In [14]:
def standarderror(p, n):
    return np.sqrt(p*(1-p)/n)

In [15]:
#calc std_error for gross conversion
gc_p = data_df['estimate_baseline_value'][4]
gc_n = data_df['scaled_est'][1]
gc_stderror = standarderror(gc_p, gc_n)
gc_stderror

0.020230604137049392

In [16]:
#calc std_error for retention
r_p = data_df['estimate_baseline_value'][5]
r_n = data_df['scaled_est'][2]
r_stderror = standarderror(r_p, r_n)
r_stderror

0.05494901217850908

In [17]:
#calc std_error for net conversion
nc_p = data_df['estimate_baseline_value'][6]
nc_n = data_df['scaled_est'][1]
nc_stderror = standarderror(nc_p, nc_n)
nc_stderror

0.01560154458248846

In [18]:
data_df.iloc[4,5] = gc_stderror
data_df.iloc[5,5] = r_stderror
data_df.iloc[6,5] = nc_stderror
data_df

Unnamed: 0,metric,estimate_baseline_value,metric_name,dmin,scaled_est,std_error
C,unique cookies to view course overview page per day:,40000.0,number of cookies,3000.0,5000.0,
CL,"unique cookies to click ""start free trial"" per day:",3200.0,number of clicks,-50.0,400.0,
EN,enrollments per day:,660.0,number of enrollments,240.0,82.5,
CTP,"click-through-probability on ""start free trial"":",0.08,CTP,0.01,,
GC,"probability of enrolling, given click:",0.20625,gross conversion,0.01,,0.020231
R,"probability of payment, given enroll:",0.53,retention,0.01,,0.054949
NC,"probability of payment, given click",0.109313,net conversion,0.0075,,0.015602


In [19]:
data_df.info()

<class 'pandas.core.frame.DataFrame'>
Index: 7 entries, C to NC
Data columns (total 6 columns):
 #   Column                   Non-Null Count  Dtype  
---  ------                   --------------  -----  
 0   metric                   7 non-null      object 
 1   estimate_baseline_value  7 non-null      float64
 2   metric_name              7 non-null      object 
 3   dmin                     7 non-null      float64
 4   scaled_est               3 non-null      float64
 5   std_error                3 non-null      float64
dtypes: float64(4), object(2)
memory usage: 712.0+ bytes


### Sizing

1. choosing number of samples given power 
    - how many pageviews total (both groups) to collect adequately power the experiment?
    - alpha = 0.05  beta = 0.2
    
2. choosing duration vs. exposure
    - what percentage of traffic would you divert to this experiment
    - given the percentage, how long would the experiment take to run?

In [20]:
#determing sample size
alpha = 0.05
beta = 0.2

def zscore(alpha):
    return norm.ppf(alpha)

def effectsize(p, dmin):
    es = dmin/np.sqrt(p*(1-p))
    return es

def samplesize(alpha, beta, es):
    n = 2*((zscore(1-alpha/2)+zscore(1-beta))/es)**2
    return n


In [21]:
data_df['sample_size'] = np.nan

In [22]:
#sample size >> # of pageview needed  for gross conversion
gc_dmin = data_df.loc['GC','dmin']
gc_n = samplesize(alpha, beta, effectsize(gc_p, gc_dmin))/(data_df.loc['CTP','estimate_baseline_value'])*2
data_df.loc['GC','sample_size'] = gc_n
gc_n

642473.7298175199

In [23]:
samplesize(alpha, beta, effectsize(gc_p, gc_dmin))

25698.9491927008

In [25]:
#sample size to # of pageview needed for retention
r_dmin = data_df.loc['R','dmin']
r_n = samplesize(alpha, beta, effectsize(r_p, r_dmin))/(data_df.loc['CTP','estimate_baseline_value'])/(data_df.loc['GC','estimate_baseline_value'])*2
data_df.loc['R','sample_size'] = r_n

In [26]:
samplesize(alpha, beta, effectsize(r_p, r_dmin))

39103.11883652715

In [27]:
r_n

4739771.980185109

In [29]:
#sample size to # of pageview needed for net conversion
nc_dmin = data_df.loc['NC','dmin']
nc_n = samplesize(alpha, beta, effectsize(nc_p, nc_dmin))/(data_df.loc['CTP','estimate_baseline_value'])*2
data_df.loc['NC','sample_size'] = nc_n

In [30]:
samplesize(alpha, beta, effectsize(nc_p, nc_dmin))

27171.294379134903

In [31]:
nc_n

679282.3594783725

In [32]:
data_df

Unnamed: 0,metric,estimate_baseline_value,metric_name,dmin,scaled_est,std_error,sample_size
C,unique cookies to view course overview page per day:,40000.0,number of cookies,3000.0,5000.0,,
CL,"unique cookies to click ""start free trial"" per day:",3200.0,number of clicks,-50.0,400.0,,
EN,enrollments per day:,660.0,number of enrollments,240.0,82.5,,
CTP,"click-through-probability on ""start free trial"":",0.08,CTP,0.01,,,
GC,"probability of enrolling, given click:",0.20625,gross conversion,0.01,,0.020231,642473.7
R,"probability of payment, given enroll:",0.53,retention,0.01,,0.054949,4739772.0
NC,"probability of payment, given click",0.109313,net conversion,0.0075,,0.015602,679282.4


In [40]:
#exposure & duration
#try diverting 50% of total traffic

traffic = 0.7


In [41]:
#days needed for testing metric GC only
gc_duration = data_df.loc['GC','sample_size']/(data_df.loc['C','estimate_baseline_value']*traffic)

print('Days needed for teting metric Gross Conversion:', math.ceil(gc_duration))

Days needed for teting metric Gross Conversion: 23


In [42]:
#days needed for testing metric NC 
nc_duration = data_df.loc['NC','sample_size']/(data_df.loc['C','estimate_baseline_value']*traffic)

print('Days needed for teting metric Net Conversion:', math.ceil(nc_duration))

Days needed for teting metric Net Conversion: 25


In [36]:
r_duration = data_df.loc['R','sample_size']/(data_df.loc['C','estimate_baseline_value']*traffic)

print('Days needed for teting metric Retention:', math.ceil(r_duration))

Days needed for teting metric Retention: 170


### Analysis w/ Collected Data

In [44]:
#sanity check - invariant metrics are equivalent between two groups
#invariant metrics -'# of cookies' & '# of clicks' & 'ctp'
#load collected data

control_df = pd.read_csv('data_control.csv')
experiment_df = pd.read_csv('data_experiment.csv')
#control_df.head()
experiment_df.head()

Unnamed: 0,Date,Pageviews,Clicks,Enrollments,Payments
0,"Sat, Oct 11",7716,686,105.0,34.0
1,"Sun, Oct 12",9288,785,116.0,91.0
2,"Mon, Oct 13",10480,884,145.0,79.0
3,"Tue, Oct 14",9867,827,138.0,92.0
4,"Wed, Oct 15",9793,832,140.0,94.0


In [45]:
control_df.describe()

Unnamed: 0,Pageviews,Clicks,Enrollments,Payments
count,37.0,37.0,23.0,23.0
mean,9339.0,766.972973,164.565217,88.391304
std,740.239563,68.286767,29.977,20.650202
min,7434.0,632.0,110.0,56.0
25%,8896.0,708.0,146.5,70.0
50%,9420.0,759.0,162.0,91.0
75%,9871.0,825.0,175.0,102.5
max,10667.0,909.0,233.0,128.0


In [46]:
experiment_df.describe()

Unnamed: 0,Pageviews,Clicks,Enrollments,Payments
count,37.0,37.0,23.0,23.0
mean,9315.135135,765.540541,148.826087,84.565217
std,708.070781,64.578374,33.234227,23.060841
min,7664.0,642.0,94.0,34.0
25%,8881.0,722.0,127.0,69.0
50%,9359.0,770.0,142.0,91.0
75%,9737.0,827.0,172.0,99.0
max,10551.0,884.0,213.0,123.0


##### sanity check for pageviews/cookies


In [47]:
pg_control = control_df['Pageviews'].sum()
pg_exp = experiment_df['Pageviews'].sum()
pg_total = pg_control + pg_exp

print('control pageview:', pg_control)
print('control pageview:', pg_exp)

control pageview: 345543
control pageview: 344660


In [48]:
p = 0.5
alpha = 0.05

p_hat_pg = round(pg_control/pg_total,4)
sde_pg = standarderror(p, pg_total)
margin_of_error_pg = round(norm.ppf(1-alpha/2)*(sde_pg),4)
pg_left = p - margin_of_error_pg
pg_right = p + margin_of_error_pg

print('pageviews confidence interval is between {} and {}'.format(pg_left, pg_right))

pageviews confidence interval is between 0.4988 and 0.5012


In [49]:
def check_ci(p_hat, left, right):
    if left <= p_hat <= right:
        return round(p_hat,4), 'sanity check passed'
    else:
        return 'sanity check not passed'

In [50]:
print(check_ci(p_hat_pg, pg_left, pg_right))

(0.5006, 'sanity check passed')


In [51]:
#using another method, calcuating test statistic and compare corresponding p-value against alpha
#if p-value is greater than alpha >> no difference between the number of pageviews assigend to the two groups
z_statistic_pg, p_value_pg = proportions_ztest(pg_control, pg_total, value=0.5)
if p_value_pg > alpha:
    print('p-value:', round(p_value_pg,4),',' ' pageviews - sanity check is passed')
else:
    print('sanity check is not passed, number of pageviews is different between two groups')

p-value: 0.2878 , pageviews - sanity check is passed


##### sanity check for clicks

In [52]:
cl_control = control_df['Clicks'].sum()
cl_exp = experiment_df['Clicks'].sum()
cl_total = cl_control + cl_exp

print('control clicks:', cl_control)
print('control clicks:', cl_exp)

control clicks: 28378
control clicks: 28325


In [53]:
p_hat_cl = round(cl_control/cl_total,4)
sde_cl = standarderror(p, cl_total)
margin_of_error_cl = round(norm.ppf(1-alpha/2)*(sde_cl),4)
cl_left = p - margin_of_error_cl
cl_right = p + margin_of_error_cl

print('pageviews confidence interval is between {} and {}'.format(cl_left, cl_right))

pageviews confidence interval is between 0.4959 and 0.5041


In [54]:
print(check_ci(p_hat_cl, cl_left, cl_right))

(0.5005, 'sanity check passed')


In [55]:
z_statistic_cl, p_value_cl = proportions_ztest(cl_control, cl_total, value=0.5)


##### sanity check for click through probability

In [56]:
#calculate ctp for both groups
ctp_control = control_df['Clicks'].sum()/control_df['Pageviews'].sum()
ctp_exp = experiment_df['Clicks'].sum()/experiment_df['Pageviews'].sum()

#calclulate observed difference between two groups
d_hat = ctp_control-ctp_exp

#calculate std. error for both groups
sde_control = np.sqrt(ctp_control*(1-ctp_control))
sde_exp = np.sqrt(ctp_exp*(1-ctp_exp))

#calculate se_pooled
se_pooled = np.sqrt(sde_control**2/pg_control + sde_exp**2/pg_exp)

#calculate margin of error
me_ctp = round(zscore(1-alpha/2)*(se_pooled),4)

#calculate left & right CI
ctp_left = 0-me_ctp
ctp_right = 0+me_ctp

print('Click-through-probability confidence interval is between {} and {}'.format(ctp_left, ctp_right))



Click-through-probability confidence interval is between -0.0013 and 0.0013


In [57]:
print(check_ci(d_hat, ctp_left, ctp_right))

(-0.0001, 'sanity check passed')


In [58]:
#alternative approach using normal stata's proportion z-test function
#comparing p-value to alpha 
n_total = np.array([pg_control, pg_exp])
n_clicks = np.array([cl_control, cl_exp])

z_statistic_ctp, p_value_ctp = proportions_ztest(n_clicks, n_total, value=0)

#if p-value is greater than alpha, null hypothesis fails to reject, meaning experiment & control groups have the same CTP 
if p_value_ctp > alpha:
    print('p-value:', round(p_value_ctp,4),',' ' CTP - sanity check is passed')
else:
    print('sanity check is not passed, CTP is different between two groups')
    

    
# d_hat = ctp_control-ctp_exp
# p_pooled = cl_total/pg_total
# z_statistic_ctp = (d_hat-0)/np.sqrt(p_pooled*(1-p_pooled)*(1/pg_control+1/pg_exp))
# p_value_ctp = norm.cdf(z_statistic_ctp)*2
# p_value_ctp

p-value: 0.9317 , CTP - sanity check is passed


### Result Analysis

In [59]:
#check if there's null values
control_df.isnull().sum()

Date            0
Pageviews       0
Clicks          0
Enrollments    14
Payments       14
dtype: int64

In [60]:
experiment_df.isnull().sum()

Date            0
Pageviews       0
Clicks          0
Enrollments    14
Payments       14
dtype: int64

In [61]:
#calculate number of clicks where data is not null
cl_cont_r = control_df['Clicks'].loc[control_df['Enrollments'].notnull()].sum() 
cl_exp_r = experiment_df['Clicks'].loc[experiment_df['Enrollments'].notnull()].sum()

#calculate number of pageviews where data is not null
pg_cont_r = control_df['Pageviews'].loc[control_df['Enrollments'].notnull()].sum()
pg_exp_r = experiment_df['Pageviews'].loc[experiment_df['Enrollments'].notnull()].sum()

#find number of enrollment for each group
enrol_cont_r = control_df['Enrollments'].sum()
enrol_exp_r = experiment_df['Enrollments'].sum()

##### gross conversion analysis

In [65]:
#calculate gross conversion (GC) for both groups 
gc_cont_result = enrol_cont_r/cl_cont_r #p1
gc_exp_result = enrol_exp_r/cl_exp_r #p2

gc_diff_r = gc_exp_result - gc_cont_result
#gc_diff 
#gc is negative indicating experiment group has lower conversion than that of the control group, which is expected 
#since the pop-up screen (how many hrs devote to study) should filter out people who do not have enough time to study

#calculate standard error pooled
gc_se_r = np.sqrt(gc_exp_result*(1-gc_exp_result)/cl_exp_r + gc_cont_result*(1-gc_cont_result)/cl_cont_r)

#calculate left and right of the CI
alpha = 0.05
gc_r_left = gc_diff_r - zscore(1-alpha/2)*gc_se_r
gc_r_right = gc_diff_r + zscore(1-alpha/2)*gc_se_r
print('the confidence interval for gross conversion: [{}, {}]'.format(round(gc_r_left,4), round(gc_r_right,4)))

the confidence interval for gross conversion: [-0.0291, -0.012]


In [66]:
#another approcach using p-value
#H0: GCcontrol = GCexp
#Ha: GCcontrol != GCexp

gc_parameter_count = np.array([enrol_cont_r, enrol_exp_r])
gc_parameter_nobs = np.array([cl_cont_r, cl_exp_r])

gc_zscore_r, gc_pvalue_r = proportions_ztest(gc_parameter_count, gc_parameter_nobs, 0)
print('z-statistic: {}, p-value: {}'.format(round(gc_zscore_r,4), gc_pvalue_r))

z-statistic: 4.7018, p-value: 2.578401033720593e-06


In [67]:
gc_diff_r

-0.020554874580361565

In [68]:
#calcuating p-value using self defined funciton
def two_proportions_test(succ_1, n_1, succ_2, n_2):
    '''
    given success and sample size of group A and B
    calculate z-score and p-value
    '''
    
    p1 = succ_1/n_1
    p2 = succ_2/n_2   
    p_pooled = (succ_1+succ_2)/(n_1+n_2)
    
    zscore = abs(p2-p1)/np.sqrt(p_pooled*(1-p_pooled)*(1/n_1+1/n_2))
    
    pvalue_oneside = 1-norm.cdf(zscore)
    pvalue_twosides = pvalue_oneside*2
    
    return zscore, pvalue_twosides


In [69]:
#function for determin practical significance 
def test_practical_sig(result_d, dmin, ci_left, ci_right):
    '''
    given previously determined practical significance boundary for each metric
    compare result to the practical boundary to check for practical significance
    '''
    
    if dmin >= 0:
        if result_d > dmin and ci_left > dmin:
            return 'metric is practically significant'
        else:
            return 'metric is not practically significant'
    else:
        if result_d < dmin and ci_right < dmin:
            return 'metric is practically significant'
        else:
            return 'metric is not practically significant'

In [70]:
gc_zscore_r2, gc_pvalue_r2 = two_proportions_test(enrol_cont_r, cl_cont_r, enrol_exp_r, cl_exp_r)

print('gross conversion: z-statistic = {}, p-value = {}'.format(gc_zscore_r2, gc_pvalue_r2))

gross conversion: z-statistic = 4.701830023753982, p-value = 2.5784010337925878e-06


In [71]:
test_practical_sig(gc_diff_r, -data_df.loc['GC','dmin'], gc_r_left, gc_r_right)

'metric is practically significant'

##### net conversion analysis

In [72]:
#compute number of payments for each group
pm_cont_r = control_df['Payments'].sum()
pm_exp_r = experiment_df['Payments'].sum()


#calculate net conversion (NC) for both groups 
nc_cont_r = pm_cont_r/cl_cont_r
nc_exp_r = pm_exp_r/cl_exp_r

In [73]:
nc_diff_r = nc_exp_r - nc_cont_r

#calculate standard error pooled
nc_sepool_r = np.sqrt(nc_exp_r*(1-nc_exp_r)/cl_exp_r + nc_cont_r*(1-nc_cont_r)/cl_cont_r)

#calculate left and right of the CI
alpha = 0.05
nc_r_left = nc_diff_r - zscore(1-alpha/2)*nc_sepool_r
nc_r_right = nc_diff_r + zscore(1-alpha/2)*nc_sepool_r
print('the confidence interval for net conversion: [{}, {}]'.format(round(nc_r_left,4), round(nc_r_right,4)))

the confidence interval for net conversion: [-0.0116, 0.0019]


In [74]:
#another approcach using p-value
#H0: NCcontrol = NCexp
#Ha: NCcontrol != NCexp

nc_parameter_count = np.array([pm_cont_r, pm_exp_r])
nc_parameter_nobs = np.array([cl_cont_r, cl_exp_r])

nc_zscore_r, nc_pvalue_r = proportions_ztest(nc_parameter_count, nc_parameter_nobs, 0)
print('z-statistic: {}, p-value: {}'.format(round(nc_zscore_r,4), nc_pvalue_r))

z-statistic: 1.4192, p-value: 0.15584068262150205


In [77]:
nc_diff_r

-0.0048737226745441675

In [75]:
nc_zscore_r2, nc_pvalue_r2 = two_proportions_test(pm_cont_r, cl_cont_r, pm_exp_r, cl_exp_r)
print('net conversion: z-statistic = {}, p-value = {}'.format(nc_zscore_r2, nc_pvalue_r2))

net conversion: z-statistic = 1.4192001144365733, p-value = 0.15584068262150197


In [76]:
#determine practical significance 
test_practical_sig(nc_diff_r, data_df.loc['NC','dmin'], nc_r_left, nc_r_right)

'metric is not practically significant'

### Double Check with Sign Test

Do a sign test for each evaluation metric using day-by-day breakdown. Check if it agrees with the confidence interval for the difference. 

In [319]:
#merge the two datasets
result_all = control_df.join(experiment_df, how='inner', lsuffix='_cont', rsuffix='_exp')
result_all.head()

Unnamed: 0,Date_cont,Pageviews_cont,Clicks_cont,Enrollments_cont,Payments_cont,Date_exp,Pageviews_exp,Clicks_exp,Enrollments_exp,Payments_exp
0,"Sat, Oct 11",7723,687,134.0,70.0,"Sat, Oct 11",7716,686,105.0,34.0
1,"Sun, Oct 12",9102,779,147.0,70.0,"Sun, Oct 12",9288,785,116.0,91.0
2,"Mon, Oct 13",10511,909,167.0,95.0,"Mon, Oct 13",10480,884,145.0,79.0
3,"Tue, Oct 14",9871,836,156.0,105.0,"Tue, Oct 14",9867,827,138.0,92.0
4,"Wed, Oct 15",10014,837,163.0,64.0,"Wed, Oct 15",9793,832,140.0,94.0


In [320]:
result_all = result_all.loc[result_all['Enrollments_cont'].notnull()]
result_all.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 23 entries, 0 to 22
Data columns (total 10 columns):
 #   Column            Non-Null Count  Dtype  
---  ------            --------------  -----  
 0   Date_cont         23 non-null     object 
 1   Pageviews_cont    23 non-null     int64  
 2   Clicks_cont       23 non-null     int64  
 3   Enrollments_cont  23 non-null     float64
 4   Payments_cont     23 non-null     float64
 5   Date_exp          23 non-null     object 
 6   Pageviews_exp     23 non-null     int64  
 7   Clicks_exp        23 non-null     int64  
 8   Enrollments_exp   23 non-null     float64
 9   Payments_exp      23 non-null     float64
dtypes: float64(4), int64(4), object(2)
memory usage: 2.0+ KB


In [321]:
#check daily if the experiment result is greater than control result

#gross conversion
signt_gc_cont = result_all['Enrollments_cont']/result_all['Clicks_cont']
signt_gc_exp = result_all['Enrollments_exp']/result_all['Clicks_exp']
result_all['gc_test'] = np.where(signt_gc_cont < signt_gc_exp, 1, 0)

#net conversion
signt_nc_cont = result_all['Payments_cont']/result_all['Clicks_cont']
signt_nc_exp = result_all['Payments_exp']/result_all['Clicks_exp']
result_all['nc_test'] = np.where(signt_nc_cont < signt_nc_exp, 1, 0)


In [322]:
result_all.head()

Unnamed: 0,Date_cont,Pageviews_cont,Clicks_cont,Enrollments_cont,Payments_cont,Date_exp,Pageviews_exp,Clicks_exp,Enrollments_exp,Payments_exp,gc_test,nc_test
0,"Sat, Oct 11",7723,687,134.0,70.0,"Sat, Oct 11",7716,686,105.0,34.0,0,0
1,"Sun, Oct 12",9102,779,147.0,70.0,"Sun, Oct 12",9288,785,116.0,91.0,0,1
2,"Mon, Oct 13",10511,909,167.0,95.0,"Mon, Oct 13",10480,884,145.0,79.0,0,0
3,"Tue, Oct 14",9871,836,156.0,105.0,"Tue, Oct 14",9867,827,138.0,92.0,0,0
4,"Wed, Oct 15",10014,837,163.0,64.0,"Wed, Oct 15",9793,832,140.0,94.0,0,1


In [329]:
#count number to true for both metrics
gc_true = result_all['gc_test'][result_all['gc_test']==1].sum()
nc_true = result_all['nc_test'][result_all['nc_test']==1].sum()

#count total number of cases(days)
n_total = len(result_all.index)

In [338]:
#getting pvalues of the sign test using built in library binom_test
gc_signt_pvalue = stats.binom_test(gc_true, n_total, 0.5, 'two-sided')
if gc_signt_pvalue < 0.05:
    print('gross conversion: sign test passed, change is significant')
else:
    print('gross conversion: sign test not passed, change is not significant ')



gross conversion: sign test passed, change is significant


In [339]:
nc_signt_pvalue = stats.binom_test(nc_true, n_total, 0.5, 'two-sided')
if nc_signt_pvalue < 0.05:
    print('net conversion: sign test passed, change is significant')
else:
    print('net conversion: sign test not passed, change is not significant ')


net conversion: sign test not passed, change is not significant 


In [340]:
gc_signt_pvalue

0.0025994777679443364

In [341]:
nc_signt_pvalue

0.6776394844055175