# Load Data

In [10]:
import pandas as pd
import math

In [2]:
df = pd.read_excel('Analysis.xlsx', sheet_name='Data')

In [3]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 74 entries, 0 to 73
Data columns (total 6 columns):
 #   Column       Non-Null Count  Dtype  
---  ------       --------------  -----  
 0   Group        74 non-null     object 
 1   Date         74 non-null     object 
 2   Pageviews    74 non-null     int64  
 3   Clicks       74 non-null     int64  
 4   Enrollments  46 non-null     float64
 5   Payments     46 non-null     float64
dtypes: float64(2), int64(2), object(2)
memory usage: 3.6+ KB


In [4]:
df.head()

Unnamed: 0,Group,Date,Pageviews,Clicks,Enrollments,Payments
0,Experiment,"Sat, Oct 11",7716,686,105.0,34.0
1,Experiment,"Sun, Oct 12",9288,785,116.0,91.0
2,Experiment,"Mon, Oct 13",10480,884,145.0,79.0
3,Experiment,"Tue, Oct 14",9867,827,138.0,92.0
4,Experiment,"Wed, Oct 15",9793,832,140.0,94.0


# Sanity Checks

In [21]:
def calc_sanity(metric):
    control_sum = df[df['Group']=='Control'][metric].sum()
    experiment_sum = df[df['Group']=='Experiment'][metric].sum()
    SD = math.sqrt(0.5*0.5/(control_sum+experiment_sum))
    m = SD * 1.96
    CI_LB = 0.5 - m
    CI_UB = 0.5 + m
    print("CI 95%: [" + str(round(CI_LB,4)) + " - " + str(round(CI_UB,4)) + "]")
    observed_fraction = control_sum / (control_sum + experiment_sum)
    print("Observed fraction: "+ str(round(observed_fraction,4)))

In [22]:
calc_sanity('Pageviews')

CI 95%: [0.4988 - 0.5012]
Observed fraction: 0.5006


In [23]:
calc_sanity('Clicks')

CI 95%: [0.4959 - 0.5041]
Observed fraction: 0.5005


In [52]:
def compare_two_samples(X, N):
    X_cont = df[df['Group']=='Control'][X].sum()
    N_cont = df[df['Group']=='Control'][N].sum()
    X_exp = df[df['Group']=='Experiment'][X].sum()
    N_exp = df[df['Group']=='Experiment'][N].sum()

    p_cont = X_cont/N_cont
    p_exp = X_exp/N_exp
    p_pool = (X_cont+X_exp)/(N_cont+N_exp)
    SE_pool = math.sqrt(p_pool*(1-p_pool)*(1/N_cont + 1/N_exp))
    CI_LB = -1 * SE_pool * 1.96
    CI_UB = SE_pool * 1.96
    d_hat = p_exp - p_cont

    print("CI 95%: [" + str(round(CI_LB,4)) + " - " + str(round(CI_UB,4)) + "]")
    print("Observed difference: "+ str(round(d_hat,4)))

In [36]:
X = 'Clicks'
N = 'Pageviews'

compare_two_samples('Clicks', 'Pageviews')

CI 95%: [-0.0013 - 0.0013]
Observed difference: 0.0001


For all 3 invariant metrics, the observed fraction/difference falls within the 95% confidence intervals. Therefore, the invariant sanity checks pass.

# Check for Practical and Statistical Significance

Gross Conversion (GC) = # enrolled in free trial / # who clicked start free trial

In [37]:
df = df.dropna()

In [112]:
df.info()

<class 'pandas.core.frame.DataFrame'>
Index: 47 entries, 0 to total
Data columns (total 9 columns):
 #   Column       Non-Null Count  Dtype  
---  ------       --------------  -----  
 0   Group        46 non-null     object 
 1   Date         46 non-null     object 
 2   Pageviews    47 non-null     float64
 3   Clicks       47 non-null     float64
 4   Enrollments  47 non-null     float64
 5   Payments     47 non-null     float64
 6   GC           47 non-null     float64
 7   NC           47 non-null     float64
 8   Ret          47 non-null     float64
dtypes: float64(7), object(2)
memory usage: 4.7+ KB


In [64]:
def analyze_results(X, N):
    X_cont = df[df['Group']=='Control'][X].sum()
    N_cont = df[df['Group']=='Control'][N].sum()
    X_exp = df[df['Group']=='Experiment'][X].sum()
    N_exp = df[df['Group']=='Experiment'][N].sum()

    p_cont = X_cont/N_cont
    p_exp = X_exp/N_exp
    p_pool = (X_cont+X_exp)/(N_cont+N_exp)
    SE_pool = math.sqrt(p_pool*(1-p_pool)*(1/N_cont + 1/N_exp))
    d_hat = p_exp - p_cont
    CI_LB = -1 * SE_pool * 1.96 + d_hat
    CI_UB = SE_pool * 1.96 + d_hat

    print("CI 95%: [" + str(round(CI_LB,4)) + " - " + str(round(CI_UB,4)) + "]")
    print("Observed difference: ", round(d_hat,4))

In [65]:
analyze_results('Enrollments', 'Clicks')

CI 95%: [-0.0291 - -0.012]
Observed difference:  -0.0206


For Gross Conversions, the CI doesn't include 0, therefore it is statistically significant. However, it the result shows that the control was better than the experiment group since it is negative.

Net conversion (NC) = # paid / # clicked start free trial

In [66]:
analyze_results('Payments', 'Clicks')

CI 95%: [-0.0116 - 0.0019]
Observed difference:  -0.0049


For Net Conversions, the CI includes 0, therefore it is not statistically significant

Retention (R): # paid / # enrolled in free trial

In [67]:
analyze_results('Payments', 'Enrollments')

CI 95%: [0.0081 - 0.0541]
Observed difference:  0.0311


For Retention, the CI does not include 0, therefore it is statistically significant. For retention, dmin is 0.01, which is whin the 95% CI. Typically, this means to run additional tests since the CI needs to be above the dmin to launch.

# Run Sign Tests

In [114]:
from scipy import stats

In [100]:
df['GC'] = df['Enrollments']/df['Clicks']
df['NC'] = df['Payments']/df['Clicks']
df['Ret'] = df['Payments']/df['Enrollments']

In [108]:
def sign_test(metric):
    df1 = df[df['Group']=='Control'][['Date', metric]]
    df1.set_index(['Date'], inplace=True)
    df2 = df[df['Group']=='Experiment'][['Date', metric]]
    df2.set_index(['Date'], inplace=True)
    df3 = pd.concat([df2, df1], axis = 1)
    df3.columns = [metric + '_exp', metric + '_cont']
    df3[metric + '_diff'] = df3[metric + '_exp']-df3[metric + '_cont']
    print(len(df3[df3[metric + '_diff']>0]), "days where the experiment was higher than the control")
    print(stats.binom_test(len(df3[df3[metric + '_diff']>0]), n=len(df3), p=0.5, alternative='two-sided'))

In [109]:
sign_test('GC')

4 days where the experiment was higher than the control
0.002599477767944336


P value < 0.05, therefore this result is unlikely due to chance. Changes with GC significant.

In [110]:
sign_test('NC')

10 days where the experiment was higher than the control
0.6776394844055176


P value > 0.05, therefore this result is likely due to chance. Changes with NC not significant

In [115]:
sign_test('Ret')

13 days where the experiment was higher than the control
0.6776394844055176


P value > 0.05, therefore this result is likely due to chance. Changes with Ret not significant

In [116]:
df3

Unnamed: 0_level_0,GC_exp,GC_cont,GC_diff
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
"Sat, Oct 11",0.153061,0.195051,-0.04199
"Sun, Oct 12",0.147771,0.188703,-0.040933
"Mon, Oct 13",0.164027,0.183718,-0.019691
"Tue, Oct 14",0.166868,0.186603,-0.019735
"Wed, Oct 15",0.168269,0.194743,-0.026474
"Thu, Oct 16",0.163706,0.167679,-0.003974
"Fri, Oct 17",0.162821,0.195187,-0.032367
"Sat, Oct 18",0.144172,0.174051,-0.029879
"Sun, Oct 19",0.172166,0.18958,-0.017414
"Mon, Oct 20",0.177907,0.191638,-0.013731
