In [178]:
import pandas as pd
import numpy as np

In [179]:
ab_testing = pd.read_csv('AB_test_data.csv')

In [180]:
ab_testing

Unnamed: 0,purchase_TF,Variant,date,id
0,False,A,2019-11-08,0x25b44a
1,False,B,2020-08-27,0x46271e
2,False,A,2020-06-11,0x80b8f1
3,False,B,2020-08-22,0x8d736d
4,False,A,2020-08-05,0x96c9c8
...,...,...,...,...
129995,False,A,2020-07-23,0x4089c2
129996,False,A,2020-06-24,0x6a5e3a
129997,False,A,2019-10-12,0x95e302
129998,False,A,2020-03-18,0x7c4afa


In [181]:
ab_testing.isnull().any()

purchase_TF    False
Variant        False
date           False
id             False
dtype: bool

# Question 1

In [182]:
control = ab_testing[ab_testing.Variant=='A']
treatment = ab_testing[ab_testing.Variant=='B']
#control
#treatment

In [184]:
control_F = control[control.purchase_TF == False]
control_T = control[control.purchase_TF == True]

In [185]:
conv_A = control_T.shape[0]/control.shape[0]
conv_A# control_F
# control_T

0.149616

In [186]:
treatment_F = treatment[treatment.purchase_TF == False]
treatment_T = treatment[treatment.purchase_TF == True]

In [187]:
#treatment_F
#treatment_T

In [188]:
conv_A = control_T.shape[0]/control.shape[0]
conv_A

0.149616

In [189]:
conv_B = treatment_T.shape[0]/treatment.shape[0]
conv_B

0.1766

# Question 2

# H0: p2 - p1 = 0
# Ha: p2 - p1 ≠ 0

In [190]:
# H0 is the ammsumption we want to reject. only when we think there is difference between these two group,
# it is reasonable for us to conduct A/B testing.

In [191]:
# 95% confidence rate =(1-α)
# α = 0.05 = Significance Level = probability of type 1 error 
# 80% Statistical Power = (1-β)
# β = 0.2 = probability of type 2 error 

# Baseline Rate = conversionrate_A = 0.149616
# Minimum Detectable Effect = 0.05

In [192]:
import math

In [193]:
# z_A = z_α/2 = z_0.025
# calculate z score：
# z=norm.isf(0.05/2)
#z_A = 1.96 

# z_B = z_β = z_0.2
# calculate z score：
# z=norm.isf(0.2)
#z_B = 0.84

In [194]:
from scipy.stats import norm
z_A = abs(norm(0,1).ppf(0.025))
z_B = abs(norm(0,1).ppf(0.2))

In [195]:
p1 = conv_A
# 0.149616
p2 = conv_B
# 0.1766

In [196]:
# using formula to calculate optimal sample size

In [197]:
a = 2*(p1+p2)/2
b = 1- (p1+p2)/2
c = p1*(1-p1)
d = p2*(1-p2)
e = abs(p1-p2)

opt_size = ((z_A * math.sqrt(a * b)+z_B * math.sqrt(c+d))**2)/e**2
opt_size = math.ceil(opt_size)
opt_size

2942

In [198]:
# the optimal sample size is 2942

In [199]:
# use library to double check my optimal sample size

In [200]:
from statsmodels.stats.power import zt_ind_solve_power
from statsmodels.stats.proportion import proportion_effectsize as es

In [201]:
zt_ind_solve_power(effect_size=es(prop1=p1, prop2=p2), alpha=0.05, power=0.8, alternative="two-sided")

2938.2739443584583

In [202]:
# another method

In [203]:
from scipy.stats import norm,zscore

In [204]:
def sample_power_probtest(p1,p2,power=0.8,sig=0.05):
    z=norm.isf([sig/2])
    zp=-1*norm.isf([power])
    d=(p1-p2)
    s=2*((p1+p2)/2)*(1-((p1+p2)/2))
    n=s*((zp+z)**2)/(d**2)
    return int(round(n[0]))

In [205]:
sample_power_probtest(p1,p2)

2943

In [206]:
# another method

In [207]:
import numpy as np
import pandas as pd
import scipy.stats as stats
import statsmodels.stats.api as sms
import matplotlib as mpl
import matplotlib.pyplot as plt
import seaborn as sns
from math import ceil

In [208]:
effect_size = sms.proportion_effectsize(p1, p2)   
required_n = sms.NormalIndPower().solve_power(
    effect_size, 
    power=0.8, 
    alpha=0.05, 
    ratio=1
    )  
print(required_n)

2938.2739443584583


In [209]:
# Conduct the test 10 times using samples of the optimal size

# 10 samples

In [210]:
import pandas as pd
import numpy as np

In [211]:
new_ab_testing = pd.read_csv('AB_test_data.csv')

In [212]:
new_ab_testing.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 130000 entries, 0 to 129999
Data columns (total 4 columns):
 #   Column       Non-Null Count   Dtype 
---  ------       --------------   ----- 
 0   purchase_TF  130000 non-null  bool  
 1   Variant      130000 non-null  object
 2   date         130000 non-null  object
 3   id           130000 non-null  object
dtypes: bool(1), object(3)
memory usage: 3.1+ MB


In [213]:
pd.crosstab(new_ab_testing['Variant'], new_ab_testing['purchase_TF'])

purchase_TF,False,True
Variant,Unnamed: 1_level_1,Unnamed: 2_level_1
A,106298,18702
B,4117,883


In [214]:
# Before we go ahead and sample the data to get our subset, 
#let’s make sure there are no users that have been sampled multiple times.

In [215]:
session_counts = new_ab_testing['id'].value_counts(ascending=False)

In [216]:
multi_users = session_counts[session_counts > 1].count()

print(f'There are {multi_users} users that appear multiple times in the dataset')

There are 0 users that appear multiple times in the dataset


In [217]:
samplelist = []
for i in range(0,10):
    control_sample = new_ab_testing[new_ab_testing['Variant'] == 'A'].sample(n=opt_size, random_state=i)
    treatment_sample = new_ab_testing[new_ab_testing['Variant'] == 'B'].sample(n=opt_size, random_state=i)
    ab_test = pd.concat([control_sample, treatment_sample], axis=0)
    samplelist.append(ab_test)
    ab_test.reset_index(drop=True, inplace=True)
samp_1 = samplelist[0]
samp_2 = samplelist[1]
samp_3 = samplelist[2]
samp_4 = samplelist[3]
samp_5  = samplelist[4]
samp_6  = samplelist[5]
samp_7  = samplelist[6]
samp_8  = samplelist[7]
samp_9  = samplelist[8]
samp_10  = samplelist[9]

In [218]:
samp_1

Unnamed: 0,purchase_TF,Variant,date,id
0,False,A,2020-08-08,0x776020
1,False,A,2020-03-20,0x66a543
2,False,A,2020-03-04,0x2e6bec
3,False,A,2019-08-27,0x627a0c
4,False,A,2019-10-04,0x1c7e95
...,...,...,...,...
5879,False,B,2020-08-26,0x895869
5880,False,B,2020-08-19,0x6e4eb4
5881,False,B,2020-08-21,0x2779ac
5882,True,B,2020-08-30,0x2c12ac


In [219]:
samp_2

Unnamed: 0,purchase_TF,Variant,date,id
0,True,A,2020-07-09,0x11b683
1,False,A,2020-07-12,0x4c2024
2,True,A,2020-01-02,0x62e974
3,False,A,2020-03-17,0x550902
4,False,A,2020-07-03,0x499a93
...,...,...,...,...
5879,True,B,2020-08-26,0x1769ab
5880,False,B,2020-08-24,0x3afb2f
5881,True,B,2020-08-15,0x3e7bd0
5882,False,B,2020-08-15,0x5e8596


In [220]:
#control_sample = new_ab_testing[new_ab_testing['Variant'] == 'A'].sample(n=opt_size, random_state=68)
#treatment_sample = new_ab_testing[new_ab_testing['Variant'] == 'B'].sample(n=opt_size, random_state=68)
# n needs to be int, so we need to math.ceil or int our optimal size calculating before

In [221]:
#ab_test = pd.concat([control_sample, treatment_sample], axis=0)
#ab_test.reset_index(drop=True, inplace=True)

In [222]:
#ab_test

# Question 3

In [223]:
# 95% confidence rate =(1-α)
# α = 0.05 = Significance Level = probability of type 1 error 
# 80% Statistical Power = (1-β)
# β = 0.2 = probability of type 2 error 

# Baseline Rate = conversionrate_A = 0.149616
# Minimum Detectable Effect = 0.05

In [224]:
sample_F = control[control.purchase_TF == False]
control_T = control[control.purchase_TF == True]

In [225]:
p1 = conv_A #p(h0)
# 0.149616
p2 = conv_B #p(h1)
# 0.1766

In [235]:
α = 0.05
β = 0.2
boundA = np.log(1/α) #lnA
print("BoundA is",boundA)
boundB = np.log(β) #lnB
print("BoundB is",boundB)

BoundA is 2.995732273553991
BoundB is -1.6094379124341003


In [231]:
iteration = 0
time = 0
for i in range (1,11):
    #print(i)
    curln = 0
    ln =0
    j = 0
    cur = samplelist[i-1]
    #print(cur)
    A = cur[cur['Variant']=='A']
    m0 = np.mean(A["purchase_TF"])
    B = cur[cur['Variant']=='B']
    m1 = np.mean(B["purchase_TF"])
    while j<2942:
        #print(B["purchase_TF"][j:j+1])
        diff = int(B["purchase_TF"][j:j+1])-int(A["purchase_TF"][j:j+1])
        #print(diff)
        if diff == 1:
            #print(diff)
            ln = math.log(m1/m0)
        elif diff == 0:
            ln = math.log((1-m1)/(1-m0))
            #print(ln)
        curln += ln
        if curln <= boundB:
            print("We will accept H_0 for sample",i,"on trail",j)
            iteration += j
            time+=1
            break
        elif curln >= boundA:
            print("We will reject H_0 for sample",i,"on trail",j)
            iteration += j
            time+=1
            break
        
        j+=1
average = iteration/time
print("The average is",average)
    
        

We will accept H_0 for sample 1 on trail 788
We will accept H_0 for sample 2 on trail 2206
We will accept H_0 for sample 3 on trail 113
We will reject H_0 for sample 4 on trail 1656
We will accept H_0 for sample 5 on trail 976
We will reject H_0 for sample 6 on trail 573
We will accept H_0 for sample 8 on trail 271
We will accept H_0 for sample 9 on trail 235
We will reject H_0 for sample 10 on trail 1047
The average is 873.8888888888889
