In [1]:
import pandas as pd
import numpy as np

In [2]:
ab_testing = pd.read_csv('AB_test_data.csv')

In [3]:
ab_testing

Unnamed: 0,purchase_TF,Variant,date,id
0,False,A,2019-11-08,0x25b44a
1,False,B,2020-08-27,0x46271e
2,False,A,2020-06-11,0x80b8f1
3,False,B,2020-08-22,0x8d736d
4,False,A,2020-08-05,0x96c9c8
...,...,...,...,...
129995,False,A,2020-07-23,0x4089c2
129996,False,A,2020-06-24,0x6a5e3a
129997,False,A,2019-10-12,0x95e302
129998,False,A,2020-03-18,0x7c4afa


In [4]:
ab_testing.isnull().any()

purchase_TF    False
Variant        False
date           False
id             False
dtype: bool

# Question 1

In [5]:
control = ab_testing[ab_testing.Variant=='A']
treatment = ab_testing[ab_testing.Variant=='B']
#control
#treatment

In [6]:
control

Unnamed: 0,purchase_TF,Variant,date,id
0,False,A,2019-11-08,0x25b44a
2,False,A,2020-06-11,0x80b8f1
4,False,A,2020-08-05,0x96c9c8
5,False,A,2020-03-10,0x751c24
6,False,A,2019-11-05,0x60d2bd
...,...,...,...,...
129995,False,A,2020-07-23,0x4089c2
129996,False,A,2020-06-24,0x6a5e3a
129997,False,A,2019-10-12,0x95e302
129998,False,A,2020-03-18,0x7c4afa


In [7]:
control_F = control[control.purchase_TF == False]
control_T = control[control.purchase_TF == True]

In [8]:
treatment_F = treatment[treatment.purchase_TF == False]
treatment_T = treatment[treatment.purchase_TF == True]

In [9]:
# number of purchased in control group divided by the total number of control group
# conversion rate of the control group 
conv_A = control_T.shape[0]/control.shape[0]
conv_A

0.149616

In [10]:
# number of purchases in treatment group divided by the total number of treatment group
# conversion rate of the treatment group
conv_B = treatment_T.shape[0]/treatment.shape[0]
conv_B

0.1766

In [11]:
z_score = (conv_B - conv_A)/pow((conv_A*(1-conv_A))/len(treatment),0.5) #one-sample z test
print(f'The z score for this A/B test is {z_score:.2f}')
if z_score >= 1.64:
    print('Therefore we reject the null hypothesis.')
else:
    print('Therefore we fail to Reject null hypothesis.')

The z score for this A/B test is 5.35
Therefore we reject the null hypothesis.


# Question 2

# H0: p1 ≥ p2
# Ha: p1 < p2 

In [12]:
# H0 is the ammsumption we want to reject, if we found that treatment group performs better than the control group

In [13]:
# 95% confidence rate =(1-α)
# α = 0.05 = Significance Level = probability of type 1 error 
# 80% Statistical Power = (1-β)
# β = 0.2 = probability of type 2 error 

# Baseline Rate = conversionrate_A = 0.149616
# Minimum Detectable Effect = 0.05

In [14]:
import math

In [15]:
# z_A = z_α/2 = z_0.025
# calculate z score：
# z=norm.isf(0.05/2)
#z_A = 1.96 

# z_B = z_β = z_0.2
# calculate z score：
# z=norm.isf(0.2)
#z_B = 0.84

In [16]:
from scipy.stats import norm
t_A = abs(norm(0,1).ppf(0.025))
t_B = abs(norm(0,1).ppf(0.2))

In [17]:
print(t_A)
print(t_B)

1.9599639845400545
0.8416212335729142


In [18]:
p1 = conv_A
# 0.149616
p2 = conv_B
# 0.1766

In [19]:
# using formula to calculate optimal sample size

In [20]:
p1 = conv_A
# 0.149616
p2 = conv_B
# 0.1766
a = 2*(p1+p2)/2
b = 1- (p1+p2)/2
c = p1*(1-p1)
d = p2*(1-p2) 
e = abs(p1-p2) #mde

opt_size = ((t_A * math.sqrt(a * b)+t_B * math.sqrt(c+d))**2)/e**2
opt_size = math.ceil(opt_size)
opt_size

2942

In [21]:
# the optimal sample size is 2942

# 10 samples

In [22]:
import pandas as pd
import numpy as np

In [23]:
new_ab_testing = pd.read_csv('AB_test_data.csv')

In [24]:
new_ab_testing.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 130000 entries, 0 to 129999
Data columns (total 4 columns):
 #   Column       Non-Null Count   Dtype 
---  ------       --------------   ----- 
 0   purchase_TF  130000 non-null  bool  
 1   Variant      130000 non-null  object
 2   date         130000 non-null  object
 3   id           130000 non-null  object
dtypes: bool(1), object(3)
memory usage: 3.1+ MB


In [25]:
pd.crosstab(new_ab_testing['Variant'], new_ab_testing['purchase_TF'])

purchase_TF,False,True
Variant,Unnamed: 1_level_1,Unnamed: 2_level_1
A,106298,18702
B,4117,883


In [26]:
# Before we go ahead and sample the data to get our subset, 
#let’s make sure there are no users that have been sampled multiple times.

In [27]:
session_counts = new_ab_testing['id'].value_counts(ascending=False)

In [28]:
multi_users = session_counts[session_counts > 1].count()

print(f'There are {multi_users} users that appear multiple times in the dataset')

There are 0 users that appear multiple times in the dataset


In [29]:
#Randomly select the data into 10 samples
samplelist = []
for i in range(0,10):
    control_sample = new_ab_testing[new_ab_testing['Variant'] == 'A'].sample(n=opt_size, random_state=i+30)
    treatment_sample = new_ab_testing[new_ab_testing['Variant'] == 'B'].sample(n=opt_size, random_state=i+30)
    cvr_A = sum(control_sample['purchase_TF'])/len(control_sample['purchase_TF'])
    cvr_B = sum(treatment_sample['purchase_TF'])/len(treatment_sample['purchase_TF'])
    z_score = (cvr_B - cvr_A)/pow((cvr_A*(1-cvr_A))/len(treatment_sample),0.5)
    
    
    if z_score >= 1.64:
        print('The z score is %.2f. Therefore we reject the null hypothesis.'%z_score)
    else:
        print('The z score is %.2f. Therefore we fail to Reject null hypothesis.'%z_score)
    ab_test = pd.concat([control_sample, treatment_sample], axis=0)
    samplelist.append(ab_test)
    ab_test.reset_index(drop=True, inplace=True)


The z score is 5.51. Therefore we reject the null hypothesis.
The z score is 2.77. Therefore we reject the null hypothesis.
The z score is 4.08. Therefore we reject the null hypothesis.
The z score is 4.52. Therefore we reject the null hypothesis.
The z score is 6.70. Therefore we reject the null hypothesis.
The z score is 4.41. Therefore we reject the null hypothesis.
The z score is 5.15. Therefore we reject the null hypothesis.
The z score is 3.62. Therefore we reject the null hypothesis.
The z score is 5.54. Therefore we reject the null hypothesis.
The z score is 5.05. Therefore we reject the null hypothesis.


In [30]:
#control_sample = new_ab_testing[new_ab_testing['Variant'] == 'A'].sample(n=opt_size, random_state=68)
#treatment_sample = new_ab_testing[new_ab_testing['Variant'] == 'B'].sample(n=opt_size, random_state=68)
# n needs to be int, so we need to math.ceil or int our optimal size calculating before

In [31]:
#ab_test = pd.concat([control_sample, treatment_sample], axis=0)
#ab_test.reset_index(drop=True, inplace=True)

In [32]:
#ab_test

# Question 3

In [33]:
# 95% confidence rate =(1-α)
# α = 0.05 = Significance Level = probability of type 1 error 
# 80% Statistical Power = (1-β)
# β = 0.2 = probability of type 2 error 

# Baseline Rate = conversionrate_A = 0.149616
# Minimum Detectable Effect = 0.05

In [34]:
control_F = control[control.purchase_TF == False]
control_T = control[control.purchase_TF == True]

In [35]:
p1 = conv_A #p(h0)
# 0.149616
p2 = conv_B #p(h1)
# 0.1766

In [36]:
α = 0.05
β = 0.2
boundA = np.log(1/α) #lnA
print("Boundary A is", boundA)
boundB = np.log(β) #lnB
print("Boundary B is", boundB)

Boundary A is 2.995732273553991
Boundary B is -1.6094379124341003


In [37]:
iteration = 0
time = 0
for i in range (1,11):
    #print(i)
    curln = 0
    ln =0
    j = 0
    cur = samplelist[i-1]
    #print(cur)
    A = cur[cur['Variant']=='A']
    m0 = len(A["purchase_TF"][A.purchase_TF == True])/len(A["purchase_TF"])
    B = cur[cur['Variant']=='B']
    m1 =len(B["purchase_TF"][B.purchase_TF == True])/len(B["purchase_TF"])
    while j<2942:
        #print(B["purchase_TF"][j:j+1])
        d = int(B["purchase_TF"][j:j+1])
        #print(diff)
        if d == 1:
            #print(diff)
            ln = math.log(m1/m0)
        elif d == 0:
            ln = math.log((1-m1)/(1-m0))
            #print(ln)
        curln += ln
        if curln <= boundB:
            print("We will not reject H_0 for sample",i,"on iteration",j)
            iteration += j
            time+=1
            break
        elif curln >= boundA:
            print("We will reject H_0 for sample",i,"on iteration",j)
            iteration += j
            time+=1
            break
        
        j+=1
average = iteration/time
print("The average number of iterations is", average)



We will reject H_0 for sample 1 on iteration 334
We will not reject H_0 for sample 2 on iteration 457
We will reject H_0 for sample 3 on iteration 853
We will reject H_0 for sample 4 on iteration 254
We will reject H_0 for sample 5 on iteration 475
We will reject H_0 for sample 6 on iteration 587
We will reject H_0 for sample 7 on iteration 191
We will reject H_0 for sample 8 on iteration 384
We will reject H_0 for sample 9 on iteration 464
We will reject H_0 for sample 10 on iteration 346
The average number of iterations is 434.5
