In [9]:
import pandas as pd
import numpy as np
from scipy import stats

In [10]:
df = pd.read_csv('AB_test_data.csv')

In [11]:
df = df[df.columns.drop('date')]

In [6]:
df.head()

Unnamed: 0,Variant,purchase_TF,id
0,A,False,0x6f9421
1,A,False,0x59d442
2,A,True,0x6db8f8
3,A,False,0x68245d
4,A,False,0x28566e


In [12]:
groupA = df.loc[df['Variant'] == 'A']
groupA.head()

Unnamed: 0,Variant,purchase_TF,id
0,A,False,0x6f9421
1,A,False,0x59d442
2,A,True,0x6db8f8
3,A,False,0x68245d
4,A,False,0x28566e


In [13]:
groupB = df.loc[df['Variant'] == 'B']
groupB.head()

Unnamed: 0,Variant,purchase_TF,id
7,B,False,0x724b78
55,B,False,0x83895b
69,B,False,0x202cfc
73,B,False,0x2bc5c5
76,B,True,0x58d794


In [5]:
import scipy
norm = scipy.stats.norm()

In [26]:
# Assuming equal variances
# Assuming one-tailed
# This function gives us the t-stats
#@param: confidence: the accepted confidence level
#        A: the control group (as an 1-D list)
#        B: the treatment group(as an 1-D list)
#@return: the t-stats
def t_test(A,B,confidence):
    x1 = np.mean(A)
    x2 = np.mean(B)
    s = 0
    for i in range(len(A)):
        s += (A[i]-x1)**2
    for j in range(len(B)):
        s += (B[j]-x2)**2
    s = np.sqrt(s/(len(A)+len(B)-2))
    t = (x2-x1)/(s*np.sqrt(1/len(A)+1/len(B)))
    if t >= norm.ppf(confidence):
        print('The improvment of B is statistically siginificant.')
    else:
        print('The improvment of B is not statistically siginificant.')
    print('t score is: ', t)
    return t

In [28]:
A_list = list(groupA['purchase_TF'])
B_list = list(groupB['purchase_TF'])
t_test(A_list,B_list,0.95)

The improvment of B is statistically siginificant.
t score is:  8.204699796739607


8.204699796739607

In [40]:
# assume equal sample size
# Assuming one-tailed
# This function gives us the optimal sample size, given a caertain confidence level and power
#@param: confidence: the accepted confidence level
#        power: power
#        A: the control group (as an 1-D list)
#        B: the treatment group (as an 1-D list)
#@return: the minimal sample size for each group
def sample_size_estimation(confidence,power,A,B):
    x1 = np.mean(A)
    x2 = np.mean(B)
    var1 = np.var(A)
    var2 = np.var(B)
    mde = norm.ppf(power)*np.sqrt(var1/len(A)+var2/len(B))+(x1-x2)
    p = (np.sum(A)+np.sum(B))/(len(A)+len(B))
    n = (((norm.ppf(confidence)*np.sqrt(2*p*(1-p)) + norm.ppf(power)*np.sqrt(x1*(1-x1)+x2*(1-x2))))**2)/(mde**2)
    return n

In [41]:
A_list = list(groupA['purchase_TF'])
B_list = list(groupB['purchase_TF'])
optimal_size = sample_size_estimation(0.95,0.8,A_list,B_list)
print(optimal_size)

1089.727979264397


In [19]:
from random import sample
import math
# assume equal sample size
# Assuming one-tailed
# This function gives us the t test based on random sampling
#@param:  confidence: the accepted confidence level
#         iterations: the number of random sampling 
#         size: sample size
#         A: the control group (as an 1-D list)
#         B: the treatment group (as an 1-D list)
#@return: none
def random_t_test(A,B,iterations,size,confidence):
    size  = math.ceil(size)
    for i in range(iterations):
        A_sample = sample(A,size)
        B_sample = sample(B,size)
        t_test(A_sample,B_sample,confidence)
    return



In [20]:
A_list = list(groupA['purchase_TF'])
B_list = list(groupB['purchase_TF'])

random_t_test(A_list,B_list,10,optimal_size,0.95)

t-stat: 1.3465299060063374
The improvment of B is not statistically siginificant with 95% confidence
t-stat: 1.597182087807784
The improvment of B is not statistically siginificant with 95% confidence
t-stat: 3.160706537839517
The improvment of B is statistically siginificant with 95% confidence
t-stat: 1.8814352294188352
The improvment of B is statistically siginificant with 95% confidence
t-stat: 2.2368062556718353
The improvment of B is statistically siginificant with 95% confidence
t-stat: 2.9235982468722783
The improvment of B is statistically siginificant with 95% confidence
t-stat: 1.745101207558235
The improvment of B is statistically siginificant with 95% confidence
t-stat: 4.554758043139889
The improvment of B is statistically siginificant with 95% confidence
t-stat: 2.5404220331710015
The improvment of B is statistically siginificant with 95% confidence
t-stat: 3.1402653058458414
The improvment of B is statistically siginificant with 95% confidence


In [60]:
import random
# This function does the squential test
#@param: iterations: the number of random sampling 
#        size: sample size
#        A: the control group (as an 1-D list)
#        B: the treatment group (as an 1-D list)
#        alpha: 1-confidence interval
#        power: power
#@return: average number of iterations
def sequential_t_test(A,B,iterations,size,alpha,power):
    iteration = list() 
    size  = math.ceil(size)
    for k in range(iterations):
        A_sample = sample(A,size)
        B_sample = sample(B,size)
        ln_A = np.log(1/alpha)
        ln_B = np.log(1-power)
        total_sample = A_sample.copy()
        total_sample.extend(B_sample)
        random.shuffle(total_sample)
        p_A = np.mean(A_sample)
        p_B = np.mean(B_sample)
        accumulative_log_lamda = 0
        i=0
        while (accumulative_log_lamda>ln_B) and (accumulative_log_lamda <ln_A):
            if total_sample[i]:
                accumulative_log_lamda += np.log(p_A/p_B)
            else:
                accumulative_log_lamda += np.log((1-p_A)/(1-p_B))
            i+=1
            if i >= len(total_sample):
                print('Cannot reject or accept H0')
                break
        if accumulative_log_lamda <= ln_B:
            print('Accept H0 in {} trials'.format(i))
        elif accumulative_log_lamda >= ln_A:
            print('Reject H0 in {} trials'.format(i))
        iteration.append(i)
    return np.average(iteration)

In [61]:
A_list = list(groupA['purchase_TF'])
B_list = list(groupB['purchase_TF'])
sequential_t_test(A_list,B_list,10,optimal_size,0.05,0.8)

Accept H0 in 139 trials
Accept H0 in 1049 trials
Reject H0 in 638 trials
Accept H0 in 135 trials
Accept H0 in 425 trials
Accept H0 in 38 trials
Reject H0 in 83 trials
Accept H0 in 107 trials
Cannot reject or accept H0
Accept H0 in 261 trials


505.5