In [1]:
# I started by importing the libraries

import pandas as pd
import numpy as np
import math
import statsmodels.stats.api as sms
import scipy.stats as st
import matplotlib.pyplot as plt

In [4]:
#I decided to import the data obatined from Kaggle

df= pd.read_csv('ab_data.csv')

In [6]:
#I did some checks to be understand the underlining trends and statistical characteristics of the features

df.head()

Unnamed: 0,user_id,timestamp,group,landing_page,converted
0,851104,11:48.6,control,old_page,0
1,804228,01:45.2,control,old_page,0
2,661590,55:06.2,treatment,new_page,0
3,853541,28:03.1,treatment,new_page,0
4,864975,52:26.2,control,old_page,1


In [7]:
df.isnull().any()

user_id         False
timestamp       False
group           False
landing_page    False
converted       False
dtype: bool

In [8]:
df.isnull().sum()/df.shape[0]

user_id         0.0
timestamp       0.0
group           0.0
landing_page    0.0
converted       0.0
dtype: float64

In [9]:
df.isnull().sum()/df.shape[0]

user_id         0.0
timestamp       0.0
group           0.0
landing_page    0.0
converted       0.0
dtype: float64

In [10]:
df.group.dtype

dtype('O')

In [11]:
print( "Number of rows:", df.shape[0], "Number of columns:", df.shape[1])

Number of rows: 294478 Number of columns: 5


In [12]:
df.group.value_counts()

treatment    147276
control      147202
Name: group, dtype: int64

In [13]:
# Some of the control groups saw the new page and some treatment groups saw the old page.I expunge them

control_new_page= (df["group"]== "control") & (df["landing_page"] =="new_page")
Index_to_expunge1= df[control_new_page].index
df= df.drop(Index_to_expunge1)

In [14]:
treatment_old_page= (df["group"]== "treatment") & (df["landing_page"] =="old_page")
Index_to_expunge2= df[treatment_old_page].index
df= df.drop(Index_to_expunge2)

In [16]:
df.shape


(290585, 5)

In [17]:
df.group.value_counts()

treatment    145311
control      145274
Name: group, dtype: int64

In [20]:
# See how many duplicated users are there and remove duplicates, if any

df.user_id.count()-df.user_id.nunique()

1

In [21]:
# To remove duplicated user_ids, so that each row uniquely identifies each user

df.drop_duplicates(subset='user_id', keep='first', inplace=True)

In [23]:
# Show the percentage split between the 2 categories of users of new vs old page
#Calculate pooled probability

control = (df["group"]=="control")

control_converted =df["converted"][control].sum()
total_users_control= df["converted"][control].count()

treatment = (df["group"]== "treatment")

treatment_converted =df["converted"][treatment].sum()
total_users_treatment= df["converted"][treatment].count()

print("Split of control users who saw old page vs treatment users who saw new page:",

round((total_users_control/(df["converted"].count())*100),2),'%',

round((total_users_treatment/(df["converted"].count())*100),2),'%')

Split of control users who saw old page vs treatment users who saw new page: 49.99 % 50.01 %


In [24]:
#Count number of users who converted in each group

print("Users in control group that converted on old page:", control_converted)
print("% of the control group that converted: ", round((control_converted/total_users_control)*100,2),"%")

print("Users in treatment group that converted on new page:", treatment_converted)
print("% of the treatment group that converted: ", round((treatment_converted/total_users_treatment)*100,2),"%")

Users in control group that converted on old page: 17489
% of the control group that converted:  12.04 %
Users in treatment group that converted on new page: 17264
% of the treatment group that converted:  11.88 %


In [25]:
# Defining the test parameters

# First, I checked the sample size requirement to run the experiment
#Effect size for a tests comparing two proportions and use in power function. 
# It is unlikely that the statistical significance that conflates effect size and sample size
#It measures the size of the difference, rather than confounding this with the sample size

# NormalIndPower is statistical power calculation for z test for 2 independent samples

Baseline_rate = control_converted/total_users_control
Practical_significance = 0.01
Confidence_level= 0.05 
Sensitivity = 0.8

effect_size = sms.proportion_effectsize(Baseline_rate, Baseline_rate + Practical_significance)
sample_size =sms.NormalIndPower().solve_power(effect_size= effect_size, power =Sensitivity, alpha= Confidence_level,ratio=1)

print("Required sample size:", round(sample_size), "per group")

Required sample size: 17209 per group


In [40]:
#Let's proceed to A/B Testing using the frequentist approach. The Bayesian ML approach will be treated later

# First, I calculate the pooled probability 
   
pooled_prob=(control_converted + treatment_converted)/(total_users_treatment + total_users_control)

print("pooled_prob=",round(pooled_prob,3))

pooled_prob= 0.12


In [43]:
# To calculate pooled standard error,z-score & margin of error(from a 2-tailed standard normal distribution)
# Each tail= 0.025 .Therefore,z-score =st.norm.ppf(1-0.025). Moving from z-score to prob(ppf) is st.norm.cdf(z-score)

pooled_se= math.sqrt(pooled_prob* (1-pooled_prob)*(1/total_users_control + 1/total_users_treatment))

z_score = st.norm.ppf(1-Confidence_level /2)

margin_of_error =pooled_se *z_score


In [45]:
#For d_hat, estimated difference between the probabilities of conversion (experimented vs control group)

# d_hat of -0.00157 indicates the probability of conversion is higher in the control group than treatment
# However, it would be informative to know if the difference is high enough to reject the null hypothesis

d_hat= (treatment_converted/total_users_treatment)-(control_converted/total_users_control)

print("d_hat=", d_hat)

d_hat= -0.0015782389853555567


In [46]:
# Testing to know if we can reject the null hypothesis. The decision boundaries are needed here

Lower_bound = d_hat - margin_of_error
Upper_bound = d_hat + margin_of_error

In [47]:
# Finally, let's evaluate treatment effect using the decision rule:
    
if Practical_significance < Lower_bound:
    
     print("Reject the null hypothesis that there treatment effect is insignificant")
else:
     print("Fail to reject the null hypothesis")
     
print("Lower bound of the confidence interval will be:", round((Lower_bound)*100,2),"%")
print("Upper bound of the confidence interval will be:", round((Upper_bound)*100,2),"%")

Fail to reject the null hypothesis
Lower bound of the confidence interval will be: -0.39 %
Upper bound of the confidence interval will be: 0.08 %


In [48]:
# In conclusion,at 95pct C.I, we fail to reject the null hypothesis that the effect of treating the experiment group is insignificant