In [3]:
# Introduction

In [6]:
# # Introduction¶
# A/B tests are very commonly performed by data analysts and data scientists. It is important that we get some practice working with the difficulties of these.

# For this project, we will be working to understand the results of an A/B test run by an e-commerce website. Our goal is to work through this notebook to help the company understand if they should implement the new page, keep the old page, or perhaps run the experiment longer to make their decision.

In [7]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

import matplotlib.pyplot as plt # basic visualizations 
import seaborn as sns # advanced visualizations

import random
random.seed(42) #We are setting the seed to assure you get the same answers

import warnings
warnings.filterwarnings('ignore')

In [9]:
#read the data and display the top 5 rows
df = pd.read_csv("C:/Users/aruni/Desktop/Python/AB TESTING/ab_test.csv")
df.head()

Unnamed: 0,id,time,con_treat,page,converted
0,851104,11:48.6,control,old_page,0
1,804228,01:45.2,control,old_page,0
2,661590,55:06.2,treatment,new_page,0
3,853541,28:03.1,treatment,new_page,0
4,864975,52:26.2,control,old_page,1


In [10]:
# change column names 
df.columns = ["user_id", "timestamp", "group", "landing_page", "converted"]
df.head()

Unnamed: 0,user_id,timestamp,group,landing_page,converted
0,851104,11:48.6,control,old_page,0
1,804228,01:45.2,control,old_page,0
2,661590,55:06.2,treatment,new_page,0
3,853541,28:03.1,treatment,new_page,0
4,864975,52:26.2,control,old_page,1


In [11]:
#numer of rows and unique users
print(f'Number of rows: {df.shape[0]}')
print(f'Number of unique users: {df.user_id.nunique()}')

Number of rows: 294478
Number of unique users: 290584


In [12]:
#general info
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 294478 entries, 0 to 294477
Data columns (total 5 columns):
 #   Column        Non-Null Count   Dtype 
---  ------        --------------   ----- 
 0   user_id       294478 non-null  int64 
 1   timestamp     294478 non-null  object
 2   group         294478 non-null  object
 3   landing_page  294478 non-null  object
 4   converted     294478 non-null  int64 
dtypes: int64(2), object(3)
memory usage: 11.2+ MB


In [13]:
#missing values
df.isna().sum()

user_id         0
timestamp       0
group           0
landing_page    0
converted       0
dtype: int64

In [14]:
#Does the number of new_page and treatment match?
n_treat = df[df["group"] == "treatment"].shape[0]
n_new_page = df[df["landing_page"] == "new_page"].shape[0]
difference = n_treat - n_new_page

pd.DataFrame({
    'N treatment': [n_treat],
    'N new_page': [n_new_page],
    'Difference': [difference]
})

Unnamed: 0,N treatment,N new_page,Difference
0,147276,147239,37


In [15]:
# lets see those rows 
df[(df["group"] == "treatment") & (df["landing_page"] == "old_page")]

Unnamed: 0,user_id,timestamp,group,landing_page,converted
308,857184,34:59.8,treatment,old_page,0
327,686623,26:40.7,treatment,old_page,0
357,856078,29:30.4,treatment,old_page,0
685,666385,11:54.8,treatment,old_page,0
713,748761,47:44.4,treatment,old_page,0
...,...,...,...,...,...
293773,688144,34:50.5,treatment,old_page,1
293817,876037,15:09.0,treatment,old_page,1
293917,738357,37:55.7,treatment,old_page,0
294014,813406,25:33.2,treatment,old_page,0


In [16]:
df_mismatch = df[(df["group"] == "treatment") & (df["landing_page"] == "old_page")
               |(df["group"] == "control") & (df["landing_page"] == "new_page")]

n_mismatch = df_mismatch.shape[0]

percent_mismatch = round(n_mismatch / len(df) * 100, 2)
print(f'Number of mismatched rows: {n_mismatch} rows')
print(f'Percent of mismatched rows: {percent_mismatch} percent')

Number of mismatched rows: 3893 rows
Percent of mismatched rows: 1.32 percent


In [17]:
df2 = df[(df["group"] == "treatment") & (df["landing_page"] == "new_page")
        |(df["group"] == "control") & (df["landing_page"] == "old_page")]

len(df2)

290585

In [18]:
df2.head()

Unnamed: 0,user_id,timestamp,group,landing_page,converted
0,851104,11:48.6,control,old_page,0
1,804228,01:45.2,control,old_page,0
2,661590,55:06.2,treatment,new_page,0
3,853541,28:03.1,treatment,new_page,0
4,864975,52:26.2,control,old_page,1


In [19]:
# Double Check all of the correct rows were removed - this should be 0
df2[((df2['group'] == 'treatment') == (df2['landing_page'] == 'new_page')) == False].shape[0]

0

In [20]:
# Another way to double Check all of the correct rows were removed 
df_mismatch = df2[(df2["group"] == "treatment") & (df2["landing_page"] == "old_page")
               |(df2["group"] == "control") & (df2["landing_page"] == "new_page")]

n_mismatch = df_mismatch.shape[0]

percent_mismatch = round(n_mismatch / len(df2) * 100, 2)
print(f'Number of mismatched rows: {n_mismatch} rows')
print(f'Percent of mismatched rows: {percent_mismatch} percent')

Number of mismatched rows: 0 rows
Percent of mismatched rows: 0.0 percent


In [21]:
# unique user id in df2 
df2.user_id.nunique()

290584

In [22]:
# number of repeated ids in df2
len(df2) - df2.user_id.nunique()

1

In [23]:
# Display the duplicated row 
df2[df2.duplicated("user_id") == True]

Unnamed: 0,user_id,timestamp,group,landing_page,converted
2893,773192,55:59.6,treatment,new_page,0


In [24]:
#drop the duplicated row
df2 = df2.drop_duplicates("user_id") 

In [25]:
# Douple Check that it is actually dropped
len(df2) - df2.user_id.nunique()

0

In [26]:
# Part II - Probability

In [27]:
# Percent of convergance
# The probability of an individual converting regardless of the page they receive
df2.converted.mean() * 100

11.959708724499627

In [31]:
#Given that an individual was in the control group, what is the probability they converted?
#Given that an individual was in the treatment group, what is the probability they converted?
df2.user_id = df2.user_id.astype(str)
df2.groupby("group").mean() * 100

TypeError: agg function failed [how->mean,dtype->object]

In [30]:
#What is the probability that an individual received the new page?
pd.DataFrame(df2.landing_page.value_counts(normalize = True) * 100)

Unnamed: 0_level_0,proportion
landing_page,Unnamed: 1_level_1
new_page,50.006194
old_page,49.993806


In [32]:
# # Is there a sufficient evidence to conclude that the new treatment page leads to more conversions?¶
# The probability that an individual received the new page is 50%
# The probability of an individual converting regardless of the page they receive is 11.96%
# Given that an individual was in the control group, the probability they converted is 12.04%
# Given that an individual was in the treatment group, the probability they converted is 11.88%
# 1 to 4 suggests that there is no significant difference in convergence between treatment and control groups. Therefore we may conclude that the new treatment page has no impact and does not lead to more conversions.

In [33]:
# Part III - A/B Test

In [35]:
# # Notice that because of the time stamp associated with each event, you could technically run a hypothesis test continuously as each observation was observed.

# However, then the hard question is do you stop as soon as one page is considered significantly better than another or does it need to happen consistently for a certain amount of time? How long do you run to render a decision that neither page is better than another?

# These questions are the difficult parts associated with A/B tests in general.

In [36]:
# For now, consider you need to make the decision just based on all the data provided. If you want to assume that the old page is better unless the new page proves to be definitely better at a Type I error rate of 5%

In [38]:
#

In [39]:
# 

In [None]:
# Creating the sampling distribution of difference in means 
means_diff = []
size = df.shape[0]
for _ in range(10000):
    sample = df2.sample(size, replace = True)
    control_mean = sample[sample["group"] == "control"]["converted"].mean()
    treat_mean = sample[sample["group"] == "treatment"]["converted"].mean()
    means_diff.append(treat_mean - control_mean)

In [None]:
# Plotting the sampling distribution 
plt.figure(figsize = (8,4), dpi = 100)
plt.hist(means_diff, bins = 25)
plt.show()

In [None]:
# Simulate distribution under the null hypothesis
means_diff = np.array(means_diff)
null_vals = np.random.normal(0, means_diff.std(), means_diff.size)

In [None]:
# Plot the null distribution
plt.figure(figsize = (8,4), dpi = 100)
plt.hist(null_vals, bins = 25)
plt.show()

In [None]:
# Plot observed statistic with the null distibution
control_mean = df2[df2["group"] == "control"]["converted"].mean()
treat_mean = df2[df2["group"] == "treatment"]["converted"].mean()
obs_diff = treat_mean - control_mean

plt.figure(figsize = (8,4), dpi = 100)
plt.hist(null_vals, bins = 25)
plt.axvline(obs_diff, c='red')
plt.show()

In [None]:
# calculating the p value 
(null_vals > obs_diff).mean()

In [None]:
import statsmodels.api as sm

convert_old = df2[(df2["converted"] == 1) & (df2["landing_page"] == "old_page")]['user_id'].nunique()
convert_new = df2[(df2["converted"] == 1) & (df2["landing_page"] == "new_page")]['user_id'].nunique()
n_old = df2[df["landing_page"] == "old_page"]['user_id'].nunique()
n_new = df2[df["landing_page"] == "new_page"]['user_id'].nunique()

In [None]:
#Compute test statistic and p-value
z_score, p_value = sm.stats.proportions_ztest(np.array([convert_new,convert_old]),np.array([n_new,n_old]), alternative = 'larger')

In [None]:
# Print Z Score and P_Value
z_score, p_value 

In [None]:
# Using test statistic and p-value, we reach the same coclusion: we can not reject the null

In [None]:
# Part IV - Regression
# In this final part, you will see that the result we achieved in the A/B test Part above can also be achieved by performing regression. Since each row is either a conversion or no conversion, we will use logestic regression to see if there is a significant difference in conversion based on which page a customer receives. However, we first need to create in df2 a column for the intercept, and create a dummy variable column for which page each user received.

In [None]:
# Creat the intercept 
df2["intercept"] = 1
df2.head()

In [None]:
# Create ab_page column, which is 1 when an individual receives the treatment and 0 if control.
df2["ab_page"] = df2.group.apply(lambda x: 1 if (x == "treatment") else 0)
df2.head()

In [None]:
# Instantiate and fit the regression model
model = sm.Logit(df2['converted'], df2[['intercept','ab_page']])
result = model.fit()
result.summary()

In [None]:
# Optimization terminated successfully.
#          Current function value: 0.366118
#          Iterations 6

In [None]:
# The P-Value is 0.190, It is different from the one we obtained from the previous analysis because the null hypothesis is different in both cases.

# We might add additional features to our model such timestamp, the reason for that conversion might differ according to the time at which the user visits the website. But this might come with a disadvantage, the model will become more complicated and less interpretable. It might be also susceptible to overfitting.

# We will leave timestamp for now. Instead, along with testing if the conversion rate changes for different pages, we will also add an effect based on which country a user lives in. we will need to read in the countries.csv dataset and merge together your datasets on the appropriate rows.

In [None]:
# Read the country data
countries = pd.read_csv("../input/ecommerce-ab-testing/countries_ab.csv")
countries.head()

In [None]:
# Merge the countries dataframe with df2 
countries.columns = ["user_id", "country"]
countries["user_id"] = countries["user_id"].astype(str)
df3 = df2.merge(countries, on = "user_id", how = "left")
df3.head()

In [None]:
# creating dummies for country and landing_page columns 
df3[['CA','UK','US']] = pd.get_dummies(df3['country'])
df3[['new_page','old_page']] = pd.get_dummies(df3['landing_page'])
df3.head()

In [None]:
# lest see if there is a relation between country and conversion
pd.pivot_table(data = df3, index = "country", values = "converted").sort_values(by = "converted", ascending = False) * 100

In [None]:
# It seems that coutry has a very minor impact on convergance. we will see its true impact along with ather features in the regression# Instantiate and fit the regression model model = sm.Logit(df2['converted'], df2[['intercept','ab_page']]) result = model.fit() result.summary()# Instantiate and fit the regression model model = sm.Logit(df2['converted'], df2[['intercept','ab_page']]) result = model.fit() result.summary()

In [None]:
# Instantiate and fit the regression model with country as an additional variable: 'CA' is a baseline
model = sm.Logit(df3['converted'], df3[['intercept','ab_page', 'UK','US']])
result = model.fit()
result.summary()

In [None]:
# Optimization terminated successfully.
#          Current function value: 0.366113
#          Iterations 6

In [None]:
# exponentiate the parameters to inteprete the result
np.exp(result.params)

In [None]:
# All the coefficients are statistically insignificant except the intercept. This comes inline with the initial conclusions that we have just made.

In [None]:
# Summary:
# In this notebook we conducted a detailed A/B testing using 3 main methods:

# Sampling distribution
# Z test
# Logestic regression
# All three methods resulted in the same conclusion: the treatment has no impact.