In [24]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

/kaggle/input/ab-testing/ab_data.csv


In [25]:
import pandas as pd
import scipy.stats as stats
import statsmodels.api as sms
import matplotlib.pyplot as plt
import seaborn as sns

# Background
Company XYZ places significant emphasis on the design of its landing page and aims to enhance its conversion rate through improvements in its layout. Historically, the company has maintained an average annual conversion rate of 13%. However, it now seeks to achieve a higher conversion rate with the introduction of a new page design. The company anticipates that the new design will yield a 2% increase, resulting in a target conversion rate of 15%. Prior to the official launch of the new page, the company desires that data scientists conduct a small-scale A/B test to ensure that the anticipated improvements are realized.

# Metrics Determination
In this case study, our metric will be CVR (Conversion Rate), as our objective is to enhance the CVR by 2% with the introduction of the new landing page design.

# A/B Test Design
## Propose a hypothesis
In this scenario, we will employ a two-tailed test, as we aim to ascertain whether the new design yields a significant difference from the old design before drawing any conclusions regarding whether the conversion rate of the new page is greater than that of the old page.


## Calculate Sample size
We will begin by establishing our key variables:
- alpha = 0.05
- beta = 20%
- delta = 15% -13% = 2%

In [62]:
import statsmodels.stats.api as sms

# Parameters for sample size calculation
alpha = 0.05  # Significance level
power = 0.80  # Statistical power
p1 = 0.13     # control group
p2 = 0.15     # treatment group

# Calculate the effect size using proportions
effect_size = sms.proportion_effectsize(p1, p2)

# Calculate the required sample size for each group
required_n = sms.NormalIndPower().solve_power(effect_size, power=power, alpha=alpha, ratio=1)
required_n = np.ceil(required_n)  

print(f"Minimum sample size per group: {required_n}")

Minimum sample size per group: 4720.0


## Calculate Experiment duration

Let's assume the average number of pageviews per day is 1000. In this case, the duration of the experiment would be calculated as follows: (4720 * 2) / 1000 = 9 days.

Now, let's explore another example where the average number of pageviews per day is 10000. Even with this higher daily average, we would still require a minimum of 7 days to ensure the reliability and robustness of the results obtained on a daily basis.

# Data Collection

In [26]:
df = pd.read_csv('/kaggle/input/ab-testing/ab_data.csv')

In [27]:
df

Unnamed: 0,user_id,timestamp,group,landing_page,converted
0,851104,2017-01-21 22:11:48.556739,control,old_page,0
1,804228,2017-01-12 08:01:45.159739,control,old_page,0
2,661590,2017-01-11 16:55:06.154213,treatment,new_page,0
3,853541,2017-01-08 18:28:03.143765,treatment,new_page,0
4,864975,2017-01-21 01:52:26.210827,control,old_page,1
...,...,...,...,...,...
294473,751197,2017-01-03 22:28:38.630509,control,old_page,0
294474,945152,2017-01-12 00:51:57.078372,control,old_page,0
294475,734608,2017-01-22 11:45:03.439544,control,old_page,0
294476,697314,2017-01-15 01:20:28.957438,control,old_page,0


In [28]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 294478 entries, 0 to 294477
Data columns (total 5 columns):
 #   Column        Non-Null Count   Dtype 
---  ------        --------------   ----- 
 0   user_id       294478 non-null  int64 
 1   timestamp     294478 non-null  object
 2   group         294478 non-null  object
 3   landing_page  294478 non-null  object
 4   converted     294478 non-null  int64 
dtypes: int64(2), object(3)
memory usage: 11.2+ MB


# Data cleaning

In [29]:
df.isnull().sum()

user_id         0
timestamp       0
group           0
landing_page    0
converted       0
dtype: int64

In [30]:
df.duplicated().sum()

0

In [31]:
df['user_id'].duplicated().sum()

3894

In [32]:
df[df['user_id'].duplicated()]

Unnamed: 0,user_id,timestamp,group,landing_page,converted
2656,698120,2017-01-15 17:13:42.602796,control,old_page,0
2893,773192,2017-01-14 02:55:59.590927,treatment,new_page,0
7500,899953,2017-01-07 03:06:54.068237,control,new_page,0
8036,790934,2017-01-19 08:32:20.329057,treatment,new_page,0
10218,633793,2017-01-17 00:16:00.746561,treatment,old_page,0
...,...,...,...,...,...
294308,905197,2017-01-03 06:56:47.488231,treatment,new_page,0
294309,787083,2017-01-17 00:15:20.950723,control,old_page,0
294328,641570,2017-01-09 21:59:27.695711,control,old_page,0
294331,689637,2017-01-13 11:34:28.339532,control,new_page,0


In [33]:
df[df['user_id']==633793] 

Unnamed: 0,user_id,timestamp,group,landing_page,converted
7691,633793,2017-01-17 20:45:03.581306,treatment,new_page,0
10218,633793,2017-01-17 00:16:00.746561,treatment,old_page,0


After randomly selecting from the duplicated user IDs, we observed that users in the treatment group are viewing both the old page and the new page. To maintain the independence of the control and treatment groups, we need to remove these users from the analysis.

In [34]:
del_id = df[df['user_id'].duplicated()]['user_id'].values
del_id

array([698120, 773192, 899953, ..., 641570, 689637, 744456])

In [35]:
df = df[~df['user_id'].isin(del_id)]
df.info()

<class 'pandas.core.frame.DataFrame'>
Index: 286690 entries, 0 to 294477
Data columns (total 5 columns):
 #   Column        Non-Null Count   Dtype 
---  ------        --------------   ----- 
 0   user_id       286690 non-null  int64 
 1   timestamp     286690 non-null  object
 2   group         286690 non-null  object
 3   landing_page  286690 non-null  object
 4   converted     286690 non-null  int64 
dtypes: int64(2), object(3)
memory usage: 13.1+ MB


In [43]:
df['timestamp'] = pd.to_datetime(df['timestamp'])

In [44]:
df['timestamp'].max() - df['timestamp'].min()

Timedelta('21 days 23:59:49.081927')

In [36]:
df = df[((df['group'] == 'control') & (df['landing_page'] == 'old_page')) |
        ((df['group'] == 'treatment') & (df['landing_page'] == 'new_page'))]

Also, we want to ensure control groups view only old_page while treatment groups view only new_page

In [45]:
pd.crosstab(df['group'],df['landing_page'])

landing_page,new_page,old_page
group,Unnamed: 1_level_1,Unnamed: 2_level_1
control,0,143293
treatment,143397,0


# Sampling

In [47]:
required_n = 5000
control_sample = df[df['group']=='control'].sample(n = required_n, random_state = 22)
treatment_sample = df[df['group']=='treatment'].sample(n = required_n, random_state = 22)

ab_test = pd.concat([control_sample, treatment_sample], axis = 0)
ab_test.reset_index(drop=True, inplace =True) # drop origin index and replace with new index
ab_test

Unnamed: 0,user_id,timestamp,group,landing_page,converted
0,763854,2017-01-21 03:43:17.188315,control,old_page,0
1,690555,2017-01-18 06:38:13.079449,control,old_page,0
2,861520,2017-01-06 21:13:40.044766,control,old_page,0
3,630778,2017-01-05 16:42:36.995204,control,old_page,0
4,656634,2017-01-04 15:31:21.676130,control,old_page,0
...,...,...,...,...,...
9995,787786,2017-01-06 18:10:45.577881,treatment,new_page,0
9996,770196,2017-01-24 07:07:24.954319,treatment,new_page,0
9997,775724,2017-01-18 10:54:36.300523,treatment,new_page,0
9998,920254,2017-01-23 00:40:30.728885,treatment,new_page,0


In [48]:
pd.crosstab(ab_test['group'], ab_test['landing_page'])

landing_page,new_page,old_page
group,Unnamed: 1_level_1,Unnamed: 2_level_1
control,0,5000
treatment,5000,0


In [51]:
conversion_rate = ab_test.groupby('group')['converted'].agg(['mean', 'std'])
conversion_rate


Unnamed: 0_level_0,mean,std
group,Unnamed: 1_level_1,Unnamed: 2_level_1
control,0.1232,0.3287
treatment,0.1266,0.332558


While we noticed a slight increase in the average conversion rate for the treatment group, it's imperative to conduct statistical testing to ascertain the significance of this difference.

# Statistical Testing
- 

In [55]:
from statsmodels.stats.proportion import proportions_ztest, proportion_confint

In [56]:
control_result = ab_test[ab_test['group']=='control']['converted']
treatment_result = ab_test[ab_test['group']=='treatment']['converted']

In [59]:
n_con = control_result.count()
n_treat = treatment_result.count()
success = [control_result.sum(), treatment_result.sum()]
nobs = [n_con, n_treat]

zstat, p_value = proportions_ztest(success, nobs=nobs)
(lower_con, lower_treat), (upper_con, upper_treat) = proportion_confint(success, nobs=nobs, alpha =0.05)

print(f"Z-test statistic: {zstat:.2f}")
print(f"P-value: {p_value:.2f}")
print(f"95% CI for control group: [{lower_con:.3f},{upper_con:.3f}]")
print(f"95% CI for treatment group: [{lower_treat:.3f},{upper_treat:.3f}]")

Z-test statistic: -0.51
P-value: 0.61
95% CI for control group: [0.114,0.132]
95% CI for treatment group: [0.117,0.136]


# Result Explanation

Given that the p-value (0.61) substantially exceeds the alpha threshold (0.05), we lack sufficient evidence to reject the null hypothesis. This suggests that the new landing page does not yield a significant difference compared to the old landing page.

Additionally, examining the confidence interval for the treatment group [0.117, 0.136], we find that:
- The baseline conversion rate (0.13) falls within this range.
- However, the target conversion rate (0.15) is outside of this interval.

As a result, we can conclude that the new landing page does not represent a notable improvement and should not be implemented.