# 0.1 Imports and Loading

In [64]:
import pandas as pd
import math
from statsmodels.stats import api as sms
from scipy.stats import chi2_contingency as chi2

In [2]:
df = pd.read_csv('ab_data.csv')

# 1.0 Experiment Design

## 1.1 Experiment Parameters

H0 : The new page has no effect on the conversion rate

In [15]:
confidence_level = 0.95
significance_level = 0.05

p1= 0.13
p2 = 0.15

effect_size = sms.proportion_effectsize(p1, p2)
power = 0.8

In [16]:
#sample size
sample_n = sms.NormalIndPower().solve_power(effect_size, power=power, alpha=significance_level)
sample_n = math.ceil(sample_n)

print('Control group size: {}'.format(sample_n))
print('Treatment group size: {}'.format(sample_n))
print('Total sample size: {}'.format(2*sample_n))

Control group size: 4720
Treatment group size: 4720
Total sample size: 9440


# 2.0 Data Preparation

In [18]:
print('Number of rows: {}'.format(df.shape[0]))
print('Number of columns: {}'.format(df.shape[1]))

Number of rows: 294478
Number of columns: 5


## 2.1 Checking Flags

In [19]:
df[['user_id','group','landing_page']].groupby(['group','landing_page']).count().reset_index()

Unnamed: 0,group,landing_page,user_id
0,control,new_page,1928
1,control,old_page,145274
2,treatment,new_page,145311
3,treatment,old_page,1965


In [23]:
user_delete = df[['user_id', 'group']].groupby('user_id').count().reset_index().query('group > 1')['user_id']
df1 = df[~df['user_id'].isin(user_delete)]
df1.shape

(286690, 5)

In [24]:
df1[['user_id','group','landing_page']].groupby(['group','landing_page']).count().reset_index()

Unnamed: 0,group,landing_page,user_id
0,control,old_page,143293
1,treatment,new_page,143397


## 2.2 Data Sampling

In [61]:
df_control_sample = df1[df1['group'] == 'control'].sample(sample_n, random_state=42) 
print('Size of control group: {}'.format(df_control_sample.shape[0]))

df_treatment_sample = df1[df1['group'] == 'treatment'].sample(sample_n, random_state=42)
print('Size of treatment group: {}'.format(df_treatment_sample.shape[0]))

df_total = pd.concat([df_control_sample, df_treatment_sample]).reset_index(drop=True)

Size of control group: 4720
Size of treatment group: 4720


# 3.0 Calculating Conversion Rate

In [62]:
sales_control = df_control_sample['converted'].sum()
visit_control = df_control_sample['converted'].count()
cr_control = sales_control/visit_control
print('Conversion rate of the control group: {}'.format(cr_control))

sales_treatment = df_treatment_sample['converted'].sum()
visit_treatment = df_treatment_sample['converted'].count()
cr_treatment = sales_treatment/visit_treatment
print('Conversion rate of the treatment group: {}'.format(cr_treatment))

Conversion rate of the control group: 0.11546610169491525
Conversion rate of the treatment group: 0.12902542372881357


# 4.0 Statistical Hypothesis Testing

In [67]:
df_table = df_total[['group','converted']].groupby('group').agg({'converted':['sum','count']})
df_table.columns = ['converted', 'non_converted']
chi_val, pval, dof, expected = chi2(df_table)

print('p-value: {:.2f}'.format(pval))

if pval < significance_level:
    print('H0 is rejected')
else:
    print('Failed to reject H0')

p-value: 0.08
Failed to reject H0
