# 0.0. IMPORTS

In [1]:
import numpy                 as np 
import pandas                as pd
import scipy.stats           as stats
import statsmodels.stats.api as sms
import matplotlib            as mpl
import matplotlib.pyplot     as plt
import seaborn               as sns

from math                         import ceil
from statsmodels.stats.proportion import proportions_ztest, proportion_confint

In [2]:
%matplotlib inline

# Some plot styling preferences
plt.style.use('seaborn-whitegrid')
font = {'family' : 'Helvetica',
        'weight' : 'bold',
        'size'   : 14}

mpl.rc('font', **font)

## 0.1. Aux Functions

## 0.2. Reading Datasets

In [3]:
df = pd.read_csv( "../datasets/ab_data.csv")

# 1.0. Power Analysis

In [39]:
effect_size = sms.proportion_effectsize(0.13, 0.15)    # Calculating effect size based on our expected rates

required_n = sms.NormalIndPower().solve_power(
    effect_size, 
    power=0.8, 
    alpha=0.01, 
    ratio=1
    )                                                  # Calculating sample size needed

required_n = ceil(required_n)                          # Rounding up to next whole number                          

print( f"{required_n} observations are required for each group." )

10712 observations are required for each group.


# 2.0. Data Cleaning

## 2.1. Data Description

In [40]:
df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 286690 entries, 0 to 294477
Data columns (total 5 columns):
 #   Column        Non-Null Count   Dtype 
---  ------        --------------   ----- 
 0   user_id       286690 non-null  int64 
 1   timestamp     286690 non-null  object
 2   group         286690 non-null  object
 3   landing_page  286690 non-null  object
 4   converted     286690 non-null  int64 
dtypes: int64(2), object(3)
memory usage: 13.1+ MB


In [41]:
df.head()

Unnamed: 0,user_id,timestamp,group,landing_page,converted
0,851104,2017-01-21 22:11:48.556739,control,old_page,0
1,804228,2017-01-12 08:01:45.159739,control,old_page,0
2,661590,2017-01-11 16:55:06.154213,treatment,new_page,0
3,853541,2017-01-08 18:28:03.143765,treatment,new_page,0
4,864975,2017-01-21 01:52:26.210827,control,old_page,1


## 2.2. Data Cleaning

In [42]:
pd.crosstab( df['group'], df['landing_page'] ) # checking if there are users from a group seeing more than one page

landing_page,new_page,old_page
group,Unnamed: 1_level_1,Unnamed: 2_level_1
control,0,143293
treatment,143397,0


In [43]:
session_counts = df['user_id'].value_counts(ascending=False)
multi_users = session_counts[session_counts > 1].count()

print(f'There are {multi_users} users that appear multiple times in the dataset')

There are 0 users that appear multiple times in the dataset


In [44]:
# dropping multi users to not have people on both groups
multi_users_list = session_counts[session_counts > 1 ].index
df = df[~df['user_id'].isin( multi_users_list )]

In [45]:
pd.crosstab( df['group'], df['landing_page'] ) # Now, there aren't users duplicated

landing_page,new_page,old_page
group,Unnamed: 1_level_1,Unnamed: 2_level_1
control,0,143293
treatment,143397,0


## 2.3. Sampling Data

In [46]:
control_group = df[df['group'] == "control" ].sample( n=required_n, random_state=22 )
treatment_group = df[df['group'] == "treatment" ].sample( n=required_n, random_state=22 )

df_sampled = pd.concat( [control_group, treatment_group], axis=0 )
df_sampled.reset_index( drop=True, inplace=True )

In [47]:
# Population and sample conversion rates

population_control_conversion = df[df['group'] == "control"]["converted"].mean()
population_treatment_conversion = df[df['group'] == "treatment"]["converted"].mean()
sample_control_conversion = df_sampled[df_sampled['group'] == "control"]["converted"].mean()
sample_treatment_conversion = df_sampled[df_sampled['group'] == "treatment"]["converted"].mean()

data = { 'control': [population_control_conversion, sample_control_conversion],
         'treatment': [population_treatment_conversion, sample_treatment_conversion] }

conversion_df = pd.DataFrame( data, index=['population','sample'] )
conversion_df.style.format('{:.3f}')

Unnamed: 0,control,treatment
population,0.12,0.119
sample,0.124,0.121


## 3.0. Testing the Hypotesis

In [48]:
control_results = df_sampled[df_sampled['group'] == 'control']['converted']
treatment_results = df_sampled[df_sampled['group'] == 'treatment']['converted']

In [49]:
n_con = control_results.count()
n_treat = treatment_results.count()
successes = [control_results.sum(), treatment_results.sum()]
nobs = [n_con, n_treat]

In [50]:
z_stat, pval = proportions_ztest(successes, nobs=nobs)
(lower_con, lower_treat), (upper_con, upper_treat) = proportion_confint(successes, nobs=nobs, alpha=0.05)

print(f'z statistic: {z_stat:.2f}')
print(f'p-value: {pval:.3f}')
print(f'ci 95% for control group: [{lower_con:.3f}, {upper_con:.3f}]')
print(f'ci 95% for treatment group: [{lower_treat:.3f}, {upper_treat:.3f}]')

z statistic: 0.56
p-value: 0.573
ci 95% for control group: [0.117, 0.130]
ci 95% for treatment group: [0.115, 0.127]


Since our p-value=0.732 is way above our α=0.05 threshold, we cannot reject the Null hypothesis Hₒ, which means that our new design did not perform significantly different (let alone better) than our old one