In [None]:
# IMPORTANT: RUN THIS CELL IN ORDER TO IMPORT YOUR KAGGLE DATA SOURCES,
# THEN FEEL FREE TO DELETE THIS CELL.
# NOTE: THIS NOTEBOOK ENVIRONMENT DIFFERS FROM KAGGLE'S PYTHON
# ENVIRONMENT SO THERE MAY BE MISSING LIBRARIES USED BY YOUR
# NOTEBOOK.
import kagglehub
zhangluyuan_ab_testing_path = kagglehub.dataset_download('zhangluyuan/ab-testing')

print('Data source import complete.')


# **Imports**

Before we start, I would like to say *Thank you* to the author of [this](https://github.com/renatofillinich/ab_test_guide_in_python/blob/master/AB%20testing%20with%20Python.ipynb) notebook!

In [None]:
import numpy as np
import pandas as pd
import scipy.stats as stats
import statsmodels.stats.api as sms
import matplotlib as mpl
import matplotlib.pyplot as plt
import seaborn as sns
from math import ceil

import warnings
warnings.filterwarnings('ignore')

%matplotlib inline

sns.set_theme(style="whitegrid", palette="muted")
plt.rc('patch', edgecolor='black')
sns.set_context("notebook", font_scale=1.3, rc={"lines.linewidth": 1.5})

# **Downloading and exploring the data**

In [None]:
df = pd.read_csv('../input/ab-testing/ab_data.csv')

df.sample(10)

In [None]:
df.shape

In [None]:
df.nunique()

In [None]:
df.isna().sum()

In [None]:
df.info()

In [None]:
df.columns.tolist()

We see, that there are 5 columns:
* '**user_id**' contains user ids
* '**timestamp**' is about when a session was
* '**group**' contains 2 variables: control and treatment
* '**landing_page**' is about what version of a site a user saw
* '**converted**' says us about user's behavior: if a user made a purchase (1) or not (0)

In [None]:
pd.crosstab(df['group'], df['landing_page'])

# **Power analysis**

* To provide an analysis of an A/B-test, **let assume, that the conversion (number of purchases/number of visits) in our company was 12%. We would like to increase this number to 14%.** Therefore, we created A/B-test to ensure that a new version of a site will certainly increase the conversion.

* To decide what number of people we need to prove/reject the hypothesis, we have to conduct a power analysis.

In [None]:
effect_size = sms.proportion_effectsize(0.12, 0.14)    # Calculating effect size based on our expected rates

required_n = sms.NormalIndPower().solve_power(
    effect_size,
    power=0.9,  #power of a test (defualt is 0.8)
    alpha=0.05, #p-value
    ratio=1
    )                                                  # Calculating sample size needed

required_n = ceil(required_n)                          # Rounding up to next whole number

print(required_n)

Therefore, we need **5134 users in control** and **5134 users in treatment** group.

# **Choosing users for each group**

In [None]:
df[['user_id', 'timestamp']].nunique()

In [None]:
df.user_id.value_counts()

Some of the users occur in the dataset more than once. As we have 294478 rows and we need only 10268 of them, let's simply drop the doubles.

In [None]:
session_counts = df.user_id.value_counts()
double_users = session_counts[session_counts>1].index

double_users

In [None]:
df['user_id'].isin(double_users).value_counts()

In [None]:
df = df[~df['user_id'].isin(double_users)]
df.shape

In [None]:
control_group = df.query('group == "control"').sample(required_n)
treatment_group = df.query('group == "treatment"').sample(required_n)

In [None]:
control_group.head()

In [None]:
treatment_group.head()

In [None]:
ab_test = pd.concat([control_group, treatment_group], axis=0)
ab_test.reset_index(inplace=True, drop=True)

In [None]:
ab_test.head()

In [None]:
ab_test.info()

In [None]:
ab_test.group.value_counts()

In [None]:
conversion_rates = ab_test.groupby('group')['converted']

std_p = lambda x: np.std(x, ddof=1)              # Std. deviation of the proportion
se_p = lambda x: stats.sem(x, ddof=1)            # Std. error of the proportion (std / sqrt(n))

conversion_rates = conversion_rates.agg([np.mean, std_p, se_p])
conversion_rates.columns = ['conversion_rate', 'std_deviation', 'std_error']


conversion_rates.style.format('{:.3f}')

The control group shows us **12.1%** conversion rate and the treatment group **12.4%** conversion rate.

In [None]:
sns.barplot(x=ab_test['group'], y=ab_test['converted'], ci=False)

plt.ylim(0, 0.15)
plt.title('Conversion rate by group', pad=20)
plt.xlabel('Group', labelpad=15)
plt.ylabel('Converted (proportion)', labelpad=15);

So, we see a light improvement. Let's decide whether this difference is **statistically significant**.

# **Z-test and confidence intervals**

In [None]:
from statsmodels.stats.proportion import proportions_ztest, proportion_confint

In [None]:
control_conversion = ab_test.query('group == "control"')['converted']
treatment_conversion = ab_test.query('group == "treatment"')['converted']

In [None]:
nobs = [control_conversion.count(), treatment_conversion.count()]
successes = [control_conversion.sum(), treatment_conversion.sum()]

z_stat, pval = proportions_ztest(successes, nobs=nobs)
(lower_con, lower_treat), (upper_con, upper_treat) = proportion_confint(successes, nobs=nobs, alpha=0.05)

print(f'z statistic: {z_stat:.2f}')
print(f'p-value: {pval:.3f}')
print(f'95% Confidence interval for control group: [{lower_con:.3f}, {upper_con:.3f}]')
print(f'95% Confidence interval for treatment group: [{lower_treat:.3f}, {upper_treat:.3f}]')

Despite the fact, that from the first sight the conversation rate of the treatment group seemed to be slightly higher, **p-value>0.05 --> we cannot accept the alternative hypothesis**. Besides, the confident intervals include our baseline value of 12% conversion rate.

Therefore, this **new version of the site cannot increase conversion significantly**.
With the results of the A/B-test we came to conclusion, that it makes no sense for the company to introduce a new version of the site.