# AB Testing

## Goal:
Determine if a button on a landing page should be updated to a new design.

## Metric:
Click-through-rate (CTR); Increase CTR; CTR is a proxy for/measure of engagement.

## Business Perspective:
Does updating the button on the landing page lead to higher enrollment numbers?
Increased Engagement -> Increased CTR -> Increased Enrollment

## Practical significance:
For a new button design to be worth the effort to implement, the team is
targeting an increase in CTR of at least 10% with the new design.

In [None]:
# Import libraries
import numpy as np
import pandas as pd
from scipy.stats import norm
import scipy.stats as stats
import seaborn as sns
import matplotlib.pyplot as plt

# Load and review dataset

In [None]:
# Load click data
df = pd.read_csv('click_data.csv', parse_dates=['timestamp'])

# Initial data review
print('Head \n', df.head(), '\n')
print('Describe \n', df.describe(include='int64'), '\n')

# Create new df describing users and counts per group
dfCounts = (df.groupby('group').agg({'user_id': 'nunique', 'click': 'sum'})
            .rename(columns={'user_id': 'users'}))
print('Counts \n', dfCounts, '\n')

# Create new df describing click through rates (CTRs)
std_dev = lambda x: np.std(x, ddof = 0)
std_err = lambda x: stats.sem(x, ddof=0)
CTR = df.groupby('group')['click'].agg(['mean', std_dev, std_err])
CTR.columns = ['CTR', 'std_deviation', 'std_error']
print('CTR \n', CTR)

In [None]:
# Plot CTR in Control and Experimental groups
# Define custom color palette
palette = {0:'orange', 1:'black'}

# Plot click distribution for each group
plt.figure(figsize=(6, 4))
ax = sns.countplot(x='group', hue='click', data=df,
                   order=['con', 'exp'], palette=palette)
plt.title('Click distributions')
plt.xlabel('Group')
plt.ylabel('Count')
plt.legend(title='Clicked?', labels=['No', 'Yes'])

# Annotate bars with percentages
for p in ax.patches[0:4]:
    height = p.get_height()
    group = 'exp' if p.get_x() < 0.5 else 'con'
    click = 1 if p.get_x() % 1 > 0.5 else 0
    total = dfCounts.loc[group]['users']
    percentage = 100 * height / total
    ax.text(p.get_x() + p.get_width() / 2., height + 50, f'{percentage:.0f}%',
            ha='center', color='black', fontsize=10) 

# Statistical analysis

In [None]:
# Statistical analysis parameters
alpha = 0.05 # 5%
print('Alpha (the significance level):', alpha)

delta = 0.1 # 10%
print('Delta (the minimum detectable effect; practical significance):', delta)

## Approach 1

In [None]:
# Get counts for users and clicks by group
X_con = dfCounts.loc['con', 'click']
X_exp = dfCounts.loc['exp', 'click']

N_con = dfCounts.loc['con', 'users']
N_exp = dfCounts.loc['exp', 'users']

# Estimate click probability by group
p_con_hat = X_con / N_con
p_exp_hat = X_exp / N_exp

print('Click probability in Control:', p_con_hat)
print('Click probability in Experimental:', p_exp_hat)

# Estimate pooled click probability
p_pooled_hat = (X_con + X_exp) / (N_con + N_exp)
print('Pooled click probability:', p_pooled_hat)

In [None]:
# Calculate pooled variance
pooled_variance = p_pooled_hat * (1 - p_pooled_hat) * (1 / N_con + 1 / N_exp)
print('Pooled variance:', pooled_variance)

In [None]:
# Calculate standard error
SE = np.sqrt(pooled_variance)
print('Standard error:', SE)

# Test statistic for Z-test
Test_stat = (p_con_hat - p_exp_hat) / SE
print('Test statistic for Z-test (2 sample):', round(Test_stat, 2))

# Critical value of the Z-test
Z_crit = norm.ppf(1-alpha/2)
print('Critical Z value for standard normal distribution:', round(Z_crit, 2))

In [None]:
# Calculate p-value
p_value = 2 * norm.sf(abs(Test_stat))

# Return decision on significance of experimental results
print(f"P-value of the Z-test (2 sample): {round(p_value, 3)}")

if p_value <= alpha:
    print('The experimental results are statistically significant.')
else:
    print('The experimental results are NOT statistically significant.')

In [None]:
# Plot results
# Parameters for the standard normal distribution
mu = 0
sigma = 1
x = np.linspace(mu - 3 * sigma, mu + 3 * sigma, 100)
y = norm.pdf(x, mu, sigma)

# Create plot
plt.plot(x, y, label='Standard Normal Distribution')
plt.fill_between(x, y, where=(x > Z_crit) | (x< -Z_crit), color='red', alpha=0.5, label='Rejection Region')
plt.axvline(Test_stat, color='green', linestyle='dashed', linewidth=2, label=f'Test Statistic= {Test_stat:.2f}')
plt.axvline(Z_crit, color='blue', linestyle='dashed', linewidth=1, label=f'Z-critical = {Z_crit:.2f}')
plt.axvline(-Z_crit, color='blue', linestyle='dashed', linewidth=1,)

plt.xlabel('Z-value')
plt.ylabel('Probability density')
plt.title('Gaussian distribution with rejection region')
plt.legend()
plt.show()

In [None]:
# Confidence interval for a 2-sided Z-test
CI = [
    round((p_exp_hat - p_con_hat) - SE * Z_crit, 3),
    round((p_exp_hat - p_con_hat) + SE * Z_crit, 3),
]

print('Confidence interval of the Z-test (2 sample):', CI)

# Return result of practical significance
lower_bound_CI = CI[0]

if lower_bound_CI >= delta:
    print(f'The results are practically significant at delta = {delta}.')
else:
    print(f'The results are NOT practically significant at delta = {delta}.')

## Approach two

In [None]:
from statsmodels.stats.proportion import proportions_ztest, proportion_confint

con_results = df[df['group'] == 'con']['click']
exp_results = df[df['group'] == 'exp']['click']


num_con = con_results.count()
num_exp = exp_results.count()
successes = [con_results.sum(), exp_results.sum()]
nobs = [num_con, num_exp]

z_stat, pval = proportions_ztest(successes, nobs = nobs)
(lower_con, lower_exp), (upper_con, upper_exp) = proportion_confint(successes, nobs=nobs, alpha=0.05)

print(f'Z Statistic: {z_stat:.2f}')
print(f'P-Value: {pval:.3f}')
print(f'Confidence interval (95%) for control group: [{lower_con:.3f}, {upper_con:.3f}]')
print(f'Confidence interval (95%) for experimental group: [{lower_exp:.3f}, {upper_exp:.3f}]')

# Plot confidence intervals
plt.plot([1, 1], [lower_con, upper_con], label='Control', color='red')
plt.plot([0.98, 1.02], [lower_con, lower_con], color='red')
plt.plot([0.98, 1.02], [upper_con, upper_con], color='red')

plt.plot([2, 2], [lower_exp, upper_exp], label='Experimental', color='blue')
plt.plot([1.98, 2.02], [lower_exp, lower_exp], color='blue')
plt.plot([1.98, 2.02], [upper_exp, upper_exp], color='blue')

# plt.xlabel('Group')
plt.xticks([0, 1, 2, 3], ['', 'Control', 'Experimental', ''])
plt.ylabel('Click through rate')
plt.title('Confidence intervals')
plt.legend(title='Group')
plt.show()

# Determine practical significance
## Compare lower bound of experimental CI to upper bound of control CI
delta_CI = lower_exp - upper_con
if delta_CI >= delta:
    print(f'The results are practically significant at delta = {delta}.')
else:
    print(f'The results are NOT practically significant at delta = {delta}.')

## Calculate effect size (Cohen's d)
### Calculate pooled standard deviation
pooled_std_dev = np.sqrt(((N_con-1) * CTR.loc['con', 'std_deviation'] + 
                         (N_exp-1) * CTR.loc['exp', 'std_deviation']) / 
                         (N_con + N_exp - 2))

d = (CTR.loc['exp', 'CTR'] - CTR.loc['con', 'CTR']) / pooled_std_dev

# Determine effect size
if 0 <= d < 0.2:
    effect = 'little or no effect'
elif 0.2 <= d < 0.5:
    effect = 'small effect size'
elif 0.5 <= d < 0.8:
    effect = 'medium effect size'
elif d >= 0.8:
    effect = 'large effect size'

print(f"Cohen's d for this experiment is {round(d, 2)}, which describes a {effect}.")
