In [None]:
import numpy as np
import pandas as pd
from scipy.stats import norm
import seaborn as sns
import matplotlib.pyplot as plt

In [None]:
#load data into a pandas df
df = pd.read_csv('ab_test_click_data.csv')

#display the first 5 rows
df.head()

In [None]:
#getting some statstical  information
df.describe()

In [None]:
#counting number of users in each group
df['group'].value_counts()

In [None]:
#counting  number of clicks per group
df.groupby('group').sum('click')

Bar chart showing the total clicks versus no-clicks in each group that can provide a clear visual comparison of user engagement

In [None]:
#plotting the bar-chart
plt.figure(figsize=(4,3)) 
ax = sns.countplot(x='group', hue='click', data=df)
plt.title('Click Distribution in Experimental and Control groups')
plt.legend(labels=['No', 'Yes'])

#calculate the percentages and annotate the bars
group_counts = df.groupby(['group']).size()
group_click_counts = df.groupby(['group', 'click']).size().reset_index(name='count')

for p in ax.patches:
    height = p.get_height()
    #find the group and click type for the current bar
    group = 'exp' if p.get_x() < 0.5 else 'con'
    click = 1 if p.get_x() % 1 > 0.5 else 0
    total = group_counts.loc[group]
    percentage = 100 * height / total 
    ax.text(p.get_x() + p.get_width() / 2., height + 5, f'{percentage:.1f}%', ha="center")
    
plt.tight_layout()

Parameters of the model from Power Analysis

            β: Probability of Type II Error
            (1-β): Power of the test
            α: Probabilty of Type I Error, Significance Level
            δ: Minimum Detectable Effect


In [None]:
alpha = 0.05  # 5%
print("Alpha: Sognificance level is:", alpha)

delta = 0.1 # 10%
print("Delta: minimum detectavle effect is:", delta)

In [None]:
#Calculatimg the total number of clicks per group by summing clicks
N_con =df[df["group"] == "con"].count()
N_exp = df[df["group"] == "exp"].count()

#Calculatimg the total number of clicks per group by summing 1's
x_con = df.groupby("group")["click"].sum().loc["con"]
x_exp = df.groupby("group")["click"].sum().loc["exp"]

print("number of clicks in control group:", x_con)
print("number of clicks in experimental group:", x_exp)

Calculating Pooled Estimates for clicks per group

In [None]:
p_con_hat = x_con/N_con
p_exp_hat = x_exp/N_exp

print("click prob. in control group:", p_con_hat) 
print("click prob. in experimental group:", p_exp_hat)

#Computing the estimate of pooled clicked probabilty
p_pooled_hat = (x_con+x_exp)/(N_con+N_exp)
print("pooled click probabilty:", p_pooled_hat)

Calculating pooled variance

In [None]:
# computinh the estimate of pooled variance
pooled_variance = p_pooled_hat * (1-p_pooled_hat) * (1/N_con + 1/N_exp)
print("p_hat_pooled: ", p_pooled_hat )
print("pooled_variance is:" ,pooled_variance)

Calculating Standard Error and Test Statistics

In [None]:
#computing the standard error of the test 
SE = np.sqrt(pooled_variance)
print("The standard error is:", SE)

#computing the test statistics of Z-test
Test_stat = (p_con_hat -  p_exp_hat)/SE
print("Test statistics for 2-sample Z_test is:", Test_stat)

#critical value of the Z-test
Z_crit = norm.ppf(1-alpha/2)
print("Z-critical value from standard Normal Distribution: ", Z_crit)

Calculating the p_values of the Z-test

- A low p-value (p <= 0.05 at 5% significance level) indicates strong evidencce against the null hypothesis, so we reject the null hypothesis.
- A high p-value (p > 0.05) indicates weak evidence against the null hypothesis, so we fail to reject the null hypothesis

In [None]:
 #calculating the p value
p_value = 2 * norm.sf(abs(Test_stat))

#function checking the statistical significance
def is_statistical_significance(p_value, alpha):
    """
    We assess whther there is statistial significance based on the p-value and alpha.

    Args:
        p_value (float): The p-value result from a statistical test
        alpha (float, optional): The significance level threshold used to determine statistical significance. Defaults to 0.05.
        
    Returns:
    -Prints the assesment of statistical significance
    """
    #print the rounded p-value of the statistical significane
    print("P-value of the 2-sample Z-test: ",np.round(p_value,3))
    
    #Determine the statistical significance
    if p_value < alpha:
        print("There is stataistical significance, indicating that the observed differences between the groups are unlikely to have occured by chance alone.")
    else:
        print("There is no statistical significance, suggesting that the observed differences between the groups could have occured by chance. ")
 is_statistical_significance(p_value, alpha)

In [None]:
CI = [round((p_exp_hat - p_con_hat) - SE*Z_crit,3), round((p_exp_hat - p_con_hat) + SE*Z_crit,3)]
print("Confidence Interval of the 2 sample Z-test is: ", CI)

In [None]:
# Parameters for the standard normal distribution
mu = 0  # Mean
sigma = 1  # Standard deviation
x = np.linspace(mu - 3*sigma, mu + 3*sigma, 100)
y = norm.pdf(x, mu, sigma)

# Test statistics and critical value from previous calculation
Test_stat = -42.71219397822086  # This value is from your A/B test results
Z_crit = 1.959963984540054  # Z-critical value for a 5% significance level in a two-tailed test

# Plotting the standard normal distribution
plt.plot(x, y, label='Standard Normal Distribution')

# Shade the rejection region for a two-tailed test
plt.fill_between(x, y, where=(x > Z_crit) | (x < -Z_crit), color='red', alpha=0.5, label='Rejection Region')

# Adding Test Statistic
plt.axvline(Test_stat, color='green', linestyle='dashed', linewidth=2, label=f'Test Statistic = {Test_stat:.2f}')

# Adding Z-critical values
plt.axvline(Z_crit, color='blue', linestyle='dashed', linewidth=1, label=f'Z-critical = {Z_crit:.2f}')
plt.axvline(-Z_crit, color='blue', linestyle='dashed', linewidth=1)

# Adding labels and title
plt.xlabel('Z-value')
plt.ylabel('Probability Density')
plt.title('Gaussian Distribution with Rejection Region \n (A/B Testing for LunarTech CTA button)')
plt.legend()

Testing for practical Significance in A/B testing

In [None]:
def is_practically_significant(delta, CI_95):
    """We assess here if the differnce betweeen Control and Experimental group is significant using the Minimum Detectable Effect (MDE)

    Args:
        delta (float): The MDE 
        CI_95 (tuple): A tuple represnting thr lower and upper bounds of the 95% confidence interval.
        
    Returns:
        Prints whether the experiment has practical significance based on the MDE and CI
    """
    
    #extract lower bound of 95% CI
    lower_bound_CI = CI_95[0]
    
    #check if the lower bound is greater than or equal to delta
    if delta >= lower_bound_CI: 
        print(f"We have practical significance! \nWith MDE of {delta}, The difference between Control and Experimental Group is practically significant")
        return True
    else:
        print("We dont have practical significance! \nThe fiffernce between Control and Experimental Group is not practically significant")
        return False
    

delta = 0.05
CI_95 = (0.04, 0.06)    

#call the function
signifcance = is_practically_significant(delta, CI_95)
print("Lower bound of 95% confidence interval is:", CI_95[0])

There exists statistically significant difference in CTR in Experimental version (Enroll Now Button) vs Control version (Secure Free Trial button) of the product at 5% significance level.

There exists statistically significant difference in CTR in Experimental version (Enroll Now Button) vs Control version (Secure Free Trial button) of the product at 10% MDE.
