In [1]:
import numpy as np
import pandas as pd
from scipy.stats import norm
import seaborn as sns
import matplotlib.pyplot as plt

### Loading the CTR data for A/B Test

In [2]:
df_ab_test = pd.read_csv('ad_click_dataset.csv')

### EDA

In [3]:
df_ab_test

Unnamed: 0,id,full_name,age,gender,device_type,ad_position,browsing_history,time_of_day,click
0,670,User670,22.0,,Desktop,Top,Shopping,Afternoon,1
1,3044,User3044,,Male,Desktop,Top,,,1
2,5912,User5912,41.0,Non-Binary,,Side,Education,Night,1
3,5418,User5418,34.0,Male,,,Entertainment,Evening,1
4,9452,User9452,39.0,Non-Binary,,,Social Media,Morning,0
...,...,...,...,...,...,...,...,...,...
9995,8510,User8510,,,Mobile,Top,Education,,0
9996,7843,User7843,,Female,Desktop,Bottom,Entertainment,,0
9997,3914,User3914,,Male,Mobile,Side,,Morning,0
9998,7924,User7924,,,Desktop,,Shopping,Morning,1


In [4]:
df_ab_test.head()

Unnamed: 0,id,full_name,age,gender,device_type,ad_position,browsing_history,time_of_day,click
0,670,User670,22.0,,Desktop,Top,Shopping,Afternoon,1
1,3044,User3044,,Male,Desktop,Top,,,1
2,5912,User5912,41.0,Non-Binary,,Side,Education,Night,1
3,5418,User5418,34.0,Male,,,Entertainment,Evening,1
4,9452,User9452,39.0,Non-Binary,,,Social Media,Morning,0


In [5]:
df_ab_test.describe()

Unnamed: 0,id,age,click
count,10000.0,5234.0,10000.0
mean,5060.2114,40.197363,0.65
std,2861.758265,13.12642,0.476993
min,5.0,18.0,0.0
25%,2529.0,29.0,0.0
50%,5218.0,39.5,1.0
75%,7466.0,52.0,1.0
max,10000.0,64.0,1.0


In [6]:
df_ab_test.groupby("time_of_day").sum("click")

Unnamed: 0_level_0,id,age,click
time_of_day,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Afternoon,10523567,42071.0,1382
Evening,9555097,40097.0,1233
Morning,10903539,45162.0,1414
Night,9657668,41425.0,1187


In [7]:
df_ab_test.groupby("ad_position").sum("click")

Unnamed: 0_level_0,id,age,click
ad_position,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Bottom,13658598,57794.0,1936
Side,13665886,56515.0,1633
Top,13199343,53966.0,1649


In [9]:
# Checking if any columns in the dataset have nulls

df_ab_test.isnull().sum()

id                     0
full_name              0
age                 4766
gender              4693
device_type         2000
ad_position         2000
browsing_history    4782
time_of_day         2000
click                  0
dtype: int64

In [10]:
# Number of rows in the dataset

df_ab_test.shape[0]

10000

In [11]:
# Looking at records where device_type IS NULL

df_ab_test[df_ab_test['device_type'].isnull()]

Unnamed: 0,id,full_name,age,gender,device_type,ad_position,browsing_history,time_of_day,click
2,5912,User5912,41.0,Non-Binary,,Side,Education,Night,1
3,5418,User5418,34.0,Male,,,Entertainment,Evening,1
4,9452,User9452,39.0,Non-Binary,,,Social Media,Morning,0
5,5942,User5942,,Non-Binary,,Bottom,Social Media,Evening,1
9,4509,User4509,,,,Bottom,Education,Afternoon,1
...,...,...,...,...,...,...,...,...,...
9979,8337,User8337,,Female,,Bottom,Entertainment,Night,0
9984,9422,User9422,21.0,,,Side,Entertainment,Night,1
9990,9540,User9540,64.0,Non-Binary,,Side,Education,,0
9993,503,User503,43.0,,,,,Morning,1


In [12]:
df_ab_test.groupby('device_type', dropna=False)[['id']].count()

Unnamed: 0_level_0,id
device_type,Unnamed: 1_level_1
Desktop,2754
Mobile,2649
Tablet,2597
,2000


### Cleaning the dataset

In [13]:
# Dropping rows where device_type is null

df_ab_test.dropna(subset=['device_type'], axis=0, inplace=True)

df_ab_test

Unnamed: 0,id,full_name,age,gender,device_type,ad_position,browsing_history,time_of_day,click
0,670,User670,22.0,,Desktop,Top,Shopping,Afternoon,1
1,3044,User3044,,Male,Desktop,Top,,,1
6,7808,User7808,26.0,Female,Desktop,Top,,,1
7,5065,User5065,40.0,Male,Mobile,Side,,Evening,0
8,7993,User7993,,Non-Binary,Mobile,Bottom,Social Media,,1
...,...,...,...,...,...,...,...,...,...
9995,8510,User8510,,,Mobile,Top,Education,,0
9996,7843,User7843,,Female,Desktop,Bottom,Entertainment,,0
9997,3914,User3914,,Male,Mobile,Side,,Morning,0
9998,7924,User7924,,,Desktop,,Shopping,Morning,1


In [14]:
# Create a group column to identify groups by device_type

df_ab_test['group'] = None # or '', or dtype='object'

# Assign Desktop to group A, and Mobile to group B

df_ab_test.loc[df_ab_test['device_type'] == 'Desktop', 'group'] = 'group_A'
df_ab_test.loc[df_ab_test['device_type'] == 'Mobile', 'group'] = 'group_B'

df_ab_test

Unnamed: 0,id,full_name,age,gender,device_type,ad_position,browsing_history,time_of_day,click,group
0,670,User670,22.0,,Desktop,Top,Shopping,Afternoon,1,group_A
1,3044,User3044,,Male,Desktop,Top,,,1,group_A
6,7808,User7808,26.0,Female,Desktop,Top,,,1,group_A
7,5065,User5065,40.0,Male,Mobile,Side,,Evening,0,group_B
8,7993,User7993,,Non-Binary,Mobile,Bottom,Social Media,,1,group_B
...,...,...,...,...,...,...,...,...,...,...
9995,8510,User8510,,,Mobile,Top,Education,,0,group_B
9996,7843,User7843,,Female,Desktop,Bottom,Entertainment,,0,group_A
9997,3914,User3914,,Male,Mobile,Side,,Morning,0,group_B
9998,7924,User7924,,,Desktop,,Shopping,Morning,1,group_A


### Calculating CTR for each group

In [15]:
# Calculating the total impressions per group

N_con = df_ab_test[df_ab_test["group"] == "group_A"].shape[0]
N_exp = df_ab_test[df_ab_test["group"] == "group_B"].shape[0]

# Printing total impressions per group

print(f"Total impressions for group A (control group): {N_con}")
print(f"Total impressions for group B (experimental group): {N_exp}")

Total impressions for group A (control group): 2754
Total impressions for group B (experimental group): 2649


In [16]:
# Calculating the total number of clicks per group by summing 1's

X_con = df_ab_test.groupby("group")["click"].sum().loc["group_A"]
X_exp = df_ab_test.groupby("group")["click"].sum().loc["group_B"]

# Printing total clicks per group

print(f"Total clicks for group A (control group): {X_con}")
print(f"Total clicks for group B (experimental group): {X_exp}")

Total clicks for group A (control group): 1829
Total clicks for group B (experimental group): 1678


In [17]:
# Calculating CTR per group

p_con_hat = X_con/N_con
p_exp_hat = X_exp/N_exp

# Printing CTR per group

print(f"The CTR of group_A (control group): {p_con_hat}")
print(f"The CTR of group_B (experimental group): {p_exp_hat}")


The CTR of group_A (control group): 0.6641249092229484
The CTR of group_B (experimental group): 0.633446583616459


In [18]:
# Computing the estimate of pooled click probability (value that describes the control and experimental group)

p_pooled_hat = (X_con + X_exp)/(N_con + N_exp)

print(f"The pooled click probability is {p_pooled_hat}")

The pooled click probability is 0.6490838423098279


## Hypothesis Setup for A/B Test

We are running a two-proportion z-test to determine whether there is a statistically significant difference in click-through rates (CTR) between users on **Desktop (group_A)** and **Mobile (group_B)**.

### Null Hypothesis (H₀)
There is **no difference** in click-through rates between the two groups.

$$
H_0: p_{\text{control}} = p_{\text{experimental}}
$$

This means that device type has **no effect** on the likelihood of a user clicking on an ad.

---

### Alternative Hypothesis (H₁)
There **is a difference** in click-through rates between the two groups.

$$
H_1: p_{\text{control}} \ne p_{\text{experimental}}
$$

This is a **two-tailed test**, so we are testing for any significant difference — regardless of direction (higher or lower).

---

### Test Logic

- We calculate a **pooled click probability** under the assumption that H₀ is true.
- Using this pooled rate, we compute a **standard error** and a **z-statistic** to quantify how different the observed CTRs are.
- We calculate a **p-value** to determine the likelihood of seeing such a difference **by chance**, assuming H₀ is true.
- We compare the p-value to our **significance level (α = 0.05)** to decide whether the observed difference is statistically significant.

---

### Decision Rule

- If **p-value < 0.05** → Reject the null hypothesis (statistically significant difference)
- If **p-value ≥ 0.05** → Fail to reject the null hypothesis (no statistically significant difference)

### Calculating Pooled Variance

In [19]:
# Computing the estimate of pooled variance

pooled_variance = p_pooled_hat * (1 - p_pooled_hat) * (1/N_con + 1/N_exp)

print(f"p^_pooled is {p_pooled_hat}")
print(f"Pooled variance is {pooled_variance}")

p^_pooled is 0.6490838423098279
Pooled variance is 0.0001686915144284361


### Calculating Standard Error and Test Statistics

In [22]:
# Computing standard error of the test

SE = np.sqrt(pooled_variance)

print(f"The standard error is {SE}")



The standard error is 0.012988129750985556


In [21]:
# Calculating the z-statistic

z_stat = (p_exp_hat - p_con_hat) / SE

print(f"The z-statistic is {z_stat}")

The z-statistic is -2.362027958964724


### Calculating p-value of the two-tailed Z-test

In [26]:
# Define significance level

alpha = 0.05

In [None]:
# Calculating p-value

p_value = 2 * (1 - norm.cdf(abs(z_stat)))

The p-value is 0.01817527140725783


In [28]:
# Function to check statistical significance

def is_statistically_significant(p_value, alpha):
    """
    We assess whether there is statistical significance based on the p-value and alpha.

    Arguments:
    - p_value (float): The p-value resulting from a statistical test.
    - alpha (float, optional): The significance level threshold used to determine statistical significance. Defaults to 0.05.

    Returns:
    - Prints the assessment of statistical significance.
    """

    # Print the rounded p-value to 3 decimal places
    print(f"The p-value of the 2-tailed Z-test is {round(p_value, 3)}")

    # Determine statistical significance
    if p_value < alpha:
        print("The result is statistically significant - we reject the null hypothesis.")
    else:
        print("The result is not statistically significant - we fail to reject the null hypothesis.")


is_statistically_significant(p_value, alpha)


The p-value of the 2-tailed Z-test is 0.018
The result is statistically significant - we reject the null hypothesis.
