In [2]:
import numpy as np
import pandas as pd
from statsmodels.stats.proportion import proportions_ztest

Let's read the data from our dataset and load it into a DataFrame, specifying that columns in the CSV are separated by semicolons:

In [3]:
df = pd.read_csv("ab_test.csv", sep=";", encoding="utf-8")

Now we ensure that the relvant fields have correct data types (integer) and we extract the list of unique clients:

In [4]:
df["Month"] = df["Month"].astype(int)
df["Year"] = df["Year"].astype(int)

clients = df["Client"].unique()

Then we prepare the list to hold the results and set the significance level:

In [5]:
results = []
alpha = 0.05

Now let's define our function to perform A/B test on proportions:

In [6]:
def ab_test_proportions(x_con, N_con, x_exp, N_exp, alpha=0.05, alternative='two-sided'):

    # Prepare the vectors as required by statsmodels
    X = np.array([x_con, x_exp])
    N = np.array([N_con, N_exp])
    
    # Perform the z-test
    z_stat, p_value = proportions_ztest(count=X,
                                        nobs=N,
                                        alternative=alternative,
                                        prop_var=False)
    # Evaluate the null hypothesis:
    reject_null = (p_value <= alpha)

    return {
        "z_stat": z_stat,
        "p_value": p_value,
        "reject_null": reject_null,
  }


Now we loop through each customer, extracting their respective data and performing the AB Test. In this case we perform the test on the Conversion Rate (Orders / Sessions):

In [7]:
for client in clients:
    # Select the rows for January 2024 and January 2025 for this customer
    row_2024 = df[(df["Client"] == client) & (df["Year"] == 2024)]
    row_2025 = df[(df["Client"] == client) & (df["Year"] == 2025)]

    # Extract number of orders (x_con) and sessions (N_con) for the control group (2024)
    x_con = int(row_2024["Orders"].iloc[0])
    N_con = int(row_2024["Sessions"].iloc[0])

    # Extract number of orders (x_exp) and sessions (N_exp) for the experimental group (2025)
    x_exp = int(row_2025["Orders"].iloc[0])
    N_exp = int(row_2025["Sessions"].iloc[0])

    # Perform the A/B test on proportions
    result = ab_test_proportions(x_con, N_con, x_exp, N_exp, alpha=alpha, alternative='two-sided')

    # Add the "Client" metadata to the result dictionary of that specific client
    result["Client"] = client

    # Append the result to the results list, which will contain the results for all clients
    results.append(result)

At this point, in our results list, we have (for each client) p-value, test outcome and client name. Let's convert it into a dataframe, reorder columns for better readability, and print the results:

In [9]:
df_results = pd.DataFrame(results)

# Let's specify, in this list, the columns we want to keep and their order:
final_columns = [
    "Client",
    "z_stat", "p_value", "reject_null"
]

# Let's create a new DataFrame based on that list and print it:
df_results = df_results[final_columns]

# Print the results
print("\nConversion Rate A/B Test results:")
print(df_results.to_string(index=False))


Conversion Rate A/B Test results:
     Client    z_stat       p_value  reject_null
  Cool Care -0.006236  9.950247e-01        False
     Extend -0.913478  3.609913e-01        False
 Frank Veil  1.661501  9.661293e-02        False
     Komodo 27.701063 6.779286e-169         True
  Campari90  4.627506  3.700959e-06         True
  Old Angel  3.225563  1.257251e-03         True
Saint Louis  0.626956  5.306880e-01        False


Just by slightly modifying the last two cells, we can adapt the code to other KPIs and repeat the process for all other metrics on which we have to perform AB Testing.