<a href="https://colab.research.google.com/github/helenosss/ab-testing-tool/blob/main/A_B_testing_for_an_online_store.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

The goal of the project is to analyze A/B test results using statistical methods in Python and to create a visualization that demonstrates key conversion metrics.

In [None]:
import pandas as pd
import numpy as np
from statsmodels.stats.proportion import proportions_ztest

In [None]:
from google.colab import drive
drive.mount("/content/drive")

Mounted at /content/drive


In [None]:
%cd /content/drive/MyDrive/Files/AB_Testing_Tool

/content/drive/MyDrive/Files/AB_Testing_Tool


In [None]:
df = pd.read_csv('events_df.csv')

In [None]:
df.head()

Unnamed: 0,date,country,device,continent,channel,test,test_group,event_name,value
0,2020-11-01,Lithuania,mobile,Europe,Organic Search,2,2,new account,1
1,2020-11-01,El Salvador,desktop,Americas,Social Search,2,1,new account,1
2,2020-11-01,Slovakia,mobile,Europe,Paid Search,2,2,new account,1
3,2020-11-01,Lithuania,desktop,Europe,Paid Search,2,2,new account,1
4,2020-11-02,North Macedonia,desktop,Europe,Direct,2,1,new account,1


In [None]:
df["event_name"] = df["event_name"].replace("new account", "new_accounts")

In [None]:
df.shape

(800996, 9)

In [None]:
# Creating an aggregated event pivot table by country, device, continent,
# channel, test, and test group to prepare data for A/B test analysis
table = df.pivot_table(index=['country', 'device', 'continent', 'channel', 'test', 'test_group'],
    columns='event_name', values='value', aggfunc='sum', fill_value=0).reset_index()
table.head()

event_name,country,device,continent,channel,test,test_group,add_payment_info,add_shipping_info,add_to_cart,begin_checkout,...,select_item,select_promotion,session,session with orders,session_start,user_engagement,view_item,view_item_list,view_promotion,view_search_results
0,(not set),desktop,(not set),Direct,1,1,0,0,0,0,...,0,0,15,2,15,30,23,0,6,3
1,(not set),desktop,(not set),Direct,1,2,0,1,0,2,...,0,0,14,1,14,138,69,0,9,2
2,(not set),desktop,(not set),Direct,2,1,0,1,0,2,...,0,0,12,0,12,138,65,0,14,5
3,(not set),desktop,(not set),Direct,2,2,0,0,0,0,...,0,0,19,3,19,36,27,0,3,0
4,(not set),desktop,(not set),Direct,3,1,0,0,0,0,...,0,0,27,2,28,28,7,0,12,1


In [None]:
# Selection of metrics for the analysis
metrics = ['add_payment_info', 'add_shipping_info', 'begin_checkout', 'new_accounts']
session_cnt = 'session'

def ab_test(data, metrics, denominator):
    """
    This function performs A/B test analysis using a two-sided proportions z-test.
    It calculates conversion rates, relative metric change, z-statistics, p-values,
    and statistical significance for both total results and segmented data.
    """

    segmentation_levels = ['country', 'device', 'continent', 'channel']
    rows = []

    for test_id in data['test'].unique():
        df_test = data[data['test'] == test_id]

        control = df_test[df_test['test_group'] == 1]
        experiment = df_test[df_test['test_group'] == 2]

        # TOTAL
        for metric in metrics:
            numerator_c = control[metric].sum()
            denominator_c = control[denominator].sum()

            numerator_e = experiment[metric].sum()
            denominator_e = experiment[denominator].sum()

            # conversion rates
            conversion_c = (numerator_c / denominator_c) if denominator_c != 0 else np.nan
            conversion_e = (numerator_e / denominator_e) if denominator_e != 0 else np.nan

            # metric change (%)
            metric_change = ((conversion_e / conversion_c) - 1) * 100 if (conversion_c not in [0, np.nan] and pd.notna(conversion_c) and pd.notna(conversion_e)) else np.nan

            # z-test
            if denominator_c == 0 or denominator_e == 0:
                z_stat, p_value = np.nan, np.nan
            else:
                z_stat, p_value = proportions_ztest(
                    [numerator_e, numerator_c],
                    [denominator_e, denominator_c],
                    alternative='two-sided'
                )

            rows.append({
                "test_number": test_id,
                "segment_type": "total",
                "segment_value": "All",
                "metric": f'{metric} / session',

                "numerator_control": numerator_c,
                "denominator_control": denominator_c,
                "conversion_rate_control": conversion_c,

                "numerator_exp": numerator_e,
                "denominator_exp": denominator_e,
                "conversion_rate_exp": conversion_e,

                "metric_change": metric_change,
                "z_stat": z_stat,
                "p_value": p_value,
                "significant": (p_value < 0.05) })

        # SEGMENTS
        for level in segmentation_levels:
            control_segm = control.groupby(level)[metrics + [denominator]].sum()
            experiment_segm = experiment.groupby(level)[metrics + [denominator]].sum()

            for seg_value in control_segm.index:
                if seg_value not in experiment_segm.index:
                    continue

                for metric in metrics:
                    numerator_c = control_segm.loc[seg_value, metric]
                    denominator_c = control_segm.loc[seg_value, denominator]

                    numerator_e = experiment_segm.loc[seg_value, metric]
                    denominator_e = experiment_segm.loc[seg_value, denominator]

                    conversion_c = (numerator_c / denominator_c) if denominator_c != 0 else np.nan
                    conversion_e = (numerator_e / denominator_e) if denominator_e != 0 else np.nan

                    metric_change = ((conversion_e / conversion_c) - 1) * 100 if (conversion_c not in [0, np.nan] and pd.notna(conversion_c) and pd.notna(conversion_e)) else np.nan

                    if denominator_c == 0 or denominator_e == 0:
                        z_stat, p_value = np.nan, np.nan
                    else:
                        z_stat, p_value = proportions_ztest([numerator_e, numerator_c],
                            [denominator_e, denominator_c])

                    rows.append({
                        "test_number": test_id,
                        "segment_type": level,
                        "segment_value": seg_value,
                        "metric": f'{metric} / session',

                        "numerator_control": numerator_c,
                        "denominator_control": denominator_c,
                        "conversion_rate_control": conversion_c,

                        "numerator_exp": numerator_e,
                        "denominator_exp": denominator_e,
                        "conversion_rate_exp": conversion_e,

                        "metric_change": metric_change,
                        "z_stat": z_stat,
                        "p_value": p_value,
                        "significant": (p_value < 0.05) if pd.notna(p_value) else False})

    return pd.DataFrame(rows)

result_df = ab_test(table, metrics, session_cnt)
result_df.to_csv("ab_test_results.tsv", index=False, sep="\t", encoding="utf-8")

  zstat = value / std
  zstat = value / std
  zstat = value / std
  zstat = value / std
  std_diff = np.sqrt(var_)


In [None]:
result_df.head()

Unnamed: 0,test_number,segment_type,segment_value,metric,numerator_control,denominator_control,conversion_rate_control,numerator_exp,denominator_exp,conversion_rate_exp,metric_change,z_stat,p_value,significant
0,1,total,All,add_payment_info / session,1988,45362,0.043825,2229,45193,0.049322,12.542021,3.924884,8.7e-05,True
1,1,total,All,add_shipping_info / session,3034,45362,0.066884,3221,45193,0.071272,6.560481,2.603571,0.009226,True
2,1,total,All,begin_checkout / session,3784,45362,0.083418,4021,45193,0.088974,6.660587,2.978783,0.002894,True
3,1,total,All,new_accounts / session,3823,45362,0.084278,3681,45193,0.081451,-3.354299,-1.542883,0.122859,False
4,1,country,(not set),add_payment_info / session,16,369,0.04336,19,373,0.050938,17.476542,0.486827,0.626381,False


[Tableau Public Dashboard](https://public.tableau.com/views/ABtest_17617430569810/ABtest?:language=en-US&:sid=&:redirect=auth&:display_count=n&:origin=viz_share_link)

- [tsv file with results](https://drive.google.com/file/d/1fShHu06GlRbX_aWdTFylrzXN_8I-79QJ/view?usp=sharing)