In [2]:
import pandas as pd
from scipy.stats import chi2_contingency
import re
import math

In [1]:
alpha = 0.05

In [None]:
# Defining function for extracting data from csv
def get_clicks(version):
    page_df = pd.read_csv(f'../data/eniac_{version.lower()}.csv')
    page_info = page_df['Snapshot information'][1]
    # Extract num visits from snapshot info
    visits = re.findall(pattern='(\d+) visits', string=page_info)[0]
    num_visits = int(visits)
    # Find element being tested
    button = page_df['Name'].isin(['SEE DEALS', 'SHOP NOW'])
    # Get clicked and didn't click
    clicked = page_df.loc[button, 'No. clicks'].iloc[0]
    didnt_click = num_visits - clicked
    return clicked, didnt_click

versions = ['A', 'B', 'C', 'D']

# Setting up empty contingency table
observed = pd.DataFrame(
    columns=versions,
    index=['Clicked', 'Didn\'t Click']
)

# Looping through versions grabbing click and didn't click counts
for version in versions:
    observed[version] = get_clicks(version)

observed

In [None]:
# Calculating statistical significance
chisq, pvalue, df, expected = chi2_contingency(observed)
# Comparing with threshold
pvalue < alpha

In [None]:
pvalue

### Post-Hoc Testing


In [None]:
num_versions = len(versions)
# Getting a count of all the ways they could be paired
num_comparisons = math.comb(num_versions, 2)
# Dividing alpha by the number of comparisons
adjusted_alpha = alpha/num_comparisons

In [None]:
# Calculating CTR for each version
ctrs = (observed / observed.sum()).iloc[0,:]
ctrs

In [None]:
# Set of all versions (to remove losers from)
contenders = set(versions)

# Looping through pairs and computing significance
for v1 in versions:
    for v2 in versions:

        # Running new chi2 test with each pair of versions
        pair_contingency_table = observed[[v1,v2]]
        chisq, pvalue, df, expected = chi2_contingency(pair_contingency_table)

        # If statistically significant, removing loser from contenders
        if pvalue < adjusted_alpha:
            # (we only need to check 1 direction because the loop will repeat the same pair with v1 and v2 swapped)
            if ctrs.loc[v1] > ctrs.loc[v2]:
                # Removing loser from contenders
                contenders -= set(v2)
contenders

In [None]:
contenders = set(versions)

# Looping through pairs and computing significance
for idx1 in range(num_versions-1): #(0, 1, 2)
    for idx2 in range(idx1+1, num_versions): #(1, 2, 3), (2, 3), (3)

        # Running new chi2 test with each pair of versions
        pair_contingency_table = observed.iloc[:, [idx1, idx2]]
        _, pvalue, _, _ = chi2_contingency(pair_contingency_table)

        # If statistically significant, removing loser from contenders
        if pvalue < adjusted_alpha:
            # Figuring out which is worse...
            loser = idx2 if ctrs.iloc[idx1] > ctrs.iloc[idx2] else idx1
            # and removing it from the contenders
            contenders -= set(versions[loser])

contenders

It looks like A and C are better than the others, but the comparison between the two of them was inconclusive. In this case we should lean toward sticking with version A.

