# Imports

In [1]:
import pandas as pd
import numpy as np
from scipy.stats import chi2_contingency
import re

# 1. Hypothesis

* Ho: All versions of buttons have same click rate.
* Ha: All versions of buttons do not have same click rate.

# 2. alpha

In [2]:
alpha = 0.05

# 3. Collect Data

In [3]:
data_folder_path = "/content/drive/MyDrive/Colab Notebooks/WBS - Bootcamp/A B Test/data"

In [4]:
# Defining function for extracting data from csv
def get_clicks(version):
    version_csv_path = data_folder_path + "/" + f"eniac_{version.lower()}.csv"
    page_df = pd.read_csv(version_csv_path)

    page_info = page_df['Snapshot information'][1]
    # Extract num visits from snapshot info
    visits = re.findall(pattern='(\d+) visits', string=page_info)[0]
    num_visits = int(visits)
    # Find element being tested
    button = page_df['Name'].isin(['SEE DEALS', 'SHOP NOW'])
    # Get clicked and didn't click
    clicked = page_df.loc[button, 'No. clicks'].iloc[0]
    didnt_click = num_visits - clicked
    return clicked, didnt_click

versions = ['A', 'B', 'C', 'D']

# Looping through versions, importing csv and gathering data in dict
clicks_data = {}
for version in versions:
    clicks_data[version] = get_clicks(version)

# Converting dict to df
observed = pd.DataFrame(clicks_data, index=['Clicked', 'Didn\'t Click'])
observed

  visits = re.findall(pattern='(\d+) visits', string=page_info)[0]


Unnamed: 0,A,B,C,D
Clicked,512,281,527,193
Didn't Click,24814,24466,24349,25040


# 4. Calculate the Test Results

In [5]:
chisq, pvalue, df, expected = chi2_contingency(observed)
print(f"Chi2: {chisq}\nP-value: {pvalue}")

Chi2: 224.01877488058412
P-value: 2.7161216607868712e-48


# 5. Interpret the Test Results

In [6]:
if pvalue < alpha:
  print("Reject the NULL Hypothesis.")
else:
  print("Fail to Reject NULL Hypothesis.")

Reject the NULL Hypothesis.


# 6. Post-hoc Test

In [7]:
post_hoc_pairs = {"A" : ["B", "C", "D"]}

In [8]:
new_alpha = alpha / sum([len(value) for value in post_hoc_pairs.values()])
new_alpha

0.016666666666666666

In [9]:
pvalue_results = {}

for version_x, check_versions in post_hoc_pairs.items():
  for version_y in check_versions:
    comparision_name = f"{version_x}-{version_y}"

    observed_comparision = observed.loc[:, [version_x, version_y]]
    _, pvalue, _, _ = chi2_contingency(observed_comparision)

    pvalue_results[comparision_name] = float(round(pvalue, 4))

pvalue_results

{'A-B': 0.0, 'A-C': 0.4648, 'A-D': 0.0}

In [10]:
for comparision_name, pvalue in pvalue_results.items():
  if pvalue < new_alpha:
    print(f"Reject the NULL Hypothesis for {comparision_name}")
  else:
    print(f"Fail to Reject the NULL Hypothesis for {comparision_name}")

Reject the NULL Hypothesis for A-B
Fail to Reject the NULL Hypothesis for A-C
Reject the NULL Hypothesis for A-D


# 7. Retest A-B-D Versions

In [11]:
observed_abd = observed.loc[:, ["A", "B", "D"]]

_, pvalue, _, _ = chi2_contingency(observed_abd)

if pvalue < alpha:
  print("Reject the NULL Hypothesis.")
else:
  print("Fail to Reject NULL Hypothesis.")

Reject the NULL Hypothesis.


# 8. After Remove

In [13]:
observed_removed = observed.loc[:, ["A", "C", "D"]]

np.float64(49.0)

In [14]:
drop_off_a = observed_removed.loc["Clicked", "A"] * 9.7 // 100
home_return_a = observed_removed.loc["Clicked", "A"] * 5.3 // 100
total_remove_a = drop_off_a + home_return_a

observed_removed.loc["Clicked", "A"] -= total_remove_a
observed_removed.loc["Didn't Click", "A"] += total_remove_a

In [15]:
drop_off_c = observed_removed.loc["Clicked", "C"] * 12.3 // 100
home_return_c = observed_removed.loc["Clicked", "C"] * 4.6 // 100
total_remove_c = drop_off_c + home_return_c

observed_removed.loc["Clicked", "C"] -= total_remove_c
observed_removed.loc["Didn't Click", "C"] += total_remove_c

In [16]:
drop_off_d = observed_removed.loc["Clicked", "D"] * 9.7 // 100
home_return_d = observed_removed.loc["Clicked", "D"] * 5.3 // 100
total_remove_d = drop_off_d + home_return_d

observed_removed.loc["Clicked", "D"] -= total_remove_d
observed_removed.loc["Didn't Click", "D"] += total_remove_d

In [17]:
observed_removed

Unnamed: 0,A,C,D
Clicked,436,439,165
Didn't Click,24890,24437,25068


In [18]:
_, pvalue, _, _ = chi2_contingency(observed_removed)
pvalue

np.float64(1.4271735052767685e-32)

In [19]:
if pvalue > alpha:
  print("Fail to Reject NULL Hypothesis")
else:
  print("Reject the NULL Hypothesis")

Reject the NULL Hypothesis


# 9. New Post-hoc Test

In [20]:
post_hoc_pairs = {"A" : ["C", "D"]}

In [21]:
new_alpha = alpha / sum([len(value) for value in post_hoc_pairs.values()])
new_alpha

0.025

In [22]:
pvalue_results = {}

for version_x, check_versions in post_hoc_pairs.items():
  for version_y in check_versions:
    comparision_name = f"{version_x}-{version_y}"

    observed_comparision = observed_removed.loc[:, [version_x, version_y]]
    _, pvalue, _, _ = chi2_contingency(observed_comparision)

    pvalue_results[comparision_name] = float(round(pvalue, 4))

pvalue_results

{'A-C': 0.7371, 'A-D': 0.0}