### AB Testing Kit
* [Guideline for AB testing](https://www.kaggle.com/code/ekrembayar/a-b-testing-step-by-step-hypothesis-testing)

### Import libraries

In [None]:
from statsmodels.stats.proportion import proportion_effectsize, proportions_ztest
from statsmodels.stats.power import NormalIndPower, TTestIndPower, zt_ind_solve_power
from scipy.stats import norm, ttest_ind
import numpy as np
import math
import statistics as st
from scipy import stats
import pandas as pd
from tabulate import tabulate
import pandas as pd
import matplotlib.pyplot as plt
from datetime import datetime
import csv

### Data prepping

##### Import data

In [None]:
df_ab = pd.read_csv(r"C:\Users\Master\Documents\data_analytics\globox\ab_test_final.csv")

##### Check data

In [None]:
def check_df(dataframe, head):
    print("\n" + " DATAFRAME SUMMARY ".center(70, '=') + "")
    print("\n" + " INFO ".center(70, '-'))
    info_df = dataframe.dtypes.to_frame(name='Dtype')
    info_df['Non-Null'] = dataframe.notnull().sum()
    info_df['Unique'] = dataframe.nunique()
    info_df['Duplicate'] = dataframe.T.duplicated().sum()
    info_df['Missing'] = dataframe.isnull().sum()
    print(info_df)
    print('\nRows: {}'.format(dataframe.shape[0]))
    print('Columns: {}'.format(dataframe.shape[1]))
    print("\n" + " DESCRIBE ".center(70, '-'))
    print(dataframe.describe().T)
    print("\n" + " PERCENTILES ".center(70, '-'))
    print(dataframe.describe([0, 0.05, 0.50, 0.95, 0.99, 1]).T)
    print("\n" + " HEAD ".center(70, '-'))
    print(dataframe.head(head))
display(check_df(df_ab,1))

##### Summary table

In [None]:
# Summary table module
# Define column order
column_order = ['users', 'conversions', 'conversion_rate', 'total_spend_USD', 'avg_spend_USD']
# Calculate summary statistics by test_group and country_name
summary = df_ab.groupby(['test_group','country_name']).agg({'conversion': 'sum', 'spend_USD': 'sum', 'user_id': 'nunique'})
summary.columns = ['conversions', 'total_spend_USD', 'users']
summary['conversion_rate'] = summary['conversions'] / summary['users']
summary['avg_spend_USD'] = summary['total_spend_USD'] / summary['users']
summary = summary[column_order]
# Calculate subtotals by test_group
sub = df_ab.groupby('test_group').agg({'conversion': 'sum', 'spend_USD': 'sum', 'user_id': 'nunique'})
sub.columns = ['conversions', 'total_spend_USD', 'users']
sub['conversion_rate'] = sub['conversions'] / sub['users']
sub['avg_spend_USD'] = sub['total_spend_USD'] / sub['users']
sub = sub[column_order]
sub.index = pd.MultiIndex.from_tuples([(x, 'Subtotal') for x in sub.index])
# Calculate grand total
total = pd.DataFrame({
    'conversions': [df_ab['conversion'].sum()],
    'total_spend_USD': [df_ab['spend_USD'].sum()],
    'users': [df_ab['user_id'].nunique()]})
total['conversion_rate'] = total['conversions'] / total['users']
total['avg_spend_USD'] = total['total_spend_USD'] / total['users']
total.index = pd.MultiIndex.from_tuples([('Grand Total', '')])
total = total[column_order]
# Merge summary statistics, subtotals, and grand total and display results
result = pd.concat([summary, sub, total])
# Export summary table module
file_path = f"C:\\Users\\Master\\Documents\\data_analytics\\globox\\summary_{datetime.now().strftime('%Y-%m-%d_%H-%M-%S')}.csv"
# Save result DataFrame to CSV file
result.to_csv(file_path)
display(result.head(200))

##### Country table

In [None]:
# Country View
print("Country View:")
df_ab['lifespan_conversion'] = df_ab['user_lifespan_days'] * df_ab['conversion']
test = df_ab.groupby(['country_name', 'test_group']).agg({'user_id': 'count', 'conversion': ['sum', 'mean'], 'spend_USD': ['sum', 'mean'], 'user_lifespan_days': 'mean', 'lifespan_conversion': 'sum'})
test.columns = ['users', 'conversions', 'conversion_rate', 'total_spend', 'avg_spend', 'lifespan', 'lifespan_conversion']
test['lifespan_conversion'] = test['lifespan_conversion'] / test['conversions']
test['unconverted'] = test['users']-test['conversions']
test['avg_spend_conversion'] = test['total_spend']/test['conversions'] 
test = test[['users', 'conversions', 'unconverted', 'conversion_rate', 'total_spend', 'avg_spend', 'avg_spend_conversion', 'lifespan', 'lifespan_conversion']]
test = test.sort_values(by='users', ascending=False)
display(test)
df_ab = df_ab.drop('lifespan_conversion', axis = 1)

print("\nUSA Summary:")
# locate index  = 'USA'
usa = test.loc['USA']
display(usa)

##### Average conversion rate and per user $ spend by groups

In [None]:
# General conversions and spend 
df_ab['lifespan_conversion'] = df_ab['user_lifespan_days'] * df_ab['conversion']
test = df_ab.groupby(['test_group']).agg({'user_id': 'count', 'conversion': ['sum', 'mean'], 'spend_USD': ['sum', 'mean'], 'user_lifespan_days': 'mean', 'lifespan_conversion': 'sum'})
test.columns = ['users', 'conversions', 'conversion_rate', 'total_spend', 'avg_spend', 'lifespan', 'lifespan_conversion']
test['lifespan_conversion'] = test['lifespan_conversion'] / test['conversions']
test['unconverted'] = test['users']-test['conversions']
test['avg_spend_conversion'] = test['total_spend']/test['conversions'] 
test = test[['users', 'conversions', 'unconverted', 'conversion_rate', 'total_spend', 'avg_spend', 'avg_spend_conversion', 'lifespan', 'lifespan_conversion']]
display(test)
df_ab = df_ab.drop('lifespan_conversion', axis = 1)

In [None]:
# Device conversions and spend 
df_ab['lifespan_conversion'] = df_ab['user_lifespan_days'] * df_ab['conversion']
test = df_ab.groupby(['device', 'test_group']).agg({'user_id': 'count', 'conversion': ['sum', 'mean'], 'spend_USD': ['sum', 'mean'], 'user_lifespan_days': 'mean', 'lifespan_conversion': 'sum'})
test.columns = ['users', 'conversions', 'conversion_rate', 'total_spend', 'avg_spend', 'lifespan', 'lifespan_conversion']
test['lifespan_conversion'] = test['lifespan_conversion'] / test['conversions']
test['unconverted'] = test['users']-test['conversions']
test['avg_spend_conversion'] = test['total_spend']/test['conversions'] 
test = test[['users', 'conversions', 'unconverted', 'conversion_rate', 'total_spend', 'avg_spend', 'avg_spend_conversion', 'lifespan', 'lifespan_conversion']]
display(test)
df_ab = df_ab.drop('lifespan_conversion', axis = 1)

In [None]:
# Gender conversions and spend 
df_ab['lifespan_conversion'] = df_ab['user_lifespan_days'] * df_ab['conversion']
test = df_ab.groupby(['gender', 'test_group']).agg({'user_id': 'count', 'conversion': ['sum', 'mean'], 'spend_USD': ['sum', 'mean'], 'user_lifespan_days': 'mean', 'lifespan_conversion': 'sum'})
test.columns = ['users', 'conversions', 'conversion_rate', 'total_spend', 'avg_spend', 'lifespan', 'lifespan_conversion']
test['lifespan_conversion'] = test['lifespan_conversion'] / test['conversions']
test['unconverted'] = test['users']-test['conversions']
test['avg_spend_conversion'] = test['total_spend']/test['conversions'] 
test = test[['users', 'conversions', 'unconverted', 'conversion_rate', 'total_spend', 'avg_spend', 'avg_spend_conversion', 'lifespan', 'lifespan_conversion']]
display(test)
df_ab = df_ab.drop('lifespan_conversion', axis = 1)

In [None]:
# Cohort conversions and spend 
df_ab['lifespan_conversion'] = df_ab['user_lifespan_days'] * df_ab['conversion']
test = df_ab.groupby(['cohort_month', 'test_group']).agg({'user_id': 'count', 'conversion': ['sum', 'mean'], 'spend_USD': ['sum', 'mean'], 'user_lifespan_days': 'mean', 'lifespan_conversion': 'sum'})
test.columns = ['users', 'conversions', 'conversion_rate', 'total_spend', 'avg_spend', 'lifespan', 'lifespan_conversion']
test['lifespan_conversion'] = test['lifespan_conversion'] / test['conversions']
test['unconverted'] = test['users']-test['conversions']
test['avg_spend_conversion'] = test['total_spend']/test['conversions'] 
test = test[['users', 'conversions', 'unconverted', 'conversion_rate', 'total_spend', 'avg_spend', 'avg_spend_conversion', 'lifespan', 'lifespan_conversion']]
display(test)
df_ab = df_ab.drop('lifespan_conversion', axis = 1)

In [None]:
# Convert only
convert = df_ab[df_ab['conversion']==1].groupby(['test_group', 'country_name', 'gender', 'device']).agg({'user_id': 'count', 'spend_USD': 'mean', 'user_lifespan_days': 'mean'})
convert.columns = ['users', 'avg_spend', 'lifespan']
# Export summary table module
file_path = f"C:\\Users\\Master\\Documents\\data_analytics\\globox\\convert_{datetime.now().strftime('%Y-%m-%d_%H-%M-%S')}.csv"
# Save result DataFrame to CSV file
convert.to_csv(file_path)
convert.head(200)

### Programs
* Application guidelines
    * MDE or minimum detectable effect is expressed on relative change basis
    * t_test: difference in means
    * z_test: difference in proportions (large sample, > 30 observations)
    * z_test_clt: difference in means, only if Central Limit Theorem applies (sample size > 30) i.e. t-test sans degrees of freedom - decommissioned but available in older version
    * chi_sq_test: difference in proportions (small sample, < 30 observations) - not built
* Sources
    * [Link](https://www.cuemath.com/data/z-test/) pooled proportions se (z-test) 
    * [Link](https://cms.master.school/confidence-interval-and-hypothesis-testing-cheat-sheet) unpooled proportions se (z-test)
    * [Link](https://online.stat.psu.edu/stat500/lesson/7/7.3/7.3.1/7.3.1.1) pooled se & df (t-test)
    * [Link](https://online.stat.psu.edu/stat500/lesson/7/7.3/7.3.1/7.3.1.2) unpooled se (t-test)
    * [Link](https://www.statology.org/satterthwaite-approximation/) unpooled df (t-test)

In [None]:
def z_test(control, treatment, alpha, pooled, full_report, scope_note, target_mde, power):
    # convert series objects to NumPy arrays
    control = np.asarray(control)
    treatment = np.asarray(treatment)
    # summary stats
    control_mean = np.mean(control)
    control_std = np.std(control)
    control_size = len(control)
    treatment_mean = np.mean(treatment)
    treatment_std = np.std(treatment)
    treatment_size = len(treatment)
    combined_size = control_size + treatment_size
    combined_value = control_size * control_mean + treatment_size * treatment_mean
    p = combined_value / combined_size
    combined_mean = p
    combined_std = np.sqrt(((control_size - 1) * control_std**2 + (treatment_size - 1) * treatment_std**2 ) / (combined_size - 2))
    # sample_stat and theoretical proportion p2 based on MDE
    sample_stat = (treatment_mean - control_mean)
    relative_change = sample_stat / control_mean
    p2 = control_mean * (1 + relative_change)
    pb = control_mean * (1 + target_mde)
    # calculate se, cohen_d, ideal MDE-based combined sample size, actual MDE practical significance of cohen's d and test_stat
    if pooled:  
        se = np.sqrt(p * (1 - p) * (1 / control_size + 1 / treatment_size))
        cohen_d = sample_stat / np.sqrt(p * (1 - p))
        effect_size = proportion_effectsize(control_mean, p2)
        min_combined_sample_size = NormalIndPower().solve_power(effect_size=effect_size, alpha=alpha, power=power, ratio=1)
        theoretical_effect_size = proportion_effectsize(control_mean, pb)
        try_combined_sample_size = NormalIndPower().solve_power(effect_size=theoretical_effect_size, alpha=alpha, power=power, ratio=1)
    else:
        se = np.sqrt(control_mean*(1-control_mean)/control_size + treatment_mean*(1-treatment_mean)/treatment_size)
        cohen_d = sample_stat / np.sqrt((control_std**2 + treatment_std**2) / 2)
        effect_size = proportion_effectsize(control_mean, p2)
        n = NormalIndPower().solve_power(effect_size=effect_size, alpha=alpha, power=power)
        min_combined_sample_size = 2 * n
        theoretical_effect_size = proportion_effectsize(control_mean, pb)
        k = NormalIndPower().solve_power(effect_size=theoretical_effect_size, alpha=alpha, power=power)
        try_combined_sample_size = k * 2
    practical_significance = "large" if cohen_d >= 0.8 else 'medium'if cohen_d >= 0.5 else 'small' if cohen_d >= 0.2 else "little_effect"
    test_stat = sample_stat / se
    # perform one-tailed test
    p_value_1tail = stats.norm.sf(abs(test_stat))
    critical_value_1tail = stats.norm.ppf(1 - alpha)
    me_1tail = critical_value_1tail * se
    decision_1tail = "Reject_H0" if p_value_1tail <= alpha else "Fail_to_reject_H0"
    # perform two-tailed test
    p_value_2tail = stats.norm.sf(abs(test_stat)) * 2
    critical_value_2tail = stats.norm.ppf(1 - alpha/2)
    me_2tail = critical_value_2tail * se
    decision_2tail = "Reject_H0" if p_value_2tail <= alpha else "Fail_to_reject_H0"
    # report
    print("" + "Start".center(100, '*') + "")
    print(f"{'Pooled' if pooled else 'Unpooled'} Z-Test (difference in proportions): {scope_note} scope")
    # create table of descriptive statistics
    desc_head = ["Descriptive (group): ", "Sample size", "Total value", "Mean", "Standard deviation"]
    desc_stat = [["Control (A)", control_size, f"{np.sum(control):.5f}", f"{control_mean:.5f}", f"{control_std:.5f}"], 
                 ["Treatment (B)", treatment_size, f"{np.sum(treatment):.5f}", f"{treatment_mean:.5f}", f"{treatment_std:.5f}"],
                 ["Combined", combined_size, f"{combined_value:.5f}", f"{combined_mean:.5f}", f"{combined_std:.5f}"]]
    inf_head = ["Inferential (test): ", "Conclusion", "Significance level (α)", "Test statistic", "P-value", "Confidence interval", 
                "Cohen's d", "Practical significance", "Relative change", "Sample size req",
                "Target MDE%", "Equiv. sample size"]
    inf_stat = [["1-tailed", decision_1tail, alpha, f"{test_stat:.15f}", f"{p_value_1tail:.10f}", f"(>{(sample_stat - me_1tail):.5f} or <{(sample_stat + me_1tail):.5f})", 
                cohen_d, practical_significance, f"{relative_change:.5f}", f"{0.5*min_combined_sample_size:.0f}",
                target_mde, f"{0.5*try_combined_sample_size:.0f}"],
                ["2-tailed", decision_2tail, alpha, f"{test_stat:.15f}", f"{p_value_2tail:.10f}", f"({(sample_stat - me_2tail):.5f},{(sample_stat + me_2tail):.5f})",
                cohen_d, practical_significance, f"{relative_change:.5f}", f"{0.5*min_combined_sample_size:.0f}",
                target_mde, f"{0.5*try_combined_sample_size:.0f}"]]
    if full_report:
        print() 
        print(f"Null hypothesis (H0): There is no significant effect size difference between Control (A) and Treatment (B)")
        print(f"Alternative hypothesis (H1): There is significant effect size difference between Control (A) and Treatment (B)\n")
        print(f"Conclusion (1-tail): {decision_1tail}, since p-value ({p_value_1tail:.10f}) {'<=' if p_value_1tail <= alpha else '>'} significance level ({alpha})")
        print(f"Conclusion (2-tail): {decision_2tail}, since p-value ({p_value_2tail:.10f}) {'<=' if p_value_2tail <= alpha else '>'} significance level ({alpha})\n")
        print(tabulate(desc_stat, headers=desc_head))
        print()
        print(tabulate(inf_stat, headers=inf_head))
        print(f"{'Sufficient' if combined_size >=  min_combined_sample_size else 'Insufficient'} sample size for observed relative change, which requires combined sample size of {min_combined_sample_size:.0f}")
        print(f"Combined sample size of {try_combined_sample_size:.0f} required to detect a relative change as small as {target_mde} (at statistical power of {power}, significance level of {alpha})")
    # output for visualization
    headers = ["Scope","Test", "Conclusion", "α", "Z*/T*", "SE", "MOE", 
               "sample_stat", "test-stat", "p-value", 
               "CI", "Lower_limit", "Upper_limit", 
               "A_#", "B_#", "A_value", "B_value", 
               "A_x̄", "B_x̄", "A_σ", "B_σ", 
               "cohen_d", "practical_sig", 
               "relative_change", "min_req_sample_size", 
               "target_MDE", "MDE_equiv_sample_size"]
    data = [[scope_note, f"1-tail_z_test_{'pooled' if pooled else 'unpooled'}", decision_1tail, alpha, f"{critical_value_1tail:.6f}", f"{se:.5f}", f"{me_1tail:.5f}", 
            f"{sample_stat:.15f}", f"{test_stat:.15f}", f"{p_value_1tail:.10f}", 
            f"(>{(sample_stat - me_1tail):.5f}/<{(sample_stat + me_1tail):.5f})", f"{(sample_stat - me_1tail):.5f}", f"{(sample_stat + me_1tail):.5f}", 
            control_size, treatment_size, f"{np.sum(control):.5f}", f"{np.sum(treatment):.5f}",  
            f"{control_mean:.5f}", f"{treatment_mean:.5f}", f"{control_std:.5f}", f"{treatment_std:.5f}", 
            cohen_d, practical_significance, 
            f"{relative_change:.5f}", f"{0.5*min_combined_sample_size:.0f}", 
            target_mde, f"{0.5*try_combined_sample_size:.0f}"]
             ,
            [scope_note, f"2-tail_z_test_{'pooled' if pooled else 'unpooled'}", decision_2tail, alpha, f"{critical_value_2tail:.6f}", f"{se:.5f}", f"{me_2tail:.5f}", 
            f"{sample_stat:.15f}", f"{test_stat:.15f}", f"{p_value_2tail:.10f}", 
            f"({(sample_stat - me_2tail):.5f},{(sample_stat + me_2tail):.5f})", f"{(sample_stat - me_2tail):.5f}", f"{(sample_stat + me_2tail):.5f}", 
            control_size, treatment_size, f"{np.sum(control):.5f}", f"{np.sum(treatment):.5f}", 
            f"{control_mean:.5f}", f"{treatment_mean:.5f}", f"{control_std:.5f}", f"{treatment_std:.5f}", 
            cohen_d, practical_significance, 
            f"{relative_change:.5f}", f"{0.5*min_combined_sample_size:.0f}", 
            target_mde, f"{0.5*try_combined_sample_size:.0f}"]]
    print("\n""EXPORT FOR VISUALIZATION:")
    print(tabulate(data, headers=headers))
    print("" + "End".center(100, '*') + "\n")

In [None]:
def t_test(control, treatment, alpha, pooled, full_report, scope_note, target_mde, power):
 # convert series objects to NumPy arrays
    control = np.asarray(control)
    treatment = np.asarray(treatment)
    # summary stats
    control_mean = np.mean(control)
    control_std = np.std(control)
    control_size = len(control)
    treatment_mean = np.mean(treatment)
    treatment_std = np.std(treatment)
    treatment_size = len(treatment)
    combined_size = control_size + treatment_size
    combined_value = control_size * control_mean + treatment_size * treatment_mean
    combined_mean = combined_value / combined_size
    pooled_var = ((control_size - 1) * np.var(control) + (treatment_size - 1) * np.var(treatment)) / (combined_size - 2)
    combined_std = np.sqrt(pooled_var)
    # sample_stat and proforma theoretical np Array m2 based on relative MDE 
    sample_stat = (treatment_mean - control_mean)
    relative_change = sample_stat / control_mean
    m2 = control * (1 + relative_change)
    mb = control * (1 + target_mde)
    # calculate se, cohen_d, relative MDE-based combined sample size, practical significance of cohen's d and test_stat
    test_stat, p_value_2tail = ttest_ind(control, treatment, equal_var=pooled) 
    if pooled:
        se = np.sqrt(pooled_var * (1 / control_size + 1 / treatment_size))
        df = control_size + treatment_size - 2
        cohen_d = sample_stat / np.sqrt(pooled_var)
        effect_size = target_mde / np.sqrt((np.var(control) + np.var(m2)) / 2)
        min_combined_sample_size = TTestIndPower().solve_power(effect_size=effect_size, alpha=alpha, power=power, ratio=1)
        theoretical_effect_size = target_mde / np.sqrt((np.var(control) + np.var(mb)) / 2)
        try_combined_sample_size = TTestIndPower().solve_power(effect_size=theoretical_effect_size, alpha=alpha, power=power, ratio=1)
    else:
        se = np.sqrt(control_std**2/control_size + treatment_std**2/treatment_size)
        df = (control_std**2/control_size + treatment_std**2/treatment_size)**2 / ((control_std**2/control_size)**2/(control_size-1) + (treatment_std**2/treatment_size)**2/(treatment_size-1))
        cohen_d = sample_stat / np.sqrt((control_std**2 + treatment_std**2) / 2)
        effect_size = relative_change / np.sqrt((np.var(control) / control_size) + (np.var(m2) / control_size))
        n = TTestIndPower().solve_power(effect_size=effect_size, alpha=alpha, power=power)
        min_combined_sample_size = 2 * n
        theoretical_effect_size = target_mde / np.sqrt((np.var(control) / control_size) + (np.var(mb) / control_size))
        k = TTestIndPower().solve_power(effect_size=theoretical_effect_size, alpha=alpha, power=power)
        try_combined_sample_size = k * 2
    practical_significance = "large" if cohen_d >= 0.8 else 'medium'if cohen_d >= 0.5 else 'small' if cohen_d >= 0.2 else "little_effect"
    # perform one-tailed test
    p_value_1tail = stats.t.sf(abs(test_stat), df)
    critical_value_1tail = stats.t.ppf(1 - alpha, df)
    me_1tail = critical_value_1tail * se
    decision_1tail = "Reject_H0" if p_value_1tail < alpha else "Fail_to_reject_H0"
    # perform two-tailed test
    critical_value_2tail = stats.t.ppf(1 - alpha/2, df)
    me_2tail = critical_value_2tail * se
    decision_2tail = "Reject_H0" if p_value_2tail < alpha else "Fail_to_reject_H0"
    # report 
    print("" + "Start".center(100, '*') + "")
    print(f"{'Pooled' if pooled else 'Unpooled'} T-Test (differences in means): {scope_note} scope")
    # create table of descriptive statistics
    desc_head = ["Descriptive (group): ", "Sample size", "Total value", "Mean", "Standard deviation"]
    desc_stat = [["Control (A)", control_size, f"{np.sum(control):.5f}", f"{control_mean:.5f}", f"{control_std:.5f}"], 
                 ["Treatment (B)", treatment_size, f"{np.sum(treatment):.5f}", f"{treatment_mean:.5f}", f"{treatment_std:.5f}"],
                 ["Combined", combined_size, f"{combined_value:.5f}", f"{combined_mean:.5f}", f"{combined_std:.5f}"]]
    inf_head = ["Inferential (test): ", "Conclusion", "Significance level (α)", "Test statistic", "P-value", "Confidence interval", 
                "Cohen's d", "Practical significance", "Relative change", "Sample size req",
                "Target MDE%", "Equiv. sample size"]
    inf_stat = [["1-tailed", decision_1tail, alpha, f"{test_stat:.15f}", f"{p_value_1tail:.10f}", f"(>{(sample_stat - me_1tail):.5f} or <{(sample_stat + me_1tail):.5f})", 
                cohen_d, practical_significance, f"{relative_change:.5f}", f"{0.5*min_combined_sample_size:.0f}",
                target_mde, f"{0.5*try_combined_sample_size:.0f}"],
                ["2-tailed", decision_2tail, alpha, f"{test_stat:.15f}", f"{p_value_2tail:.10f}", f"({(sample_stat - me_2tail):.5f},{(sample_stat + me_2tail):.5f})", 
                cohen_d, practical_significance, f"{relative_change:.5f}", f"{0.5*min_combined_sample_size:.0f}",
                target_mde, f"{0.5*try_combined_sample_size:.0f}"]]
    if full_report:
        print() 
        print(f"Null hypothesis (H0): There is no significant effect size difference between Control (A) and Treatment (B)")
        print(f"Alternative hypothesis (H1): There is significant effect size difference between Control (A) and Treatment (B)\n")
        print(f"Conclusion (1-tail): {decision_1tail}, since p-value ({p_value_1tail:.10f}) {'<=' if p_value_1tail <= alpha else '>'} significance level ({alpha})")
        print(f"Conclusion (2-tail): {decision_2tail}, since p-value ({p_value_2tail:.10f}) {'<=' if p_value_2tail <= alpha else '>'} significance level ({alpha})\n")
        print(tabulate(desc_stat, headers=desc_head))
        print()
        print(tabulate(inf_stat, headers=inf_head))
        print(f"{'Sufficient' if combined_size >=  min_combined_sample_size else 'Insufficient'} sample size for observed relative change, which requires combined sample size of {min_combined_sample_size:.0f}")
        print(f"Combined sample size of {try_combined_sample_size:.0f} required to detect a relative change as small as {target_mde} (at statistical power of {power}, significance level of {alpha})")
    # output for visualization
    headers = ["Scope","Test", "Conclusion", "α", "Z*/T*", "SE", "MOE", 
               "sample_stat", "test-stat", "p-value", 
               "CI", "Lower_limit", "Upper_limit", 
               "A_#", "B_#", "A_value", "B_value", 
               "A_x̄", "B_x̄", "A_σ", "B_σ", 
               "cohen_d", "practical_sig",
               "relative_change", "min_req_sample_size",  
                "target_MDE", "MDE_equiv_sample_size"]
    data = [[scope_note, f"1-tail_t_test_{'pooled' if pooled else 'unpooled'}", decision_1tail, alpha, f"{critical_value_1tail:.6f}", f"{se:.5f}", f"{me_1tail:.5f}", 
            f"{sample_stat:.15f}", f"{test_stat:.15f}", f"{p_value_1tail:.10f}", 
            f"(>{(sample_stat - me_1tail):.5f}/<{(sample_stat + me_1tail):.5f})", f"{(sample_stat - me_1tail):.5f}", f"{(sample_stat + me_1tail):.5f}", 
            control_size, treatment_size, f"{np.sum(control):.5f}", f"{np.sum(treatment):.5f}",  
            f"{control_mean:.5f}", f"{treatment_mean:.5f}", f"{control_std:.5f}", f"{treatment_std:.5f}", 
            cohen_d, practical_significance, 
            f"{relative_change:.5f}", f"{0.5*min_combined_sample_size:.0f}", 
            target_mde, f"{0.5*try_combined_sample_size:.0f}"]
             ,
            [scope_note, f"2-tail_t_test_{'pooled' if pooled else 'unpooled'}", decision_2tail, alpha, f"{critical_value_2tail:.6f}", f"{se:.5f}", f"{me_2tail:.5f}", 
            f"{sample_stat:.15f}", f"{test_stat:.15f}", f"{p_value_2tail:.10f}", 
            f"({(sample_stat - me_2tail):.5f},{(sample_stat + me_2tail):.5f})", f"{(sample_stat - me_2tail):.5f}", f"{(sample_stat + me_2tail):.5f}", 
            control_size, treatment_size, f"{np.sum(control):.5f}", f"{np.sum(treatment):.5f}", 
            f"{control_mean:.5f}", f"{treatment_mean:.5f}", f"{control_std:.5f}", f"{treatment_std:.5f}", 
            cohen_d, practical_significance, 
            f"{relative_change:.5f}", f"{0.5*min_combined_sample_size:.0f}", 
            target_mde, f"{0.5*try_combined_sample_size:.0f}"]]
    print("\n""EXPORT FOR VISUALIZATION:")
    print(tabulate(data, headers=headers))
    print("" + "End".center(100, '*') + "\n")

### Power analysis (resources)
* [Estimate sample size at given power, or power at given sample size](https://www.stat.ubc.ca/~rollin/stats/ssize/b2.html)
* [Estimate sample size for independent proportions effect size z-test at required MDE](https://www.statsig.com/calculator) 
* [Estimate sample size for independent means  effect size t-test at required MDE](https://statulator.com/SampleSize/ss2M.html#)

### Data packing & results

* General

In [None]:
# conversion rate between the two groups
conv_a = df_ab[df_ab['test_group'] == "A: control"].pivot_table(values='conversion', index='user_id', aggfunc='mean', fill_value=0)['conversion']
conv_b = df_ab[df_ab['test_group'] == "B: treatment"].pivot_table(values='conversion', index='user_id', aggfunc='mean', fill_value=0)['conversion']
# amount spent per user between the two groups
spend_a = df_ab[df_ab['test_group'] == "A: control"].pivot_table(values='spend_USD', index='user_id', aggfunc='mean', fill_value=0)['spend_USD']
spend_b = df_ab[df_ab['test_group'] == "B: treatment"].pivot_table(values='spend_USD', index='user_id', aggfunc='mean', fill_value=0)['spend_USD']
z_test(conv_a, conv_b, 0.05, False, True, 'overall', 0.129, 0.80)
t_test(spend_a, spend_b, 0.05, False, True, 'overall', 0.129, 0.80)