### AB Testing Kit
* [Guideline for AB testing](https://www.kaggle.com/code/ekrembayar/a-b-testing-step-by-step-hypothesis-testing)

### Import libraries

In [28]:
from statsmodels.stats.proportion import proportion_effectsize, proportions_ztest
from statsmodels.stats.power import NormalIndPower, TTestIndPower, zt_ind_solve_power
from scipy.stats import norm, ttest_ind
import numpy as np
import math
import statistics as st
from scipy import stats
import pandas as pd
from tabulate import tabulate
import pandas as pd
import matplotlib.pyplot as plt
from datetime import datetime
import csv

### Data prepping

##### Import data

In [29]:
df_ab = pd.read_csv(r"C:\Users\Master\Documents\data_analytics\globox\ab_test_final.csv")

##### Check data

In [30]:
def check_df(dataframe, head):
    print("\n" + " DATAFRAME SUMMARY ".center(70, '=') + "")
    print("\n" + " INFO ".center(70, '-'))
    info_df = dataframe.dtypes.to_frame(name='Dtype')
    info_df['Non-Null'] = dataframe.notnull().sum()
    info_df['Unique'] = dataframe.nunique()
    info_df['Duplicate'] = dataframe.T.duplicated().sum()
    info_df['Missing'] = dataframe.isnull().sum()
    print(info_df)
    print('\nRows: {}'.format(dataframe.shape[0]))
    print('Columns: {}'.format(dataframe.shape[1]))
    print("\n" + " DESCRIBE ".center(70, '-'))
    print(dataframe.describe().T)
    print("\n" + " PERCENTILES ".center(70, '-'))
    print(dataframe.describe([0, 0.05, 0.50, 0.95, 0.99, 1]).T)
    print("\n" + " HEAD ".center(70, '-'))
    print(dataframe.head(head))
display(check_df(df_ab,1))



-------------------------------- INFO --------------------------------
                             Dtype  Non-Null  Unique  Duplicate  Missing
user_id                      int64     48943   48943          0        0
country_name                object     48943      11          0        0
gender                      object     48943       4          0        0
test_group                  object     48943       2          0        0
conversion                   int64     48943       2          0        0
spend_USD                  float64     48943    1948          0        0
device                      object     48943       3          0        0
join_dt                     object     48943      13          0        0
first_active_dt             object     48943      13          0        0
last_active_dt              object     48943      13          0        0
purchase_days                int64     48943       3          0        0
user_lifespan_days           int64     48943       

None

##### Summary table

In [31]:
# Summary table module
# Define column order
column_order = ['users', 'conversions', 'conversion_rate', 'total_spend_USD', 'avg_spend_USD']
# Calculate summary statistics by test_group and country_name
summary = df_ab.groupby(['test_group','country_name']).agg({'conversion': 'sum', 'spend_USD': 'sum', 'user_id': 'nunique'})
summary.columns = ['conversions', 'total_spend_USD', 'users']
summary['conversion_rate'] = summary['conversions'] / summary['users']
summary['avg_spend_USD'] = summary['total_spend_USD'] / summary['users']
summary = summary[column_order]
# Calculate subtotals by test_group
sub = df_ab.groupby('test_group').agg({'conversion': 'sum', 'spend_USD': 'sum', 'user_id': 'nunique'})
sub.columns = ['conversions', 'total_spend_USD', 'users']
sub['conversion_rate'] = sub['conversions'] / sub['users']
sub['avg_spend_USD'] = sub['total_spend_USD'] / sub['users']
sub = sub[column_order]
sub.index = pd.MultiIndex.from_tuples([(x, 'Subtotal') for x in sub.index])
# Calculate grand total
total = pd.DataFrame({
    'conversions': [df_ab['conversion'].sum()],
    'total_spend_USD': [df_ab['spend_USD'].sum()],
    'users': [df_ab['user_id'].nunique()]})
total['conversion_rate'] = total['conversions'] / total['users']
total['avg_spend_USD'] = total['total_spend_USD'] / total['users']
total.index = pd.MultiIndex.from_tuples([('Grand Total', '')])
total = total[column_order]
# Merge summary statistics, subtotals, and grand total and display results
result = pd.concat([summary, sub, total])
# Export summary table module
file_path = f"C:\\Users\\Master\\Documents\\data_analytics\\globox\\summary_{datetime.now().strftime('%Y-%m-%d_%H-%M-%S')}.csv"
# Save result DataFrame to CSV file
result.to_csv(file_path)
display(result.head(200))

Unnamed: 0_level_0,Unnamed: 1_level_0,users,conversions,conversion_rate,total_spend_USD,avg_spend_USD
test_group,country_name,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
A: control,Australia,608,13,0.021382,1014.38,1.668388
A: control,Brazil,4805,179,0.037253,15442.946812,3.213933
A: control,Canada,767,36,0.046936,2762.660237,3.601904
A: control,France,1536,48,0.03125,4113.213833,2.677874
A: control,Germany,1906,61,0.032004,6481.521714,3.400589
A: control,Mexico,2815,83,0.029485,7915.628884,2.811946
A: control,Spain,997,29,0.029087,2171.86,2.178395
A: control,Turkiye,1849,74,0.040022,6814.259298,3.685375
A: control,USA,7309,374,0.05117,31394.809752,4.295363
A: control,United Kingdom,1455,42,0.028866,3068.22009,2.108742


##### Country table

In [32]:
# Country View
print("Country View:")
df_ab['lifespan_conversion'] = df_ab['user_lifespan_days'] * df_ab['conversion']
test = df_ab.groupby(['country_name', 'test_group']).agg({'user_id': 'count', 'conversion': ['sum', 'mean'], 'spend_USD': ['sum', 'mean'], 'user_lifespan_days': 'mean', 'lifespan_conversion': 'sum'})
test.columns = ['users', 'conversions', 'conversion_rate', 'total_spend', 'avg_spend', 'lifespan', 'lifespan_conversion']
test['lifespan_conversion'] = test['lifespan_conversion'] / test['conversions']
test['unconverted'] = test['users']-test['conversions']
test['avg_spend_conversion'] = test['total_spend']/test['conversions'] 
test = test[['users', 'conversions', 'unconverted', 'conversion_rate', 'total_spend', 'avg_spend', 'avg_spend_conversion', 'lifespan', 'lifespan_conversion']]
test = test.sort_values(by='users', ascending=False)
display(test)
df_ab = df_ab.drop('lifespan_conversion', axis = 1)

print("\nUSA Summary:")
# locate index  = 'USA'
usa = test.loc['USA']
display(usa)

Country View:


Unnamed: 0_level_0,Unnamed: 1_level_0,users,conversions,unconverted,conversion_rate,total_spend,avg_spend,avg_spend_conversion,lifespan,lifespan_conversion
country_name,test_group,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
USA,B: treatment,7463,429,7034,0.057484,30250.912033,4.053452,70.514946,1.040332,1.701632
USA,A: control,7309,374,6935,0.05117,31394.809752,4.295363,83.943342,1.041045,1.802139
Brazil,A: control,4805,179,4626,0.037253,15442.946812,3.213933,86.273446,1.031009,1.832402
Brazil,B: treatment,4629,188,4441,0.040614,14193.055138,3.066117,75.494974,1.016418,1.404255
Mexico,B: treatment,2923,130,2793,0.044475,9778.914089,3.345506,75.222416,1.029764,1.669231
Mexico,A: control,2815,83,2732,0.029485,7915.628884,2.811946,95.369023,1.019183,1.650602
Germany,B: treatment,1948,86,1862,0.044148,5275.35,2.708085,61.341279,1.049281,2.116279
Germany,A: control,1906,61,1845,0.032004,6481.521714,3.400589,106.254454,1.02361,1.737705
Turkiye,B: treatment,1883,67,1816,0.035582,4686.706379,2.488957,69.950841,1.032926,1.925373
Turkiye,A: control,1849,74,1775,0.040022,6814.259298,3.685375,92.084585,1.027042,1.675676



USA Summary:


Unnamed: 0_level_0,users,conversions,unconverted,conversion_rate,total_spend,avg_spend,avg_spend_conversion,lifespan,lifespan_conversion
test_group,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
B: treatment,7463,429,7034,0.057484,30250.912033,4.053452,70.514946,1.040332,1.701632
A: control,7309,374,6935,0.05117,31394.809752,4.295363,83.943342,1.041045,1.802139


##### Average conversion rate and per user $ spend by groups

In [33]:
# General conversions and spend 
df_ab['lifespan_conversion'] = df_ab['user_lifespan_days'] * df_ab['conversion']
test = df_ab.groupby(['test_group']).agg({'user_id': 'count', 'conversion': ['sum', 'mean'], 'spend_USD': ['sum', 'mean'], 'user_lifespan_days': 'mean', 'lifespan_conversion': 'sum'})
test.columns = ['users', 'conversions', 'conversion_rate', 'total_spend', 'avg_spend', 'lifespan', 'lifespan_conversion']
test['lifespan_conversion'] = test['lifespan_conversion'] / test['conversions']
test['unconverted'] = test['users']-test['conversions']
test['avg_spend_conversion'] = test['total_spend']/test['conversions'] 
test = test[['users', 'conversions', 'unconverted', 'conversion_rate', 'total_spend', 'avg_spend', 'avg_spend_conversion', 'lifespan', 'lifespan_conversion']]
display(test)
df_ab = df_ab.drop('lifespan_conversion', axis = 1)

Unnamed: 0_level_0,users,conversions,unconverted,conversion_rate,total_spend,avg_spend,avg_spend_conversion,lifespan,lifespan_conversion
test_group,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
A: control,24343,955,23388,0.039231,82145.903065,3.374518,86.016652,1.030892,1.787435
B: treatment,24600,1139,23461,0.046301,83415.326869,3.390867,73.235581,1.034634,1.748025


In [34]:
# Device conversions and spend 
df_ab['lifespan_conversion'] = df_ab['user_lifespan_days'] * df_ab['conversion']
test = df_ab.groupby(['device', 'test_group']).agg({'user_id': 'count', 'conversion': ['sum', 'mean'], 'spend_USD': ['sum', 'mean'], 'user_lifespan_days': 'mean', 'lifespan_conversion': 'sum'})
test.columns = ['users', 'conversions', 'conversion_rate', 'total_spend', 'avg_spend', 'lifespan', 'lifespan_conversion']
test['lifespan_conversion'] = test['lifespan_conversion'] / test['conversions']
test['unconverted'] = test['users']-test['conversions']
test['avg_spend_conversion'] = test['total_spend']/test['conversions'] 
test = test[['users', 'conversions', 'unconverted', 'conversion_rate', 'total_spend', 'avg_spend', 'avg_spend_conversion', 'lifespan', 'lifespan_conversion']]
display(test)
df_ab = df_ab.drop('lifespan_conversion', axis = 1)

Unnamed: 0_level_0,Unnamed: 1_level_0,users,conversions,unconverted,conversion_rate,total_spend,avg_spend,avg_spend_conversion,lifespan,lifespan_conversion
device,test_group,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
Android,A: control,15054,417,14637,0.0277,34814.470516,2.312639,83.487939,1.021058,1.760192
Android,B: treatment,15235,537,14698,0.035248,37567.903211,2.465895,69.958851,1.024943,1.707635
iOS,A: control,9142,535,8607,0.058521,46370.332548,5.072231,86.673519,1.047583,1.813084
iOS,B: treatment,9218,596,8622,0.064656,45339.320053,4.918564,76.072685,1.050662,1.783557
unknown,A: control,147,3,144,0.020408,961.1,6.538095,320.366667,1.0,1.0
unknown,B: treatment,147,6,141,0.040816,508.103605,3.456487,84.683934,1.034014,1.833333


In [35]:
# Gender conversions and spend 
df_ab['lifespan_conversion'] = df_ab['user_lifespan_days'] * df_ab['conversion']
test = df_ab.groupby(['gender', 'test_group']).agg({'user_id': 'count', 'conversion': ['sum', 'mean'], 'spend_USD': ['sum', 'mean'], 'user_lifespan_days': 'mean', 'lifespan_conversion': 'sum'})
test.columns = ['users', 'conversions', 'conversion_rate', 'total_spend', 'avg_spend', 'lifespan', 'lifespan_conversion']
test['lifespan_conversion'] = test['lifespan_conversion'] / test['conversions']
test['unconverted'] = test['users']-test['conversions']
test['avg_spend_conversion'] = test['total_spend']/test['conversions'] 
test = test[['users', 'conversions', 'unconverted', 'conversion_rate', 'total_spend', 'avg_spend', 'avg_spend_conversion', 'lifespan', 'lifespan_conversion']]
display(test)
df_ab = df_ab.drop('lifespan_conversion', axis = 1)

Unnamed: 0_level_0,Unnamed: 1_level_0,users,conversions,unconverted,conversion_rate,total_spend,avg_spend,avg_spend_conversion,lifespan,lifespan_conversion
gender,test_group,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
Female,A: control,10069,518,9551,0.051445,44936.242253,4.462831,86.749502,1.043103,1.837838
Female,B: treatment,10061,547,9514,0.054368,41551.11961,4.129919,75.961827,1.039757,1.731261
Male,A: control,10054,264,9790,0.026258,22616.227926,2.249476,85.66753,1.019694,1.75
Male,B: treatment,10235,388,9847,0.037909,26616.948653,2.600581,68.600383,1.028823,1.760309
Other,A: control,808,26,782,0.032178,2236.009887,2.767339,86.00038,1.014851,1.461538
Other,B: treatment,861,26,835,0.030197,2384.964561,2.769994,91.729406,1.012776,1.423077
unknown,A: control,3412,147,3265,0.043083,12357.422999,3.621754,84.064102,1.031653,1.734694
unknown,B: treatment,3443,178,3265,0.051699,12862.294046,3.735781,72.260079,1.042405,1.820225


In [36]:
# Cohort conversions and spend 
df_ab['lifespan_conversion'] = df_ab['user_lifespan_days'] * df_ab['conversion']
test = df_ab.groupby(['cohort_month', 'test_group']).agg({'user_id': 'count', 'conversion': ['sum', 'mean'], 'spend_USD': ['sum', 'mean'], 'user_lifespan_days': 'mean', 'lifespan_conversion': 'sum'})
test.columns = ['users', 'conversions', 'conversion_rate', 'total_spend', 'avg_spend', 'lifespan', 'lifespan_conversion']
test['lifespan_conversion'] = test['lifespan_conversion'] / test['conversions']
test['unconverted'] = test['users']-test['conversions']
test['avg_spend_conversion'] = test['total_spend']/test['conversions'] 
test = test[['users', 'conversions', 'unconverted', 'conversion_rate', 'total_spend', 'avg_spend', 'avg_spend_conversion', 'lifespan', 'lifespan_conversion']]
display(test)
df_ab = df_ab.drop('lifespan_conversion', axis = 1)

Unnamed: 0_level_0,Unnamed: 1_level_0,users,conversions,unconverted,conversion_rate,total_spend,avg_spend,avg_spend_conversion,lifespan,lifespan_conversion
cohort_month,test_group,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
Feb-2023,A: control,4728,174,4554,0.036802,15650.443041,3.310161,89.945075,1.020939,1.568966
Feb-2023,B: treatment,4860,226,4634,0.046502,16416.952567,3.377974,72.641383,1.02428,1.522124
Jan-2023,A: control,19615,781,18834,0.039816,66495.460024,3.390031,85.141434,1.033291,1.836108
Jan-2023,B: treatment,19740,913,18827,0.046251,66998.374302,3.394041,73.382666,1.037183,1.803943


In [37]:
# Convert only
convert = df_ab[df_ab['conversion']==1].groupby(['test_group', 'country_name', 'gender', 'device']).agg({'user_id': 'count', 'spend_USD': 'mean', 'user_lifespan_days': 'mean'})
convert.columns = ['users', 'avg_spend', 'lifespan']
# Export summary table module
file_path = f"C:\\Users\\Master\\Documents\\data_analytics\\globox\\convert_{datetime.now().strftime('%Y-%m-%d_%H-%M-%S')}.csv"
# Save result DataFrame to CSV file
convert.to_csv(file_path)
convert.head(200)

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,users,avg_spend,lifespan
test_group,country_name,gender,device,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
A: control,Australia,Female,Android,6,72.013333,1.166667
A: control,Australia,Female,iOS,4,95.760000,1.000000
A: control,Australia,Male,iOS,2,83.850000,2.500000
A: control,Australia,unknown,iOS,1,31.560000,1.000000
A: control,Brazil,Female,Android,49,87.696393,1.816327
...,...,...,...,...,...,...
B: treatment,unknown,Female,Android,2,21.100000,1.000000
B: treatment,unknown,Female,iOS,3,105.370476,2.666667
B: treatment,unknown,Male,Android,4,75.832500,3.500000
B: treatment,unknown,Male,iOS,4,99.585000,1.000000


### Programs
* Application guidelines
    * MDE or minimum detectable effect is expressed on relative change basis
    * t_test: difference in means
    * z_test: difference in proportions (large sample, > 30 observations)
    * z_test_clt: difference in means, only if Central Limit Theorem applies (sample size > 30) i.e. t-test sans degrees of freedom - decommissioned but available in older version
    * chi_sq_test: difference in proportions (small sample, < 30 observations) - not built
* Sources
    * [Link](https://www.cuemath.com/data/z-test/) pooled proportions se (z-test) 
    * [Link](https://cms.master.school/confidence-interval-and-hypothesis-testing-cheat-sheet) unpooled proportions se (z-test)
    * [Link](https://online.stat.psu.edu/stat500/lesson/7/7.3/7.3.1/7.3.1.1) pooled se & df (t-test)
    * [Link](https://online.stat.psu.edu/stat500/lesson/7/7.3/7.3.1/7.3.1.2) unpooled se (t-test)
    * [Link](https://www.statology.org/satterthwaite-approximation/) unpooled df (t-test)

In [38]:
def z_test(control, treatment, alpha, pooled, full_report, metric, scope_note, target_mde, power):
    # convert series objects to NumPy arrays
    control = np.asarray(control)
    treatment = np.asarray(treatment)
    # summary stats
    control_mean = np.mean(control)
    control_std = np.std(control)
    control_size = len(control)
    treatment_mean = np.mean(treatment)
    treatment_std = np.std(treatment)
    treatment_size = len(treatment)
    combined_size = control_size + treatment_size
    combined_value = control_size * control_mean + treatment_size * treatment_mean
    p = combined_value / combined_size
    combined_mean = p
    combined_std = np.sqrt(((control_size - 1) * control_std**2 + (treatment_size - 1) * treatment_std**2 ) / (combined_size - 2))
    # sample_stat and theoretical proportion p2 based on MDE
    sample_stat = (treatment_mean - control_mean)
    relative_change = sample_stat / control_mean
    p2 = control_mean * (1 + relative_change)
    pb = control_mean * (1 + target_mde)
    substance_test = "Pass" if relative_change >= target_mde else "Fail"
    # calculate se, cohen_d, ideal MDE-based combined sample size, actual MDE practical significance of cohen's d and test_stat
    if pooled:  
        se = np.sqrt(p * (1 - p) * (1 / control_size + 1 / treatment_size))
        cohen_d = sample_stat / np.sqrt(p * (1 - p))
        effect_size = proportion_effectsize(control_mean, p2)
        min_combined_sample_size = NormalIndPower().solve_power(effect_size=abs(effect_size), alpha=alpha, power=power, ratio=1)
        theoretical_effect_size = proportion_effectsize(control_mean, pb)
        try_combined_sample_size = NormalIndPower().solve_power(effect_size=abs(theoretical_effect_size), alpha=alpha, power=power, ratio=1)
    else:
        se = np.sqrt(control_mean*(1-control_mean)/control_size + treatment_mean*(1-treatment_mean)/treatment_size)
        cohen_d = sample_stat / np.sqrt((control_std**2 + treatment_std**2) / 2)
        effect_size = proportion_effectsize(control_mean, p2)
        n = NormalIndPower().solve_power(effect_size=abs(effect_size), alpha=alpha, power=power)
        min_combined_sample_size = 2 * n
        theoretical_effect_size = proportion_effectsize(control_mean, pb)
        k = NormalIndPower().solve_power(effect_size=abs(theoretical_effect_size), alpha=alpha, power=power)
        try_combined_sample_size = k * 2
    practical_significance = "large" if relative_change >= 0.3 else 'medium'if relative_change >= 0.1 else 'small' if relative_change >= 0.05 else "insignificant"
    test_stat = sample_stat / se
    current_sample = 'Sufficient' if combined_size >=  min_combined_sample_size else 'Insufficient'
    # perform one-tailed test
    p_value_1tail = stats.norm.sf(abs(test_stat))
    critical_value_1tail = stats.norm.ppf(1 - alpha)
    me_1tail = critical_value_1tail * se
    decision_1tail = "Reject_H0" if p_value_1tail <= alpha else "Fail_to_reject_H0"
    # perform two-tailed test
    p_value_2tail = stats.norm.sf(abs(test_stat)) * 2
    critical_value_2tail = stats.norm.ppf(1 - alpha/2)
    me_2tail = critical_value_2tail * se
    decision_2tail = "Reject_H0" if p_value_2tail <= alpha else "Fail_to_reject_H0"
    # report
    print("" + "Start".center(100, '*') + "")
    print(f"{'Pooled' if pooled else 'Unpooled'} Z-Test (difference in proportions): {metric} {scope_note} scope")
    # create table of descriptive statistics
    desc_head = ["Descriptive (group): ", "Sample size", "Total value", "Mean", "Standard deviation"]
    desc_stat = [["Control (A)", control_size, f"{np.sum(control):.5f}", f"{control_mean:.5f}", f"{control_std:.5f}"], 
                 ["Treatment (B)", treatment_size, f"{np.sum(treatment):.5f}", f"{treatment_mean:.5f}", f"{treatment_std:.5f}"],
                 ["Combined", combined_size, f"{combined_value:.5f}", f"{combined_mean:.5f}", f"{combined_std:.5f}"]]
    inf_head = ["Inferential (test): ", "Conclusion", "Significance level (α)", "Test statistic", "P-value", "Confidence interval", 
                "Cohen's d", "Practical significance", "Relative change", "Sample size req",
                "Target MDE%", "Equiv. sample size"]
    inf_stat = [["1-tailed", decision_1tail, alpha, f"{test_stat:.15f}", f"{p_value_1tail:.10f}", f"(>{(sample_stat - me_1tail):.5f} or <{(sample_stat + me_1tail):.5f})", 
                cohen_d, practical_significance, f"{relative_change:.5f}", f"{0.5*min_combined_sample_size:.0f}",
                target_mde, f"{0.5*try_combined_sample_size:.0f}"],
                ["2-tailed", decision_2tail, alpha, f"{test_stat:.15f}", f"{p_value_2tail:.10f}", f"({(sample_stat - me_2tail):.5f},{(sample_stat + me_2tail):.5f})",
                cohen_d, practical_significance, f"{relative_change:.5f}", f"{0.5*min_combined_sample_size:.0f}",
                target_mde, f"{0.5*try_combined_sample_size:.0f}"]]
    if full_report:
        print() 
        print(f"Null hypothesis (H0): There is no significant effect size difference between Control (A) and Treatment (B)")
        print(f"Alternative hypothesis (H1): There is significant effect size difference between Control (A) and Treatment (B)\n")
        print(f"Conclusion (1-tail): {decision_1tail}, since p-value ({p_value_1tail:.10f}) {'<=' if p_value_1tail <= alpha else '>'} significance level ({alpha})")
        print(f"Conclusion (2-tail): {decision_2tail}, since p-value ({p_value_2tail:.10f}) {'<=' if p_value_2tail <= alpha else '>'} significance level ({alpha})\n")
        print(tabulate(desc_stat, headers=desc_head))
        print()
        print(tabulate(inf_stat, headers=inf_head))
    print(f"{'Sufficient' if combined_size >=  min_combined_sample_size else 'Insufficient'} sample size for observed relative change, which requires combined sample size of {min_combined_sample_size:.0f}")
    print(f"Combined sample size of {try_combined_sample_size:.0f} required to detect a relative change as small as {target_mde} (at statistical power of {power}, significance level of {alpha})")
    # output for visualization
    headers = ["Metric", "Scope","Test", "Conclusion", "alpha", "Z*/T*", "SE", "MOE", 
               "sample_stat", "test-stat", "p-value", 
               "CI", "Lower_limit", "Upper_limit", 
               "A_#", "B_#", "A_value", "B_value", 
               "A_mu", "B_mu", "A_std", "B_std", 
               "cohen_d", "substantiveness", 
               "relative_change", "min_req_sample_size", 
               "target_MDE", "MDE_equiv_sample_size", "substance_test", "current_sample"]
    data = [[metric, scope_note, f"1-tail_z_test_{'pooled' if pooled else 'unpooled'}", decision_1tail, alpha, f"{critical_value_1tail:.6f}", f"{se:.5f}", f"{me_1tail:.5f}", 
            f"{sample_stat:.15f}", f"{test_stat:.15f}", f"{p_value_1tail:.15f}", 
            f"(>{(sample_stat - me_1tail):.5f}/<{(sample_stat + me_1tail):.5f})", f"{(sample_stat - me_1tail):.5f}", f"{(sample_stat + me_1tail):.5f}", 
            control_size, treatment_size, f"{np.sum(control):.5f}", f"{np.sum(treatment):.5f}",  
            f"{control_mean:.5f}", f"{treatment_mean:.5f}", f"{control_std:.5f}", f"{treatment_std:.5f}", 
            cohen_d, practical_significance, 
            f"{relative_change:.5f}", f"{0.5*min_combined_sample_size:.0f}", 
            target_mde, f"{0.5*try_combined_sample_size:.0f}", substance_test, current_sample]
             ,
            [metric, scope_note, f"2-tail_z_test_{'pooled' if pooled else 'unpooled'}", decision_2tail, alpha, f"{critical_value_2tail:.6f}", f"{se:.5f}", f"{me_2tail:.5f}", 
            f"{sample_stat:.15f}", f"{test_stat:.15f}", f"{p_value_2tail:.15f}", 
            f"({(sample_stat - me_2tail):.5f},{(sample_stat + me_2tail):.5f})", f"{(sample_stat - me_2tail):.5f}", f"{(sample_stat + me_2tail):.5f}", 
            control_size, treatment_size, f"{np.sum(control):.5f}", f"{np.sum(treatment):.5f}", 
            f"{control_mean:.5f}", f"{treatment_mean:.5f}", f"{control_std:.5f}", f"{treatment_std:.5f}", 
            cohen_d, practical_significance, 
            f"{relative_change:.5f}", f"{0.5*min_combined_sample_size:.0f}", 
            target_mde, f"{0.5*try_combined_sample_size:.0f}", substance_test, current_sample]]
    print("\n""EXPORT FOR VISUALIZATION:")
    print(tabulate(data, headers=headers))
    print("" + "End".center(100, '*') + "\n")

In [39]:
def t_test(control, treatment, alpha, pooled, full_report, metric, scope_note, target_mde, power):
 # convert series objects to NumPy arrays
    control = np.asarray(control)
    treatment = np.asarray(treatment)
    # summary stats
    control_mean = np.mean(control)
    control_std = np.std(control)
    control_size = len(control)
    treatment_mean = np.mean(treatment)
    treatment_std = np.std(treatment)
    treatment_size = len(treatment)
    combined_size = control_size + treatment_size
    combined_value = control_size * control_mean + treatment_size * treatment_mean
    combined_mean = combined_value / combined_size
    pooled_var = ((control_size - 1) * np.var(control) + (treatment_size - 1) * np.var(treatment)) / (combined_size - 2)
    combined_std = np.sqrt(pooled_var)
    # sample_stat and proforma theoretical np Array m2 based on relative MDE 
    sample_stat = (treatment_mean - control_mean)
    relative_change = sample_stat / control_mean
    m2 = control * (1 + relative_change)
    mb = control * (1 + target_mde)
    substance_test = "Pass" if relative_change >= target_mde else "Fail"
    # calculate se, cohen_d, relative MDE-based combined sample size, practical significance of cohen's d and test_stat
    test_stat, p_value_2tail = ttest_ind(control, treatment, equal_var=pooled) 
    if pooled:
        se = np.sqrt(pooled_var * (1 / control_size + 1 / treatment_size))
        df = control_size + treatment_size - 2
        cohen_d = sample_stat / np.sqrt(pooled_var)
        effect_size = relative_change / np.sqrt((np.var(control) + np.var(m2)) / 2)
        min_combined_sample_size = TTestIndPower().solve_power(effect_size=abs(effect_size), alpha=alpha, power=power, ratio=1)
        theoretical_effect_size = target_mde / np.sqrt((np.var(control) + np.var(mb)) / 2)
        try_combined_sample_size = TTestIndPower().solve_power(effect_size=abs(theoretical_effect_size), alpha=alpha, power=power, ratio=1)
    else:
        se = np.sqrt(control_std**2/control_size + treatment_std**2/treatment_size)
        df = (control_std**2/control_size + treatment_std**2/treatment_size)**2 / ((control_std**2/control_size)**2/(control_size-1) + (treatment_std**2/treatment_size)**2/(treatment_size-1))
        cohen_d = sample_stat / np.sqrt((control_std**2 + treatment_std**2) / 2)
        effect_size = relative_change / np.sqrt((np.var(control) / control_size) + (np.var(m2) / control_size))
        n = TTestIndPower().solve_power(effect_size=abs(effect_size), alpha=alpha, power=power)
        min_combined_sample_size = 2 * n
        theoretical_effect_size = target_mde / np.sqrt((np.var(control) / control_size) + (np.var(mb) / control_size))
        k = TTestIndPower().solve_power(effect_size=abs(theoretical_effect_size), alpha=alpha, power=power)
        try_combined_sample_size = k * 2
    practical_significance = "large" if relative_change >= 0.3 else 'medium'if relative_change >= 0.1 else 'small' if relative_change >= 0.05 else "insignificant"
    current_sample = 'Sufficient' if combined_size >=  min_combined_sample_size else 'Insufficient'
    # perform one-tailed test
    p_value_1tail = stats.t.sf(abs(test_stat), df)
    critical_value_1tail = stats.t.ppf(1 - alpha, df)
    me_1tail = critical_value_1tail * se
    decision_1tail = "Reject_H0" if p_value_1tail < alpha else "Fail_to_reject_H0"
    # perform two-tailed test
    critical_value_2tail = stats.t.ppf(1 - alpha/2, df)
    me_2tail = critical_value_2tail * se
    decision_2tail = "Reject_H0" if p_value_2tail < alpha else "Fail_to_reject_H0"
    # report 
    print("" + "Start".center(100, '*') + "")
    print(f"{'Pooled' if pooled else 'Unpooled'} T-Test (differences in means): {metric} {scope_note} scope")
    # create table of descriptive statistics
    desc_head = ["Descriptive (group): ", "Sample size", "Total value", "Mean", "Standard deviation"]
    desc_stat = [["Control (A)", control_size, f"{np.sum(control):.5f}", f"{control_mean:.5f}", f"{control_std:.5f}"], 
                 ["Treatment (B)", treatment_size, f"{np.sum(treatment):.5f}", f"{treatment_mean:.5f}", f"{treatment_std:.5f}"],
                 ["Combined", combined_size, f"{combined_value:.5f}", f"{combined_mean:.5f}", f"{combined_std:.5f}"]]
    inf_head = ["Inferential (test): ", "Conclusion", "Significance level (α)", "Test statistic", "P-value", "Confidence interval", 
                "Cohen's d", "Practical significance", "Relative change", "Sample size req",
                "Target MDE%", "Equiv. sample size", "df"]
    inf_stat = [["1-tailed", decision_1tail, alpha, f"{test_stat:.15f}", f"{p_value_1tail:.10f}", f"(>{(sample_stat - me_1tail):.5f} or <{(sample_stat + me_1tail):.5f})", 
                cohen_d, practical_significance, f"{relative_change:.5f}", f"{0.5*min_combined_sample_size:.0f}",
                target_mde, f"{0.5*try_combined_sample_size:.0f}", f"{df:.0f}"],
                ["2-tailed", decision_2tail, alpha, f"{test_stat:.15f}", f"{p_value_2tail:.10f}", f"({(sample_stat - me_2tail):.5f},{(sample_stat + me_2tail):.5f})", 
                cohen_d, practical_significance, f"{relative_change:.5f}", f"{0.5*min_combined_sample_size:.0f}",
                target_mde, f"{0.5*try_combined_sample_size:.0f}", f"{df:.0f}"]]
    if full_report:
        print() 
        print(f"Null hypothesis (H0): There is no significant effect size difference between Control (A) and Treatment (B)")
        print(f"Alternative hypothesis (H1): There is significant effect size difference between Control (A) and Treatment (B)\n")
        print(f"Conclusion (1-tail): {decision_1tail}, since p-value ({p_value_1tail:.10f}) {'<=' if p_value_1tail <= alpha else '>'} significance level ({alpha})")
        print(f"Conclusion (2-tail): {decision_2tail}, since p-value ({p_value_2tail:.10f}) {'<=' if p_value_2tail <= alpha else '>'} significance level ({alpha})\n")
        print(tabulate(desc_stat, headers=desc_head))
        print()
        print(tabulate(inf_stat, headers=inf_head))
    print(f"{'Sufficient' if combined_size >=  min_combined_sample_size else 'Insufficient'} sample size for observed relative change, which requires combined sample size of {min_combined_sample_size:.0f}")
    print(f"Combined sample size of {try_combined_sample_size:.0f} required to detect a relative change as small as {target_mde} (at statistical power of {power}, significance level of {alpha})")
    # output for visualization
    headers = ["Metric", "Scope","Test", "Conclusion", "alpha", "Z*/T*", "SE", "MOE", 
               "sample_stat", "test-stat", "p-value", 
               "CI", "Lower_limit", "Upper_limit", 
               "A_#", "B_#", "A_value", "B_value", 
               "A_mu", "B_mu", "A_std", "B_std",
               "cohen_d", "substantiveness",
               "relative_change", "min_req_sample_size",  
                "target_MDE", "MDE_equiv_sample_size", "substance_test", "current_sample"]
    data = [[metric, scope_note, f"1-tail_t_test_{'pooled' if pooled else 'unpooled'}", decision_1tail, alpha, f"{critical_value_1tail:.6f}", f"{se:.5f}", f"{me_1tail:.5f}", 
            f"{sample_stat:.15f}", f"{test_stat:.15f}", f"{p_value_1tail:.10f}", 
            f"(>{(sample_stat - me_1tail):.5f}/<{(sample_stat + me_1tail):.5f})", f"{(sample_stat - me_1tail):.5f}", f"{(sample_stat + me_1tail):.5f}", 
            control_size, treatment_size, f"{np.sum(control):.5f}", f"{np.sum(treatment):.5f}",  
            f"{control_mean:.5f}", f"{treatment_mean:.5f}", f"{control_std:.5f}", f"{treatment_std:.5f}", 
            cohen_d, practical_significance, 
            f"{relative_change:.5f}", f"{0.5*min_combined_sample_size:.0f}", 
            target_mde, f"{0.5*try_combined_sample_size:.0f}", substance_test, current_sample]
             ,
            [metric, scope_note, f"2-tail_t_test_{'pooled' if pooled else 'unpooled'}", decision_2tail, alpha, f"{critical_value_2tail:.6f}", f"{se:.5f}", f"{me_2tail:.5f}", 
            f"{sample_stat:.15f}", f"{test_stat:.15f}", f"{p_value_2tail:.10f}", 
            f"({(sample_stat - me_2tail):.5f},{(sample_stat + me_2tail):.5f})", f"{(sample_stat - me_2tail):.5f}", f"{(sample_stat + me_2tail):.5f}", 
            control_size, treatment_size, f"{np.sum(control):.5f}", f"{np.sum(treatment):.5f}", 
            f"{control_mean:.5f}", f"{treatment_mean:.5f}", f"{control_std:.5f}", f"{treatment_std:.5f}", 
            cohen_d, practical_significance, 
            f"{relative_change:.5f}", f"{0.5*min_combined_sample_size:.0f}", 
            target_mde, f"{0.5*try_combined_sample_size:.0f}", substance_test, current_sample]]
    print("\n""EXPORT FOR VISUALIZATION:")
    print(tabulate(data, headers=headers))
    print("" + "End".center(100, '*') + "\n")

### Power analysis (resources)
* [Estimate sample size at given power, or power at given sample size](https://www.stat.ubc.ca/~rollin/stats/ssize/b2.html)
* [Estimate sample size for independent proportions effect size z-test at required MDE](https://www.statsig.com/calculator) 
* [Estimate sample size for independent means  effect size t-test at required MDE](https://statulator.com/SampleSize/ss2M.html#)

### Data packing & results

In [40]:
MDE_low = 0.05
MDE_med = 0.10
MDE_high = 0.30

* General view: overall, $200_or_less

In [41]:
# All test subjects
conv_a = df_ab[df_ab['test_group'] == "A: control"].pivot_table(values='conversion', index='user_id', aggfunc='mean', fill_value=0)['conversion']
conv_b = df_ab[df_ab['test_group'] == "B: treatment"].pivot_table(values='conversion', index='user_id', aggfunc='mean', fill_value=0)['conversion']
spend_a = df_ab[df_ab['test_group'] == "A: control"].pivot_table(values='spend_USD', index='user_id', aggfunc='mean', fill_value=0)['spend_USD']
spend_b = df_ab[df_ab['test_group'] == "B: treatment"].pivot_table(values='spend_USD', index='user_id', aggfunc='mean', fill_value=0)['spend_USD']
cspend_a = df_ab[(df_ab['conversion'] == 1) & (df_ab['test_group'] == "A: control")].pivot_table(values='spend_USD', index='user_id', aggfunc='mean', fill_value=0)['spend_USD']
cspend_b = df_ab[(df_ab['conversion'] == 1) & (df_ab['test_group'] == "B: treatment")].pivot_table(values='spend_USD', index='user_id', aggfunc='mean', fill_value=0)['spend_USD']
# run
z_test(conv_a, conv_b, 0.05, False, True, 'CR', 'overall', MDE_low, 0.80)
t_test(spend_a, spend_b, 0.05, False, True, "$/user", 'overall', MDE_low, 0.80)
t_test(cspend_a, cspend_b, 0.05, False, True, "$/conversion", 'overall', MDE_low, 0.80)

***********************************************Start************************************************
Unpooled Z-Test (difference in proportions): CR overall scope

Null hypothesis (H0): There is no significant effect size difference between Control (A) and Treatment (B)
Alternative hypothesis (H1): There is significant effect size difference between Control (A) and Treatment (B)

Conclusion (1-tail): Reject_H0, since p-value (0.0000552077) <= significance level (0.05)
Conclusion (2-tail): Reject_H0, since p-value (0.0001104154) <= significance level (0.05)

Descriptive (group):       Sample size    Total value     Mean    Standard deviation
-----------------------  -------------  -------------  -------  --------------------
Control (A)                      24343            955  0.03923               0.19414
Treatment (B)                    24600           1139  0.0463                0.21014
Combined                         48943           2094  0.04278               0.20234

Inferentia

In [52]:
# $200_or_less
conv_a_max200 = df_ab[(df_ab['test_group'] == "A: control") & (df_ab['spend_USD_200_plus'] == 'N')].pivot_table(values='conversion', index='user_id', aggfunc='mean', fill_value=0)['conversion']
conv_b_max200 = df_ab[(df_ab['test_group'] == "B: treatment") & (df_ab['spend_USD_200_plus'] == 'N')].pivot_table(values='conversion', index='user_id', aggfunc='mean', fill_value=0)['conversion']
spend_a_max200 = df_ab[(df_ab['test_group'] == "A: control") & (df_ab['spend_USD_200_plus'] == 'N')].pivot_table(values='spend_USD', index='user_id', aggfunc='mean', fill_value=0)['spend_USD']
spend_b_max200 = df_ab[(df_ab['test_group'] == "B: treatment") & (df_ab['spend_USD_200_plus'] == 'N')].pivot_table(values='spend_USD', index='user_id', aggfunc='mean', fill_value=0)['spend_USD']
cspend_a_max200 = df_ab[(df_ab['conversion'] == 1) & (df_ab['test_group'] == "A: control") & (df_ab['spend_USD_200_plus'] == 'N')].pivot_table(values='spend_USD', index='user_id', aggfunc='mean', fill_value=0)['spend_USD']
cspend_b_max200 = df_ab[(df_ab['conversion'] == 1) & (df_ab['test_group'] == "B: treatment") & (df_ab['spend_USD_200_plus'] == 'N')].pivot_table(values='spend_USD', index='user_id', aggfunc='mean', fill_value=0)['spend_USD']
# run
z_test(conv_a_max200, conv_b_max200, 0.05, False, True, 'CR', '$200_or_less', MDE_low, 0.80)
t_test(spend_a_max200, spend_b_max200, 0.05, False, True, "$/user", '$200_or_less', MDE_low, 0.80)
t_test(cspend_a_max200, cspend_b_max200, 0.05, False, True, "$/conversion", '$200_or_less', MDE_low, 0.80)

***********************************************Start************************************************
Unpooled Z-Test (difference in proportions): CR $200_or_less scope

Null hypothesis (H0): There is no significant effect size difference between Control (A) and Treatment (B)
Alternative hypothesis (H1): There is significant effect size difference between Control (A) and Treatment (B)

Conclusion (1-tail): Reject_H0, since p-value (0.0000342185) <= significance level (0.05)
Conclusion (2-tail): Reject_H0, since p-value (0.0000684369) <= significance level (0.05)

Descriptive (group):       Sample size    Total value     Mean    Standard deviation
-----------------------  -------------  -------------  -------  --------------------
Control (A)                      24307            919  0.03781               0.19073
Treatment (B)                    24566           1105  0.04498               0.20726
Combined                         48873           2024  0.04141               0.19921

Infer

* Device: android, iOS
* Gender: male, female

In [43]:
# android
conv_a_android = df_ab[(df_ab['test_group'] == "A: control") & (df_ab['device'] == "Android")].pivot_table(values='conversion', index='user_id', aggfunc='mean', fill_value=0)['conversion']
conv_b_android = df_ab[(df_ab['test_group'] == "B: treatment") & (df_ab['device'] == "Android")].pivot_table(values='conversion', index='user_id', aggfunc='mean', fill_value=0)['conversion']
spend_a_android = df_ab[(df_ab['test_group'] == "A: control") & (df_ab['device'] == "Android")].pivot_table(values='spend_USD', index='user_id', aggfunc='mean', fill_value=0)['spend_USD']
spend_b_android = df_ab[(df_ab['test_group'] == "B: treatment") & (df_ab['device'] == "Android")].pivot_table(values='spend_USD', index='user_id', aggfunc='mean', fill_value=0)['spend_USD']
# ios
conv_a_ios = df_ab[(df_ab['test_group'] == "A: control") & (df_ab['device'] == "iOS")].pivot_table(values='conversion', index='user_id', aggfunc='mean', fill_value=0)['conversion']
conv_b_ios = df_ab[(df_ab['test_group'] == "B: treatment") & (df_ab['device'] == "iOS")].pivot_table(values='conversion', index='user_id', aggfunc='mean', fill_value=0)['conversion']
spend_a_ios = df_ab[(df_ab['test_group'] == "A: control") & (df_ab['device'] == "iOS")].pivot_table(values='spend_USD', index='user_id', aggfunc='mean', fill_value=0)['spend_USD']
spend_b_ios = df_ab[(df_ab['test_group'] == "B: treatment") & (df_ab['device'] == "iOS")].pivot_table(values='spend_USD', index='user_id', aggfunc='mean', fill_value=0)['spend_USD']
# run
z_test(conv_a_android, conv_b_android, 0.05, False, False, 'CR', 'android', MDE_med, 0.80)
t_test(spend_a_android, spend_b_android, 0.05, False, False, "$/user", 'android', MDE_med, 0.80)
z_test(conv_a_ios, conv_b_ios, 0.05, False, False, 'CR', 'iOS', MDE_med, 0.80)
t_test(spend_a_ios, spend_b_ios, 0.05, False, False, "$/user", 'iOS', MDE_med, 0.80)

# male
conv_a_male = df_ab[(df_ab['test_group'] == "A: control") & (df_ab['gender'] == "Male")].pivot_table(values='conversion', index='user_id', aggfunc='mean', fill_value=0)['conversion']
conv_b_male = df_ab[(df_ab['test_group'] == "B: treatment") & (df_ab['gender'] == "Male")].pivot_table(values='conversion', index='user_id', aggfunc='mean', fill_value=0)['conversion']
spend_a_male = df_ab[(df_ab['test_group'] == "A: control") & (df_ab['gender'] == "Male")].pivot_table(values='spend_USD', index='user_id', aggfunc='mean', fill_value=0)['spend_USD']
spend_b_male = df_ab[(df_ab['test_group'] == "B: treatment") & (df_ab['gender'] == "Male")].pivot_table(values='spend_USD', index='user_id', aggfunc='mean', fill_value=0)['spend_USD']
# female
conv_a_female = df_ab[(df_ab['test_group'] == "A: control") & (df_ab['gender'] == "Female")].pivot_table(values='conversion', index='user_id', aggfunc='mean', fill_value=0)['conversion']
conv_b_female = df_ab[(df_ab['test_group'] == "B: treatment") & (df_ab['gender'] == "Female")].pivot_table(values='conversion', index='user_id', aggfunc='mean', fill_value=0)['conversion']
spend_a_female = df_ab[(df_ab['test_group'] == "A: control") & (df_ab['gender'] == "Female")].pivot_table(values='spend_USD', index='user_id', aggfunc='mean', fill_value=0)['spend_USD']
spend_b_female = df_ab[(df_ab['test_group'] == "B: treatment") & (df_ab['gender'] == "Female")].pivot_table(values='spend_USD', index='user_id', aggfunc='mean', fill_value=0)['spend_USD']
# run
z_test(conv_a_male, conv_b_male, 0.05, False, False, 'CR', 'male', MDE_med, 0.80)
t_test(spend_a_male, spend_b_male, 0.05, False, False, "$/user", 'male', MDE_med, 0.80)
z_test(conv_a_female, conv_b_female, 0.05, False, False, 'CR', 'female', MDE_med, 0.80)
t_test(spend_a_female, spend_b_female, 0.05, False, False, "$/user", 'female', MDE_med, 0.80)

***********************************************Start************************************************
Unpooled Z-Test (difference in proportions): CR android scope
Sufficient sample size for observed relative change, which requires combined sample size of 16741
Combined sample size of 115481 required to detect a relative change as small as 0.1 (at statistical power of 0.8, significance level of 0.05)

EXPORT FOR VISUALIZATION:
Metric    Scope    Test                    Conclusion      alpha    Z*/T*       SE      MOE    sample_stat    test-stat      p-value  CI                     Lower_limit    Upper_limit    A_#    B_#    A_value    B_value    A_mu     B_mu    A_std    B_std    cohen_d  substantiveness      relative_change    min_req_sample_size    target_MDE    MDE_equiv_sample_size  substance_test    current_sample
--------  -------  ----------------------  ------------  -------  -------  -------  -------  -------------  -----------  -----------  -------------------  -------------  

* Country: USA, anglophone, non_anglophone

In [44]:
# USA
conv_a_USA = df_ab[(df_ab['test_group'] == "A: control") & (df_ab['country_name'] == 'USA')].pivot_table(values='conversion', index='user_id', aggfunc='mean', fill_value=0)['conversion']
conv_b_USA = df_ab[(df_ab['test_group'] == "B: treatment") & (df_ab['country_name'] == 'USA')].pivot_table(values='conversion', index='user_id', aggfunc='mean', fill_value=0)['conversion']
spend_a_USA = df_ab[(df_ab['test_group'] == "A: control") & (df_ab['country_name'] == 'USA')].pivot_table(values='spend_USD', index='user_id', aggfunc='mean', fill_value=0)['spend_USD']
spend_b_USA = df_ab[(df_ab['test_group'] == "B: treatment") & (df_ab['country_name'] == 'USA')].pivot_table(values='spend_USD', index='user_id', aggfunc='mean', fill_value=0)['spend_USD']
# run
z_test(conv_a_USA, conv_b_USA, 0.05, False, False, 'CR', 'USA', MDE_med, 0.80)
t_test(spend_a_USA, spend_b_USA, 0.05, False, False, "$/user", 'USA', MDE_med, 0.80)

***********************************************Start************************************************
Unpooled Z-Test (difference in proportions): CR USA scope
Insufficient sample size for observed relative change, which requires combined sample size of 40428
Combined sample size of 60928 required to detect a relative change as small as 0.1 (at statistical power of 0.8, significance level of 0.05)

EXPORT FOR VISUALIZATION:
Metric    Scope    Test                    Conclusion           alpha    Z*/T*       SE      MOE    sample_stat    test-stat    p-value  CI                     Lower_limit    Upper_limit    A_#    B_#    A_value    B_value     A_mu     B_mu    A_std    B_std    cohen_d  substantiveness      relative_change    min_req_sample_size    target_MDE    MDE_equiv_sample_size  substance_test    current_sample
--------  -------  ----------------------  -----------------  -------  -------  -------  -------  -------------  -----------  ---------  -------------------  -----------

* Anglo-gender: anglo_male, anglo_female

In [45]:
# anglophone
conv_a_anglo = df_ab[(df_ab['test_group'] == "A: control") & (df_ab['country_name'].isin(['USA', 'United Kingdom', 'Canada', 'Australia']))].pivot_table(values='conversion', index='user_id', aggfunc='mean', fill_value=0)['conversion']
conv_b_anglo = df_ab[(df_ab['test_group'] == "B: treatment") & (df_ab['country_name'].isin(['USA', 'United Kingdom', 'Canada', 'Australia']))].pivot_table(values='conversion', index='user_id', aggfunc='mean', fill_value=0)['conversion']
spend_a_anglo = df_ab[(df_ab['test_group'] == "A: control") & (df_ab['country_name'].isin(['USA', 'United Kingdom', 'Canada', 'Australia']))].pivot_table(values='spend_USD', index='user_id', aggfunc='mean', fill_value=0)['spend_USD']
spend_b_anglo = df_ab[(df_ab['test_group'] == "B: treatment") & (df_ab['country_name'].isin(['USA', 'United Kingdom', 'Canada', 'Australia']))].pivot_table(values='spend_USD', index='user_id', aggfunc='mean', fill_value=0)['spend_USD']
# anglo_male
conv_a_anglo_m = df_ab[(df_ab['test_group'] == "A: control") & (df_ab['gender'] == "Male") & (df_ab['country_name'].isin(['USA', 'United Kingdom', 'Canada', 'Australia']))].pivot_table(values='conversion', index='user_id', aggfunc='mean', fill_value=0)['conversion']
conv_b_anglo_m = df_ab[(df_ab['test_group'] == "B: treatment") & (df_ab['gender'] == "Male") & (df_ab['country_name'].isin(['USA', 'United Kingdom', 'Canada', 'Australia']))].pivot_table(values='conversion', index='user_id', aggfunc='mean', fill_value=0)['conversion']
spend_a_anglo_m = df_ab[(df_ab['test_group'] == "A: control") & (df_ab['gender'] == "Male") & (df_ab['country_name'].isin(['USA', 'United Kingdom', 'Canada', 'Australia']))].pivot_table(values='spend_USD', index='user_id', aggfunc='mean', fill_value=0)['spend_USD']
spend_b_anglo_m = df_ab[(df_ab['test_group'] == "B: treatment") & (df_ab['gender'] == "Male") & (df_ab['country_name'].isin(['USA', 'United Kingdom', 'Canada', 'Australia']))].pivot_table(values='spend_USD', index='user_id', aggfunc='mean', fill_value=0)['spend_USD']
# anglo_female
conv_a_anglo_f = df_ab[(df_ab['test_group'] == "A: control") & (df_ab['gender'] == "Female") & (df_ab['country_name'].isin(['USA', 'United Kingdom', 'Canada', 'Australia']))].pivot_table(values='conversion', index='user_id', aggfunc='mean', fill_value=0)['conversion']
conv_b_anglo_f = df_ab[(df_ab['test_group'] == "B: treatment") & (df_ab['gender'] == "Female") & (df_ab['country_name'].isin(['USA', 'United Kingdom', 'Canada', 'Australia']))].pivot_table(values='conversion', index='user_id', aggfunc='mean', fill_value=0)['conversion']
spend_a_anglo_f = df_ab[(df_ab['test_group'] == "A: control") & (df_ab['gender'] == "Female") & (df_ab['country_name'].isin(['USA', 'United Kingdom', 'Canada', 'Australia']))].pivot_table(values='spend_USD', index='user_id', aggfunc='mean', fill_value=0)['spend_USD']
spend_b_anglo_f = df_ab[(df_ab['test_group'] == "B: treatment") & (df_ab['gender'] == "Female") & (df_ab['country_name'].isin(['USA', 'United Kingdom', 'Canada', 'Australia']))].pivot_table(values='spend_USD', index='user_id', aggfunc='mean', fill_value=0)['spend_USD']
# run
z_test(conv_a_anglo, conv_b_anglo, 0.05, False, False, 'CR', 'anglophone', MDE_med, 0.80)
t_test(spend_a_anglo, spend_b_anglo, 0.05, False, False, "$/user", 'anglophone', MDE_med, 0.80)
z_test(conv_a_anglo_m, conv_b_anglo_m, 0.05, False, False, 'CR', 'anglo_male', MDE_med, 0.80)
t_test(spend_a_anglo_m, spend_b_anglo_m, 0.05, False, False, "$/user", 'anglo_male', MDE_med, 0.80)
z_test(conv_a_anglo_f, conv_b_anglo_f, 0.05, False, False, 'CR', 'anglo_female', MDE_med, 0.80)
t_test(spend_a_anglo_f, spend_b_anglo_f, 0.05, False, False, "$/user", 'anglo_female', MDE_med, 0.80)

***********************************************Start************************************************
Unpooled Z-Test (difference in proportions): CR anglophone scope
Insufficient sample size for observed relative change, which requires combined sample size of 24837
Combined sample size of 68379 required to detect a relative change as small as 0.1 (at statistical power of 0.8, significance level of 0.05)

EXPORT FOR VISUALIZATION:
Metric    Scope       Test                    Conclusion      alpha    Z*/T*       SE      MOE    sample_stat    test-stat     p-value  CI                     Lower_limit    Upper_limit    A_#    B_#    A_value    B_value     A_mu     B_mu    A_std    B_std    cohen_d  substantiveness      relative_change    min_req_sample_size    target_MDE    MDE_equiv_sample_size  substance_test    current_sample
--------  ----------  ----------------------  ------------  -------  -------  -------  -------  -------------  -----------  ----------  -------------------  ------

* Non anglo: non_anglo, non_anglo_male, non_anglo_female

In [46]:
# non anglophone
conv_a_non_anglo = df_ab[(df_ab['test_group'] == "A: control") & (df_ab['country_name'].isin(['Brazil', 'Mexico', 'Germany', 'Turkiye', 'France', 'Spain']))].pivot_table(values='conversion', index='user_id', aggfunc='mean', fill_value=0)['conversion']
conv_b_non_anglo = df_ab[(df_ab['test_group'] == "B: treatment") & (df_ab['country_name'].isin(['Brazil', 'Mexico', 'Germany', 'Turkiye', 'France', 'Spain']))].pivot_table(values='conversion', index='user_id', aggfunc='mean', fill_value=0)['conversion']
spend_a_non_anglo = df_ab[(df_ab['test_group'] == "A: control") & (df_ab['country_name'].isin(['Brazil', 'Mexico', 'Germany', 'Turkiye', 'France', 'Spain']))].pivot_table(values='spend_USD', index='user_id', aggfunc='mean', fill_value=0)['spend_USD']
spend_b_non_anglo = df_ab[(df_ab['test_group'] == "B: treatment") & (df_ab['country_name'].isin(['Brazil', 'Mexico', 'Germany', 'Turkiye', 'France', 'Spain']))].pivot_table(values='spend_USD', index='user_id', aggfunc='mean', fill_value=0)['spend_USD']
# non_anglo_male
conv_a_non_anglo_m = df_ab[(df_ab['test_group'] == "A: control") & (df_ab['gender'] == "Male") & (df_ab['country_name'].isin(['Brazil', 'Mexico', 'Germany', 'Turkiye', 'France', 'Spain']))].pivot_table(values='conversion', index='user_id', aggfunc='mean', fill_value=0)['conversion']
conv_b_non_anglo_m = df_ab[(df_ab['test_group'] == "B: treatment") & (df_ab['gender'] == "Male") & (df_ab['country_name'].isin(['Brazil', 'Mexico', 'Germany', 'Turkiye', 'France', 'Spain']))].pivot_table(values='conversion', index='user_id', aggfunc='mean', fill_value=0)['conversion']
spend_a_non_anglo_m = df_ab[(df_ab['test_group'] == "A: control") & (df_ab['gender'] == "Male") & (df_ab['country_name'].isin(['Brazil', 'Mexico', 'Germany', 'Turkiye', 'France', 'Spain']))].pivot_table(values='spend_USD', index='user_id', aggfunc='mean', fill_value=0)['spend_USD']
spend_b_non_anglo_m = df_ab[(df_ab['test_group'] == "B: treatment") & (df_ab['gender'] == "Male") & (df_ab['country_name'].isin(['Brazil', 'Mexico', 'Germany', 'Turkiye', 'France', 'Spain']))].pivot_table(values='spend_USD', index='user_id', aggfunc='mean', fill_value=0)['spend_USD']
# non_anglo_female
conv_a_non_anglo_f = df_ab[(df_ab['test_group'] == "A: control") & (df_ab['gender'] == "Female") & (df_ab['country_name'].isin(['Brazil', 'Mexico', 'Germany', 'Turkiye', 'France', 'Spain']))].pivot_table(values='conversion', index='user_id', aggfunc='mean', fill_value=0)['conversion']
conv_b_non_anglo_f = df_ab[(df_ab['test_group'] == "B: treatment") & (df_ab['gender'] == "Female") & (df_ab['country_name'].isin(['Brazil', 'Mexico', 'Germany', 'Turkiye', 'France', 'Spain']))].pivot_table(values='conversion', index='user_id', aggfunc='mean', fill_value=0)['conversion']
spend_a_non_anglo_f = df_ab[(df_ab['test_group'] == "A: control") & (df_ab['gender'] == "Female") & (df_ab['country_name'].isin(['Brazil', 'Mexico', 'Germany', 'Turkiye', 'France', 'Spain']))].pivot_table(values='spend_USD', index='user_id', aggfunc='mean', fill_value=0)['spend_USD']
spend_b_non_anglo_f = df_ab[(df_ab['test_group'] == "B: treatment") & (df_ab['gender'] == "Female") & (df_ab['country_name'].isin(['Brazil', 'Mexico', 'Germany', 'Turkiye', 'France', 'Spain']))].pivot_table(values='spend_USD', index='user_id', aggfunc='mean', fill_value=0)['spend_USD']
# run
z_test(conv_a_non_anglo, conv_b_non_anglo, 0.05, False, False, 'CR', 'non_anglophone', MDE_med, 0.80)
t_test(spend_a_non_anglo, spend_b_non_anglo, 0.05, False, False, "$/user", 'non_anglophone', MDE_med, 0.80)
z_test(conv_a_non_anglo_m, conv_b_non_anglo_m, 0.05, False, False, 'CR', 'non_anglo_male', MDE_med, 0.80)
t_test(spend_a_non_anglo_m, spend_b_non_anglo_m, 0.05, False, False, "$/user", 'non_anglo_male', MDE_med, 0.80)
z_test(conv_a_non_anglo_f, conv_b_non_anglo_f, 0.05, False, False, 'CR', 'non_anglo_female', MDE_med, 0.80)
t_test(spend_a_non_anglo_f, spend_b_non_anglo_f, 0.05, False, False, "$/user", 'non_anglo_female', MDE_med, 0.80)

***********************************************Start************************************************
Unpooled Z-Test (difference in proportions): CR non_anglophone scope
Sufficient sample size for observed relative change, which requires combined sample size of 23300
Combined sample size of 93213 required to detect a relative change as small as 0.1 (at statistical power of 0.8, significance level of 0.05)

EXPORT FOR VISUALIZATION:
Metric    Scope           Test                    Conclusion      alpha    Z*/T*       SE      MOE    sample_stat    test-stat     p-value  CI                     Lower_limit    Upper_limit    A_#    B_#    A_value    B_value     A_mu     B_mu    A_std    B_std    cohen_d  substantiveness      relative_change    min_req_sample_size    target_MDE    MDE_equiv_sample_size  substance_test    current_sample
--------  --------------  ----------------------  ------------  -------  -------  -------  -------  -------------  -----------  ----------  -----------------

* Major countries: brazil, mexico, germany, turkiye
* Other countries: france, uk, spain, canada, australia

In [47]:
# brazil
conv_a_bra = df_ab[(df_ab['test_group'] == "A: control") & (df_ab['country_name']=='Brazil')].pivot_table(values='conversion', index='user_id', aggfunc='mean', fill_value=0)['conversion']
conv_b_bra = df_ab[(df_ab['test_group'] == "B: treatment") & (df_ab['country_name']=='Brazil')].pivot_table(values='conversion', index='user_id', aggfunc='mean', fill_value=0)['conversion']
spend_a_bra = df_ab[(df_ab['test_group'] == "A: control") & (df_ab['country_name']=='Brazil')].pivot_table(values='spend_USD', index='user_id', aggfunc='mean', fill_value=0)['spend_USD']
spend_b_bra = df_ab[(df_ab['test_group'] == "B: treatment") & (df_ab['country_name']=='Brazil')].pivot_table(values='spend_USD', index='user_id', aggfunc='mean', fill_value=0)['spend_USD']
# mexico
conv_a_mex = df_ab[(df_ab['test_group'] == "A: control") & (df_ab['country_name']=='Mexico')].pivot_table(values='conversion', index='user_id', aggfunc='mean', fill_value=0)['conversion']
conv_b_mex = df_ab[(df_ab['test_group'] == "B: treatment") & (df_ab['country_name']=='Mexico')].pivot_table(values='conversion', index='user_id', aggfunc='mean', fill_value=0)['conversion']
spend_a_mex = df_ab[(df_ab['test_group'] == "A: control") & (df_ab['country_name']=='Mexico')].pivot_table(values='spend_USD', index='user_id', aggfunc='mean', fill_value=0)['spend_USD']
spend_b_mex = df_ab[(df_ab['test_group'] == "B: treatment") & (df_ab['country_name']=='Mexico')].pivot_table(values='spend_USD', index='user_id', aggfunc='mean', fill_value=0)['spend_USD']
# germany
conv_a_ger = df_ab[(df_ab['test_group'] == "A: control") & (df_ab['country_name']=='Germany')].pivot_table(values='conversion', index='user_id', aggfunc='mean', fill_value=0)['conversion']
conv_b_ger = df_ab[(df_ab['test_group'] == "B: treatment") & (df_ab['country_name']=='Germany')].pivot_table(values='conversion', index='user_id', aggfunc='mean', fill_value=0)['conversion']
spend_a_ger = df_ab[(df_ab['test_group'] == "A: control") & (df_ab['country_name']=='Germany')].pivot_table(values='spend_USD', index='user_id', aggfunc='mean', fill_value=0)['spend_USD']
spend_b_ger = df_ab[(df_ab['test_group'] == "B: treatment") & (df_ab['country_name']=='Germany')].pivot_table(values='spend_USD', index='user_id', aggfunc='mean', fill_value=0)['spend_USD']
# turkiye
conv_a_tur = df_ab[(df_ab['test_group'] == "A: control") & (df_ab['country_name']=='Turkiye')].pivot_table(values='conversion', index='user_id', aggfunc='mean', fill_value=0)['conversion']
conv_b_tur = df_ab[(df_ab['test_group'] == "B: treatment") & (df_ab['country_name']=='Turkiye')].pivot_table(values='conversion', index='user_id', aggfunc='mean', fill_value=0)['conversion']
spend_a_tur = df_ab[(df_ab['test_group'] == "A: control") & (df_ab['country_name']=='Turkiye')].pivot_table(values='spend_USD', index='user_id', aggfunc='mean', fill_value=0)['spend_USD']
spend_b_tur = df_ab[(df_ab['test_group'] == "B: treatment") & (df_ab['country_name']=='Turkiye')].pivot_table(values='spend_USD', index='user_id', aggfunc='mean', fill_value=0)['spend_USD']
# run
z_test(conv_a_bra, conv_b_bra, 0.05, False, False, 'CR', 'brazil', MDE_high, 0.80)
t_test(spend_a_bra, spend_b_bra, 0.05, False, False, "$/user", 'brazil', MDE_high, 0.80)
z_test(conv_a_mex, conv_b_mex, 0.05, False, False, 'CR', 'mexico', MDE_high, 0.80)
t_test(spend_a_mex, spend_b_mex, 0.05, False, False, "$/user", 'mexico', MDE_high, 0.80)
z_test(conv_a_ger, conv_b_ger, 0.05, False, False, 'CR', 'germany', MDE_high, 0.80)
t_test(spend_a_ger, spend_b_ger, 0.05, False, False, "$/user", 'germany', MDE_high, 0.80)
z_test(conv_a_tur, conv_b_tur, 0.05, False, False, 'CR', 'turkiye', MDE_high, 0.80)
t_test(spend_a_tur, spend_b_tur, 0.05, False, False, "$/user", 'turkiye', MDE_high, 0.80)

# france
conv_a_fra = df_ab[(df_ab['test_group'] == "A: control") & (df_ab['country_name']=='France')].pivot_table(values='conversion', index='user_id', aggfunc='mean', fill_value=0)['conversion']
conv_b_fra = df_ab[(df_ab['test_group'] == "B: treatment") & (df_ab['country_name']=='France')].pivot_table(values='conversion', index='user_id', aggfunc='mean', fill_value=0)['conversion']
spend_a_fra = df_ab[(df_ab['test_group'] == "A: control") & (df_ab['country_name']=='France')].pivot_table(values='spend_USD', index='user_id', aggfunc='mean', fill_value=0)['spend_USD']
spend_b_fra = df_ab[(df_ab['test_group'] == "B: treatment") & (df_ab['country_name']=='France')].pivot_table(values='spend_USD', index='user_id', aggfunc='mean', fill_value=0)['spend_USD']
# uk
conv_a_uk = df_ab[(df_ab['test_group'] == "A: control") & (df_ab['country_name']=="United Kingdom")].pivot_table(values='conversion', index='user_id', aggfunc='mean', fill_value=0)['conversion']
conv_b_uk = df_ab[(df_ab['test_group'] == "B: treatment") & (df_ab['country_name']=="United Kingdom")].pivot_table(values='conversion', index='user_id', aggfunc='mean', fill_value=0)['conversion']
spend_a_uk = df_ab[(df_ab['test_group'] == "A: control") & (df_ab['country_name']=="United Kingdom")].pivot_table(values='spend_USD', index='user_id', aggfunc='mean', fill_value=0)['spend_USD']
spend_b_uk = df_ab[(df_ab['test_group'] == "B: treatment") & (df_ab['country_name']=="United Kingdom")].pivot_table(values='spend_USD', index='user_id', aggfunc='mean', fill_value=0)['spend_USD']
# spain
conv_a_esp = df_ab[(df_ab['test_group'] == "A: control") & (df_ab['country_name']=='Spain')].pivot_table(values='conversion', index='user_id', aggfunc='mean', fill_value=0)['conversion']
conv_b_esp = df_ab[(df_ab['test_group'] == "B: treatment") & (df_ab['country_name']=='Spain')].pivot_table(values='conversion', index='user_id', aggfunc='mean', fill_value=0)['conversion']
spend_a_esp = df_ab[(df_ab['test_group'] == "A: control") & (df_ab['country_name']=='Spain')].pivot_table(values='spend_USD', index='user_id', aggfunc='mean', fill_value=0)['spend_USD']
spend_b_esp = df_ab[(df_ab['test_group'] == "B: treatment") & (df_ab['country_name']=='Spain')].pivot_table(values='spend_USD', index='user_id', aggfunc='mean', fill_value=0)['spend_USD']
# canada
conv_a_can = df_ab[(df_ab['test_group'] == "A: control") & (df_ab['country_name']=='Canada')].pivot_table(values='conversion', index='user_id', aggfunc='mean', fill_value=0)['conversion']
conv_b_can = df_ab[(df_ab['test_group'] == "B: treatment") & (df_ab['country_name']=='Canada')].pivot_table(values='conversion', index='user_id', aggfunc='mean', fill_value=0)['conversion']
spend_a_can = df_ab[(df_ab['test_group'] == "A: control") & (df_ab['country_name']=='Canada')].pivot_table(values='spend_USD', index='user_id', aggfunc='mean', fill_value=0)['spend_USD']
spend_b_can = df_ab[(df_ab['test_group'] == "B: treatment") & (df_ab['country_name']=='Canada')].pivot_table(values='spend_USD', index='user_id', aggfunc='mean', fill_value=0)['spend_USD']
# australia
conv_a_aus = df_ab[(df_ab['test_group'] == "A: control") & (df_ab['country_name']=='Australia')].pivot_table(values='conversion', index='user_id', aggfunc='mean', fill_value=0)['conversion']
conv_b_aus = df_ab[(df_ab['test_group'] == "B: treatment") & (df_ab['country_name']=='Australia')].pivot_table(values='conversion', index='user_id', aggfunc='mean', fill_value=0)['conversion']
spend_a_aus = df_ab[(df_ab['test_group'] == "A: control") & (df_ab['country_name']=='Australia')].pivot_table(values='spend_USD', index='user_id', aggfunc='mean', fill_value=0)['spend_USD']
spend_b_aus = df_ab[(df_ab['test_group'] == "B: treatment") & (df_ab['country_name']=='Australia')].pivot_table(values='spend_USD', index='user_id', aggfunc='mean', fill_value=0)['spend_USD']
# run
z_test(conv_a_fra, conv_b_fra, 0.05, False, False, 'CR', 'france', MDE_high, 0.80)
t_test(spend_a_fra, spend_b_fra, 0.05, False, False, "$/user", 'france', MDE_high, 0.80)
z_test(conv_a_uk, conv_b_uk, 0.05, False, False, 'CR', 'uk', MDE_high, 0.80)
t_test(spend_a_uk, spend_b_uk, 0.05, False, False, "$/user", 'uk', MDE_high, 0.80)
z_test(conv_a_esp, conv_b_esp, 0.05, False, False, 'CR', 'spain', MDE_high, 0.80)
t_test(spend_a_esp, spend_b_fra, 0.05, False, False, "$/user", 'spain', MDE_high, 0.80)
z_test(conv_a_can, conv_b_can, 0.05, False, False, 'CR', 'canada', MDE_high, 0.80)
t_test(spend_a_can, spend_b_can, 0.05, False, False, "$/user", 'canada', MDE_high, 0.80)
z_test(conv_a_aus, conv_b_aus, 0.05, False, False, 'CR', 'australia', MDE_high, 0.80)
t_test(spend_a_aus, spend_b_aus, 0.05, False, False, "$/user", 'australia', MDE_high, 0.80)

***********************************************Start************************************************
Unpooled Z-Test (difference in proportions): CR brazil scope
Insufficient sample size for observed relative change, which requires combined sample size of 103966
Combined sample size of 10265 required to detect a relative change as small as 0.3 (at statistical power of 0.8, significance level of 0.05)

EXPORT FOR VISUALIZATION:
Metric    Scope    Test                    Conclusion           alpha    Z*/T*       SE      MOE    sample_stat    test-stat    p-value  CI                      Lower_limit    Upper_limit    A_#    B_#    A_value    B_value     A_mu     B_mu    A_std    B_std    cohen_d  substantiveness      relative_change    min_req_sample_size    target_MDE    MDE_equiv_sample_size  substance_test    current_sample
--------  -------  ----------------------  -----------------  -------  -------  -------  -------  -------------  -----------  ---------  --------------------  -----

* Proportions and average per user: other

* Anglo-device: anglo_Android, anglo_iOS

In [48]:
# anglo_android
conv_a_anglo_and = df_ab[(df_ab['test_group'] == "A: control") & (df_ab['device'] == "Android") & (df_ab['country_name'].isin(['USA', 'United Kingdom', 'Canada', 'Australia']))].pivot_table(values='conversion', index='user_id', aggfunc='mean', fill_value=0)['conversion']
conv_b_anglo_and = df_ab[(df_ab['test_group'] == "B: treatment") & (df_ab['device'] == "Android") & (df_ab['country_name'].isin(['USA', 'United Kingdom', 'Canada', 'Australia']))].pivot_table(values='conversion', index='user_id', aggfunc='mean', fill_value=0)['conversion']
spend_a_anglo_and = df_ab[(df_ab['test_group'] == "A: control") & (df_ab['device'] == "Android") & (df_ab['country_name'].isin(['USA', 'United Kingdom', 'Canada', 'Australia']))].pivot_table(values='spend_USD', index='user_id', aggfunc='mean', fill_value=0)['spend_USD']
spend_b_anglo_and = df_ab[(df_ab['test_group'] == "B: treatment") & (df_ab['device'] == "Android") & (df_ab['country_name'].isin(['USA', 'United Kingdom', 'Canada', 'Australia']))].pivot_table(values='spend_USD', index='user_id', aggfunc='mean', fill_value=0)['spend_USD']
# anglo_ios
conv_a_anglo_ios = df_ab[(df_ab['test_group'] == "A: control") & (df_ab['device'] == "iOS") & (df_ab['country_name'].isin(['USA', 'United Kingdom', 'Canada', 'Australia']))].pivot_table(values='conversion', index='user_id', aggfunc='mean', fill_value=0)['conversion']
conv_b_anglo_ios = df_ab[(df_ab['test_group'] == "B: treatment") & (df_ab['device'] == "iOS") & (df_ab['country_name'].isin(['USA', 'United Kingdom', 'Canada', 'Australia']))].pivot_table(values='conversion', index='user_id', aggfunc='mean', fill_value=0)['conversion']
spend_a_anglo_ios = df_ab[(df_ab['test_group'] == "A: control") & (df_ab['device'] == "iOS") & (df_ab['country_name'].isin(['USA', 'United Kingdom', 'Canada', 'Australia']))].pivot_table(values='spend_USD', index='user_id', aggfunc='mean', fill_value=0)['spend_USD']
spend_b_anglo_ios = df_ab[(df_ab['test_group'] == "B: treatment") & (df_ab['device'] == "iOS") & (df_ab['country_name'].isin(['USA', 'United Kingdom', 'Canada', 'Australia']))].pivot_table(values='spend_USD', index='user_id', aggfunc='mean', fill_value=0)['spend_USD']
# run
z_test(conv_a_anglo_and, conv_b_anglo_and, 0.05, False, False, 'CR', 'anglo_android', MDE_med, 0.80)
t_test(spend_a_anglo_and, spend_b_anglo_and, 0.05, False, False, "$/user", 'anglo_android', MDE_med, 0.80)
z_test(conv_a_anglo_ios, conv_b_anglo_ios, 0.05, False, False, 'CR', 'anglo_ios', MDE_med, 0.80)
t_test(spend_a_anglo_ios, spend_b_anglo_ios, 0.05, False, False, "$/user", 'anglo_ios', MDE_med, 0.80)

***********************************************Start************************************************
Unpooled Z-Test (difference in proportions): CR anglo_android scope
Insufficient sample size for observed relative change, which requires combined sample size of 21297
Combined sample size of 89586 required to detect a relative change as small as 0.1 (at statistical power of 0.8, significance level of 0.05)

EXPORT FOR VISUALIZATION:
Metric    Scope          Test                    Conclusion      alpha    Z*/T*       SE      MOE    sample_stat    test-stat    p-value  CI                     Lower_limit    Upper_limit    A_#    B_#    A_value    B_value     A_mu     B_mu    A_std    B_std    cohen_d  substantiveness      relative_change    min_req_sample_size    target_MDE    MDE_equiv_sample_size  substance_test    current_sample
--------  -------------  ----------------------  ------------  -------  -------  -------  -------  -------------  -----------  ---------  ------------------- 

* Female-device

In [49]:
# female_android
conv_a_female_and = df_ab[(df_ab['test_group'] == "A: control") & (df_ab['device'] == "Android") & (df_ab['gender'] == "Female")].pivot_table(values='conversion', index='user_id', aggfunc='mean', fill_value=0)['conversion']
conv_b_female_and = df_ab[(df_ab['test_group'] == "B: treatment") & (df_ab['device'] == "Android") & (df_ab['gender'] == "Female")].pivot_table(values='conversion', index='user_id', aggfunc='mean', fill_value=0)['conversion']
spend_a_female_and = df_ab[(df_ab['test_group'] == "A: control") & (df_ab['device'] == "Android")  & (df_ab['gender'] == "Female")].pivot_table(values='spend_USD', index='user_id', aggfunc='mean', fill_value=0)['spend_USD']
spend_b_female_and = df_ab[(df_ab['test_group'] == "B: treatment") & (df_ab['device'] == "Android")  & (df_ab['gender'] == "Female")].pivot_table(values='spend_USD', index='user_id', aggfunc='mean', fill_value=0)['spend_USD']
# female_ios
conv_a_female_ios = df_ab[(df_ab['test_group'] == "A: control") & (df_ab['device'] == "iOS") & (df_ab['gender'] == "Female")].pivot_table(values='conversion', index='user_id', aggfunc='mean', fill_value=0)['conversion']
conv_b_female_ios = df_ab[(df_ab['test_group'] == "B: treatment") & (df_ab['device'] == "iOS") & (df_ab['gender'] == "Female")].pivot_table(values='conversion', index='user_id', aggfunc='mean', fill_value=0)['conversion']
spend_a_female_ios = df_ab[(df_ab['test_group'] == "A: control") & (df_ab['device'] == "iOS")  & (df_ab['gender'] == "Female")].pivot_table(values='spend_USD', index='user_id', aggfunc='mean', fill_value=0)['spend_USD']
spend_b_female_ios = df_ab[(df_ab['test_group'] == "B: treatment") & (df_ab['device'] == "iOS")  & (df_ab['gender'] == "Female")].pivot_table(values='spend_USD', index='user_id', aggfunc='mean', fill_value=0)['spend_USD']
# run
z_test(conv_a_female_and, conv_b_female_and, 0.05, False, False, 'CR', 'female_android', MDE_med, 0.80)
t_test(spend_a_female_and, spend_b_female_and, 0.05, False, False, "$/user", 'female_android', MDE_med, 0.80)
z_test(conv_a_female_ios, conv_b_female_ios, 0.05, False, False, 'CR', 'female_ios', MDE_med, 0.80)
t_test(spend_a_female_ios, spend_b_female_ios, 0.05, False, False, "$/user", 'female_ios', MDE_med, 0.80)

***********************************************Start************************************************
Unpooled Z-Test (difference in proportions): CR female_android scope
Insufficient sample size for observed relative change, which requires combined sample size of 63214
Combined sample size of 78160 required to detect a relative change as small as 0.1 (at statistical power of 0.8, significance level of 0.05)

EXPORT FOR VISUALIZATION:
Metric    Scope           Test                    Conclusion           alpha    Z*/T*       SE      MOE    sample_stat    test-stat    p-value  CI                      Lower_limit    Upper_limit    A_#    B_#    A_value    B_value     A_mu     B_mu    A_std    B_std    cohen_d  substantiveness      relative_change    min_req_sample_size    target_MDE    MDE_equiv_sample_size  substance_test    current_sample
--------  --------------  ----------------------  -----------------  -------  -------  -------  -------  -------------  -----------  ---------  ------

* Converted t-test: for output

In [50]:
# android
cspend_a_android = df_ab[(df_ab['conversion'] == 1) & (df_ab['test_group'] == "A: control") & (df_ab['device'] == "Android")].pivot_table(values='spend_USD', index='user_id', aggfunc='mean', fill_value=0)['spend_USD']
cspend_b_android = df_ab[(df_ab['conversion'] == 1) & (df_ab['test_group'] == "B: treatment") & (df_ab['device'] == "Android")].pivot_table(values='spend_USD', index='user_id', aggfunc='mean', fill_value=0)['spend_USD']
# ios
cspend_a_ios = df_ab[(df_ab['conversion'] == 1) & (df_ab['test_group'] == "A: control") & (df_ab['device'] == "iOS")].pivot_table(values='spend_USD', index='user_id', aggfunc='mean', fill_value=0)['spend_USD']
cspend_b_ios = df_ab[(df_ab['conversion'] == 1) & (df_ab['test_group'] == "B: treatment") & (df_ab['device'] == "iOS")].pivot_table(values='spend_USD', index='user_id', aggfunc='mean', fill_value=0)['spend_USD']
# male
cspend_a_male = df_ab[(df_ab['conversion'] == 1) & (df_ab['test_group'] == "A: control") & (df_ab['gender'] == "Male")].pivot_table(values='spend_USD', index='user_id', aggfunc='mean', fill_value=0)['spend_USD']
cspend_b_male = df_ab[(df_ab['conversion'] == 1) & (df_ab['test_group'] == "B: treatment") & (df_ab['gender'] == "Male")].pivot_table(values='spend_USD', index='user_id', aggfunc='mean', fill_value=0)['spend_USD']
# female
cspend_a_female = df_ab[(df_ab['conversion'] == 1) & (df_ab['test_group'] == "A: control") & (df_ab['gender'] == "Female")].pivot_table(values='spend_USD', index='user_id', aggfunc='mean', fill_value=0)['spend_USD']
cspend_b_female = df_ab[(df_ab['conversion'] == 1) & (df_ab['test_group'] == "B: treatment") & (df_ab['gender'] == "Female")].pivot_table(values='spend_USD', index='user_id', aggfunc='mean', fill_value=0)['spend_USD']
# USA
cspend_a_USA = df_ab[(df_ab['conversion'] == 1) & (df_ab['test_group'] == "A: control") & (df_ab['country_name'] == 'USA')].pivot_table(values='spend_USD', index='user_id', aggfunc='mean', fill_value=0)['spend_USD']
cspend_b_USA = df_ab[(df_ab['conversion'] == 1) & (df_ab['test_group'] == "B: treatment") & (df_ab['country_name'] == 'USA')].pivot_table(values='spend_USD', index='user_id', aggfunc='mean', fill_value=0)['spend_USD']

# anglophone
cspend_a_anglo = df_ab[(df_ab['conversion'] == 1) & (df_ab['test_group'] == "A: control") & (df_ab['country_name'].isin(['USA', 'United Kingdom', 'Canada', 'Australia']))].pivot_table(values='spend_USD', index='user_id', aggfunc='mean', fill_value=0)['spend_USD']
cspend_b_anglo = df_ab[(df_ab['conversion'] == 1) & (df_ab['test_group'] == "B: treatment") & (df_ab['country_name'].isin(['USA', 'United Kingdom', 'Canada', 'Australia']))].pivot_table(values='spend_USD', index='user_id', aggfunc='mean', fill_value=0)['spend_USD']
# anglo_male
cspend_a_anglo_m = df_ab[(df_ab['conversion'] == 1) & (df_ab['test_group'] == "A: control") & (df_ab['gender'] == "Male") & (df_ab['country_name'].isin(['USA', 'United Kingdom', 'Canada', 'Australia']))].pivot_table(values='spend_USD', index='user_id', aggfunc='mean', fill_value=0)['spend_USD']
cspend_b_anglo_m = df_ab[(df_ab['conversion'] == 1) & (df_ab['test_group'] == "B: treatment") & (df_ab['gender'] == "Male") & (df_ab['country_name'].isin(['USA', 'United Kingdom', 'Canada', 'Australia']))].pivot_table(values='spend_USD', index='user_id', aggfunc='mean', fill_value=0)['spend_USD']
# anglo_female
cspend_a_anglo_f = df_ab[(df_ab['conversion'] == 1) & (df_ab['test_group'] == "A: control") & (df_ab['gender'] == "Female") & (df_ab['country_name'].isin(['USA', 'United Kingdom', 'Canada', 'Australia']))].pivot_table(values='spend_USD', index='user_id', aggfunc='mean', fill_value=0)['spend_USD']
cspend_b_anglo_f = df_ab[(df_ab['conversion'] == 1) & (df_ab['test_group'] == "B: treatment") & (df_ab['gender'] == "Female") & (df_ab['country_name'].isin(['USA', 'United Kingdom', 'Canada', 'Australia']))].pivot_table(values='spend_USD', index='user_id', aggfunc='mean', fill_value=0)['spend_USD']

# non anglophone
cspend_a_non_anglo = df_ab[(df_ab['conversion'] == 1) & (df_ab['test_group'] == "A: control") & (df_ab['country_name'].isin(['Brazil', 'Mexico', 'Germany', 'Turkiye', 'France', 'Spain']))].pivot_table(values='spend_USD', index='user_id', aggfunc='mean', fill_value=0)['spend_USD']
cspend_b_non_anglo = df_ab[(df_ab['conversion'] == 1) & (df_ab['test_group'] == "B: treatment") & (df_ab['country_name'].isin(['Brazil', 'Mexico', 'Germany', 'Turkiye', 'France', 'Spain']))].pivot_table(values='spend_USD', index='user_id', aggfunc='mean', fill_value=0)['spend_USD']
# non_anglo_male
cspend_a_non_anglo_m = df_ab[(df_ab['conversion'] == 1) & (df_ab['test_group'] == "A: control") & (df_ab['gender'] == "Male") & (df_ab['country_name'].isin(['Brazil', 'Mexico', 'Germany', 'Turkiye', 'France', 'Spain']))].pivot_table(values='spend_USD', index='user_id', aggfunc='mean', fill_value=0)['spend_USD']
cspend_b_non_anglo_m = df_ab[(df_ab['conversion'] == 1) & (df_ab['test_group'] == "B: treatment") & (df_ab['gender'] == "Male") & (df_ab['country_name'].isin(['Brazil', 'Mexico', 'Germany', 'Turkiye', 'France', 'Spain']))].pivot_table(values='spend_USD', index='user_id', aggfunc='mean', fill_value=0)['spend_USD']
# non_anglo_female
cspend_a_non_anglo_f = df_ab[(df_ab['conversion'] == 1) & (df_ab['test_group'] == "A: control") & (df_ab['gender'] == "Female") & (df_ab['country_name'].isin(['Brazil', 'Mexico', 'Germany', 'Turkiye', 'France', 'Spain']))].pivot_table(values='spend_USD', index='user_id', aggfunc='mean', fill_value=0)['spend_USD']
cspend_b_non_anglo_f = df_ab[(df_ab['conversion'] == 1) & (df_ab['test_group'] == "B: treatment") & (df_ab['gender'] == "Female") & (df_ab['country_name'].isin(['Brazil', 'Mexico', 'Germany', 'Turkiye', 'France', 'Spain']))].pivot_table(values='spend_USD', index='user_id', aggfunc='mean', fill_value=0)['spend_USD']

# brazil
cspend_a_bra = df_ab[(df_ab['conversion'] == 1) & (df_ab['test_group'] == "A: control") & (df_ab['country_name']=='Brazil')].pivot_table(values='spend_USD', index='user_id', aggfunc='mean', fill_value=0)['spend_USD']
cspend_b_bra = df_ab[(df_ab['conversion'] == 1) & (df_ab['test_group'] == "B: treatment") & (df_ab['country_name']=='Brazil')].pivot_table(values='spend_USD', index='user_id', aggfunc='mean', fill_value=0)['spend_USD']
# mexico
cspend_a_mex = df_ab[(df_ab['conversion'] == 1) & (df_ab['test_group'] == "A: control") & (df_ab['country_name']=='Mexico')].pivot_table(values='spend_USD', index='user_id', aggfunc='mean', fill_value=0)['spend_USD']
cspend_b_mex = df_ab[(df_ab['conversion'] == 1) & (df_ab['test_group'] == "B: treatment") & (df_ab['country_name']=='Mexico')].pivot_table(values='spend_USD', index='user_id', aggfunc='mean', fill_value=0)['spend_USD']
# germany
cspend_a_ger = df_ab[(df_ab['conversion'] == 1) & (df_ab['test_group'] == "A: control") & (df_ab['country_name']=='Germany')].pivot_table(values='spend_USD', index='user_id', aggfunc='mean', fill_value=0)['spend_USD']
cspend_b_ger = df_ab[(df_ab['conversion'] == 1) & (df_ab['test_group'] == "B: treatment") & (df_ab['country_name']=='Germany')].pivot_table(values='spend_USD', index='user_id', aggfunc='mean', fill_value=0)['spend_USD']
# turkiye
cspend_a_tur = df_ab[(df_ab['conversion'] == 1) & (df_ab['test_group'] == "A: control") & (df_ab['country_name']=='Turkiye')].pivot_table(values='spend_USD', index='user_id', aggfunc='mean', fill_value=0)['spend_USD']
cspend_b_tur = df_ab[(df_ab['conversion'] == 1) & (df_ab['test_group'] == "B: treatment") & (df_ab['country_name']=='Turkiye')].pivot_table(values='spend_USD', index='user_id', aggfunc='mean', fill_value=0)['spend_USD']
# france
cspend_a_fra = df_ab[(df_ab['conversion'] == 1) & (df_ab['test_group'] == "A: control") & (df_ab['country_name']=='France')].pivot_table(values='spend_USD', index='user_id', aggfunc='mean', fill_value=0)['spend_USD']
cspend_b_fra = df_ab[(df_ab['conversion'] == 1) & (df_ab['test_group'] == "B: treatment") & (df_ab['country_name']=='France')].pivot_table(values='spend_USD', index='user_id', aggfunc='mean', fill_value=0)['spend_USD']
# uk
cspend_a_uk = df_ab[(df_ab['conversion'] == 1) & (df_ab['test_group'] == "A: control") & (df_ab['country_name']=="United Kingdom")].pivot_table(values='spend_USD', index='user_id', aggfunc='mean', fill_value=0)['spend_USD']
cspend_b_uk = df_ab[(df_ab['conversion'] == 1) & (df_ab['test_group'] == "B: treatment") & (df_ab['country_name']=="United Kingdom")].pivot_table(values='spend_USD', index='user_id', aggfunc='mean', fill_value=0)['spend_USD']
# spain
cspend_a_esp = df_ab[(df_ab['conversion'] == 1) & (df_ab['test_group'] == "A: control") & (df_ab['country_name']=='Spain')].pivot_table(values='spend_USD', index='user_id', aggfunc='mean', fill_value=0)['spend_USD']
cspend_b_esp = df_ab[(df_ab['conversion'] == 1) & (df_ab['test_group'] == "B: treatment") & (df_ab['country_name']=='Spain')].pivot_table(values='spend_USD', index='user_id', aggfunc='mean', fill_value=0)['spend_USD']
# canada
cspend_a_can = df_ab[(df_ab['conversion'] == 1) & (df_ab['test_group'] == "A: control") & (df_ab['country_name']=='Canada')].pivot_table(values='spend_USD', index='user_id', aggfunc='mean', fill_value=0)['spend_USD']
cspend_b_can = df_ab[(df_ab['conversion'] == 1) & (df_ab['test_group'] == "B: treatment") & (df_ab['country_name']=='Canada')].pivot_table(values='spend_USD', index='user_id', aggfunc='mean', fill_value=0)['spend_USD']
# australia
cspend_a_aus = df_ab[(df_ab['conversion'] == 1) & (df_ab['test_group'] == "A: control") & (df_ab['country_name']=='Australia')].pivot_table(values='spend_USD', index='user_id', aggfunc='mean', fill_value=0)['spend_USD']
cspend_b_aus = df_ab[(df_ab['conversion'] == 1) & (df_ab['test_group'] == "B: treatment") & (df_ab['country_name']=='Australia')].pivot_table(values='spend_USD', index='user_id', aggfunc='mean', fill_value=0)['spend_USD']

# run
t_test(cspend_a_android, cspend_b_android, 0.05, False, False, "$/conversion", 'android', MDE_med, 0.80)
t_test(cspend_a_ios, cspend_b_ios, 0.05, False, False, "$/conversion", 'iOS', MDE_med, 0.80)
t_test(cspend_a_male, cspend_b_male, 0.05, False, False, "$/conversion", 'male', MDE_med, 0.80)
t_test(cspend_a_female, cspend_b_female, 0.05, False, False, "$/conversion", 'female', MDE_med, 0.80)
t_test(cspend_a_USA, cspend_b_USA, 0.05, False, False, "$/conversion", 'USA', MDE_med, 0.80)

t_test(cspend_a_anglo, cspend_b_anglo, 0.05, False, False, "$/conversion", 'anglophone', MDE_med, 0.80)
t_test(cspend_a_anglo_m, cspend_b_anglo_m, 0.05, False, False, "$/conversion", 'anglo_male', MDE_med, 0.80)
t_test(cspend_a_anglo_f, cspend_b_anglo_f, 0.05, False, False, "$/conversion", 'anglo_female', MDE_med, 0.80)

t_test(cspend_a_non_anglo, cspend_b_non_anglo, 0.05, False, False, "$/conversion", 'non_anglophone', MDE_med, 0.80)
t_test(cspend_a_non_anglo_m, cspend_b_non_anglo_m, 0.05, False, False, "$/conversion", 'non_anglo_male', MDE_med, 0.80)
t_test(cspend_a_non_anglo_f, cspend_b_non_anglo_f, 0.05, False, False, "$/conversion", 'non_anglo_female', MDE_med, 0.80)

t_test(cspend_a_bra, cspend_b_bra, 0.05, False, False, "$/conversion", 'brazil', MDE_high, 0.80)
t_test(cspend_a_mex, cspend_b_mex, 0.05, False, False, "$/conversion", 'mexico', MDE_high, 0.80)
t_test(cspend_a_ger, cspend_b_ger, 0.05, False, False, "$/conversion", 'germany', MDE_high, 0.80)
t_test(cspend_a_tur, cspend_b_tur, 0.05, False, False, "$/conversion", 'turkiye', MDE_high, 0.80)
t_test(cspend_a_fra, cspend_b_fra, 0.05, False, False, "$/conversion", 'france', MDE_high, 0.80)
t_test(cspend_a_uk, cspend_b_uk, 0.05, False, False, "$/conversion", 'uk', MDE_high, 0.80)
t_test(cspend_a_esp, cspend_b_fra, 0.05, False, False, "$/conversion", 'spain', MDE_high, 0.80)
t_test(cspend_a_can, cspend_b_can, 0.05, False, False, "$/conversion", 'canada', MDE_high, 0.80)
t_test(cspend_a_aus, cspend_b_aus, 0.05, False, False, "$/conversion", 'australia', MDE_high, 0.80)





***********************************************Start************************************************
Unpooled T-Test (differences in means): $/conversion android scope
Insufficient sample size for observed relative change, which requires combined sample size of 38416
Combined sample size of 130973 required to detect a relative change as small as 0.1 (at statistical power of 0.8, significance level of 0.05)

EXPORT FOR VISUALIZATION:
Metric        Scope    Test                    Conclusion      alpha    Z*/T*       SE       MOE    sample_stat    test-stat     p-value  CI                        Lower_limit    Upper_limit    A_#    B_#    A_value    B_value     A_mu     B_mu    A_std    B_std    cohen_d  substantiveness      relative_change    min_req_sample_size    target_MDE    MDE_equiv_sample_size  substance_test    current_sample
------------  -------  ----------------------  ------------  -------  -------  -------  --------  -------------  -----------  ----------  -----------------

* Converted t-test: other

In [51]:
# anglo_android
cspend_a_anglo_and = df_ab[(df_ab['conversion'] == 1) & (df_ab['test_group'] == "A: control") & (df_ab['device'] == "Android") & (df_ab['country_name'].isin(['USA', 'United Kingdom', 'Canada', 'Australia']))].pivot_table(values='spend_USD', index='user_id', aggfunc='mean', fill_value=0)['spend_USD']
cspend_b_anglo_and = df_ab[(df_ab['conversion'] == 1) & (df_ab['test_group'] == "B: treatment") & (df_ab['device'] == "Android") & (df_ab['country_name'].isin(['USA', 'United Kingdom', 'Canada', 'Australia']))].pivot_table(values='spend_USD', index='user_id', aggfunc='mean', fill_value=0)['spend_USD']
# anglo_ios
cspend_a_anglo_ios = df_ab[(df_ab['conversion'] == 1) & (df_ab['test_group'] == "A: control") & (df_ab['device'] == "iOS") & (df_ab['country_name'].isin(['USA', 'United Kingdom', 'Canada', 'Australia']))].pivot_table(values='spend_USD', index='user_id', aggfunc='mean', fill_value=0)['spend_USD']
cspend_b_anglo_ios = df_ab[(df_ab['conversion'] == 1) & (df_ab['test_group'] == "B: treatment") & (df_ab['device'] == "iOS") & (df_ab['country_name'].isin(['USA', 'United Kingdom', 'Canada', 'Australia']))].pivot_table(values='spend_USD', index='user_id', aggfunc='mean', fill_value=0)['spend_USD']
# female_android
cspend_a_female_and = df_ab[(df_ab['conversion'] == 1) & (df_ab['test_group'] == "A: control") & (df_ab['device'] == "Android")  & (df_ab['gender'] == "Female")].pivot_table(values='spend_USD', index='user_id', aggfunc='mean', fill_value=0)['spend_USD']
cspend_b_female_and = df_ab[(df_ab['conversion'] == 1) & (df_ab['test_group'] == "B: treatment") & (df_ab['device'] == "Android")  & (df_ab['gender'] == "Female")].pivot_table(values='spend_USD', index='user_id', aggfunc='mean', fill_value=0)['spend_USD']
# female_ios
cspend_a_female_ios = df_ab[(df_ab['conversion'] == 1) & (df_ab['test_group'] == "A: control") & (df_ab['device'] == "iOS")  & (df_ab['gender'] == "Female")].pivot_table(values='spend_USD', index='user_id', aggfunc='mean', fill_value=0)['spend_USD']
cspend_b_female_ios = df_ab[(df_ab['conversion'] == 1) & (df_ab['test_group'] == "B: treatment") & (df_ab['device'] == "iOS")  & (df_ab['gender'] == "Female")].pivot_table(values='spend_USD', index='user_id', aggfunc='mean', fill_value=0)['spend_USD']
# run
t_test(cspend_a_anglo_and, cspend_b_anglo_and, 0.05, False, False, "$/conversion", 'anglo_android', MDE_med, 0.80)
t_test(cspend_a_anglo_ios, cspend_b_anglo_ios, 0.05, False, False, "$/conversion", 'anglo_ios', MDE_med, 0.80)
t_test(cspend_a_female_and, cspend_b_female_and, 0.05, False, False, "$/conversion", 'female_android', MDE_med, 0.80)
t_test(cspend_a_female_ios, cspend_b_female_ios, 0.05, False, False, "$/conversion", 'female_ios', MDE_med, 0.80)

***********************************************Start************************************************
Unpooled T-Test (differences in means): $/conversion anglo_android scope
Insufficient sample size for observed relative change, which requires combined sample size of 123758
Combined sample size of 286697 required to detect a relative change as small as 0.1 (at statistical power of 0.8, significance level of 0.05)

EXPORT FOR VISUALIZATION:
Metric        Scope          Test                    Conclusion           alpha    Z*/T*       SE      MOE    sample_stat    test-stat    p-value  CI                       Lower_limit    Upper_limit    A_#    B_#    A_value    B_value     A_mu     B_mu    A_std    B_std    cohen_d  substantiveness      relative_change    min_req_sample_size    target_MDE    MDE_equiv_sample_size  substance_test    current_sample
------------  -------------  ----------------------  -----------------  -------  -------  -------  -------  -------------  -----------  ----