In [1]:
import pandas as pd
from scipy.stats import ttest_ind_from_stats
from statsmodels.stats.proportion import proportions_ztest

sig_df = pd.read_csv("../Data/general_results.csv")
sig_df = sig_df[sig_df.audiencecohort != 'holdoutgroup_braze']
sig_df = sig_df.merge(sig_df.audiencecohort.str.split("_", expand = True).rename(columns = {0:'Targetable', 1:'Cohort'}), 
                      left_index = True, 
                      right_index = True).drop(columns='audiencecohort')
sig_df.reset_index(drop = True, inplace = True)

def audience_cohort_parsing(row):
    if row == 'paiddis':
        return 'Paid Display'
    elif row == 'paidsoc':
        return 'Paid Social'
    else:
        return 'All Paid Media'

def targetable_bool(row):
    if row == 'targetablegroup':
        return 'Targetable'
    else:
        return 'Holdout'

sig_df['Cohort']=sig_df['Cohort'].apply(lambda row: audience_cohort_parsing(row))

sig_df['Targetable']=sig_df['Targetable'].apply(lambda row: targetable_bool(row))

def kpi_ttesting(dataframe, kpi_name):
    print(kpi_name)
    for name, group in dataframe.groupby('Cohort'):
        print(name)
        print(group[group['Targetable'] == 'Targetable'][kpi_name].values[0])
        print(group[group['Targetable'] != 'Targetable'][kpi_name].values[0])
        print('\n\n')

# Repertoire Significance

In [16]:
for name, group in sig_df.groupby('Cohort'):
    print(name)
    print(ttest_ind_from_stats(mean1=group[group['Targetable'] == 'Targetable']['repertoire'].values[0], 
                         std1 = group[group['Targetable'] == 'Targetable']['repertoire_stddev'].values[0],
                         nobs1 = group[group['Targetable'] == 'Targetable']['cohort_size'].values[0],
                         mean2 = group[group['Targetable'] != 'Targetable']['repertoire'].values[0], 
                         std2 = group[group['Targetable'] != 'Targetable']['repertoire_stddev'].values[0],
                         nobs2 = group[group['Targetable'] != 'Targetable']['cohort_size'].values[0]))
    print('\n\n')

All Paid Media
Ttest_indResult(statistic=-79.13608200678196, pvalue=0.0)



Paid Display
Ttest_indResult(statistic=-81.28678071833545, pvalue=0.0)



Paid Social
Ttest_indResult(statistic=-79.28019027859482, pvalue=0.0)





# Usage Significance

In [19]:
for name, group in sig_df.groupby('Cohort'):
    print(name)
    print(ttest_ind_from_stats(mean1=group[group['Targetable'] == 'Targetable']['usage'].values[0], 
                         std1 = group[group['Targetable'] == 'Targetable']['usage_stdv'].values[0],
                         nobs1 = group[group['Targetable'] == 'Targetable']['cohort_size'].values[0],
                         mean2 = group[group['Targetable'] != 'Targetable']['usage'].values[0], 
                         std2 = group[group['Targetable'] != 'Targetable']['usage_stdv'].values[0],
                         nobs2 = group[group['Targetable'] != 'Targetable']['cohort_size'].values[0]))
    print('\n\n')

All Paid Media
Ttest_indResult(statistic=1.8580595252576375, pvalue=0.06316057543542367)



Paid Display
Ttest_indResult(statistic=-1.3887651715816656, pvalue=0.1649041833062342)



Paid Social
Ttest_indResult(statistic=2.876167574091575, pvalue=0.00402536334480449)





# Save Rate

In [12]:
save_df = pd.read_csv('../Data/save_rate_results.csv', usecols=[0,1,2,3])

save_df.dropna(inplace = True)

save_df = save_df[save_df.audiencecohort != 'holdoutgroup_braze']
save_df = save_df.merge(save_df.audiencecohort.str.split("_", expand = True).rename(columns = {0:'Targetable', 1:'Cohort'}), left_index = True, right_index = True).drop(columns='audiencecohort')
save_df.reset_index(drop = True, inplace = True)

save_df['Cohort']=save_df['Cohort'].apply(lambda row: audience_cohort_parsing(row))

save_df['Targetable']=save_df['Targetable'].apply(lambda row: targetable_bool(row))

In [18]:
for name, group in save_df.groupby('Cohort'):
    print(name)
#     print("saves: ", [group[group['Targetable'] == 'Targetable'].saves.values[0], group[group['Targetable'] != 'Targetable'].saves.values[0]])
#     print("populations: ",[group[group['Targetable'] == 'Targetable'].save_rate_denom.values[0], group[group['Targetable'] != 'Targetable'].save_rate_denom.values[0]] )
    print('p-value:', proportions_ztest(count = [group[group['Targetable'] == 'Targetable'].saves.values[0], group[group['Targetable'] != 'Targetable'].saves.values[0]],
                      nobs = [group[group['Targetable'] == 'Targetable'].save_rate_denom.values[0], group[group['Targetable'] != 'Targetable'].save_rate_denom.values[0]])[1])


All Paid Media
saves:  [1171050.0, 35841.0]
populations:  [5624357.0, 205301.0]
p-value: 9.511100476883994e-299
Paid Display
saves:  [1208084.0, 37164.0]
populations:  [5830882.0, 207066.0]
p-value: 5.940930098639502e-206
Paid Social
saves:  [1208214.0, 37034.0]
populations:  [5831423.0, 206525.0]
p-value: 7.892945965711378e-208


# Winback Rate

In [19]:
winback_df = pd.read_csv("../Data/winback_rate_results.csv")

winback_df = winback_df[winback_df.audiencecohort != 'holdoutgroup_braze']
winback_df = winback_df.merge(winback_df.audiencecohort.str.split("_", expand = True).rename(columns = {0:'Targetable', 1:'Cohort'}), left_index = True, right_index = True).drop(columns='audiencecohort')
winback_df.reset_index(drop = True, inplace = True)

winback_df['Cohort']=winback_df['Cohort'].apply(lambda row: audience_cohort_parsing(row))

winback_df['Targetable']=winback_df['Targetable'].apply(lambda row: targetable_bool(row))

In [23]:
for name, group in winback_df.groupby('Cohort'):
    print(name)
    print("winbacks: ", [group[group['Targetable'] == 'Targetable'].winback_users.values[0], group[group['Targetable'] != 'Targetable'].winback_users.values[0]])
    print("populations: ",[group[group['Targetable'] == 'Targetable'].lapsed_users.values[0], group[group['Targetable'] != 'Targetable'].lapsed_users.values[0]] )
    print('p-value:', proportions_ztest(count = [group[group['Targetable'] == 'Targetable'].winback_users.values[0], group[group['Targetable'] != 'Targetable'].winback_users.values[0]],
                      nobs = [group[group['Targetable'] == 'Targetable'].lapsed_users.values[0], group[group['Targetable'] != 'Targetable'].lapsed_users.values[0]])[1])


All Paid Media
winbacks:  [1256518, 21819]
populations:  [7096150, 160300]
p-value: 0.0
Paid Display
winbacks:  [1278823, 22356]
populations:  [7255425, 160033]
p-value: 0.0
Paid Social
winbacks:  [1278874, 22305]
populations:  [7256183, 159275]
p-value: 5.011262883485263e-309
