Make sure that the first transaction results look significant

In [1]:
# import modules 

import os
import sys
import numpy as np
import pandas as pd
import scipy.stats as stats

from statsmodels.stats.proportion import proportions_ztest
from statsmodels.stats.multitest import multipletests

In [2]:
# Set pandas preferences
pd.options.display.max_columns=500
pd.options.display.max_colwidth=500
pd.options.display.max_rows=500

In [4]:
# Strategy:

# 1) run a chi squared test
# 2) run a Z-proportion test for each pair of segments
# 3) compute adjusted p-values

In [5]:
sample_sizes = [138538, 138517, 137562, 138475]
conversions = [130, 162, 169, 164]

In [7]:
not_conversions = [x - y for (x, y) in zip(sample_sizes, conversions)]

In [8]:
not_conversions

[138408, 138355, 137393, 138311]

In [9]:
contigency = [(x, y) for x, y in zip(conversions, not_conversions)]

In [10]:
T = np.array(contigency)

In [11]:
T

array([[   130, 138408],
       [   162, 138355],
       [   169, 137393],
       [   164, 138311]])

In [12]:
stats.chi2_contingency(T, correction=True)

(6.244363060071456,
 0.10030814068159973,
 3,
 array([[   156.54945289, 138381.45054711],
        [   156.52572266, 138360.47427734],
        [   155.44656224, 137406.55343776],
        [   156.47826221, 138318.52173779]]))

In [13]:
stats.chi2_contingency(T, correction=False)

(6.244363060071456,
 0.10030814068159973,
 3,
 array([[   156.54945289, 138381.45054711],
        [   156.52572266, 138360.47427734],
        [   155.44656224, 137406.55343776],
        [   156.47826221, 138318.52173779]]))

This suggests that the differences are NOT statistically significant

In [14]:
p_c_vs_v2 = proportions_ztest(count=[conversions[0], conversions[1]], nobs=[sample_sizes[0], sample_sizes[1]], alternative='two-sided')[1]
p_c_vs_v3 = proportions_ztest(count=[conversions[0], conversions[2]], nobs=[sample_sizes[0], sample_sizes[2]], alternative='two-sided')[1]
p_c_vs_v4 = proportions_ztest(count=[conversions[0], conversions[3]], nobs=[sample_sizes[0], sample_sizes[3]], alternative='two-sided')[1]
p_v2_vs_v3 = proportions_ztest(count=[conversions[1], conversions[2]], nobs=[sample_sizes[1], sample_sizes[2]], alternative='two-sided')[1]
p_v2_vs_v4 = proportions_ztest(count=[conversions[1], conversions[3]], nobs=[sample_sizes[1], sample_sizes[3]], alternative='two-sided')[1]
p_v3_vs_v4 = proportions_ztest(count=[conversions[2], conversions[3]], nobs=[sample_sizes[2], sample_sizes[3]], alternative='two-sided')[1]

In [15]:
multipletests(pvals=[p_c_vs_v2, p_c_vs_v3, p_c_vs_v4, p_v2_vs_v3, p_v2_vs_v4, p_v3_vs_v4], method='bonferroni')

(array([False, False, False, False, False, False]),
 array([0.36480431, 0.12275362, 0.28095308, 1.        , 1.        ,
        1.        ]),
 0.008512444610847103,
 0.008333333333333333)

In [16]:
# None of these are statistically significant 