In [76]:
import sys, json, requests
import numpy as np

sys.path.append('../../scripts/')
from s3_support import *

from datetime import datetime, timedelta

# 1. average amounts and sample sizes

In [2]:
q = '''select
            id, form, org, amount, date
        from transactions
        where
            status='A' and
            isexpresscheckout=True and
            (recurring=0 or recurring_origin=1)'''
trans_ec = redshift_query_read(q, schema='production')

In [3]:
print("{:,} transactions; {:%Y-%m-%d} to {:%Y-%m-%d}".format(len(trans_ec), trans_ec['date'].min(), trans_ec['date'].max()))
print("{:,} forms".format(len(trans_ec['form'].unique())))
print("{:,} orgs".format(len(trans_ec['org'].unique())))
print("${:.2f} mean amount".format(trans_ec['amount'].mean()))
print("${:.2f} median amount".format(trans_ec['amount'].median()))

75,145 transactions; 2023-04-20 to 2024-12-11
2,001 forms
644 orgs
$121.99 mean amount
$51.95 median amount


In [4]:
q = '''select
            count(id) as trans,
            avg(amount) as mean_amount
        from transactions
        where
            status='A' and
            year>=2023 and
            (recurring=0 or recurring_origin=1) and
            source in ('mobile', 'don_form', 'sms') and
            isexpresscheckout=False'''
trans = redshift_query_read(q, schema='production')
q = '''select median(amount) as median_amount 
        from transactions 
        where 
            status='A' and 
            year>=2023 and
            (recurring=0 or recurring_origin=1) and
            source in ('mobile', 'don_form', 'sms') and
            isexpresscheckout=False'''
trans_mdn = redshift_query_read(q, schema='production')

print("{:.2f}% trans used express checkout".format((len(trans_ec) / trans['trans'].iloc[0]) * 100.))
print("${:.2f} mean amount (not express checkout)".format(trans['mean_amount'].iloc[0]))
print("${:.2f} median amount (not express checkout)".format(trans_mdn['median_amount'].iloc[0]))

2.87% trans used express checkout
$202.79 mean amount (not express checkout)
$51.50 median amount (not express checkout)


In [5]:
q = '''select
            date_trunc('month', date) as month,
            count(distinct(case when isexpresscheckout=1 then form else null end)) as ec_forms,
            count(case when isexpresscheckout=1 then id else null end) as ec_trans,
            count(case when isexpresscheckout=0 then id else null end) as nec_trans,
            avg(case when isexpresscheckout=1 then amount else null end) as ec_mean_amount,
            avg(case when isexpresscheckout=0 then amount else null end) as nec_mean_amount
        from transactions
        where
            status='A' and
            year>=2023 and
            (recurring=0 or recurring_origin=1) and
            source in ('mobile', 'don_form', 'sms')
        group by date_trunc('month', date)'''
months = redshift_query_read(q, schema='production')
q = '''select
            date_trunc('month', date) as month,
            median(case when isexpresscheckout=1 then amount else null end) as ec_median_amount
        from transactions
        where
            status='A' and
            year>=2023 and
            (recurring=0 or recurring_origin=1) and
            source in ('mobile', 'don_form', 'sms')
        group by date_trunc('month', date)'''
mdn = redshift_query_read(q, schema='production')
months = months.merge(mdn, on='month')
q = '''select
            date_trunc('month', date) as month,
            median(case when isexpresscheckout=0 then amount else null end) as nec_median_amount
        from transactions
        where
            status='A' and
            year>=2023 and
            (recurring=0 or recurring_origin=1) and
            source in ('mobile', 'don_form', 'sms')
        group by date_trunc('month', date)'''
mdn = redshift_query_read(q, schema='production')
months = months.merge(mdn, on='month')
months['ec_perc'] = months['ec_trans'] / (months['ec_trans'] + months['nec_trans'])
months.sort_values('month', ascending=True)[['month', 'ec_forms', 'ec_perc', 'ec_mean_amount', 'nec_mean_amount', 'ec_median_amount', 'nec_median_amount']]

Unnamed: 0,month,ec_forms,ec_perc,ec_mean_amount,nec_mean_amount,ec_median_amount,nec_median_amount
18,2023-01-01,0,0.0,,193.343099,,51.75
1,2023-02-01,0,0.0,,183.48447,,51.87
10,2023-03-01,0,0.0,,188.937387,,51.5
20,2023-04-01,11,0.00037,85.196757,177.94281,53.0,50.0
19,2023-05-01,40,0.004175,175.525041,186.719385,51.5,50.0
13,2023-06-01,46,0.007506,70.822114,200.994223,30.0,51.5
23,2023-07-01,43,0.003987,149.391754,207.658499,51.5,50.75
6,2023-08-01,53,0.003774,111.356409,222.003082,51.75,52.0
5,2023-09-01,68,0.008631,149.831958,196.604357,51.97,51.5
7,2023-10-01,78,0.019411,102.884668,177.080484,50.0,50.0


# 2. conversion

In [6]:
q = '''select
            form,
            max(week) as max_date_traff,
            sum(views) as pageviews,
            sum(case when devicecategory='mobile' then views else null end) as mobile_pageviews
        from ga4_traffic_weekly_device
        where
            form!=0 and
            week>='2023-04-01' 
        group by form'''
ga4_weekly = redshift_query_read(q, schema='production')
print("ga4_traffic_weekly_device max date: {}".format(ga4_weekly['max_date_traff'].max()))

q = '''select
            form,
            max(date) as max_date_traff,
            sum(views) as pageviews,
            sum(case when devicecategory='mobile' then views else null end) as mobile_pageviews
        from ga
        where
            form!=0 and
            date>='2023-04-01' 
        group by form'''
ga = redshift_query_read(q, schema='production')
print("ga max date: {}".format(ga['max_date_traff'].max()))

ga4_traffic_weekly_device max date: 2024-07-22 00:00:00
ga max date: 2024-12-09 00:00:00


In [7]:
mrgd_traff = ga.merge(ga4_weekly, on='form')
mrgd_traff['pageviews'] = mrgd_traff[[c for c in mrgd_traff.columns if c.startswith('pageviews_')]].max(axis=1)
mrgd_traff['mobile_pageviews'] = mrgd_traff[[c for c in mrgd_traff.columns if c.startswith('mobile_pageviews_')]].max(axis=1)
traff = mrgd_traff[['form', 'pageviews', 'mobile_pageviews']]

In [8]:
print("{:,} rows traffic".format(len(traff)))

10,037 rows traffic


In [9]:
q = '''select
            form,
            max(date) as max_date_trans,
            count(case when recurring=0 or recurring_origin=1 then id else null end) as transactions,
            sum(isexpresscheckout::int) as ec_transactions,
            count(case when platform in ('iPhone', 'Android') then id else null end) as mobile_transactions
        from transactions
        where
            status='A' and
            date>='2023-04-01' and
            source not in ('p2p', 'vt', 'mobilevt')
        group by form'''
trans = redshift_query_read(q, schema='production')

In [10]:
print("{:,} rows transactions".format(len(trans)))
print("{} max date".format(trans['max_date_trans'].max()))

32,445 rows transactions
2024-12-11 00:00:00 max date


In [11]:
mrgd = traff.merge(trans, on='form')

# drop forms w/out page views as they are not relevant to comparison between EC/NEC
mrgd = mrgd[mrgd['pageviews']>0]

# fill NA for mobile pageviews w/ 0
mrgd['mobile_pageviews'] = mrgd['mobile_pageviews'].fillna(0)

mrgd['ec_ratio'] = mrgd['ec_transactions'] / mrgd['transactions']
mrgd['conversion'] = mrgd['transactions'] / mrgd['pageviews']
mrgd['mobile_conv'] = mrgd['mobile_transactions'] / mrgd['mobile_pageviews']

In [12]:
mrgd.isna().sum()

form                     0
pageviews                0
mobile_pageviews         0
max_date_trans           0
transactions             0
ec_transactions          0
mobile_transactions      0
ec_ratio                33
conversion               0
mobile_conv            227
dtype: int64

In [13]:
print("{:,} forms".format(len(mrgd)))
print("Mean express checkout adoption per form: {:.2f}%".format(mrgd[mrgd['ec_transactions']>0]['ec_ratio'].mean() * 100.))
print("Median express checkout adoption per form: {:.2f}%".format(mrgd[mrgd['ec_transactions']>0]['ec_ratio'].median() * 100.))

7,637 forms
Mean express checkout adoption: 35.12%
Median express checkout adoption: 23.73%


In [14]:
# drop observations with > 100% conversion because this is still a thing
mrgd = mrgd[mrgd['conversion']<1.]

In [15]:
ec = mrgd[mrgd['ec_transactions']>0]
ec_conv = (ec['transactions'].sum() / ec['pageviews'].sum()) * 100.
ec_mobile_conv = (ec['mobile_transactions'].sum() / ec['mobile_pageviews'].sum()) * 100.

nec = mrgd[mrgd['ec_transactions']==0]
nec_conv = (nec['transactions'].sum() / nec['pageviews'].sum()) * 100.
nec_mobile_conv = (nec['mobile_transactions'].sum() / nec['mobile_pageviews'].sum()) * 100.

print("Cumulative conversion:")
print("-"*40)
print("Express checkout forms conversion: {:.2f}%; mobile: {:.2f}%".format(ec_conv, ec_mobile_conv))
print("Non express checkout forms conversion: {:.2f}%; mobile: {:.2f}%".format(nec_conv, nec_mobile_conv))

Cumulative conversion:
----------------------------------------
Express checkout forms conversion: 10.12%; mobile: 7.86%
Non express checkout forms conversion: 6.52%; mobile: 5.02%


In [102]:
print("Aggregate conversion:")
print("-"*40)

print("Express checkout forms conversion: {:.2f}% mean; {:.2f}% median".format(mrgd[(mrgd['ec_transactions']>0)&(mrgd['transactions']<mrgd['pageviews'])]['conversion'].replace([np.inf, -np.inf], np.nan).dropna().mean() * 100., mrgd[(mrgd['ec_transactions']>0)&(mrgd['transactions']<mrgd['pageviews'])]['conversion'].median() * 100.))
print("\tmobile: {:.2f}% mean; {:.2f}% median".format(mrgd[(mrgd['ec_transactions']>0)&(mrgd['transactions']<mrgd['pageviews'])]['mobile_conv'].replace([np.inf, -np.inf], np.nan).dropna().mean() * 100., mrgd[(mrgd['ec_transactions']>0)&(mrgd['transactions']<mrgd['pageviews'])]['mobile_conv'].median() * 100.))

print("Non express checkout forms conversion: {:.2f}% mean; {:.2f}% median".format(mrgd[(mrgd['ec_transactions']==0)&(mrgd['transactions']<mrgd['pageviews'])]['conversion'].replace([np.inf, -np.inf], np.nan).dropna().mean() * 100., mrgd[(mrgd['ec_transactions']==0)&(mrgd['transactions']<mrgd['pageviews'])]['conversion'].median() * 100.))
print("\tmobile: {:.2f}% mean; {:.2f}% median".format(mrgd[(mrgd['ec_transactions']==0)&(mrgd['transactions']<mrgd['pageviews'])]['mobile_conv'].replace([np.inf, -np.inf], np.nan).dropna().mean() * 100., mrgd[(mrgd['ec_transactions']==0)&(mrgd['transactions']<mrgd['pageviews'])]['mobile_conv'].median() * 100.))

Aggregate conversion:
----------------------------------------
Express checkout forms conversion: 17.87% mean; 11.08% median
	mobile: 25.12% mean; 13.82% median
Non express checkout forms conversion: 13.69% mean; 7.45% median
	mobile: 17.41% mean; 7.50% median


### limiting to forms w/ 400 pageviews (100 per month)

In [17]:
mrgd_400 = mrgd[mrgd['pageviews']>=400]
len(mrgd_400), len(mrgd), len(mrgd_400) / len(mrgd)

(4619, 7274, 0.6350013747594171)

In [18]:
print("Mean express checkout adoption: {:.2f}%".format(mrgd[mrgd['ec_transactions']>0]['ec_ratio'].mean() * 100.))
print("Median express checkout adoption: {:.2f}%".format(mrgd[mrgd['ec_transactions']>0]['ec_ratio'].median() * 100.))

Mean express checkout adoption: 35.89%
Median express checkout adoption: 23.90%


In [19]:
ec = mrgd_400[mrgd_400['ec_transactions']>0]
ec_conv = (ec['transactions'].sum() / ec['pageviews'].sum()) * 100.
ec_mobile_conv = (ec['mobile_transactions'].sum() / ec['mobile_pageviews'].sum()) * 100.

nec = mrgd_400[mrgd_400['ec_transactions']==0]
nec_conv = (nec['transactions'].sum() / nec['pageviews'].sum()) * 100.
nec_mobile_conv = (nec['mobile_transactions'].sum() / nec['mobile_pageviews'].sum()) * 100.

print("Cumulative conversion:")
print("-"*40)
print("Express checkout forms conversion: {:.2f}%; mobile: {:.2f}%".format(ec_conv, ec_mobile_conv))
print("Non express checkout forms conversion: {:.2f}%; mobile: {:.2f}%".format(nec_conv, nec_mobile_conv))

Cumulative conversion:
----------------------------------------
Express checkout forms conversion: 9.93%; mobile: 7.66%
Non express checkout forms conversion: 6.25%; mobile: 4.79%


In [103]:
print("Aggregate conversion:")
print("-"*40)

print("Express checkout forms conversion: {:.2f}% mean; {:.2f}% median".format(mrgd_400[(mrgd_400['ec_transactions']>0)&(mrgd_400['transactions']<mrgd_400['pageviews'])]['conversion'].mean() * 100., mrgd_400[(mrgd_400['ec_transactions']>0)&(mrgd_400['transactions']<mrgd_400['pageviews'])]['conversion'].median() * 100.))
print("\tmobile: {:.2f}% mean; {:.2f}% median".format(mrgd_400[(mrgd_400['ec_transactions']>0)&(mrgd_400['transactions']<mrgd_400['pageviews'])]['mobile_conv'].replace([np.inf, -np.inf], np.nan).dropna().mean() * 100., mrgd_400[(mrgd_400['ec_transactions']>0)&(mrgd_400['transactions']<mrgd_400['pageviews'])]['mobile_conv'].median() * 100.))

print("Non express checkout forms conversion: {:.2f}% mean; {:.2f}% median".format(mrgd_400[(mrgd_400['ec_transactions']==0)&(mrgd_400['transactions']<mrgd_400['pageviews'])]['conversion'].mean() * 100., mrgd_400[(mrgd_400['ec_transactions']==0)&(mrgd_400['transactions']<mrgd_400['pageviews'])]['conversion'].median() * 100.))
print("\tmobile: {:.2f}% mean; {:.2f}% median".format(mrgd_400[(mrgd_400['ec_transactions']==0)&(mrgd_400['transactions']<mrgd_400['pageviews'])]['mobile_conv'].replace([np.inf, -np.inf], np.nan).dropna().mean() * 100., mrgd_400[(mrgd_400['ec_transactions']==0)&(mrgd_400['transactions']<mrgd_400['pageviews'])]['mobile_conv'].median() * 100.))

Aggregate conversion:
----------------------------------------
Express checkout forms conversion: 14.54% mean; 10.26% median
	mobile: 16.44% mean; 10.24% median
Non express checkout forms conversion: 9.35% mean; 5.34% median
	mobile: 9.00% mean; 4.15% median


### removing top 10%

In [21]:
print("len: {:,}".format(len(mrgd)))

no_top_10 = mrgd[mrgd['transactions'].astype(int)<=150]

print("limited len: {:,}".format(len(no_top_10)))
print("{:,} forms w/ 0 pageviews".format(len(no_top_10[no_top_10['pageviews']==0])))

len: 7,274
limited len: 5,558
0 forms w/ 0 pageviews


In [22]:
print("Mean express checkout adoption: {:.2f}%".format(no_top_10[no_top_10['ec_transactions']>0]['ec_ratio'].mean() * 100.))
print("Median express checkout adoption: {:.2f}%".format(no_top_10[no_top_10['ec_transactions']>0]['ec_ratio'].median() * 100.))

Mean express checkout adoption: 42.22%
Median express checkout adoption: 27.82%


In [23]:
ec = no_top_10[no_top_10['ec_transactions']>0]
ec_conv = (ec['transactions'].sum() / ec['pageviews'].sum()) * 100.
ec_mobile_conv = (ec['mobile_transactions'].sum() / ec['mobile_pageviews'].sum()) * 100.

nec = no_top_10[no_top_10['ec_transactions']==0]
nec_conv = (nec['transactions'].sum() / nec['pageviews'].sum()) * 100.
nec_mobile_conv = (nec['mobile_transactions'].sum() / nec['mobile_pageviews'].sum()) * 100.

print("Cumulative conversion:")
print("-"*40)
print("Express checkout forms conversion: {:.2f}%; mobile: {:.2f}%".format(ec_conv, ec_mobile_conv))
print("Non express checkout forms conversion: {:.2f}%; mobile: {:.2f}%".format(nec_conv, nec_mobile_conv))

Cumulative conversion:
----------------------------------------
Express checkout forms conversion: 4.53%; mobile: 4.53%
Non express checkout forms conversion: 2.67%; mobile: 1.82%


In [104]:
print("Aggregate conversion (per form):")
print("-"*40)

mn = no_top_10[(no_top_10['ec_transactions']>0)&(no_top_10['transactions']<no_top_10['pageviews'])]['conversion'].replace([np.inf, -np.inf], np.nan).dropna().mean()
mdn = no_top_10[(no_top_10['ec_transactions']>0)&(no_top_10['transactions']<no_top_10['pageviews'])]['conversion'].median()
print("Express checkout forms conversion: {:.2f}% mean; {:.2f}% median".format(mn * 100., mdn * 100.))
mobile_mn = no_top_10[(no_top_10['ec_transactions']>0)&(no_top_10['transactions']<no_top_10['pageviews'])]['mobile_conv'].replace([np.inf, -np.inf], np.nan).dropna().mean()
mobile_mdn = no_top_10[(no_top_10['ec_transactions']>0)&(no_top_10['transactions']<no_top_10['pageviews'])]['mobile_conv'].median()
print("\tmobile: {:.2f}% mean; {:.2f}% median".format(mobile_mn * 100., mobile_mdn * 100.))

mn = no_top_10[(no_top_10['ec_transactions']==0)&(no_top_10['transactions']<no_top_10['pageviews'])]['conversion'].mean()
mdn = no_top_10[(no_top_10['ec_transactions']==0)&(no_top_10['transactions']<no_top_10['pageviews'])]['conversion'].median()
print("Non express checkout forms conversion: {:.2f}% mean; {:.2f}% median".format(mn * 100., mdn * 100.))
mobile_mn = no_top_10[(no_top_10['ec_transactions']==0)&(no_top_10['transactions']<no_top_10['pageviews'])]['mobile_conv'].replace([np.inf, -np.inf], np.nan).dropna().mean()
mobile_mdn = no_top_10[(no_top_10['ec_transactions']==0)&(no_top_10['transactions']<no_top_10['pageviews'])]['mobile_conv'].median()
print("\tmobile: {:.2f}% mean; {:.2f}% median".format(mobile_mn * 100., mobile_mdn * 100.))

Aggregate conversion (per form):
----------------------------------------
Express checkout forms conversion: 15.69% mean; 9.34% median
	mobile: 25.18% mean; 14.19% median
Non express checkout forms conversion: 12.08% mean; 6.17% median
	mobile: 17.08% mean; 6.30% median


# channels - 2023

In [25]:
q = '''select
            form,
            count(distinct(id)) as transactions
        from transactions
        where 
            status='A' and
            (recurring=0 or recurring_origin=1) and
            year=2023
        group by form'''
forms = redshift_query_read(q, schema='production')
print("{:,} forms".format(len(forms['form'].unique())))

18,125 forms


In [26]:
q = '''select
            form,
            count(id) as transactions,
            sum(amount) as vol,
            count(case when channel=0 then id else null end) as standard_count,
            sum(case when channel=0 then amount else null end) as standard_vol,
            count(case when channel=1 then id else null end) as applepay_count,
            sum(case when channel=1 then amount else null end) as applepay_vol,
            count(case when channel=2 then id else null end) as googlepay_count,
            sum(case when channel=2 then amount else null end) as googlepay_vol,
            count(case when channel=3 then id else null end) as venmo_count,
            sum(case when channel=3 then amount else null end) as venmo_vol,
            count(case when payment_type='PP' then id else null end) as paypal_count,
            sum(case when payment_type='PP' then amount else null end) as paypal_vol,
            count(case when isexpresscheckout then id else null end) as ec_count,
            sum(case when isexpresscheckout then amount else null end) as ec_vol
        from transactions
        where
            status='A' and
            (recurring=0 or recurring_origin=1) and
            year=2023 and
            source!='vt' and source!='mobilevt'
        group by form'''
trans = redshift_query_read(q, schema='production')

In [27]:
trans.tail(3)

Unnamed: 0,form,transactions,vol,standard_count,standard_vol,applepay_count,applepay_vol,googlepay_count,googlepay_vol,venmo_count,venmo_vol,paypal_count,paypal_vol,ec_count,ec_vol
17405,928412,2,68.5,2,68.5,0,,0,,0,,0,,0,
17406,1004946,1,51.75,1,51.75,0,,0,,0,,0,,0,
17407,983421,1,26.25,1,26.25,0,,0,,0,,0,,0,


In [28]:
print("{:,} forms".format(len(trans['form'])))

17,408 forms


In [29]:
q = '''select 
            form,
            sum(views) as views,
            max(date) as max_date
        from ga4_traffic
        where date>='2023-01-01' 
        group by form'''
traff = redshift_query_read(q, schema='production')
print("{:,} rows traffic; max date {}".format(len(traff), traff['max_date'].max()))

trans = trans.merge(traff.drop('max_date', axis=1), on='form')
trans['conversion'] = trans['transactions'] / trans['views']

52,627 rows traffic; max date 2024-12-11 00:00:00


In [30]:
print("{:,} forms".format(len(trans['form'].unique())))

15,985 forms


In [31]:
print("Only standard")
standard = trans[(trans['venmo_count']==0)&(trans['applepay_count']==0)&(trans['paypal_count']==0)&(trans['googlepay_count']==0)].copy()
conv_mean = standard[standard['conversion']<1.]['conversion'].mean() * 100.
conv_median = standard[standard['conversion']<1.]['conversion'].median() * 100.
conv_cum = standard['transactions'].sum() / standard['views'].sum()

print("forms: {:,}".format(len(standard['form'])))
print("$'s per form: ${:,.2f}".format(standard['standard_vol'].sum() / len(standard['form'])))
print("conversion agg mean: {:.2f}%; median: {:.2f}%".format(conv_mean, conv_median))
print("conversion cumulative: {:.2f}%".format(conv_cum * 100.))

print()
print("Apple pay/venmo/google/paypal but no express checkouts")

no_ep = trans[trans['ec_count']==0]
no_ep_w_other = no_ep[(no_ep['venmo_count']>0)|(no_ep['applepay_count']>0)|(no_ep['googlepay_count']>0)|(no_ep['paypal_count']>0)].copy()
conv_mean = no_ep_w_other[no_ep_w_other['conversion']<1.]['conversion'].mean() * 100.
conv_median = no_ep_w_other[no_ep_w_other['conversion']<1.]['conversion'].median() * 100.
conv_cum = no_ep_w_other['transactions'].sum() / no_ep_w_other['views'].sum()

print("forms: {:,}".format(len(no_ep_w_other['form'])))
print("$'s per form: ${:,.2f}".format(no_ep_w_other['vol'].sum() / len(no_ep_w_other['form'])))
print("conversion agg mean: {:.2f}%; median: {:.2f}%".format(conv_mean, conv_median))
print("conversion cumulative: {:.2f}%".format(conv_cum * 100.))

print()
print("With express checkouts")

ep = trans[trans['ec_count']!=0]
ep['vol'] = ep['standard_vol'] + ep['applepay_vol'] + ep['venmo_vol']
conv_mean = ep[ep['conversion']<1.]['conversion'].mean() * 100.
conv_median = ep[ep['conversion']<1.]['conversion'].median() * 100.
conv_cum = ep['transactions'].sum() / ep['views'].sum()

print("forms: {:,}".format(len(ep['form'])))
print("$'s per form: ${:,.2f}".format(ep['vol'].sum() / len(ep['form'])))
print("conversion agg mean: {:.2f}%; median: {:.2f}%".format(conv_mean, conv_median))
print("conversion cumulative: {:.2f}%".format(conv_cum * 100.))

Only standard
forms: 12,952
$'s per form: $14,766.16
conversion agg mean: 12.23%; median: 5.21%
conversion cumulative: 4.97%

Apple pay/venmo/google/paypal but no express checkouts
forms: 2,687
$'s per form: $33,095.76
conversion agg mean: 12.15%; median: 5.56%
conversion cumulative: 4.68%

With express checkouts
forms: 346
$'s per form: $19,368.54
conversion agg mean: 13.94%; median: 5.53%
conversion cumulative: 3.95%


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


# channels - since 4/2023

In [32]:
q = '''select
            form,
            count(distinct(id)) as transactions,
            sum(amount) as vol,
            count(distinct(case when channel=0 then id else null end)) as standard_count,
            sum(case when channel=0 then amount else null end) as standard_vol,
            count(distinct(case when channel=1 then id else null end)) as applepay_count,
            sum(case when channel=1 then amount else null end) as applepay_vol,
            count(distinct(case when channel=2 then id else null end)) as googlepay_count,
            sum(case when channel=2 then amount else null end) as googlepay_vol,
            count(distinct(case when channel=3 then id else null end)) as venmo_count,
            sum(case when channel=3 then amount else null end) as venmo_vol,
            count(distinct(case when payment_type='PP' then id else null end)) as paypal_count,
            sum(case when payment_type='PP' then amount else null end) as paypal_vol,
            count(distinct(case when isexpresscheckout then id else null end)) as ec_count,
            sum(case when isexpresscheckout then amount else null end) as ec_vol
        from transactions
        where
            status='A' and
            (recurring=0 or recurring_origin=1) and
            date>='2023-04-01' and
            source!='vt' and source!='mobilevt'
        group by form'''
trans = redshift_query_read(q, schema='production')

In [33]:
q = '''select 
            form,
            sum(views) as views,
            max(date) as max_date
        from ga
        where date>='2023-04-01' 
        group by form'''
traff = redshift_query_read(q, schema='production')
print("{:,} rows traffic; max date {}".format(len(traff), traff['max_date'].max()))

trans = trans.merge(traff.drop('max_date', axis=1), on='form')
trans['conversion'] = trans['transactions'] / trans['views']

48,446 rows traffic; max date 2024-12-09 00:00:00


In [34]:
trans.head(2)

Unnamed: 0,form,transactions,vol,standard_count,standard_vol,applepay_count,applepay_vol,googlepay_count,googlepay_vol,venmo_count,venmo_vol,paypal_count,paypal_vol,ec_count,ec_vol,views,conversion
0,996805,106,10609.37,106,10609.37,0,,0,,0,,0,,0,,2854,0.037141
1,958241,1970,116124.69,1970,116124.69,0,,0,,0,,0,,0,,16746,0.11764


In [35]:
len(trans), len(trans[trans['conversion']<1.])

(29591, 26657)

In [36]:
trans = trans[trans['conversion']<1.]

In [37]:
print("Only standard (no Apple pay/venmo/google/paypal)")
standard = trans[(trans['venmo_count']==0)&(trans['applepay_count']==0)&(trans['paypal_count']==0)&(trans['googlepay_count']==0)].copy()
conv_mean = standard[standard['conversion']<1.]['conversion'].mean() * 100.
conv_median = standard[standard['conversion']<1.]['conversion'].median() * 100.
conv_cum = standard['transactions'].sum() / standard['views'].sum()

print("forms: {:,}".format(len(standard['form'])))
print("$'s per form: ${:,.2f}".format(standard['standard_vol'].sum() / len(standard['form'])))
print("conversion agg mean: {:.2f}%; median: {:.2f}%".format(conv_mean, conv_median))
print("conversion cumulative: {:.2f}%".format(conv_cum * 100.))
print("mean trans per form: {:,.2f}".format(standard['transactions'].mean()))
print("median trans per form: {:,.2f}".format(standard['transactions'].median()))

print()
print("Apple pay/venmo/google/paypal but no express checkouts")

no_ep = trans[trans['ec_count']==0]
no_ep_w_other = no_ep[(no_ep['venmo_count']>0)|(no_ep['applepay_count']>0)|(no_ep['googlepay_count']>0)|(no_ep['paypal_count']>0)].copy()
conv_mean = no_ep_w_other[no_ep_w_other['conversion']<1.]['conversion'].mean() * 100.
conv_median = no_ep_w_other[no_ep_w_other['conversion']<1.]['conversion'].median() * 100.
conv_cum = no_ep_w_other['transactions'].sum() / no_ep_w_other['views'].sum()
perc = (no_ep['transactions'].sum() - (no_ep['standard_count'].sum() - no_ep['paypal_count'].sum())) / no_ep['transactions'].sum()

print("forms: {:,}".format(len(no_ep_w_other['form'])))
print("$'s per form: ${:,.2f}".format(no_ep_w_other['vol'].sum() / len(no_ep_w_other['form'])))
print("conversion agg mean: {:.2f}%; median: {:.2f}%".format(conv_mean, conv_median))
print("conversion cumulative: {:.2f}%".format(conv_cum * 100.))
print("Apple pay/venmo/google/paypal transactions: {:.2f}%".format(perc * 100.))
print("mean trans per form: {:,.2f}".format(no_ep_w_other['transactions'].mean()))
print("median trans per form: {:,.2f}".format(no_ep_w_other['transactions'].median()))

print()
print("With express checkouts")

ep = trans[trans['ec_count']!=0]
ep['vol'] = ep['standard_vol'] + ep['applepay_vol'] + ep['venmo_vol']
conv_mean = ep[ep['conversion']<1.]['conversion'].mean() * 100.
conv_median = ep[ep['conversion']<1.]['conversion'].median() * 100.
conv_cum = ep['transactions'].sum() / ep['views'].sum()
perc = (ep['transactions'].sum() - (ep['standard_count'].sum() - ep['paypal_count'].sum())) / ep['transactions'].sum()
perc_ep = ep['ec_count'].sum() / ep['transactions'].sum()

print("forms: {:,}".format(len(ep['form'])))
print("$'s per form: ${:,.2f}".format(ep['vol'].sum() / len(ep['form'])))
print("conversion agg mean: {:.2f}%; median: {:.2f}%".format(conv_mean, conv_median))
print("conversion cumulative: {:.2f}%".format(conv_cum * 100.))
print("Apple pay/venmo/google/paypal transactions: {:.2f}%".format(perc * 100.))
print("Express checkout transactions: {:.2f}%".format(perc_ep * 100.))
print("mean trans per form: {:,.2f}".format(ep['transactions'].mean()))
print("median trans per form: {:,.2f}".format(ep['transactions'].median()))

Only standard (no Apple pay/venmo/google/paypal)
forms: 19,971
$'s per form: $13,921.35
conversion agg mean: 15.83%; median: 9.03%
conversion cumulative: 7.77%
mean trans per form: 82.85
median trans per form: 18.00

Apple pay/venmo/google/paypal but no express checkouts
forms: 5,037
$'s per form: $28,934.14
conversion agg mean: 17.18%; median: 10.42%
conversion cumulative: 8.07%
Apple pay/venmo/google/paypal transactions: 6.32%
mean trans per form: 175.00
median trans per form: 42.00

With express checkouts
forms: 1,649
$'s per form: $12,087.72
conversion agg mean: 19.37%; median: 12.71%
conversion cumulative: 10.26%
Apple pay/venmo/google/paypal transactions: 33.55%
Express checkout transactions: 24.57%
mean trans per form: 153.07
median trans per form: 26.00


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


# channels - since 04/2023, mobile

In [38]:
q = '''select
            form,
            count(distinct(id)) as transactions,
            sum(amount) as vol,
            count(distinct(case when channel=0 then id else null end)) as standard_count,
            sum(case when channel=0 then amount else null end) as standard_vol,
            count(distinct(case when channel=1 then id else null end)) as applepay_count,
            sum(case when channel=1 then amount else null end) as applepay_vol,
            count(distinct(case when channel=2 then id else null end)) as googlepay_count,
            sum(case when channel=2 then amount else null end) as googlepay_vol,
            count(distinct(case when channel=3 then id else null end)) as venmo_count,
            sum(case when channel=3 then amount else null end) as venmo_vol,
            count(distinct(case when payment_type='PP' then id else null end)) as paypal_count,
            sum(case when payment_type='PP' then amount else null end) as paypal_vol,
            count(distinct(case when isexpresscheckout then id else null end)) as ec_count,
            sum(case when isexpresscheckout then amount else null end) as ec_vol
        from transactions
        where
            status='A' and
            (recurring=0 or recurring_origin=1) and
            date>='2023-04-01' and
            source!='vt' and source!='mobilevt' and
            platform in ('iPhone', 'Android', 'iPad')
        group by form'''
trans = redshift_query_read(q, schema='production')

In [39]:
q = '''select 
            form,
            sum(views) as views,
            max(date) as max_date
        from ga
        where 
            date>='2023-04-01' and
            devicecategory in ('mobile', 'tablet')
        group by form'''
traff = redshift_query_read(q, schema='production')
print("{:,} rows traffic; max date {}".format(len(traff), traff['max_date'].max()))

trans = trans.merge(traff.drop('max_date', axis=1), on='form')
trans['conversion'] = trans['transactions'] / trans['views']

32,325 rows traffic; max date 2024-12-09 00:00:00


In [40]:
trans.head(2)

Unnamed: 0,form,transactions,vol,standard_count,standard_vol,applepay_count,applepay_vol,googlepay_count,googlepay_vol,venmo_count,venmo_vol,paypal_count,paypal_vol,ec_count,ec_vol,views,conversion
0,925809,967,131867.0,967,131867.0,0,,0,,0,,0,,0,,7258,0.133232
1,986472,6,234.88,6,234.88,0,,0,,0,,0,,0,,511,0.011742


In [41]:
trans = trans[trans['conversion']<1.]

In [42]:
print("Only standard (no Apple pay/venmo/google/paypal)")
standard = trans[(trans['venmo_count']==0)&(trans['applepay_count']==0)&(trans['paypal_count']==0)&(trans['googlepay_count']==0)].copy()
conv_mean = standard[standard['conversion']<1.]['conversion'].mean() * 100.
conv_median = standard[standard['conversion']<1.]['conversion'].median() * 100.
conv_cum = standard['transactions'].sum() / standard['views'].sum()

print("forms: {:,}".format(len(standard['form'])))
print("$'s per form: ${:,.2f}".format(standard['standard_vol'].sum() / len(standard['form'])))
print("conversion agg mean: {:.2f}%; median: {:.2f}%".format(conv_mean, conv_median))
print("conversion cumulative: {:.2f}%".format(conv_cum * 100.))
print("mean trans per form: {:,.2f}".format(standard['transactions'].mean()))
print("median trans per form: {:,.2f}".format(standard['transactions'].median()))

print()
print("Apple pay/venmo/google/paypal but no express checkouts")

no_ep = trans[trans['ec_count']==0]
no_ep_w_other = no_ep[(no_ep['venmo_count']>0)|(no_ep['applepay_count']>0)|(no_ep['googlepay_count']>0)|(no_ep['paypal_count']>0)].copy()
conv_mean = no_ep_w_other[no_ep_w_other['conversion']<1.]['conversion'].mean() * 100.
conv_median = no_ep_w_other[no_ep_w_other['conversion']<1.]['conversion'].median() * 100.
conv_cum = no_ep_w_other['transactions'].sum() / no_ep_w_other['views'].sum()
perc = (no_ep['transactions'].sum() - (no_ep['standard_count'].sum() - no_ep['paypal_count'].sum())) / no_ep['transactions'].sum()

print("forms: {:,}".format(len(no_ep_w_other['form'])))
print("$'s per form: ${:,.2f}".format(no_ep_w_other['vol'].sum() / len(no_ep_w_other['form'])))
print("conversion agg mean: {:.2f}%; median: {:.2f}%".format(conv_mean, conv_median))
print("conversion cumulative: {:.2f}%".format(conv_cum * 100.))
print("Apple pay/venmo/google/paypal transactions: {:.2f}%".format(perc * 100.))
print("mean trans per form: {:,.2f}".format(no_ep_w_other['transactions'].mean()))
print("median trans per form: {:,.2f}".format(no_ep_w_other['transactions'].median()))

print()
print("With express checkouts")

ep = trans[trans['ec_count']!=0]
ep['vol'] = ep['standard_vol'] + ep['applepay_vol'] + ep['venmo_vol']
conv_mean = ep[ep['conversion']<1.]['conversion'].mean() * 100.
conv_median = ep[ep['conversion']<1.]['conversion'].median() * 100.
conv_cum = ep['transactions'].sum() / ep['views'].sum()
perc = (ep['transactions'].sum() - (ep['standard_count'].sum() - ep['paypal_count'].sum())) / ep['transactions'].sum()
perc_ep = ep['ec_count'].sum() / ep['transactions'].sum()

print("forms: {:,}".format(len(ep['form'])))
print("$'s per form: ${:,.2f}".format(ep['vol'].sum() / len(ep['form'])))
print("conversion agg mean: {:.2f}%; median: {:.2f}%".format(conv_mean, conv_median))
print("conversion cumulative: {:.2f}%".format(conv_cum * 100.))
print("Apple pay/venmo/google/paypal transactions: {:.2f}%".format(perc * 100.))
print("Express checkout transactions: {:.2f}%".format(perc_ep * 100.))
print("mean trans per form: {:,.2f}".format(ep['transactions'].mean()))
print("median trans per form: {:,.2f}".format(ep['transactions'].median()))

Only standard (no Apple pay/venmo/google/paypal)
forms: 14,799
$'s per form: $7,047.75
conversion agg mean: 16.29%; median: 9.55%
conversion cumulative: 6.42%
mean trans per form: 54.69
median trans per form: 11.00

Apple pay/venmo/google/paypal but no express checkouts
forms: 4,206
$'s per form: $12,388.02
conversion agg mean: 17.56%; median: 11.05%
conversion cumulative: 6.92%
Apple pay/venmo/google/paypal transactions: 9.47%
mean trans per form: 104.77
median trans per form: 25.00

With express checkouts
forms: 1,266
$'s per form: $5,992.93
conversion agg mean: 21.58%; median: 15.35%
conversion cumulative: 8.52%
Apple pay/venmo/google/paypal transactions: 47.88%
Express checkout transactions: 36.18%
mean trans per form: 94.28
median trans per form: 19.00


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


# express checkout forms - pre/post feature

## all

In [43]:
q = '''select
            form,
            min(date) as start_date
        from transactions
        where isexpresscheckout=1 and status='A'
        group by form'''
forms_ep_enabled = redshift_query_read(q, schema='production')
start_date = forms_ep_enabled['start_date'].min()

In [44]:
start_date

Timestamp('2023-04-20 00:00:00')

In [45]:
q = '''select
            form,
            id,
            date,
            amount,
            isexpresscheckout,
            recurring
        from transactions
        where
            date>='2022-04-20' and
            (recurring=0 or recurring_origin=1) and
            form in (select distinct(form) from transactions where isexpresscheckout=1 and status='A')'''
trans = redshift_query_read(q, schema='production')

In [46]:
q = '''select 
            form,
            date,
            sum(views) as views
        from ga
        where 
            date>='2022-04-01' and
            form in (select distinct(form) from transactions where isexpresscheckout=1 and status='A')
        group by form, date'''

traff = redshift_query_read(q, schema='production')

In [47]:
ep_pivot_data = []
for form in trans['form'].unique():
    pivot_date = forms_ep_enabled[forms_ep_enabled['form']==form]['start_date'].iloc[0]
    
    pre_trans = trans[(trans['form']==form)&(trans['date']<pivot_date)]
    post_trans = trans[(trans['form']==form)&(trans['date']>=pivot_date)]
    pre_traff = traff[(traff['form']==form)&(traff['date']<pivot_date)]
    post_traff = traff[(traff['form']==form)&(traff['date']>=pivot_date)]
    
    ep_pivot_data.append({
        'form': form,
        'trans_pre_ep': len(pre_trans),
        'trans_post_ep': len(post_trans),
        'traff_pre_ep': pre_traff['views'].sum(),
        'traff_post_ep': post_traff['views'].sum(),
        'median_pre_ep': pre_trans['amount'].median(),
        'median_post_ep': post_trans['amount'].median(),
        'mean_pre_ep': pre_trans['amount'].mean(),
        'mean_post_ep': post_trans['amount'].mean()
    })

In [48]:
ep_pivot = pd.DataFrame(ep_pivot_data)
ep_pivot['conv_pre_ep'] = ep_pivot['trans_pre_ep'] / ep_pivot['traff_pre_ep']
ep_pivot['conv_post_ep'] = ep_pivot['trans_post_ep'] / ep_pivot['traff_post_ep']

In [49]:
print("{:,} forms".format(len(ep_pivot)))
print("{:,} forms w/ page views prior to EP".format(len(ep_pivot[ep_pivot['traff_pre_ep']>0])))

2,001 forms
1,671 forms w/ page views prior to EP


In [78]:
print("Mean conversion pre-ep: {:.2f}%".format(ep_pivot['conv_pre_ep'].replace([np.inf, -np.inf], np.nan).dropna().mean() * 100.))
print("Median conversion pre-ep: {:.2f}%".format(ep_pivot['conv_pre_ep'].median() * 100.))

print("Mean conversion post-ep: {:.2f}%".format(ep_pivot['conv_post_ep'].replace([np.inf, -np.inf], np.nan).dropna().mean() * 100.))
print("Median conversion post-ep: {:.2f}%".format(ep_pivot['conv_post_ep'].median() * 100.))

Mean conversion pre-ep: 37.95%
Median conversion pre-ep: 12.19%
Mean conversion post-ep: 83.33%
Median conversion post-ep: 40.00%


In [51]:
temp = ep_pivot[(ep_pivot['conv_pre_ep']<1.)&(ep_pivot['conv_post_ep']<1.)]
print("Dropping >1 conversion, {} forms:".format(len(temp)))

print("Mean conversion pre-ep: {:.2f}%".format(temp['conv_pre_ep'].mean() * 100.))
print("Median conversion pre-ep: {:.2f}%".format(temp['conv_pre_ep'].median() * 100.))

print("Mean conversion post-ep: {:.2f}%".format(temp['conv_post_ep'].mean() * 100.))
print("Median conversion post-ep: {:.2f}%".format(temp['conv_post_ep'].median() * 100.))

Dropping >1 conversion, 1084 forms:
Mean conversion pre-ep: 8.69%
Median conversion pre-ep: 3.12%
Mean conversion post-ep: 24.84%
Median conversion post-ep: 19.15%


In [79]:
temp = ep_pivot.sort_values('conv_pre_ep', ascending=True).iloc[16:150]
print("Middle 80%, {} forms:".format(len(temp)))

print("Mean conversion pre-ep: {:.2f}%".format(temp['conv_pre_ep'].replace([np.inf, -np.inf], np.nan).dropna().mean() * 100.))
print("Median conversion pre-ep: {:.2f}%".format(temp['conv_pre_ep'].median() * 100.))

print("Mean conversion post-ep: {:.2f}%".format(temp['conv_post_ep'].replace([np.inf, -np.inf], np.nan).dropna().mean() * 100.))
print("Median conversion post-ep: {:.2f}%".format(temp['conv_post_ep'].median() * 100.))

Middle 80%, 134 forms:
Mean conversion pre-ep: 0.00%
Median conversion pre-ep: 0.00%
Mean conversion post-ep: 22.67%
Median conversion post-ep: 20.32%


In [80]:
temp = ep_pivot[ep_pivot['traff_pre_ep']>0]
print("w/ pageview prior to EP, {} forms:".format(len(temp)))

print("Mean conversion pre-ep: {:.2f}%".format(temp['conv_pre_ep'].replace([np.inf, -np.inf], np.nan).dropna().mean() * 100.))
print("Median conversion pre-ep: {:.2f}%".format(temp['conv_pre_ep'].median() * 100.))

print("Mean conversion post-ep: {:.2f}%".format(temp['conv_post_ep'].replace([np.inf, -np.inf], np.nan).dropna().mean() * 100.))
print("Median conversion post-ep: {:.2f}%".format(temp['conv_post_ep'].median() * 100.))

w/ pageview prior to EP, 961 forms:
Mean conversion pre-ep: 37.95%
Median conversion pre-ep: 6.67%
Mean conversion post-ep: 69.55%
Median conversion post-ep: 28.57%


## mobile only

In [54]:
q = '''select
            form,
            id,
            date,
            amount,
            isexpresscheckout,
            recurring
        from transactions
        where
            date>='2022-04-20' and
            (recurring=0 or recurring_origin=1) and
            platform in ('iPhone', 'Android', 'iPad') and
            form in (select distinct(form) from transactions where isexpresscheckout=1 and status='A')'''
trans = redshift_query_read(q, schema='production')

In [55]:
q = '''select 
            form,
            date,
            sum(views) as views
        from ga
        where 
            date>='2022-04-01' and
            devicecategory in ('mobile', 'tablet') and
            form in (select distinct(form) from transactions where isexpresscheckout=1 and status='A')
        group by form, date'''

traff = redshift_query_read(q, schema='production')

print("{} to {}".format(traff['date'].min(), traff['date'].max()))

2022-04-01 00:00:00 to 2024-12-09 00:00:00


In [56]:
ep_pivot_data = []
for form in trans['form'].unique():
    pivot_date = forms_ep_enabled[forms_ep_enabled['form']==form]['start_date'].iloc[0]
    
    pre_trans = trans[(trans['form']==form)&(trans['date']<pivot_date)]
    post_trans = trans[(trans['form']==form)&(trans['date']>=pivot_date)]
    pre_traff = traff[(traff['form']==form)&(traff['date']<pivot_date)]
    post_traff = traff[(traff['form']==form)&(traff['date']>=pivot_date)]
    
    ep_pivot_data.append({
        'form': form,
        'trans_pre_ep': len(pre_trans),
        'trans_post_ep': len(post_trans),
        'traff_pre_ep': pre_traff['views'].sum(),
        'traff_post_ep': post_traff['views'].sum(),
        'median_pre_ep': pre_trans['amount'].median(),
        'median_post_ep': post_trans['amount'].median(),
        'mean_pre_ep': pre_trans['amount'].mean(),
        'mean_post_ep': post_trans['amount'].mean()
    })

In [57]:
ep_pivot = pd.DataFrame(ep_pivot_data)
ep_pivot['conv_pre_ep'] = ep_pivot['trans_pre_ep'] / ep_pivot['traff_pre_ep']
ep_pivot['conv_post_ep'] = ep_pivot['trans_post_ep'] / ep_pivot['traff_post_ep']

In [81]:
print("{} forms:".format(len(ep_pivot)))
print("-"*40)
print("Mean conversion pre-ep: {:.2f}%".format(ep_pivot['conv_pre_ep'].replace([np.inf, -np.inf], np.nan).dropna().mean() * 100.))
print("Median conversion pre-ep: {:.2f}%".format(ep_pivot['conv_pre_ep'].median() * 100.))

print("Mean conversion post-ep: {:.2f}%".format(ep_pivot['conv_post_ep'].replace([np.inf, -np.inf], np.nan).dropna().mean() * 100.))
print("Median conversion post-ep: {:.2f}%".format(ep_pivot['conv_post_ep'].median() * 100.))

print()
print("Mean amount pre-ep: ${:,.2f}".format(ep_pivot['mean_pre_ep'].replace([np.inf, -np.inf], np.nan).dropna().mean()))
print("Median amount pre-ep: ${:,.2f}".format(ep_pivot['median_pre_ep'].mean()))
print("Mean amount post-ep: ${:,.2f}".format(ep_pivot['mean_post_ep'].replace([np.inf, -np.inf], np.nan).dropna().mean()))
print("Median amount post-ep: ${:,.2f}".format(ep_pivot['median_post_ep'].mean()))

1938 forms:
----------------------------------------
Mean conversion pre-ep: 37.95%
Median conversion pre-ep: 12.19%
Mean conversion post-ep: 83.33%
Median conversion post-ep: 40.00%

Mean amount pre-ep: $459.71
Median amount pre-ep: $175.93
Mean amount post-ep: $257.07
Median amount post-ep: $125.42


In [59]:
temp = ep_pivot[(ep_pivot['conv_pre_ep']<1.)&(ep_pivot['conv_post_ep']<1.)]
print("Dropping >1 conversion, {} forms:".format(len(temp)))

print("Mean conversion pre-ep: {:.2f}%".format(temp['conv_pre_ep'].replace([np.inf, -np.inf], np.nan).dropna().mean() * 100.))
print("Median conversion pre-ep: {:.2f}%".format(temp['conv_pre_ep'].median() * 100.))

print("Mean conversion post-ep: {:.2f}%".format(temp['conv_post_ep'].replace([np.inf, -np.inf], np.nan).dropna().mean() * 100.))
print("Median conversion post-ep: {:.2f}%".format(temp['conv_post_ep'].median() * 100.))

print()
print("Mean amount pre-ep: ${:,.2f}".format(temp['mean_pre_ep'].replace([np.inf, -np.inf], np.nan).dropna().mean()))
print("Median amount pre-ep: ${:,.2f}".format(temp['median_pre_ep'].mean()))
print("Mean amount post-ep: ${:,.2f}".format(temp['mean_post_ep'].replace([np.inf, -np.inf], np.nan).dropna().mean()))
print("Median amount post-ep: ${:,.2f}".format(temp['median_post_ep'].mean()))

Dropping >1 conversion, 643 forms:
Mean conversion pre-ep: 10.10%
Median conversion pre-ep: 4.35%
Mean conversion post-ep: 23.20%
Median conversion post-ep: 17.14%

Mean amount pre-ep: $271.55
Median amount pre-ep: $138.51
Mean amount post-ep: $353.31
Median amount post-ep: $105.00


In [60]:
temp = ep_pivot.sort_values('conv_pre_ep', ascending=True).iloc[16:150]
print("Middle 80%, {} forms:".format(len(temp)))

print("Mean conversion pre-ep: {:.2f}%".format(temp['conv_pre_ep'].replace([np.inf, -np.inf], np.nan).dropna().mean() * 100.))
print("Median conversion pre-ep: {:.2f}%".format(temp['conv_pre_ep'].median() * 100.))

print("Mean conversion post-ep: {:.2f}%".format(temp['conv_post_ep'].replace([np.inf, -np.inf], np.nan).dropna().mean() * 100.))
print("Median conversion post-ep: {:.2f}%".format(temp['conv_post_ep'].median() * 100.))

Middle 80%, 134 forms:
Mean conversion pre-ep: 0.00%
Median conversion pre-ep: 0.00%
Mean conversion post-ep: inf%
Median conversion post-ep: 20.32%


In [83]:
temp = ep_pivot[ep_pivot['traff_pre_ep']>0]
print("w/ pageview prior to EP, {} forms:".format(len(temp)))

print("Mean conversion pre-ep: {:.2f}%".format(temp['conv_pre_ep'].replace([np.inf, -np.inf], np.nan).dropna().mean() * 100.))
print("Median conversion pre-ep: {:.2f}%".format(temp['conv_pre_ep'].median() * 100.))

print("Mean conversion post-ep: {:.2f}%".format(temp['conv_post_ep'].replace([np.inf, -np.inf], np.nan).dropna().mean() * 100.))
print("Median conversion post-ep: {:.2f}%".format(temp['conv_post_ep'].median() * 100.))

w/ pageview prior to EP, 961 forms:
Mean conversion pre-ep: 37.95%
Median conversion pre-ep: 6.67%
Mean conversion post-ep: 69.55%
Median conversion post-ep: 28.57%


In [82]:
temp = ep_pivot[ep_pivot['traff_pre_ep']>0].sort_values('conv_pre_ep', ascending=True).iloc[10:90]
print("w/ pageview prior to EP, middle 80%, {} forms:".format(len(temp)))

print("Mean conversion pre-ep: {:.2f}%".format(temp['conv_pre_ep'].replace([np.inf, -np.inf], np.nan).dropna().mean() * 100.))
print("Median conversion pre-ep: {:.2f}%".format(temp['conv_pre_ep'].median() * 100.))

print("Mean conversion post-ep: {:.2f}%".format(temp['conv_post_ep'].replace([np.inf, -np.inf], np.nan).dropna().mean() * 100.))
print("Median conversion post-ep: {:.2f}%".format(temp['conv_post_ep'].median() * 100.))

w/ pageview prior to EP, middle 80%, 80 forms:
Mean conversion pre-ep: 0.00%
Median conversion pre-ep: 0.00%
Mean conversion post-ep: 26.65%
Median conversion post-ep: 19.20%


# digital wallets - pre/post feature

look at YoY for forms using digital wallets (Apple Pay, PayPal, Venmo, Google Pay)

In [63]:
q = '''select
            form,
            id,
            date,
            amount,
            channel,
            source
        from transactions
        where
            date>='2022-04-20' and
            (recurring=0 or recurring_origin=1) and
            platform in ('iPhone', 'Android', 'iPad') and
            form in (select 
                        distinct(form) 
                    from transactions 
                    where 
                        (channel!=0 or source='PP') and 
                        status='A')'''
trans = redshift_query_read(q, schema='production')

In [64]:
trans['date'].min()

Timestamp('2022-04-20 00:00:00')

In [65]:
q = '''select 
            form,
            date,
            sum(views) as views
        from ga
        where 
            date>='2021-04-20' and
            devicecategory in ('mobile', 'tablet') and
            form in (select 
                        distinct(form) 
                    from transactions 
                    where 
                        (channel!=0 or source='PP') and 
                        status='A')
        group by form, date'''

traff = redshift_query_read(q, schema='production')

print("{:,} rows".format(len(traff)))
print("{:,} forms".format(len(traff['form'].unique())))
print("{} to {}".format(traff['date'].min(), traff['date'].max()))

278,334 rows
5,636 forms
2021-04-20 00:00:00 to 2024-12-09 00:00:00


In [66]:
dw_pivot_data = []
for form in trans['form'].unique():
    _df = trans[trans['form']==form]
    pivot_date = _df[(_df['channel']!=0)|(_df['source']=='PP')]['date'].min()
    
    pre_trans = _df[_df['date']<pivot_date]
    post_trans = _df[_df['date']>=pivot_date]
    pre_traff = traff[(traff['form']==form)&(traff['date']<pivot_date)]
    post_traff = traff[(traff['form']==form)&(traff['date']>=pivot_date)]
    
    dw_pivot_data.append({
        'form': form,
        'pivot_date': pivot_date,
        'trans_pre_ep': len(pre_trans),
        'trans_post_ep': len(post_trans),
        'traff_pre_ep': pre_traff['views'].sum(),
        'traff_post_ep': post_traff['views'].sum(),
        'median_pre_ep': pre_trans['amount'].median(),
        'median_post_ep': post_trans['amount'].median(),
        'mean_pre_ep': pre_trans['amount'].mean(),
        'mean_post_ep': post_trans['amount'].mean()
    })

In [67]:
dw_pivot = pd.DataFrame(dw_pivot_data)
dw_pivot['conv_pre_ep'] = dw_pivot['trans_pre_ep'] / dw_pivot['traff_pre_ep']
dw_pivot['conv_post_ep'] = dw_pivot['trans_post_ep'] / dw_pivot['traff_post_ep']

In [68]:
datetime.now()-timedelta(days=6*30)

datetime.datetime(2024, 6, 15, 17, 34, 11, 381660)

In [69]:
print("{:,} forms".format(len(dw_pivot)))
print("{:,} forms, drop NA".format(len(dw_pivot.dropna())))
print("{:,} forms < 100% conv".format(len(dw_pivot[(dw_pivot['conv_pre_ep']<1.)&(dw_pivot['conv_post_ep']<1.)])))

dw_pivot['pivot_date'] = pd.to_datetime(dw_pivot['pivot_date'])
dw_pivot_flt = dw_pivot[dw_pivot['pivot_date']>=datetime.now()-timedelta(days=6*30)]

print("{:,} forms w/ 6 months+ data".format(len(dw_pivot_flt)))
print("{:,} forms w/ 6 months+ data & < 100% conv".format(len(dw_pivot_flt[(dw_pivot_flt['conv_pre_ep']<1.)&(dw_pivot_flt['conv_post_ep']<1.)])))

5,966 forms
3,339 forms, drop NA
2,857 forms < 100% conv
2,485 forms w/ 6 months+ data
1,141 forms w/ 6 months+ data & < 100% conv


In [84]:
print("{} forms:".format(len(dw_pivot)))
print("-"*40)
print("Mean conversion pre-ep: {:.2f}%".format(dw_pivot['conv_pre_ep'].replace([np.inf, -np.inf], np.nan).dropna().mean() * 100.))
print("Median conversion pre-ep: {:.2f}%".format(dw_pivot['conv_pre_ep'].median() * 100.))

print("Mean conversion post-ep: {:.2f}%".format(dw_pivot['conv_post_ep'].replace([np.inf, -np.inf], np.nan).dropna().mean() * 100.))
print("Median conversion post-ep: {:.2f}%".format(dw_pivot['conv_post_ep'].median() * 100.))

print()
print("Mean amount pre-ep: ${:,.2f}".format(dw_pivot['mean_pre_ep'].replace([np.inf, -np.inf], np.nan).dropna().mean()))
print("Median amount pre-ep: ${:,.2f}".format(dw_pivot['median_pre_ep'].mean()))
print("Mean amount post-ep: ${:,.2f}".format(dw_pivot['mean_post_ep'].replace([np.inf, -np.inf], np.nan).dropna().mean()))
print("Median amount post-ep: ${:,.2f}".format(dw_pivot['median_post_ep'].mean()))

5966 forms:
----------------------------------------
Mean conversion pre-ep: 46.86%
Median conversion pre-ep: 7.14%
Mean conversion post-ep: 164.81%
Median conversion post-ep: 21.65%

Mean amount pre-ep: $1,269.29
Median amount pre-ep: $1,023.34
Mean amount post-ep: $441.24
Median amount post-ep: $121.55


In [85]:
_df = dw_pivot[(dw_pivot['conv_pre_ep']<1.)&(dw_pivot['conv_post_ep']<1.)]
print("less 100% conversion, {} forms:".format(len(_df)))
print("-"*40)
print("Mean conversion pre-ep: {:.2f}%".format(_df['conv_pre_ep'].replace([np.inf, -np.inf], np.nan).dropna().mean() * 100.))
print("Median conversion pre-ep: {:.2f}%".format(_df['conv_pre_ep'].median() * 100.))

print("Mean conversion post-ep: {:.2f}%".format(_df['conv_post_ep'].replace([np.inf, -np.inf], np.nan).dropna().mean() * 100.))
print("Median conversion post-ep: {:.2f}%".format(_df['conv_post_ep'].median() * 100.))

print()
print("Mean amount pre-ep: ${:,.2f}".format(_df['mean_pre_ep'].replace([np.inf, -np.inf], np.nan).dropna().mean()))
print("Median amount pre-ep: ${:,.2f}".format(_df['median_pre_ep'].mean()))
print("Mean amount post-ep: ${:,.2f}".format(_df['mean_post_ep'].replace([np.inf, -np.inf], np.nan).dropna().mean()))
print("Median amount post-ep: ${:,.2f}".format(_df['median_post_ep'].mean()))

less 100% conversion, 2857 forms:
----------------------------------------
Mean conversion pre-ep: 8.79%
Median conversion pre-ep: 3.17%
Mean conversion post-ep: 17.42%
Median conversion post-ep: 11.39%

Mean amount pre-ep: $1,851.85
Median amount pre-ep: $1,540.23
Mean amount post-ep: $630.00
Median amount post-ep: $114.06


In [86]:
_df = dw_pivot_flt
print("> 6 months data, {} forms:".format(len(_df)))
print("-"*40)
print("Mean conversion pre-ep: {:.2f}%".format(_df['conv_pre_ep'].replace([np.inf, -np.inf], np.nan).dropna().mean() * 100.))
print("Median conversion pre-ep: {:.2f}%".format(_df['conv_pre_ep'].median() * 100.))

print("Mean conversion post-ep: {:.2f}%".format(_df['conv_post_ep'].replace([np.inf, -np.inf], np.nan).dropna().mean() * 100.))
print("Median conversion post-ep: {:.2f}%".format(_df['conv_post_ep'].median() * 100.))

print()
print("Mean amount pre-ep: ${:,.2f}".format(_df['mean_pre_ep'].replace([np.inf, -np.inf], np.nan).dropna().mean()))
print("Median amount pre-ep: ${:,.2f}".format(_df['median_pre_ep'].mean()))
print("Mean amount post-ep: ${:,.2f}".format(_df['mean_post_ep'].replace([np.inf, -np.inf], np.nan).dropna().mean()))
print("Median amount post-ep: ${:,.2f}".format(_df['median_post_ep'].mean()))

> 6 months data, 2485 forms:
----------------------------------------
Mean conversion pre-ep: 35.87%
Median conversion pre-ep: 11.08%
Mean conversion post-ep: 52.52%
Median conversion post-ep: 27.27%

Mean amount pre-ep: $602.54
Median amount pre-ep: $166.80
Mean amount post-ep: $190.14
Median amount post-ep: $123.42


In [87]:
_df = dw_pivot_flt[(dw_pivot_flt['conv_pre_ep']<1.)&(dw_pivot_flt['conv_post_ep']<1.)]
print("> 6 months digital wallet data, < 100% conversion, {} forms:".format(len(_df)))
print("-"*40)
print("Mean conversion pre-ep: {:.2f}%".format(_df['conv_pre_ep'].replace([np.inf, -np.inf], np.nan).dropna().mean() * 100.))
print("Median conversion pre-ep: {:.2f}%".format(_df['conv_pre_ep'].median() * 100.))

print("Mean conversion post-ep: {:.2f}%".format(_df['conv_post_ep'].replace([np.inf, -np.inf], np.nan).dropna().mean() * 100.))
print("Median conversion post-ep: {:.2f}%".format(_df['conv_post_ep'].median() * 100.))

print()
print("Mean amount pre-ep: ${:,.2f}".format(_df['mean_pre_ep'].replace([np.inf, -np.inf], np.nan).dropna().mean()))
print("Median amount pre-ep: ${:,.2f}".format(_df['median_pre_ep'].mean()))
print("Mean amount post-ep: ${:,.2f}".format(_df['mean_post_ep'].replace([np.inf, -np.inf], np.nan).dropna().mean()))
print("Median amount post-ep: ${:,.2f}".format(_df['median_post_ep'].mean()))

> 6 months digital wallet data, < 100% conversion, 1141 forms:
----------------------------------------
Mean conversion pre-ep: 10.73%
Median conversion pre-ep: 5.48%
Mean conversion post-ep: 19.68%
Median conversion post-ep: 14.53%

Mean amount pre-ep: $780.04
Median amount pre-ep: $164.94
Mean amount post-ep: $187.41
Median amount post-ep: $109.10
