This notebook explores the performance comparison between single step forms and multistep forms. Specifically we will look at average conversion and transaction amounts.

The single step vs multi step forms are indicated by the `appearance` field in the `analyticsqgiv_monthly` table. A value of `1` indicates single step, `2` indicates multiple steps.

In [1]:
import sys
sys.path.insert(1, '../../scripts/')
from s3_support import *

import pandas as pd

# 0. load and prep data

In [2]:
START_DATE = "2020-01-01"

## analytics data

In [80]:
q = "select * from analyticsqgiv_monthly where date>='{}'".format(START_DATE)
analytics = redshift_query_read(q, schema='public')

In [81]:
print("{:,} analytics qgiv rows".format(len(analytics)))
print("{} to {} date range".format(analytics['date'].min(), analytics['date'].max()))
print("{:,} single step entries".format(len(analytics[analytics['appearance']==1])))
print("{:,} multi step entries".format(len(analytics[analytics['appearance']==2])))
print("{:,} unique forms".format(len(analytics['form'].unique())))
print("{:,} unique single step forms".format(len(analytics[analytics['appearance']==1]['form'].unique())))
print("{:,} unique multi step forms".format(len(analytics[analytics['appearance']==2]['form'].unique())))

2,052,156 analytics qgiv rows
2020-01-01 00:00:00 to 2024-07-01 00:00:00 date range
795,289 single step entries
869,329 multi step entries
74,017 unique forms
46,549 unique single step forms
52,427 unique multi step forms


In [82]:
analytics.head(2)

Unnamed: 0,date,form,org,pledges_count,events_count,events_priv_count,restrictions,amounts,ded_types,opt_ded_flds,...,reg_count,dl_trans_volume,dl_trans_count,dl_new_rec_count,dl_new_rec_volume,cta_after,cta_before,conditional_fields,appearance,image_size
0,2020-01-01,935895,441412,3,0,0,0,7,0,0,...,0,0.0,0,0,0.0,0,0,0,0,0
1,2020-06-01,940558,76803,0,0,0,0,7,0,0,...,0,0.0,0,0,0.0,0,0,0,0,0


## traffic data

In [83]:
q = '''select
            date,
            form,
            sum(views) as views
        from ga
        where date>='{}' 
        group by date, form'''.format(START_DATE)
traff = redshift_query_read(q, schema='production')

In [84]:
print("{:,} traffic rows retrieved".format(len(traff)))
print("{} to {} date range".format(traff['date'].min(), traff['date'].max()))
print("{:,} unique forms".format(len(traff['form'].unique())))

4,480,543 traffic rows retrieved
2020-01-01 00:00:00 to 2024-09-03 00:00:00 date range
71,704 unique forms


In [85]:
traff['month'] = traff['date'].dt.to_period('M').astype(str)
traff = traff[traff['form']!=0].groupby(['form', 'month'])['views'].sum().reset_index()

In [86]:
traff.columns = ['form', 'date', 'views']
traff['date'] = pd.to_datetime(traff['date'])

In [87]:
traff.head(2)

Unnamed: 0,form,date,views
0,1,2020-01-01,631
1,1,2020-02-01,986


## transactions

In [97]:
q = """select
            date_trunc('month', date) as date,
            form,
            count(id) as trans_count,
            count(case when recurring=0 then id else null end) as trans_onetime_count,
            count(case when recurring_origin=1 then id else null end) as trans_rec_count,
            sum(amount) as trans_vol,
            sum(case when recurring=0 then amount else null end) as trans_onetime_vol,
            sum(case when recurring_origin=1 then amount else null end) as trans_rec_vol
        from transactions
        where 
            status='A' and
            date>='{}' 
        group by date_trunc('month', date), form""".format(START_DATE)
trans = redshift_query_read(q, schema='production')

In [98]:
trans.tail(3)

Unnamed: 0,date,form,trans_count,trans_onetime_count,trans_rec_count,trans_vol,trans_onetime_vol,trans_rec_vol
475569,2022-07-01,981868,1,0,1,350.0,,350.0
475570,2021-03-01,965832,1,0,1,800.0,,800.0
475571,2023-06-01,997644,1,0,1,150.0,,150.0


In [99]:
trans['new transactions'] = trans['trans_onetime_count'] + trans['trans_rec_count']
trans.fillna(0, inplace=True)

## merge data

In [100]:
traff['form'] = traff['form'].astype(int)
analytics['form'] = analytics['form'].astype(int)
trans['form'] = trans['form'].astype(int)

traff['date'] = traff['date'].astype(str)
analytics['date'] = analytics['date'].astype(str)
trans['date'] = trans['date'].astype(str)

In [102]:
mrgd = analytics.merge(traff, how='left', on=['date', 'form'])
mrgd = mrgd.merge(trans, how='left', on=['date', 'form'])

In [103]:
print("{:,} analytics rows; {:,} unique forms".format(len(analytics), len(analytics['form'].unique())))
print("{:,} traffic rows; {:,} unique forms".format(len(traff), len(traff['form'].unique())))
print()
print("{:,} merged rows; {:,} unique forms".format(len(mrgd), len(mrgd['form'].unique())))
print("{:,} merged rows, drop NAN; {:,} unique forms".format(len(mrgd.dropna()), len(mrgd.dropna()['form'].unique())))

2,052,156 analytics rows; 74,017 unique forms
509,130 traffic rows; 71,703 unique forms

2,052,156 merged rows; 74,017 unique forms
180,906 merged rows, drop NAN; 30,553 unique forms


In [104]:
mrgd.fillna(0, inplace=True)
mrgd['conversion'] = mrgd['new transactions'] / mrgd['views']
mrgd['conversion onetime'] = mrgd['trans_onetime_count'] / mrgd['views']
mrgd['conversion recurring'] = mrgd['trans_rec_count'] / mrgd['views']

# 1. analysis

In [107]:
cols = ['date', 'form', 'appearance', 'conversion',
        'conversion onetime', 'conversion recurring']

mrgd[cols].tail(3)

Unnamed: 0,date,form,appearance,conversion,conversion onetime,conversion recurring
2052153,2024-07-01,1025216,2,,,
2052154,2024-07-01,1035827,2,,,
2052155,2023-10-01,1015510,2,0.037037,0.037037,0.0


In [117]:
conv_cols = ['conversion', 'conversion onetime', 'conversion recurring']
mrgd[(mrgd['views']>0)&(mrgd['conversion']<1.)].fillna(0).groupby('appearance')[conv_cols].agg(['mean', 'median']).reset_index()

Unnamed: 0_level_0,appearance,conversion,conversion,conversion onetime,conversion onetime,conversion recurring,conversion recurring
Unnamed: 0_level_1,Unnamed: 1_level_1,mean,median,mean,median,mean,median
0,0,0.109548,0.05618,0.099332,0.04918,0.010216,0.0
1,1,0.130207,0.053333,0.120541,0.046512,0.009666,0.0
2,2,0.150782,0.070028,0.13913,0.061889,0.011652,0.0
