In [6]:
import sys
sys.path.insert(1, '../../scripts/')
from s3_support import *

import pandas as pd
import numpy as np

Data built in <research.ipynb> and the isolated form ID's are stored in <filtered_forms.csv>.

Data was built based upon activity from 2019-01-01 forward.

# load data

In [7]:
forms = pd.read_csv("filtered_forms.csv")
len(forms)

9564

## analytics

In [15]:
analytics = None
for year in [2019, 2020, 2021, 2022, 2023, 2024]:
    print("querying {}".format(year))
    q = '''select * from analytics_weekly where date_part('year', date)={}'''.format(year)
    d = redshift_query_read(q, schema='public')
    
    d = d[d['form'].isin(forms['form'].tolist())]
    analytics = pd.concat([d, analytics])

querying 2019
querying 2020
querying 2021
querying 2022
querying 2023
querying 2024


In [16]:
print("{:,} entries".format(len(analytics)))
print("{}-{}".format(analytics['date'].min(), analytics['date'].max()))
print("{:,} unique forms".format(len(analytics['form'].unique())))

analytics.tail(2)

967,395 entries
2019-01-07 00:00:00-2024-02-26 00:00:00
9,562 unique forms


Unnamed: 0,date,org,form,product,vt_trans_count,don_form_trans_count,kiosk_trans_count,p2p_trans_count,mobile_trans_count,mobilevt_trans_count,...,kiosk_trans_vol,p2p_trans_vol,mobile_trans_vol,mobilevt_trans_vol,sms_trans_vol,fb_trans_vol,one_time_trans_vol,one_time_trans_count,rec_trans_vol,rec_trans_count
1235252,2019-10-07,29728,948118,1,0,1,0,0,0,0,...,0.0,0.0,0.0,0.0,0.0,0.0,135.0,1,0.0,0
1235259,2019-12-02,1643,949702,3,0,0,0,0,0,0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0.0,0


## analytics p2p

In [11]:
analytics_p2p = None
for year in [2019, 2020, 2021, 2022, 2023, 2024]:
    print("querying {}".format(year))
    q = '''select * from analyticsp2p_weekly where date_part('year', date)={}'''.format(year)
    d = redshift_query_read(q, schema='public')
    
    d = d[d['form'].isin(forms['form'].tolist())]
    analytics_p2p = pd.concat([d, analytics_p2p])

querying 2019
querying 2020
querying 2021
querying 2022
querying 2023
querying 2024


In [12]:
print("{:,} entries".format(len(analytics_p2p)))
print("{}-{}".format(analytics_p2p['date'].min(), analytics_p2p['date'].max()))
print("{:,} unique forms".format(len(analytics_p2p['form'].unique())))

analytics_p2p.tail(2)

312,038 entries
2019-02-11 00:00:00-2024-02-26 00:00:00
2,683 unique forms


Unnamed: 0,date,form,org,reg_count,sub_reg_count,teams_count,reg_volume,don_volume,don_count,class_count,...,allows_social,social_templt_count,social_auto,pcnt_posts,mon_posts,count_posts,date_posts,email_templt_count,sponsors_count,inappr_content
217352,2019-09-16,945730,443576,0,0,0,0.0,0.0,0,1,...,0,0,0,0,0,0,0,0,0,0
217367,2019-08-19,948889,29757,0,0,0,0.0,0.0,0,0,...,0,0,0,0,0,0,0,0,0,0


## analytics qgiv

In [13]:
analytics_qgiv = None
for year in [2019, 2020, 2021, 2022, 2023, 2024]:
    print("querying {}".format(year))
    q = '''select * from analyticsqgiv_weekly where date_part('year', date)={}'''.format(year)
    d = redshift_query_read(q, schema='public')
    
    d = d[d['form'].isin(forms['form'].tolist())]
    analytics_qgiv = pd.concat([d, analytics_qgiv])

querying 2019
querying 2020
querying 2021
querying 2022
querying 2023
querying 2024


In [14]:
print("{:,} entries".format(len(analytics_qgiv)))
print("{}-{}".format(analytics_qgiv['date'].min(), analytics_qgiv['date'].max()))
print("{:,} unique forms".format(len(analytics_qgiv['form'].unique())))

analytics_qgiv.tail(2)

580,503 entries
2019-02-11 00:00:00-2024-02-26 00:00:00
6,253 unique forms


Unnamed: 0,date,form,org,pledges_count,events_count,events_priv_count,restrictions,amounts,ded_types,opt_ded_flds,...,reg_count,dl_trans_volume,dl_trans_count,dl_new_rec_count,dl_new_rec_volume,cta_after,cta_before,conditional_fields,appearance,image_size
911435,2019-04-15,899179,439324,0,0,0,0,4,0,0,...,0,0.0,0,0,0.0,0,0,0,0,0
911501,2019-03-04,942655,442129,0,0,0,0,0,0,0,...,26,0.0,0,0,0.0,0,0,0,0,0


# analysis

In [25]:
analytics.groupby('form')['product'].first().value_counts()

1    5914
3    2628
5    1019
2       1
Name: product, dtype: int64

In [None]:
ad = analytics.drop(['org', 'form', 'date'], axis=1).describe().transpose().drop('count', axis=1)
ad['median'] = analytics.drop(['org', 'form', 'date'], axis=1).median()

In [53]:
cols = ('vt_trans_count', 'don_form_trans_count', 'kiosk_trans_count', 'p2p_trans_count', 'mobile_trans_count', 
        'mobilevt_trans_count', 'sms_trans_count', 'fb_trans_count', 'vt_trans_vol', 'don_form_trans_vol',
        'kiosk_trans_vol', 'p2p_trans_vol', 'mobile_trans_vol', 'mobilevt_trans_vol', 'sms_trans_vol',
        'fb_trans_vol', 'one_time_trans_vol', 'one_time_trans_count', 'rec_trans_vol', 'rec_trans_count')
col_str = ", ".join(["avg(cast({} as float)) as {}".format(c, c) for c in cols])
q = '''select {} from analytics_weekly where date>=2019'''.format(col_str)
avg_analytics = redshift_query_read(q, schema='public')

In [54]:
ad['mean all'] = avg_analytics.transpose()

In [56]:
ad

Unnamed: 0,mean,std,min,25%,50%,75%,max,median,mean all
product,1.953294,1.267474,1.0,1.0,1.0,3.0,5.0,1.0,
vt_trans_count,0.06416,1.700833,0.0,0.0,0.0,0.0,299.0,0.0,0.028219
don_form_trans_count,0.55492,4.755231,0.0,0.0,0.0,0.0,1040.0,0.0,0.289713
kiosk_trans_count,0.011027,0.57655,0.0,0.0,0.0,0.0,306.0,0.0,0.012031
p2p_trans_count,0.596975,7.546987,0.0,0.0,0.0,0.0,910.0,0.0,0.154816
mobile_trans_count,0.185455,2.540709,0.0,0.0,0.0,0.0,768.0,0.0,0.088685
mobilevt_trans_count,0.015544,0.816352,0.0,0.0,0.0,0.0,202.0,0.0,0.005208
sms_trans_count,0.020708,0.803257,0.0,0.0,0.0,0.0,233.0,0.0,0.007625
fb_trans_count,0.013539,0.694757,0.0,0.0,0.0,0.0,206.0,0.0,0.002753
vt_trans_vol,14.394235,648.833229,0.0,0.0,0.0,0.0,145273.0,0.0,7.40937


## p2p

In [50]:
ap = analytics_p2p.drop(['org', 'form', 'date'], axis=1).describe().transpose().drop('count', axis=1)
ap['median'] = analytics_p2p.drop(['org', 'form', 'date'], axis=1).median()

In [48]:
cols = ('reg_count', 'sub_reg_count', 'teams_count', 'reg_volume', 'don_volume', 'don_count', 'class_count',
        'cat_count', 'promo_count', 'rest_count', 'amt_count', 'ded_count', 'fields', 'opt_fields',
        'req_fields', 'allows_reg_ind', 'allows_teams', 'allows_reg_team_create', 'allows_reg_team_join', 
        'allows_opt_reg_donation', 'allows_sub_reg', 'allows_sub_reg_pfp', 'allows_other_don_amt', 'allows_pfp_off_don',
        'allows_tfp_off_don', 'allows_soc_post_pfp_tcp', 'share_home', 'share_pfp', 'share_tfp', 'share_therm',
        'share_donation', 'allows_social', 'social_templt_count', 'social_auto', 'pcnt_posts', 'mon_posts',
        'count_posts', 'date_posts', 'email_templt_count', 'sponsors_count', 'inappr_content')
col_str = ", ".join(["avg(cast({} as float)) as {}".format(c, c) for c in cols])
q = '''select {} from analyticsp2p_weekly where date>=2019'''.format(col_str)
avg_p2p = redshift_query_read(q, schema='public')

In [51]:
ap['mean all'] = avg_p2p.transpose()

In [52]:
ap

Unnamed: 0,mean,std,min,25%,50%,75%,max,median,mean all
reg_count,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
sub_reg_count,0.105353,2.26688,0.0,0.0,0.0,0.0,466.0,0.0,0.043856
teams_count,0.124578,1.081353,0.0,0.0,0.0,0.0,84.0,0.0,0.040946
reg_volume,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
don_volume,131.422017,1225.769957,0.0,0.0,0.0,0.0,263096.0,0.0,72.099187
don_count,1.362026,10.564727,0.0,0.0,0.0,0.0,804.0,0.0,0.680778
class_count,2.126908,7.763552,0.0,0.0,0.0,1.0,116.0,0.0,1.375796
cat_count,2.234286,2.434486,0.0,1.0,2.0,3.0,32.0,2.0,1.733895
promo_count,1.983835,8.520953,0.0,0.0,0.0,1.0,244.0,0.0,0.998044
rest_count,0.08398,1.247812,0.0,0.0,0.0,0.0,37.0,0.0,0.094839


## qgiv

In [None]:
aq = analytics_qgiv.drop(['org', 'form', 'date'], axis=1).describe().transpose().drop('count', axis=1)
aq['median'] = analytics_qgiv.drop(['org', 'form', 'date'], axis=1).median()

In [43]:
cols = ['pledges_count', 'events_count', 'events_priv_count', 'restrictions', 'amounts', 'ded_types', 'opt_ded_flds', 'req_ded_flds',
        'opt_fields', 'req_fields', 'pledge_active', 'donation_active', 'multirestriction_system', 'min_amount', 'max_amount',
        'permit_anonymous', 'permit_recurring', 'permit_other_amount', 'permit_create_own_pledge', 'collect_company', 'collect_phone', 
        'collect_optin', 'collect_captcha', 'collect_address_mobile', 'enable_donorlogins', 'enable_sms', 'new_rec_volume',
        'new_rec_count', 'reg_count', 'dl_trans_volume', 'dl_trans_count', 'dl_new_rec_count', 'dl_new_rec_volume', 'cta_after',
        'cta_before', 'conditional_fields', 'appearance', 'image_size']
col_str = ", ".join(["avg(cast({} as float)) as {}".format(c, c) for c in cols])
q = '''select {} from analyticsqgiv_weekly where date>=2019'''.format(col_str)
avg_qgiv = redshift_query_read(q, schema='public')

In [46]:
aq['mean all'] = avg_qgiv.transpose()

In [47]:
aq

Unnamed: 0,mean,std,min,25%,50%,75%,max,median,mean all
pledges_count,0.693282,0.627782,0.0,0.0,1.0,1.0,10.0,1.0,0.521577
events_count,0.596443,1.872823,0.0,0.0,0.0,1.0,140.0,0.0,0.251871
events_priv_count,0.031145,0.513377,0.0,0.0,0.0,0.0,31.0,0.0,0.017945
restrictions,0.627955,2.906409,0.0,0.0,0.0,0.0,121.0,0.0,0.756384
amounts,4.260006,2.350209,0.0,3.0,5.0,5.0,34.0,5.0,4.297034
ded_types,0.438013,0.878539,0.0,0.0,0.0,0.0,6.0,0.0,0.506061
opt_ded_flds,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
req_ded_flds,0.038725,0.326396,0.0,0.0,0.0,0.0,7.0,0.0,0.036627
opt_fields,0.368244,1.140071,0.0,0.0,0.0,0.0,27.0,0.0,0.367848
req_fields,0.25701,1.188785,0.0,0.0,0.0,0.0,38.0,0.0,0.171498


## smart amounts

In [67]:
q = '''select distinct(form) from syslog_logs where message like '%Smart Amount%' '''
sa_forms = redshift_query_read(q, schema='production')

In [68]:
q = '''select distinct(form) from transactions where status='A' and year>='2019' '''
active_forms = redshift_query_read(q, schema='production')

In [73]:
len_active_forms = len(active_forms)
len_sa_forms = len(sa_forms)
perc_sa_forms = (len_sa_forms / len_active_forms)

len_sa_forms_fltrd = len([f for f in forms['form'].unique().tolist() if f in sa_forms['form'].tolist()])
perc_sa_forms_fltrd = (len_sa_forms_fltrd / len(forms['form'].unique().tolist()))

print("forms engageed with smart amounts: {:,} ({:.1f}% of active forms)".format(len_sa_forms, perc_sa_forms * 100.))
print("filtered forms engaged with smart amounts: {:,} ({:.1f}% of filtered forms)".format(len_sa_forms_fltrd, perc_sa_forms_fltrd * 100.))

forms engageed with smart amounts: 522 (1.1% of active forms)
filtered forms engaged with smart amounts: 73 (0.8% of filtered forms)
