In [1]:
import pandas as pd

import sys
sys.path.insert(1, '../../scripts/')
from s3_support import *
%matplotlib inline

# load data

In [8]:
# forms
q = "select * from form where type=3"
forms = redshift_query_read(q, schema='production')
print("{:,} forms".format(len(forms)))
print("{:,} active".format(len(forms[forms['status']==1])))

16,321 forms
3,121 active


In [9]:
# logs
q = "select s.* from syslog_logs as s left join form as f on s.form=f.id where f.type=3"
logs = redshift_query_read(q, schema='production')
print("{:,} log entries".format(len(logs)))
print("{:,} forms, {:,} orgs".format(len(logs['form'].unique()), len(logs['org'].unique())))
print("min date: {}".format(logs['created'].min()))

1,405,816 log entries
7,001 forms, 1,468 orgs
min date: 2020-12-09 15:12:38


In [None]:
# transactions
q = '''select 
            t.* 
        from transactions as t 
            left join form as f on t.form=f.id 
        where 
            t.status='A' and
            t.date>='2020-12-09' and 
            f.type=3'''
trans = redshift_query_read(q, schema='production')
print("{:,} transactions".format(len(trans)))
print("{:,.2f} transactions per form".format(trans.groupby('form')['id'].count().mean()))
print("${:,.2f} volume per form".format(trans.groupby('form')['amount'].sum().mean()))

In [None]:
# analytics
q = '''select
            a.*,
            ap.*
        from production.form as f
            left join public.analytics as a on f.id=a.form
            left join public.analyticsp2p as ap on ap.id_hash=a.id_hash
        where 
            a.date>='2020-12-09' and
            f.type=3'''
#analytics = redshift_query_read(q, schema='production')

In [None]:
print("{:,} analytics entries".format(len(analytics)))

In [None]:
analytics.head()

# analysis

## annual totals

In [6]:
q = '''select
            t.form, 
            t.year,
            count(t.id) as trans_count,
            sum(t.amount) as trans_vol
        from transactions as t
            left join form as f on t.form=f.id 
        where 
            t.status='A' and
            t.date>='2018-01-01' and 
            f.type=3
        group by t.form, t.year'''
years = redshift_query_read(q, schema='production')

In [7]:
years.groupby('year')[['trans_count', 'trans_vol']].agg(['mean', 'median']).reset_index()

Unnamed: 0_level_0,year,trans_count,trans_count,trans_vol,trans_vol
Unnamed: 0_level_1,Unnamed: 1_level_1,mean,median,mean,median
0,2018,197.715647,58.0,13563.792192,3575.0
1,2019,204.865944,58.0,14052.762573,3690.25
2,2020,166.627851,36.0,15229.686463,2885.0
3,2021,181.69762,34.0,16927.979865,3217.0
4,2022,190.722319,40.0,17266.653309,3315.02
5,2023,174.612169,29.0,15756.127,2399.1


## logs & performance

In [None]:
forms_aggs = trans.groupby('form')['amount'].agg(['count', 'sum']).reset_index()
logs_aggs = logs.groupby('form')['id'].count().reset_index()
forms_aggs = forms_aggs.merge(logs_aggs, on='form')
forms_aggs.columns = ['form', 'trans_count', 'trans_vol', 'logs_count']

In [None]:
forms_aggs.drop('form', axis=1).corr().iloc[-1]

In [None]:
forms_aggs.drop('form', axis=1).mean()

In [None]:
forms_aggs['trans_vol'].quantile([0.25, 0.5, 0.75])

In [None]:
forms_aggs[forms_aggs['trans_vol']>19602].mean()

In [None]:
def cat_logs(m):
    if 'Campaign sent' in m or 'campaign' in m:
        return 'Campaign'
    elif 'email triggered' in m:
        return 'Email'
    elif 'purged' in m:
        return 'Purge'
    elif 'Unsubscribed' in m:
        return 'Unsubscribed'
    elif 'Failed to send' in m:
        return 'FailedToSend'
    elif 'Twitter post' in m:
        return 'SocialPost'
    elif 'SQLSTATE' in m or 'Unauthorized post request' in m:
        return 'Error'
    elif 'Form CMS updated' in m:
        return 'CMS'
    elif 'Classification created' in m or 'Classification deleted':
        return 'Classification'
    elif 'Category created' in m or 'Category deleted' in m:
        return 'Category'
    else:
        return None
    
logs['category'] = logs['message'].apply(cat_logs)

In [None]:
logs['category'].value_counts(normalize=True)

In [None]:
log_cat_counts = logs[['form', 'category']].groupby(['form', 'category'])['category'].count().to_frame()
log_cat_counts.columns = ['count']
log_cat_counts = log_cat_counts.reset_index()
log_cat_counts = log_cat_counts.pivot(index='form', columns='category', values='count').fillna(0)
log_cat_counts = log_cat_counts.reset_index()

log_cat_counts = log_cat_counts.merge(forms_aggs, on='form')

log_cat_counts.head(3)

In [None]:
log_cat_counts.drop(['form', 'logs_count'], axis=1).corr()

We see meaningful correlations with "Email" and "Error", and very strong correlation with "FailedToSend" at 86% correlation to transaction count. This message is related to failing to send recurring notifications, so it stands to reason that the most transaction activity is coming from forms with recurring enabled. The correlation to "Email" is interesting as it might indicate that events emphasizing email communications are performing better than those that are not, but this depends on the reason for the email. If the majority of emails are simply receipts, then a strong correlation is to be expected as an email would in most cases be sent out with every transaction.

- _Need to look at performance improvement for P2P events w/ and w/out recurring enabled_
- look at P2P performance related to communications (ie, emails, notifications, etc.)

## p2p stores

In [None]:
forms_w_stores = trans[trans['purchases_amt']!=0]['form'].unique()
perc_w_stores = (len(forms_w_stores) / len(trans['form'].unique())) * 100.

print("events with stores: {:,} ({:.2f}%)".format(len(forms_w_stores), perc_w_stores))

In [None]:
print("All events:")
print("${:,.2f} mean funds raised".format(trans.groupby('form')['amount'].sum().mean()))
print("${:,.2f} median funds raised".format(trans.groupby('form')['amount'].sum().median()))
print("{:,.2f} mean transactions".format(trans.groupby('form')['id'].count().mean()))
print("{:,.2f} median transactions".format(trans.groupby('form')['id'].count().median()))
print("${:,.2f} mean donations raised".format(trans[trans['donations_amt']!=0].groupby("form")['donations_amt'].sum().mean()))
print("${:,.2f} median donations raised".format(trans[trans['donations_amt']!=0].groupby("form")['donations_amt'].sum().median()))
print("${:,.2f} mean donations amount".format(trans[trans['donations_amt']!=0]['donations_amt'].mean()))
print("${:,.2f} median donations amount".format(trans[trans['donations_amt']!=0]['donations_amt'].median()))
print("${:,.2f} mean registrations raised".format(trans[trans['registrations_amt']!=0].groupby('form')['registrations_amt'].sum().mean()))
print("${:,.2f} median registrations raised".format(trans[trans['registrations_amt']!=0].groupby('form')['registrations_amt'].sum().median()))
print("${:,.2f} mean registrations amount".format(trans[trans['registrations_amt']!=0]['registrations_amt'].mean()))
print("${:,.2f} median registrations amount".format(trans[trans['registrations_amt']!=0]['registrations_amt'].median()))

perc_store_trans = (len(trans[trans['purchases_amt']!=0]) / len(trans[trans['form'].isin(forms_w_stores)])) * 100.
print()
print("Events w/ stores:")
print("${:,.2f} mean funds raised".format(trans[trans['form'].isin(forms_w_stores)].groupby('form')['amount'].sum().mean()))
print("${:,.2f} median funds raised".format(trans[trans['form'].isin(forms_w_stores)].groupby('form')['amount'].sum().median()))
print("{:,.2f} mean transactions".format(trans[trans['form'].isin(forms_w_stores)].groupby('form')['id'].count().mean()))
print("{:,.2f} median transactions".format(trans[trans['form'].isin(forms_w_stores)].groupby('form')['id'].count().median()))
print("${:,.2f} mean donations raised".format(trans[trans['form'].isin(forms_w_stores)&(trans['donations_amt']!=0)].groupby("form")['donations_amt'].sum().mean()))
print("${:,.2f} median donations raised".format(trans[trans['form'].isin(forms_w_stores)&(trans['donations_amt']!=0)].groupby("form")['donations_amt'].sum().median()))
print("${:,.2f} mean donations amount".format(trans[trans['form'].isin(forms_w_stores)&(trans['donations_amt']!=0)]['donations_amt'].mean()))
print("${:,.2f} median donations amount".format(trans[trans['form'].isin(forms_w_stores)&(trans['donations_amt']!=0)]['donations_amt'].median()))
print("${:,.2f} mean registrations raised".format(trans[trans['form'].isin(forms_w_stores)&(trans['registrations_amt']!=0)].groupby('form')['registrations_amt'].sum().mean()))
print("${:,.2f} median registrations raised".format(trans[trans['form'].isin(forms_w_stores)&(trans['registrations_amt']!=0)].groupby('form')['registrations_amt'].sum().median()))
print("${:,.2f} mean registrations amount".format(trans[trans['form'].isin(forms_w_stores)&(trans['registrations_amt']!=0)]['registrations_amt'].mean()))
print("${:,.2f} median registrations amount".format(trans[trans['form'].isin(forms_w_stores)&(trans['registrations_amt']!=0)]['registrations_amt'].median()))
print("${:,.2f} mean purchases amount".format(trans[trans['form'].isin(forms_w_stores)&(trans['purchases_amt']!=0)]['purchases_amt'].mean()))
print("${:,.2f} median purchases amount".format(trans[trans['form'].isin(forms_w_stores)&(trans['purchases_amt']!=0)]['purchases_amt'].median()))
print("{:.2f}% of transactions per form are store purchases".format(perc_store_trans))

print()
print("Events w/out stores:")
print("${:,.2f} mean funds raised".format(trans[~trans['form'].isin(forms_w_stores)].groupby('form')['amount'].sum().mean()))
print("${:,.2f} median funds raised".format(trans[~trans['form'].isin(forms_w_stores)].groupby('form')['amount'].sum().median()))
print("{:,.2f} mean transactions".format(trans[~trans['form'].isin(forms_w_stores)].groupby('form')['id'].count().mean()))
print("{:,.2f} median transactions".format(trans[~trans['form'].isin(forms_w_stores)].groupby('form')['id'].count().median()))
print("${:,.2f} mean donations raised".format(trans[~trans['form'].isin(forms_w_stores)&(trans['donations_amt']!=0)].groupby("form")['donations_amt'].sum().mean()))
print("${:,.2f} median donations raised".format(trans[~trans['form'].isin(forms_w_stores)&(trans['donations_amt']!=0)].groupby("form")['donations_amt'].sum().median()))
print("${:,.2f} mean donations amount".format(trans[~trans['form'].isin(forms_w_stores)&(trans['donations_amt']!=0)]['donations_amt'].mean()))
print("${:,.2f} median donations amount".format(trans[~trans['form'].isin(forms_w_stores)&(trans['donations_amt']!=0)]['donations_amt'].median()))
print("${:,.2f} mean registrations raised".format(trans[~trans['form'].isin(forms_w_stores)&(trans['registrations_amt']!=0)].groupby('form')['registrations_amt'].sum().mean()))
print("${:,.2f} median registrations raised".format(trans[~trans['form'].isin(forms_w_stores)&(trans['registrations_amt']!=0)].groupby('form')['registrations_amt'].sum().median()))
print("${:,.2f} mean registrations amount".format(trans[~trans['form'].isin(forms_w_stores)&(trans['registrations_amt']!=0)]['registrations_amt'].mean()))
print("${:,.2f} median registrations amount".format(trans[~trans['form'].isin(forms_w_stores)&(trans['registrations_amt']!=0)]['registrations_amt'].median()))

We can see that events with stores raise about 31% more by mean and nearly 300% more by median in donatinos than events without stores. Events with stores also raise nearly 600% more by registrations than events without stores. Clearly events with stores are raising more than events without stores by overall volume.

The mean donations amount does fall with a store, however, by about $15. Store transactions only account for about 10% of all event transactions of events with stores, so it doesn't account for the difference dollar for dollar. It might not prove meaningful but it is interesting that events with stores raise nearly 4 times the amount per registration than events without stores. This may signify a greater investment on the part of the participants for these events than the others.