In [2]:
import sys
sys.path.insert(1, '../../scripts/')
from s3_support import *

import pandas as pd

For all, calculate 2019, 2020, 2021, 2022, and 2023 (YTD)

Form types: 
- QGIV = 1
- HOBNOB = 3
- AGGREGATOR = 4
- AUCTION = 5

# 0 data load

## 1.transactions

In [38]:
# onetime
q = '''select *
        from transactions
        where
            status='A' and
            recurring=0 and
            year=2023'''
onetime = redshift_query_read(q, schema='production')

In [39]:
print("{:,} onetime; {:%Y-%m-%d} to {:%Y-%m-%d}".format(len(onetime), onetime['date'].min(), onetime['date'].max()))

1,794,558 onetime; 2023-01-01 to 2023-12-31


In [40]:
# recurring: count, mean, median
q = '''select
            t.*
        from transactions as t
        inner join (select
                        recurring,
                        min(date) as min_date
                    from transactions
                    group by recurring) as r on t.recurring=r.recurring and t.date=r.min_date
        where
            t.status='A' and
            t.recurring!=0 and
            date_part('year', r.min_date)=2023
        order by date asc;'''
rec = redshift_query_read(q, schema='production')

In [41]:
rec = rec.groupby('recurring').first()

In [42]:
print("{:,} recurring; {:%Y-%m-%d} to {:%Y-%m-%d}".format(len(rec), rec['date'].min(), rec['date'].max()))

68,031 recurring; 2023-01-01 to 2023-12-31


In [43]:
trans = pd.concat([onetime, rec])
print("{:,} transactions".format(len(trans)))

1,862,589 transactions


In [46]:
rec_origin = trans['recurring_origin'].sum()
rec_perc = (rec_origin / len(trans)) * 100.

print("# recurring: {:,}".format(rec_origin))
print("recurring perc: {:.2f}%".format(rec_perc))

# recurring: 19,403
recurring perc: 1.04%


## 2. new forms extra settings

In [14]:
# new forms extra settings 
q = '''select 
            date as week, form, appearance, 
            conditional_fields, image_size
        from analyticsqgiv_weekly
        where date_part('year', date)=2024
        order by date asc;'''
nuform_extra = redshift_query_read(q, schema="public")

In [15]:
# set week start to trans date
trans['week'] = trans['date'].dt.to_period('w')
nuform_extra['week'] = nuform_extra['week'].dt.to_period('w')

In [16]:
trans = trans.merge(nuform_extra, on=['form', 'week'], how='left')
print("{:,} transactions".format(len(trans)))

198,771 transactions


In [17]:
# conditional fields: count forms, sum on/off, conversion on/off
trans['is_cond_fields'] = trans['conditional_fields']>0

In [18]:
# single/multi step: count forms, sum, conversion
# multistep: appearance==2
trans['is_multistep'] = trans['appearance']==2

In [19]:
# amounts w/ images on/off: count forms, sum
# w/ images: image_size==0
trans['is_amountsimages'] = trans['image_size']!=0

'''
@TODO

amounts w/ images isn't governed from a central setting
but rather by whether or not an amount has images attached
so there isn't a clean way to track this other than
scraping each amount. these settings default to wide (2)
but this will be set whether or not the form is using
images or has ever used images
'''

"\n@TODO\n\namounts w/ images isn't governed from a central setting\nbut rather by whether or not an amount has images attached\nso there isn't a clean way to track this other than\nscraping each amount. these settings default to wide (2)\nbut this will be set whether or not the form is using\nimages or has ever used images\n"

In [20]:
# cleanup new forms extra settings columns
drop_cols = ['appearance', 'conditional_fields', 'image_size']
trans.drop(drop_cols, axis=1, inplace=True)

In [22]:
nuform_extra[nuform_extra['week'].dt.year==2024]['image_size'].value_counts()

2    217803
1     65086
Name: image_size, dtype: int64

## 3. embeds

embeds on/off: sum, mean, median, conversion

In [23]:
q = "select widget as form from embed"
embeds = redshift_query_read(q, schema='production')

trans['is_embed'] = trans['form'].isin(embeds['form'].tolist())

In [24]:
print("{:,} embed forms".format(len(embeds)))
print("{:,} embed transactions".format(len(trans[trans['is_embed']])))

19,287 embed forms
58,741 embed transactions


## 4. smart amounts

count orgs, mean, median, mean & median raised on/off

In [25]:
# log upgrades
q = '''select 
            form, 
            created, 
            message 
        from syslog_logs 
        where message like '%Smart Amount%' '''
logs = redshift_query_read(q, schema='production')

print("{:,} smart amounts log entries; {:,} forms".format(len(logs), len(logs['form'].unique())))
print("ranging from {:%Y-%m-%d} to {:%Y-%m-%d}".format(logs['created'].min(), logs['created'].max()))

639 smart amounts log entries; 442 forms
ranging from 2022-11-07 to 2024-02-12


In [26]:
logs['enabled'] = logs['message'].str.contains('Enabled').fillna(False)
logs['disabled'] = logs['message'].str.contains('Disabled').fillna(False)
entry_counts = logs.groupby('form')[['enabled', 'disabled']].sum().reset_index()

print("{:,} forms have only one entry (enabled)".format(len(entry_counts[(entry_counts['enabled']==1)&(entry_counts['disabled']==0)])))
print("{:,} forms enabled == disabled".format(len(entry_counts[entry_counts['enabled']==entry_counts['disabled']])))

207 forms have only one entry (enabled)
139 forms enabled == disabled


In [27]:
# isolate smart amounts forms to more quickly tag transactions
trans['is_smart_amounts'] = False
trans_sa_forms = trans[(trans['form'].isin(logs['form'].unique()))&(trans['date']>='2022-11-01')].copy()
trans_nonsa_forms = trans[(~trans['form'].isin(logs['form'].unique()))|(trans['date']<'2022-11-01')].copy()

In [28]:
def is_using_smart_amounts(r):
    try:
        return logs[(logs['form']==r['form'])&(logs['created']<=r['date'])].sort_values('created', ascending=True)['enabled'].iloc[0]
    except:
        return False

In [None]:
# tag smart amounts transactions
trans_sa_forms_tagged = None
for f in trans_sa_forms['form'].tolist():
    these_trans = trans_sa_forms[trans_sa_forms['form']==f].copy()
    
    if f in entry_counts[(entry_counts['enabled']==1)&(entry_counts['disabled']==0)]['form'].tolist():
        # forms that were just enabled and never disabled
        activation_date = logs[logs['form']==f]['created'].iloc[0]
        these_trans['is_smart_amounts'] = these_trans['date']>=activation_date
    else:
        # forms that were enabled and disabled, possibly multiple times
        # need to verify transactions fall within active windows
        these_trans['is_smart_amounts'] = these_trans.apply(is_using_smart_amounts, axis=1)
    
    if trans_sa_forms_tagged is None:
        trans_sa_forms_tagged = these_trans
    else:
        trans_sa_forms_tagged = pd.concat([trans_sa_forms_tagged, these_trans])

In [1]:
print("{:,} tagged transactions".format(len(trans_sa_forms_tagged['id'].unique())))
print("{:,} tagged transactions".format(len(trans_sa_forms_tagged)))
print("{:,} transactions that used smart amounts".format(len(trans_sa_forms_tagged[trans_sa_forms_tagged['is_smart_amounts']]['id'].unique())))

NameError: name 'trans_sa_forms_tagged' is not defined

In [None]:
trans = pd.concat([trans_sa_forms_tagged, trans_nonsa_forms]).sort_values('date', ascending=True)
print("{:,} transactions".format(len(trans)))

In [4]:
trans.drop_duplicates(inplace=True)
print("{:,} transactions".format(len(trans)))

163,375 transactions


## 5. traffic

In [36]:
q = '''select
            date,
            form,
            sum(views) as views
        from ga4_traffic
        where
            date>=2024 and
            form!=0
        group by date, form'''
traff = redshift_query_read(q, schema='production')
print("{:,} entries; {} to {}".format(len(traff), traff['date'].min(), traff['date'].max()))

48,731 entries; 2024-01-01 00:00:00 to 2024-02-12 00:00:00


## s3 store/retrieve

In [None]:
save_dataframe_to_file('qgiv-stats-data', 'jess.stats.yoy.csv', trans)

In [3]:
trans = get_dataframe_from_file("qgiv-stats-data", 'jess.stats.csv')

In [9]:
print("Dates: {} to {}".format(trans['date'].min(), trans['date'].max()))

trans['isexpresscheckout'].value_counts()

Dates: 2024-01-01 to 2024-02-05


False    162252
True       1123
Name: isexpresscheckout, dtype: int64

In [None]:
# general processing
q = '''select
            year,
            count(distinct(form)) as active_forms,
            sum(amount) as total_volume,
            count(distinct(case when recurring=0 then id else null end)) as onetime_count,
            avg(case when recurring=0 then amount else null end) as onetime_mean,
            count(distinct(case when recurring_origin=1 then id else null end)) as recurring_count,
            avg(case when recurring_origin=1 then amount else null end) as recurring_mean,
            count(distinct(case when gift_assist_count!=0 then id else null end)) as giftassist_count
        from transactions
        where status='A' and year>=2019 
        group by year'''

# median one time
q = '''select
            year,
            median(amount) as onetime_median
        from transactions
        where
            recurring=0 and
            status='A' and
            year>=2019
        group by year'''

# median recurring
q = '''select
            year,
            median(amount) as recurring_median
        from transactions
        where
            recurring_origin=1 and
            status='A' and
            year>=2019
        group by year'''

# smart amounts datapoints
# conditional fields datapoints
# single step vs multi step datapoints
# amounts w/ images

# embeds datapoints
q = '''select
            year,
            is_embed,
            count(distinct(form)) as forms_count,
            sum(total_volume) as total_raised_sum,
            avg(total_volume) as total_raised_mean
        from (select
                year,
                form,
                case when e.url is null then 0 else 1 end as is_embed,
                sum(amount) as total_volume,
                count(distinct(t.id)) as total_count
            from transactions as t
                left outer join embed as e on t.form=e.widget
            where
                status='A' and
                year>=2019
            group by year, form, url) as embeds
        group by year, is_embed
        order by year, is_embed'''

q = '''select
            year,
            is_embed,
            median(total_volume) as total_raised_median
        from (select
                year,
                form,
                case when e.url is null then 0 else 1 end as is_embed,
                sum(amount) as total_volume
            from transactions as t
                left outer join embed as e on t.form=e.widget
            where
                status='A' and
                year>=2019
            group by year, form, url) as embeds
        group by year, is_embed
        order by year, is_embed'''

'''
Completion rate of embedded forms
Completion rate of non-embedded forms
'''

In [21]:
q = '''select
            date_trunc('month', date) as month,
            form,
            count(distinct(case when recurring=0 or recurring_origin=1 then id else null end)) as trans_count,
            sum(gift_assist_count) as giftassist_count
        from transactions
        where 
            status='A' and
            year>=2024
        group by form, date_trunc('month', date)'''
#giftassist_trans = redshift_query_read(q, schema='production')

q = '''select
            form,
            date_trunc('month', date) as month,
            sum(views) as views
        from ga4_traffic
        where date>=2024
        group by form, date_trunc('month', date)'''
giftassist_traff = redshift_query_read(q, schema='production')

giftassist = giftassist_trans.merge(giftassist_traff, on=['form', 'month'])

In [22]:
len(giftassist_trans), len(giftassist_traff)

(16254, 11236)

In [23]:
giftassist['year'] = giftassist['month'].dt.year
giftassist['giftassist_active'] = giftassist['giftassist_count'] > 0
giftassist['conversion'] = giftassist['trans_count'] / giftassist['views']

In [24]:
len_all = len(giftassist)
print("total observations: {:,}".format(len_all))

len_gt0_views = len(giftassist[giftassist['views']==0])
perc_gt0_views = (len_gt0_views / len_all) * 100.

print("observations w/ 0 views: {:,} ({:.2f}%)".format(len_gt0_views, perc_gt0_views))

len_trans_gt_views = len(giftassist[giftassist['trans_count']>giftassist['views']])
perc_trans_gt_views = (len_trans_gt_views / len_all) * 100.

print("trans > views: {:,} ({:.2f}%)".format(len_trans_gt_views, perc_trans_gt_views))

total observations: 6,322
observations w/ 0 views: 0 (0.00%)
trans > views: 337 (5.33%)


In [25]:
ga_monthly = giftassist[(giftassist['views']>0)&(giftassist['trans_count']<giftassist['views'])].groupby(['giftassist_active', 'year'])['conversion'].agg(['mean', 'median', 'count']).reset_index()
ga_pvt = ga_monthly.pivot(index='year', columns='giftassist_active', values=['mean', 'median', 'count']).reset_index()
ga_pvt.columns = ['year', 'no ga mean conv', 'ga mean conv',
                 'no ga median conv', 'ga median conv',
                 'no ga count', 'ga count']
ga_pvt

Unnamed: 0,year,no ga mean conv,ga mean conv,no ga median conv,ga median conv,no ga count,ga count
0,2024,0.116446,0.120458,0.05,0.052632,2448.0,3490.0


In [26]:
giftassist[(giftassist['views']>0)&(giftassist['trans_count']<giftassist['views'])]

Unnamed: 0,month,form,trans_count,giftassist_count,views,year,giftassist_active,conversion
0,2024-01-01,1888,2182,0,14973,2024,False,0.145729
1,2024-01-01,1015411,253,122,7276,2024,True,0.034772
2,2024-01-01,954929,13,0,159,2024,False,0.081761
3,2024-01-01,349,3542,0,18656,2024,False,0.189858
4,2024-01-01,970771,6,5,50,2024,True,0.120000
...,...,...,...,...,...,...,...,...
6317,2024-02-01,1010183,0,0,315,2024,False,0.000000
6318,2024-01-01,1018973,2,0,72,2024,False,0.027778
6319,2024-01-01,1011143,0,1,4,2024,True,0.000000
6320,2024-01-01,808007,0,0,2,2024,False,0.000000


# 1. donation forms

- Total active donation forms
- Total amount raised
- count of one-time donations
- Avg. one-time gift size
- Median one-time gift size
- count of recurring donations
- Avg. recurring gift size
- Median recurring gift size
- perc of donors who use giftassist
- perc of donors who upgrade to recurring gift when prompted
- count of orgs using Smart Amounts
- Avg raised using Smart Amounts
- Median raised using Smart Amounts
- Avg raised by those not using Smart Amounts
- Median raised by those not using Smart Amounts
- count of forms using conditional fields
- dollarss raised by forms using conditional fields
- dollarss raised by forms not using conditional fields
- Completion rate of forms using conditional fields
- Completion rate of forms not using conditional fields
- count of single step forms
- dollars raised by single step forms
- Completion rate of single step forms
- count of multi-step forms
- dollars raised by multi-step forms
- Completion rate of multi-step forms
- count of forms with donation amounts w/images
- dollars raised by donation amounts w/images
- dollars raised by donation amounts w/o images
- dollars raised by embedded forms
- dollars raised by non-embedded forms
- Avg raised using embedded forms
- Median raised using embedded forms
- Avg raised using non-embedded forms
- Median raised using non-embedded forms
- Completion rate of embedded forms
- Completion rate of non-embedded forms

In [27]:
'''
onetime
- Total active donation forms
- Total amount raised
- # of one-time donations
- Avg. one-time gift size
'''
aggs = ['count', 'sum', 'mean', 'median']

onetime = trans[trans['recurring']==0].groupby('year')['amount'].agg(aggs).reset_index()
onetime = onetime.merge(trans[trans['recurring']==0].groupby('year')['form'].nunique().reset_index(), on='year')
onetime.columns = ['year', 'transactions', 'volume', 'mean', 'median', 'forms']
onetime.transpose()

Unnamed: 0,0
year,2024.0
transactions,154240.0
volume,24554880.0
mean,159.1992
median,50.0
forms,5516.0


In [28]:
'''
recurring
- # of recurring donations
- Avg. recurring gift size
'''
trans[trans['recurring']!=0].groupby('year')['amount'].agg(['count', 'mean', 'median']).reset_index()

Unnamed: 0,year,count,mean,median
0,2024,9135,82.557603,36.05


In [29]:
# gift assist
# perc of donors who use giftassist
ga = trans.groupby('year')[['id', 'gift_assist_count']].agg({'gift_assist_count': 'sum', 'id': 'count'}).reset_index()
ga['perc'] = ga['gift_assist_count'] / ga['id']
ga

Unnamed: 0,year,gift_assist_count,id,perc
0,2024,49822,163375,0.304955


In [30]:
'''
smart amounts
- # of orgs using Smart Amounts
- Avg raised using Smart Amounts
'''
smartamounts = trans[trans['is_smart_amounts']].groupby('year')['org'].nunique().reset_index()

smartamounts_med = trans[trans['is_smart_amounts']].groupby(['year', 'org'])['amount'].sum().reset_index()
smartamounts.merge(smartamounts_med.groupby('year')['amount'].agg(['mean', 'median']).reset_index(), on='year').transpose()

Unnamed: 0,0
year,2024.0
org,96.0
mean,8548.525521
median,1675.625


In [31]:
# not using smart amounts
notsmartamounts = trans[~trans['is_smart_amounts']].groupby(['year', 'org'])['amount'].sum().reset_index()
notsmartamounts.groupby('year')['amount'].agg(['mean', 'median']).reset_index().transpose()

Unnamed: 0,0
year,2024.0
mean,9543.409587
median,1473.225


In [37]:
'''
conditional fields
- # of forms using conditional fields
- $s raised by forms using conditional fields
- $s raised by forms not using conditional fields
- Completion rate of forms using conditional fields
- Completion rate of forms not using conditional fields

this is going to be a weird metric given this is YoY. forms will
test the feature, appear in analytics, then disable the feature, 
but would be counted as having used the feature when less than 1%
of their transactions would have been processed with conditional
fields active. calculate stats weekly?
'''
cond_stats = trans[trans['source']=='don_form'].groupby(['year', 'is_cond_fields'])[['form', 'amount']].agg({'form': 'nunique', 'amount': 'sum'}).reset_index()

condfields_forms = trans[trans['source']=='don_form'].groupby(['date', 'form']).agg({'id': 'count', 'is_cond_fields': 'median'}).reset_index()
condfields_forms = condfields_forms.merge(traff, on=['date', 'form'], how='left').fillna(0)
condfields_forms['year'] = condfields_forms['date'].dt.year
condfields_years = condfields_forms.groupby(['year', 'is_cond_fields'])[['id', 'views']].sum().reset_index()
condfields_years['conversion'] = condfields_years['id'] / condfields_years['views']
condfields_conv = condfields_years.pivot(index='year', columns='is_cond_fields', values='conversion')
condfields_conv = condfields_conv.reset_index()
condfields_conv.columns = ['year', 'no cond fields conv', 'cond fields conv']

cond_stats = cond_stats.merge(condfields_conv, on='year')

cond_stats = cond_stats.pivot(index='year', columns='is_cond_fields', values=['form', 'amount', 'no cond fields conv', 'cond fields conv']).reset_index()

cond_stats.columns = ['year', 'forms not using condfields', 
                      'forms using condfields', 'volume not condfields',
                      'volume condfields', 'not condfields conv',
                      'not condfields conv 2', 'condfields conv',
                      'condfields conv 2']
cond_stats[['year', 'forms not using condfields', 
            'forms using condfields', 'volume not condfields',
            'volume condfields', 'not condfields conv', 
            'condfields conv']].transpose()

ValueError: You are trying to merge on object and datetime64[ns] columns. If you wish to proceed you should use pd.concat

In [None]:
'''
single step vs multi step
- # of single step forms
- $s raised by single step forms
- Completion rate of single step forms
- # of multi-step forms
- $s raised by multi-step forms
- Completion rate of multi-step forms
'''
steps = trans.groupby(['year', 'is_multistep'])[['form', 'amount']].agg({'form': 'nunique', 'amount': 'sum'}).reset_index()
steps = steps.pivot(index='year', columns='is_multistep', values=['form', 'amount']).reset_index()
steps.fillna(0, inplace=True)
steps.columns = ['year', 'singlestep forms', 'multistep forms', 'singlestep volume', 'multistep volume']

multistep_forms = trans[trans['source']=='don_form'].groupby(['date', 'form']).agg({'id': 'count', 'is_multistep': 'first'}).reset_index()
multistep_forms = multistep_forms.merge(traff, on=['date', 'form'], how='left').fillna(0)
multistep_forms['year'] = multistep_forms['date'].dt.year
multistep_years = multistep_forms.groupby(['year', 'is_multistep'])[['id', 'views']].sum().reset_index()
multistep_years['conversion'] = multistep_years['id'] / multistep_years['views']
multistep_conv = multistep_years.pivot(index='year', columns='is_multistep', values='conversion')
multistep_conv = multistep_conv.reset_index()
multistep_conv.columns = ['year', 'single step conv', 'multi step conv']

steps.merge(multistep_conv, on='year').transpose()

In [None]:
'''
embeds
- $$ raised by embedded forms
- $$ raised by non-embedded forms
- Avg raised using embedded forms
- Median raised using embedded forms
- Avg raised using non-embedded forms
- Median raised using non-embedded forms
- Completion rate of embedded forms
- Completion rate of non-embedded forms
'''
yearly_forms = trans.groupby(['year', 'form'])['amount'].agg(['count', 'sum']).reset_index()
embed_forms = trans.groupby(['year', 'form'])['is_embed'].mean().reset_index()
embed_forms = yearly_forms.merge(embed_forms, on=['year', 'form'])

traff['year'] = traff['date'].dt.year
embed_forms = embed_forms.merge(traff.groupby(['year', 'form'])['views'].sum().reset_index(), on=['year', 'form'])
embed_forms['conversion'] = embed_forms['count'] / embed_forms['views']

embed_agg = embed_forms.groupby(['year', 'is_embed'])['sum'].agg(['sum', 'mean', 'median']).reset_index()
embed_agg = embed_agg.merge(embed_forms[embed_forms['count']<=embed_forms['views']].groupby(['year', 'is_embed'])['conversion'].mean(), on=['year', 'is_embed'])
embed_agg = embed_agg.pivot(index='year', columns='is_embed', values=['sum', 'mean', 'median', 'conversion']).reset_index()

embed_agg.columns = ['year', 'nonembed volume', 'embed volume',
                     'non embed mean', 'embed mean', 'non embed median',
                     'embed median', 'non embed conversion', 'embed conversion']
embed_agg.transpose()

In [None]:
print("{:,} trans entries".format(len(trans)))
print("{:,} unique trans entries".format(len(trans['id'].unique())))


In [None]:
# conversion
trans['month'] = trans['date'].dt.to_period('M')
trans_monthly = trans.groupby(['form', 'month'])['id'].count().reset_index()
trans_monthly['trans_count'] = trans_monthly['id']
trans_monthly.drop('id', axis=1, inplace=True)

traff['month'] = traff['date'].dt.to_period('M')
traff_monthly = traff.groupby(['month', 'form'])['views'].sum().reset_index()

forms_monthly = trans_monthly.merge(traff_monthly, on=['month', 'form'])
forms_monthly['conversion'] = forms_monthly['trans_count'] / forms_monthly['views']

forms_monthly[forms_monthly['conversion']<1.].groupby('month')['conversion'].agg(['mean', 'median']).reset_index()

# 2. standard events

- Total \# of standard events
- Total amount raised (reg. + donations)
- Total registration \$
- Total registration transactions
- Total donation (online & offline) \$
- Total donation transactions

In [None]:
events_years = trans[trans['events_count']>0].groupby('year')[['events_amt', 'events_count', 'donations_amt', 'donations_count']].sum().reset_index()
events_years.columns = ['year', 'registrations volume', 'registrations counts',
                       'donations volume', 'donations counts']

# get events from qgiv analytics
q = '''select date, sum(events_count)
        from analyticsqgiv_weekly
        where date>=2019
        group by date;'''
events = redshift_query_read(q, schema='public')
events['year'] = events['date'].dt.year
event_counts_years = events.groupby('year')['sum'].agg(['mean', 'median']).reset_index()
event_counts_years.columns = ['year', 'mean events count', 'median events count']

events_years.merge(event_counts_years).transpose()

# 3. text fundraising

- ~~Total \# of orgs using text fundraising~~
- Total \# of text campaigns
- ~~Total donations dollars~~
- ~~Total donation transactions~~
- ~~Avg. gift size through text fundraising~~
- Total \# of outbound messages
- Av. \# of reminder messages per campaign
- % clicks to form via text
- % of donations completed via text 
- % of donations completed after reminder message

In [None]:
sms = trans[trans['source']=='sms'].groupby('year')['amount'].agg(['count', 'sum', 'mean', 'median']).reset_index()
sms = sms.merge(trans[trans['source']=='sms'].groupby('year')['org'].nunique(), on='year')
sms.columns = ['year', 'transactions', 'volume', 'mean', 'median', 'orgs']

q = '''select 
            date_part('year', created) as year, 
            count(created) as campaigns
        from smscampaign
        group by date_part('year', created)'''
campaigns_yearly = redshift_query_read(q, schema='production')

sms.merge(campaigns_yearly, on='year').transpose()

In [None]:
'''
@TODO
difference between "total raised" and "total donation $"?
'''

# 4. P2P

- ~~Total \# of peer-to-peer events~~
- ~~Total amount raised (reg. + donations)~~
- ~~Total registration dollars~~
- ~~Total registration transactions~~
- ~~Total donation (online & offline) dollars~~
- ~~Total donation transactions~~
- Total number of teams
- Total raised from teams
- Avg emails sent per event
- Avg texts sent per event
- Total \# of orgs using badges
- Total \$ raised by orgs using badges
- Total \$ raised by orgs not using badges

In [None]:
q = '''select
        t.year,
        count(distinct(t.form)) as active_p2p_events,
        sum(t.amount) as p2p_vol,
        count(t.id) as p2p_count,
        sum(t.registrations_count) as p2p_registrations,
        sum(t.registrations_amt) as p2p_registrations_vol,
        sum(t.donations_amt) as p2p_donations_vol,
        sum(t.donations_count) as p2p_donations_count
    from transactions as t
        left join form as f on t.form=f.id
    where
        t.status='A' and
        t.recurring=0 and
        f.type=3 and
        t.year>=2019
    group by t.year'''
df_p2p = redshift_query_read(q, schema='production')

In [None]:
df_p2p.sort_values('year', ascending=True, inplace=True)

In [None]:
# teams
# total teams count
q = '''select
            date,
            sum(teams_count) as teams_count
        from analyticsp2p_weekly
        group by date'''
teams_date = redshift_query_read(q, schema='public')
teams_date['year'] = teams_date['date'].dt.year
teams_yearly = teams_date.groupby('year')['teams_count'].agg(['mean', 'median']).reset_index()
teams_yearly.columns = ['year', 'mean teams', 'median teams']

# total raised from teams
teams_vol = trans[trans['transdonationentitytype']==2].groupby('year')['amount'].sum().reset_index()
teams_vol.columns = ['year', 'volume raised teams']

teams_yearly = teams_yearly.merge(teams_vol, on=['year'])

In [None]:
df_p2p = df_p2p.merge(teams_yearly, on='year')

In [None]:
participants = trans[(trans['transdonationentitytype']==10)&(trans['transdonationentity']!=0)].groupby(['year', 'transdonationentity'])['amount'].sum().reset_index()
participants.groupby('year')['amount'].agg(['sum', 'mean', 'median']).reset_index().transpose()

In [None]:
# emails & texts sent
q = '''select
            date_part('year', created) as year,
            entity as transdonationentity,
            count(id) as emails
        from syslog_logs
        where
            message like '%campaign%' and
            message like '%has been sent for team%' and
            entitytype=2
        group by date_part('year', created), entity'''
team_emails = redshift_query_read(q, schema='production')

team_yearly_trans = trans[trans['transdonationentitytype']==2].groupby(['year', 'transdonationentity'])['amount'].sum().reset_index()
team_emails = team_yearly_trans.merge(team_emails, on=['year', 'transdonationentity'], how='left').fillna(0)

In [None]:
team_emails[team_emails['emails']!=0]

In [None]:
# badges
q = '''select 
            date_part('year', created) as year,
            org
        from logs 
        where 
            message like '%badge%' and
            created>=2019
        group by date_part('year', created), org'''
badge_orgs = redshift_query_read(q, schema='production')

In [None]:
yearly_vol = []
for year in badge_orgs['year'].unique():
    these_orgs = badge_orgs[badge_orgs['year']==year]['org'].tolist()
    
    yearly_vol.append({
        'year': year,
        'orgs badges': len(these_orgs),
        'vol badges': trans[(trans['year']==year)&(trans['org'].isin(these_orgs))&(trans['source']=='p2p')]['amount'].sum(),
        'vol no badges': trans[(trans['year']==year)&(~trans['org'].isin(these_orgs))&(trans['source']=='p2p')]['amount'].sum()
    })

In [None]:
df_p2p.merge(pd.DataFrame(yearly_vol), on='year').transpose()

# 5. auctions

- Total \# of auction events
- Total amount raised (reg. + donations + store + bidding)
- Total registration \$
- Total registration transactions
- Total donation (online & offline) \$
- Total donation transactions
- Total store sales \$
- Total store sales transactions
- Total bids \$
- Total bid transactions
- Avg. \# of bids per attendee
- Avg. \# of bids per item
- Avg price of an auction item
- What are the types of items with most bids (by tag)?
- What are the types of items with highest value (by tag)?
- Total bid \$ placed by Givi 
- Total bid transactions placed by Givi 
- Total bid \$ placed by web browser 
- Total bid transactions placed by web browser 
- Total bid \$ placed by VT
- Total bid transactions placed by VT

@TODO
- export auction item tags

In [None]:
# processing info
q = '''select
        t.year,
        count(distinct(t.form)) as active_auctions,
        sum(t.amount) as auction_vol,
        count(t.id) as auction_count,
        sum(t.registrations_count) as auction_registrations_count,
        sum(t.registrations_amt) as auction_registrations_vol,
        sum(t.donations_count) as auction_donations_count,
        sum(t.donations_amt) as auction_donations_vol,
        sum(t.purchases_count) as auction_purchases_count,
        sum(t.purchases_amt) as auction_purchases_vol
    from transactions as t
        left join form as f on t.form=f.id
    where
        t.status='A' and
        t.recurring=0 and
        f.type=5 and
        t.year>=2019
    group by t.year'''
df_auction = redshift_query_read(q, schema='production')

In [None]:
df_auction.transpose()

In [None]:
# source: givi vs web vs vt
q = '''select
            t.year,
            t.source,
            count(t.id) as transactions,
            sum(t.amount) as volume
        from transactions as t
            left join form as f on t.form=f.id
        where
            t.status='A' and
            t.recurring=0 and
            f.type=5 and
            t.year>=2019
        group by t.year, t.source'''
auction_source = redshift_query_read(q, schema='production')

In [None]:
source_years = auction_source.pivot(index='year', columns='source', values=['transactions', 'volume']).reset_index()
source_years.columns = ['year', 'trans don_form', 'trans_givi',
                       'trans_mobilevt', 'trans_vt', 'vol_don_form',
                       'vol_givi', 'vol_mobilevt', 'vol_vt']
source_years.fillna(0).transpose()