Chris Murphy stats request

https://www.notion.so/bloomerang/Qgiv-Data-Requests-40c97fe2a14d4d3981e64313fd6124df

In [3]:
import sys
sys.path.insert(1, '../../scripts/')
from s3_support import *

import pandas as pd
from datetime import date, timedelta

In [2]:
q = '''select
            date_part('year', created) as year,
            count(id) as entries
        from syslog_logs
        where
            message ilike '%logged in%'
        group by date_part('year', created)
        order by year'''
redshift_query_read(q, schema='production')

Unnamed: 0,year,entries
0,2020.0,31713
1,2021.0,623813
2,2022.0,684741
3,2023.0,816258
4,2024.0,704093


In [337]:
q = '''select * 
        from syslog_logs
        where 
            message ilike '%logged in%' and
            date_part('year', created) >= 2015'''
logins = redshift_query_read(q, schema='production')

In [338]:
print("{:,} login entries".format(len(logins)))
print("{:,} unique forms".format(len(logins['form'].unique())))
print("{:,} entries with 0 form".format(len(logins[logins['form']==0])))
print("{} - {}".format(logins['created'].min(), logins['created'].max()))

2,989,366 login entries
21 unique forms
2,989,346 entries with 0 form
2020-12-09 15:11:02 - 2024-09-28 23:01:33


In [34]:
cols = ['id', 'org', 'form', 'entity', 'entitytype',
        'systemtype', 'created', 'message']
logins[cols].head(3)

Unnamed: 0,id,org,form,entity,entitytype,systemtype,created,message
0,117074997,0,0,1064632,0,2,2023-07-20 14:33:51,1229137d2e153115dfc7a56ac6a46d16fa6ace3b logge...
1,117075333,0,0,1884784,0,2,2023-07-20 14:36:44,d20aae2ec34539b66ae92df609fd9ad375275a56 logge...
2,117076197,0,0,1853312,0,2,2023-07-20 14:41:30,45ac0a7ef4c17b0ec9698037fdabf546a051873d logge...


# 0. data exports

## 1. account ID's data

In [4]:
roles = pd.read_csv("UserRolesDump.csv", low_memory=False)

print("Roles: {:,} entries".format(len(roles)))

cols = ['uid', 'rid', 'domain', 'instanceId']

print("RIDS:")
print(roles['rid'].unique())
print()
roles[cols].head(2)

Roles: 4,143,086 entries
RIDS:
[ 9 25 20  3 19  6 22  7 21  5  8 13  1 12 16 15 18 17  2 14 26]



Unnamed: 0,uid,rid,domain,instanceId
0,326910,9,2,28300
1,995115,25,5,31804


In [6]:
unique_accounts = len(roles[roles['rid'].isin([9, 25, 26])]['uid'].unique())
print("{:,} unique accounts".format(unique_accounts))

1,415,816 unique accounts


In [7]:
1.415/6.2

0.2282258064516129

In [354]:
barnstorm_forms = accounts[accounts['product']==2]['instanceid'].unique()

In [356]:
barnstorm_forms

array([1927.,  364., 1702.,  668., 1780., 1965., 1842., 1751.,  538.,
       1807.])

### logins by role/product type

Roles:

- p2p CONSTITUENT = 9;
- "donor" accounts CONTRIBUTOR = 25;
- API token tied to accounts = 26

Products:

- QGIV = 1;
- BARNSTORM = 2;
- HOBNOB = 3;
- AGGREGATOR = 4;
- AUCTION = 5;
- QGIV_EVENT = 6;

__role.instanceid can be form or org__

In [36]:
roles['rid'] = roles['rid'].astype(int)
roles['instanceid'] = roles['instanceId'].astype(int)
roles.drop('instanceId', axis=1, inplace=True)

In [37]:
# query login logs
q = '''select * 
        from syslog_logs
        where 
            message ilike '%logged in%' and
            date_part('year', created) >= 2020'''
logins = redshift_query_read(q, schema='production')
logins['entity'] = logins['entity'].astype(int)

# merge with user roles
log_cols = ['entity', 'created']
roles_cols = ['uid', 'rid', 'domain', 'instanceid']
logins = logins[log_cols].merge(roles[roles_cols], 
                               left_on='entity',
                               right_on='uid',
                               how='left')

In [38]:
# limit to user roles
logins = logins[logins['rid'].isin([9, 25, 26])]

# query product types
q = '''select 
            id as instanceid, 
            type as product 
        from form'''
form_products = redshift_query_read(q, schema='production')
form_products['instanceid'] = form_products['instanceid'].astype(int)
form_products['product'] = form_products['product'].astype(int)

logins = logins.merge(form_products, 
                      on='instanceid',
                      how='left')

In [39]:
print("login log entries: {:,}".format(len(logins)))
logins.tail(2)

login log entries: 12,853,864


Unnamed: 0,entity,created,uid,rid,domain,instanceid,product
12853862,1526018,2023-07-20 14:31:54,1526018.0,25.0,5.0,0.0,
12853863,1526018,2023-07-20 14:31:54,1526018.0,26.0,7.0,5061721.0,


In [40]:
logins.isna().sum()

entity               0
created              0
uid                  0
rid                  0
domain               0
instanceid           0
product       10000564
dtype: int64

In [50]:
logins[logins['product'].isna()].head(3)

Unnamed: 0,entity,created,uid,rid,domain,instanceid,product
0,1138242,2022-08-16 13:30:56,1138242.0,25.0,5.0,442440.0,
1,1138242,2022-08-16 13:30:56,1138242.0,26.0,7.0,5050272.0,
2,1023199,2022-01-27 11:32:54,1023199.0,25.0,5.0,0.0,


In [42]:
logins['created'] = pd.to_datetime(logins['created'])
logins.groupby([logins['created'].dt.year, logins['rid']])['entity'].count().reset_index()

Unnamed: 0,created,rid,entity
0,2020,9.0,32478
1,2020,25.0,110420
2,2020,26.0,14423
3,2021,9.0,663837
4,2021,25.0,2038349
5,2021,26.0,287221
6,2022,9.0,597777
7,2022,25.0,2067637
8,2022,26.0,385510
9,2023,9.0,627498


Average logins per form by RID



Unnamed: 0,year,p2p mean,qgiv mean,API mean,p2p median,qgiv median,API median
0,2020,18.527096,49.471326,9.545334,8.0,23.5,4.0
1,2021,177.82936,592.543314,86.460265,50.0,147.0,37.0
2,2022,154.105955,512.171662,82.550321,46.0,146.0,36.0
3,2023,157.544062,554.049397,83.661697,39.0,137.0,38.0
4,2024,128.818877,390.665653,44.678841,32.0,95.0,15.0


In [144]:
print("Average logins per form by product")
print()
grpd = logins.groupby([logins['created'].dt.year, logins['product'], logins['instanceid']])['entity'].count().reset_index()
grpd_2 = grpd.groupby(['created', 'product'])['entity'].agg(['mean', 'median']).reset_index()

grpd_2 = grpd_2.pivot(index='created', columns='product', values=['mean', 'median']).reset_index()
grpd_2.columns = ['year', 'qgiv mean', 'bs mean', 'p2p mean',
                  'qgiv median', 'bs median', 'p2p median']
grpd_2.transpose()

Average logins per form by product



Unnamed: 0,0,1,2,3,4
year,2020.0,2021.0,2022.0,2023.0,2024.0
qgiv mean,49.264901,773.666667,664.093923,694.948864,548.590643
bs mean,12.4,191.625,137.555556,181.0,132.0
p2p mean,18.97978,181.708436,156.584901,160.217015,131.03006
qgiv median,20.0,232.0,164.0,191.5,138.0
bs median,11.0,204.0,55.0,166.5,119.0
p2p median,8.0,53.0,47.0,40.0,34.0


## 2. transaccount relationship

In [55]:
transacc = pd.read_csv("TransAccountDump.csv", low_memory=False)
print("TransAccount: {:,} entries".format(len(transacc)))
cols = ['account', 'transaction', 'recurring', 'master']
transacc[cols].head(2)

TransAccount: 3,788,360 entries


Unnamed: 0,account,transaction,recurring,master
0,13472,506447,0.0,0
1,24178,513144,0.0,0


In [65]:
agg = {'transaction': 'count',
       'recurring': 'count'}
transacc_agg = transacc.groupby('account').agg(agg)

transacc_grpd = transacc_agg.merge(form_products, left_on='account', right_on='instanceid')

In [66]:
accounts.groupby('product')['transaction'].agg(['mean', 'median']).reset_index()

Unnamed: 0,product,mean,median
0,1.0,55.295541,14.0
1,2.0,35.5,5.5
2,3.0,17.229955,5.0


In [67]:
accounts.groupby('rid')['transaction'].agg(['mean', 'median']).reset_index()

Unnamed: 0,rid,mean,median
0,9.0,17.231053,5.0
1,25.0,28.518784,4.0
2,26.0,16.863778,4.0


## 3. donor count totals

In [344]:
q = '''select
            count(distinct(email)) as total_donors
        from transactions
        where status='A' '''
total_donors = redshift_query_read(q, schema='production')
print("total unique donors: {:,}".format(total_donors['total_donors'].iloc[0]))

total unique donors: 6,219,004


In [345]:
q = '''select
            f.type,
            count(distinct(t.email)) as total_donors
        from transactions as t
        left join form as f on t.form=f.id
        where t.status='A' 
        group by f.type'''
donors_per_product = redshift_query_read(q, schema='production')

def translate_type(type_id):
    if type_id == 2:
        return 'barnstorm'
    elif type_id == 3:
        return 'p2p'
    elif type_id == 1:
        return 'qgiv'
    elif type_id == 5:
        return 'auction'
    else:
        return 'none'

donors_per_product['type'] = donors_per_product['type'].apply(translate_type)

print("unique donors per product")
print("-"*40)
for _, r in donors_per_product.iterrows():
    print("{}: {:,}".format(r['type'], r['total_donors']))

unique donors per product
----------------------------------------
none: 111
barnstorm: 28,145
auction: 231,602
qgiv: 4,214,685
p2p: 1,984,145


In [None]:
QGIV = 1;
BARNSTORM = 2;
HOBNOB = 3;
AGGREGATOR = 4;
AUCTION = 5;
QGIV_EVENT = 6

# 1. givi

## 1. Highest Priority
- ~~Number of accounts ***(supporter data)***~~
- ~~Number of accounts with at least one login to Givi app over the last 6 months ***(supporter data)***~~
- ~~Average number of logins to Givi app per year for all accounts and for those with at least one login over the last six months ***(supporter data)***~~

In [334]:
for c in givi_logins.columns:
    print("{}: {:,} unique, {} - {}".format(c, len(givi_logins[c].unique()), givi_logins[c].min(), givi_logins[c].max()))

entity: 15,274 unique, 10001 - 2377210
created: 1,850,273 unique, 2020-12-09 15:12:22 - 2024-09-03 22:59:23
uid: 15,274 unique, 10001.0 - 2377210.0
rid: 1 unique, 26.0 - 26.0
domain: 1 unique, 7.0 - 7.0
instanceid: 15,183 unique, 712964.0 - 5109041.0
product: 2 unique, 1.0 - 1.0


In [330]:
givi_logins = logins[logins['rid']==26]
six_months_ago = logins['created'].max() - timedelta(weeks=24)
gl_6m_accounts = givi_logins[givi_logins['created']>=six_months_ago]['entity'].unique()

print("givi accounts: {:,}".format(len(givi_logins['entity'].unique())))
print("(6 months ago: {})".format(six_months_ago))
print("givi accounts w/ logins w/in 6 months: {:,}".format(len(gl_6m_accounts)))

givi accounts: 15,274
(6 months ago: 2024-03-19 22:59:23)
givi accounts w/ logins w/in 6 months: 14,375


In [294]:
gl_6m = givi_logins[givi_logins['entity'].isin(gl_6m_accounts)]

print("average logins per year for accounts w/ logins in past 6 months")
gl_per_year = gl_6m.groupby(gl_6m['created'].dt.year)['entity'].count().reset_index()
gl_per_year.columns = ['year', 'logins last 6 months']

gl_per_year_all = givi_logins.groupby(givi_logins['created'].dt.year)['entity'].count().reset_index()
gl_per_year_all.columns = ['year', 'all logins']

gl_per_year_no6mo_df = givi_logins[~givi_logins['entity'].isin(gl_6m_accounts)]
gl_per_year_no6m = gl_per_year_no6mo_df.groupby(gl_per_year_no6mo_df['created'].dt.year)['entity'].count().reset_index()
gl_per_year_no6m.columns = ['year', 'no logins last 6 months']

gl_per_year.merge(gl_per_year_no6m, on='year', how='outer').merge(gl_per_year_all, on='year', how='outer')

average logins per year for accounts w/ logins in past 6 months


Unnamed: 0,year,logins last 6 months,no logins last 6 months,all logins
0,2020,14119,304,14423
1,2021,281441,5780,287221
2,2022,381205,4305,385510
3,2023,575964,1971,577935
4,2024,650491,301,650792


## 2. Medium Priority
- ~~Average number of bids made through Givi app per year ***(supporter data)***~~
- ~~Average number of purchases (store, fund a need) through Givi app per year ***(supporter data)***~~

## 3. Low Priority
- ~~Average number of transactions and transaction size for supporters through Givi based on form type: ***(supporter data)***~~
    - Auctions
    - General donation (non auction related)
- ~~Number of logins by month~~

In [146]:
q = '''select
            t.year,
            count(distinct(t.form)) as forms,
            count(distinct(t.id)) as transactions,
            sum(t.amount) as transactions_vol,
            avg(t.amount) as avg_trans_amount,
            sum(t.donations_count) as donations,
            sum(t.donations_amt) as donations_vol,
            sum(t.purchases_count) as purchases,
            sum(t.purchases_amt) as purchases_vol,
            sum(t.auctionpurchase_count) as auction_purchases,
            sum(t.auctiondonation_count) as auction_donations
        from transactions as t
        where
            t.status='A' and
            t.source='givi' and
            recurring=0
        group by t.year'''
givi_trans = redshift_query_read(q, schema='production')

In [148]:
givi_trans['avg_purchases_per_form'] = givi_trans['purchases'] / givi_trans['forms']
givi_trans['avg_donations_per_form'] = givi_trans['donations'] / givi_trans['forms']
givi_trans['avg_purchase_amount'] = givi_trans['purchases_vol'] / givi_trans['purchases']
givi_trans['avg_donation_amount'] = givi_trans['donations_vol'] / givi_trans['donations']

givi_trans.sort_values('year', ascending=True).transpose()

Unnamed: 0,5,0,3,4,6,1,2
year,2018.0,2019.0,2020.0,2021.0,2022.0,2023.0,2024.0
forms,79.0,79.0,354.0,597.0,732.0,742.0,580.0
transactions,1521.0,3984.0,12634.0,15087.0,17477.0,22266.0,14979.0
transactions_vol,215365.52,499980.14,1915111.0,2797109.0,2937669.0,3571771.0,2348225.0
avg_trans_amount,141.594688,125.497023,151.5839,185.3986,168.0877,160.4137,156.7678
donations,1891.0,4204.0,8272.0,7222.0,5928.0,6256.0,4477.0
donations_vol,215792.44,427363.09,1041438.0,1087228.0,1028539.0,1010147.0,691206.8
purchases,0.0,3.0,182.0,260.0,251.0,343.0,254.0
purchases_vol,0.0,135.0,12995.0,14097.0,11130.0,21214.0,14311.0
auction_purchases,0.0,568.0,5134.0,9226.0,9847.0,12002.0,8054.0


In [31]:
cols = ['year', 'donations', 'auction_donations',
        'purchases', 'auction_purchases']
givi_trans[cols].sort_values('year', ascending=True)

Unnamed: 0,year,donations,auction_donations,purchases,auction_purchases
6,2018,1891,0,0,0
0,2019,4204,34,3,568
4,2020,8272,362,182,5134
3,2021,7222,845,260,9226
5,2022,5928,981,251,9847
1,2023,6256,1493,343,12002
2,2024,4284,1133,243,7868


In [237]:
bids = pd.read_csv("AuctionBiddersDump.csv")
bids['lastUpdate'] = pd.to_datetime(bids['lastUpdate'])

print("{:,} entries".format(len(bids)))
print("{:,} forms".format(len(bids['form'].unique())))
print("{:,} bidders".format(len(bids['bidderNumber'].unique())))
print("{:%Y-%m-%d} to {:%Y-%m-%d}".format(bids['lastUpdate'].min(), bids['lastUpdate'].max()))

572,657 entries
3,455 forms
2,805 bidders
2020-11-09 to 2024-09-16


In [238]:
print("bids per year")
bids.groupby(bids['lastUpdate'].dt.year)['id'].count().reset_index().sort_values('lastUpdate', ascending=True)

bids per year


Unnamed: 0,lastUpdate,id
0,2020,22663
1,2021,40162
2,2022,52240
3,2023,275486
4,2024,182106


In [303]:
givi_logins.groupby(givi_logins['created'].dt.to_period('M'))['entity'].count().reset_index()

Unnamed: 0,created,entity
0,2020-12,14423
1,2021-01,19081
2,2021-02,19405
3,2021-03,24497
4,2021-04,24475
5,2021-05,23021
6,2021-06,22228
7,2021-07,20342
8,2021-08,25012
9,2021-09,28097


# 2. Qgiv

## 1. Highest Priority
- ~~Number of accounts ***(supporter data)***~~
- ~~Number of accounts with at least one login over the last 6 months ***(supporter data)***~~
- ~~Average number of logins to the portal (not P2P or Auction dashboard) per year for all accounts and for those with at least one login over the last six months ***(supporter data)***~~

In [304]:
print("Total accounts: {:,}".format(len(logins[logins['rid']==25]['entity'].unique())))

Total accounts: 28,971


In [251]:
print("Average logins per form by RID")
print()
grpd = logins.groupby([logins['created'].dt.year, logins['rid'], logins['instanceid']])['entity'].count().reset_index()

grpd_2 = grpd.groupby(['created', 'rid'])['entity'].agg(['mean', 'median']).reset_index()
grpd_2 = grpd_2.pivot(index='created', columns='rid', values=['mean', 'median']).reset_index()
grpd_2.columns = ['year', 'p2p mean', 'qgiv mean',
                  'API mean', 'p2p median', 'qgiv median',
                  'API median']
grpd_2

Average logins per form by RID



Unnamed: 0,year,p2p mean,qgiv mean,API mean,p2p median,qgiv median,API median
0,2020,18.527096,49.471326,9.545334,8.0,23.5,4.0
1,2021,177.82936,592.543314,86.460265,50.0,147.0,37.0
2,2022,154.105955,512.171662,82.550321,46.0,146.0,36.0
3,2023,157.544062,554.049397,83.661697,39.0,137.0,38.0
4,2024,128.818877,390.665653,44.678841,32.0,95.0,15.0


In [307]:
print("Average logins per form by RID w/ login last 6 months")
print()
six_months_ago = logins['created'].max() - timedelta(weeks=24)
last_6mo_acounts = logins[logins['created']>=six_months_ago]['entity'].unique()
grpd = logins[logins['entity'].isin(last_6mo_acounts)].groupby([logins['created'].dt.year, logins['rid'], logins['instanceid']])['entity'].count().reset_index()

grpd_2 = grpd.groupby(['created', 'rid'])['entity'].agg(['mean', 'median']).reset_index()
grpd_2 = grpd_2.pivot(index='created', columns='rid', values=['mean', 'median']).reset_index()
grpd_2.columns = ['year', 'p2p mean', 'qgiv mean',
                  'API mean', 'p2p median', 'qgiv median',
                  'API median']
grpd_2

Average logins per form by RID w/ login last 6 months



Unnamed: 0,year,p2p mean,qgiv mean,API mean,p2p median,qgiv median,API median
0,2020,20.069431,47.767815,9.578697,9.0,33.0,4.0
1,2021,227.317186,610.462762,90.699646,118.0,200.0,41.0
2,2022,213.688016,556.475147,86.914045,115.0,182.0,40.0
3,2023,229.440397,629.963956,88.243297,117.0,222.0,42.0
4,2024,141.991153,407.78471,45.104077,46.0,106.0,16.0


In [248]:
print("logins in the last 6 months by role ID, users and form/org:")
less_6_months = logins['created'].max() - timedelta(days=180)
login_6_months = logins[logins['created']>=less_6_months].groupby('rid').agg({'entity': 'count', 'instanceid': 'nunique'}).reset_index()
login_6_months_ent_un = logins[logins['created']>=less_6_months].groupby('rid')['entity'].nunique().reset_index()

mrgd = login_6_months.merge(login_6_months_ent_un, on='rid')
mrgd.columns = ['role ID', 'logins', 'org/form', 'unique accounts']
mrgd['mean login/account'] = mrgd['logins'] / mrgd['unique accounts']
mrgd

logins in the last 6 months by role ID, users and form/org:


Unnamed: 0,role ID,logins,org/form,unique accounts,mean login/account
0,9.0,324467,3099,2879,112.701285
1,25.0,1436934,4732,11093,129.535202
2,26.0,511281,14449,14404,35.495765


## 2. Medium Priority
- Number of donor portal accounts with specific activity ***(supporter data)***
    - Updated, or cancelled a recurring donation
    - Added or updated personal or billing details
    
## 3. Low Priority
- Average number of transactions and average transaction size by form per year for supporters with accounts vs supporters without accounts: ***(supporter data)***
    - P2P
    - Auctions
    - Event Registrations
    - All other
- Average number of recurring donations per year for supporters with accounts vs supporters without accounts ***(supporter data)***
- Number of logins by month


In [150]:
q = '''select
            date_part('year', date) as year,
            sum(dl_trans_volume) as dl_trans_volume,
            sum(dl_trans_count) as dl_trans_count,
            sum(dl_new_rec_count) as dl_new_rec_count,
            sum(dl_new_rec_volume) as dl_new_rec_volume
        from analyticsqgiv_monthly
        where
            date>='2019-01-01'
        group by date_part('year', date)'''
aq = redshift_query_read(q, schema='public')

In [152]:
aq.sort_values('year', ascending=True).tail(5).transpose()

Unnamed: 0,2,1,0,3,5
year,2020.0,2021.0,2022.0,2023.0,2024.0
dl_trans_volume,21008081.33,30398887.99,25473821.26,55577806.26,38841730.84
dl_trans_count,160331.0,233262.0,202506.0,427345.0,282597.0
dl_new_rec_count,7272.0,9725.0,8984.0,15673.0,12700.0
dl_new_rec_volume,2639939.42,2389541.83,1922836.81,2647851.34,2162292.59


In [312]:
q = '''select
            year,
            count(id) as trans_count,
            sum(amount) as trans_volume,
            sum(recurring_origin) as new_recurring,
            sum(case when recurring_origin=1 then amount else null end) as new_recurring_vol
        from transactions
        where 
            status='A' and
            date>=2020 and
            (source='don_form' or source='mobile') and
            (recurring=0 or recurring_origin=1)
        group by year'''
trans_aq = redshift_query_read(q, schema='production')
trans_aq.sort_values('year', ascending=True)

Unnamed: 0,year,trans_count,trans_volume,new_recurring,new_recurring_vol
2,2020,1032920,181492800.0,50779,3681722.21
4,2021,1037040,199622500.0,54905,5775415.93
3,2022,1067850,212183800.0,52534,6052472.95
1,2023,1155521,239721800.0,46960,5380133.98
0,2024,934578,188831400.0,49822,6242630.01


In [308]:
print("Average logins per form by RID by month")
print()
grpd = logins.groupby([logins['created'].dt.to_period('M'), logins['rid'], logins['instanceid']])['entity'].count().reset_index()

grpd_2 = grpd.groupby(['created', 'rid'])['entity'].agg(['mean', 'median']).reset_index()
grpd_2 = grpd_2.pivot(index='created', columns='rid', values=['mean', 'median']).reset_index()
grpd_2.columns = ['month', 'p2p mean', 'qgiv mean',
                  'API mean', 'p2p median', 'qgiv median',
                  'API median']
grpd_2

Average logins per form by RID by month



Unnamed: 0,month,p2p mean,qgiv mean,API mean,p2p median,qgiv median,API median
0,2020-12,18.527096,49.471326,9.545334,8.0,23.5,4.0
1,2021-01,23.609732,69.360213,10.381393,10.0,29.0,5.0
2,2021-02,25.203883,68.464894,10.738794,12.0,31.0,5.0
3,2021-03,30.358491,80.912075,12.872832,15.0,36.5,5.0
4,2021-04,30.738532,79.977833,12.577081,15.0,42.0,6.0
5,2021-05,27.701064,71.401383,11.656203,14.0,33.0,5.0
6,2021-06,28.44141,69.762616,11.047714,14.0,32.0,5.0
7,2021-07,24.946524,62.120359,10.095285,12.0,27.0,4.0
8,2021-08,28.244769,69.284059,11.753759,15.0,32.0,5.5
9,2021-09,27.694188,66.625757,12.582624,14.0,29.0,6.0


# 3. p2p

## 1. High priority
- ~~Number of accounts ***(supporter data)***~~
- ~~Number of accounts with at least one login through P2P dashboard over the last 6 months ***(supporter data)***~~
- ~~Average number of logins through P2P dashboard per year for all accounts and for those with at least one login over the last six months ***(supporter data)***~~


In [299]:
p2p_logins = logins[logins['rid']==9]
six_mo_ago = logins['created'].max() - timedelta(weeks=24)
p2p_logins_6mo_accounts = p2p_logins[p2p_logins['created']>=six_mo_ago]['entity'].unique()

print("total p2p accounts: {:,}".format(len(p2p_logins['entity'].unique())))
print("p2p accounts w/ logins w/in last 6 months: {:,}".format(len(p2p_logins_6mo_accounts)))

total p2p accounts: 10,396
p2p accounts w/ logins w/in last 6 months: 2,756


In [300]:
p2p_logins_6mo = p2p_logins[p2p_logins['entity'].isin(p2p_logins_6mo_accounts)]
p2p_logins_grp = p2p_logins_6mo.groupby(p2p_logins_6mo['created'].dt.year)['entity'].count().reset_index()
p2p_logins_grp.columns = ['year', 'logins last 6 months']

p2p_logins_no6m_df = p2p_logins[~p2p_logins['entity'].isin(p2p_logins_6mo_accounts)]
p2p_logins_no6m = p2p_logins_no6m_df.groupby(p2p_logins_no6m_df['created'].dt.year)['entity'].count().reset_index()
p2p_logins_no6m.columns = ['year', 'no logins last 6 months']

p2p_logins_all = p2p_logins.groupby(p2p_logins['created'].dt.year)['entity'].count().reset_index()
p2p_logins_all.columns = ['year', 'all']

print("p2p logins w/, w/out logins last 6 months")
p2p_logins_grp.merge(p2p_logins_no6m, on='year').merge(p2p_logins_all, on='year')

p2p logins w/, w/out logins last 6 months


Unnamed: 0,year,logins last 6 months,no logins last 6 months,all
0,2020,20812,11666,32478
1,2021,394168,269669,663837
2,2022,440411,157366,597777
3,2023,554328,73170,627498
4,2024,433357,4756,438113


## 2. Medium Priority
- Number of donor portal accounts with specific activity ***(supporter data)***
    - Created P2P page
    - Made new donation
    - ~~Sent email for P2P campaign~~
    - Added or updated personal or billing details

In [314]:
q = '''select 
            created,
            form,
            userid
        from syslog_logs
        where
            message ilike '%campaign sent%' and
            created>='2020-01-01' '''
df_campaign_sent = redshift_query_read(q, schema='production')

In [316]:
print("Users sending email campaigns by month:")
print("-"*40)
df_campaign_sent['created'] = pd.to_datetime(df_campaign_sent['created'])
df_campaign_sent.groupby(df_campaign_sent['created'].dt.to_period('M'))['userid'].nunique().reset_index().tail(12)

Users sending email campaigns by month:
----------------------------------------


Unnamed: 0,created,userid
34,2023-10,994
35,2023-11,554
36,2023-12,184
37,2024-01,722
38,2024-02,1121
39,2024-03,1009
40,2024-04,1112
41,2024-05,750
42,2024-06,454
43,2024-07,533


## 3. Low Priority
- ~~Average team size for P2P campaign ***(supporter data)***~~
- ~~Average number of transactions and transaction size for supporters through P2P Dashboard based on form type: ***(supporter data)***~~
    - P2P
    - General donation (non P2P related)
- ~~Number of logins by month~~

In [153]:
q = '''select
            date as month,
            sum(reg_count) as registrations,
            sum(sub_reg_count) as subregistrations,
            sum(teams_count) as teams
        from analyticsp2p_monthly
        where date>='2020-01-01' 
        group by date'''
df_analyticsp2p = redshift_query_read(q, schema='public')

In [154]:
df_analyticsp2p.sort_values('month', ascending=True).tail(12)

Unnamed: 0,month,registrations,subregistrations,teams
29,2023-08-01,0,3532,2128
3,2023-09-01,0,5326,2018
23,2023-10-01,0,3571,1493
42,2023-11-01,0,924,732
11,2023-12-01,0,2112,722
48,2024-01-01,0,6191,2752
15,2024-02-01,0,5273,2977
6,2024-03-01,0,5028,3692
40,2024-04-01,0,5466,2682
34,2024-05-01,0,2945,1603


In [318]:
q = '''select
            year,
            count(id) as trans_count,
            sum(amount) as trans_vol,
            sum(registrations_count) as registrations_count,
            sum(registrations_amt) as registrations_vol,
            sum(donations_count) as donations_count,
            sum(donations_amt) as donations_vol,
            count(distinct(email)) as donors
        from transactions
        where
            status='A' and
            source='p2p' and
            registrations_count>0 and
            date>='2020-01-01' and
            (recurring=0 or recurring_origin=1)
        group by year'''
df_p2p_trans_regs = redshift_query_read(q, schema='production')

In [319]:
df_p2p_trans_regs['trans/donors'] = df_p2p_trans_regs['trans_count'] / df_p2p_trans_regs['donors']
df_p2p_trans_regs['vol/donors'] = df_p2p_trans_regs['trans_vol'] / df_p2p_trans_regs['donors']

In [320]:
df_p2p_trans_regs.sort_values('year', ascending=True).tail(13)

Unnamed: 0,year,trans_count,trans_vol,registrations_count,registrations_vol,donations_count,donations_vol,donors,trans/donors,vol/donors
3,2020,86750,3153611.96,100132,1204984.2,21549,1825719.04,83118,1.043697,37.941384
4,2021,88420,3779768.06,105757,1587563.7,22107,2126806.68,82448,1.072434,45.844266
1,2022,113338,5178115.07,150416,2380591.25,27683,2568700.96,106488,1.064326,48.626278
2,2023,143593,6424640.07,196555,3193893.77,34152,3049245.83,134890,1.064519,47.628735
0,2024,121570,5800065.51,168129,2807277.04,28810,2835586.62,115166,1.055607,50.362655


In [90]:
transacc.head(2)

Unnamed: 0,id,account,transaction,recurring,title,master
0,1,13472,506447,0.0,,0
1,2,24178,513144,0.0,,0


In [94]:
len(accounts[accounts['product']==3]['entity'].unique())

9254

_How are there 10k registration donations from the P2P registration transaction data in the past 4 months alone but only 9k trans_account records for P2P?_

In [271]:
teams = pd.read_csv("TeamDump.csv")
print("{:,} teams".format(len(teams)))
print("{} to {}".format(teams['start_date'].min(), teams['end_date'].max()))

161,051 teams
2011-08-12 10:06:48 to 2024-09-18 11:02:24


In [272]:
teams['start_date'] = pd.to_datetime(teams['start_date'])
teams['end_date'] = pd.to_datetime(teams['end_date'])

In [277]:
print("Average team size:")
teams.groupby(teams['start_date'].dt.year)['teammembers'].agg(['mean', 'median']).reset_index().tail(5)

Average team size:


Unnamed: 0,start_date,mean,median
9,2020,5.180548,3.0
10,2021,6.120465,3.0
11,2022,6.686718,4.0
12,2023,6.974904,4.0
13,2024,6.354951,4.0


# 4. auction

1. Highest Priority
    - Number of accounts ***(supporter data)***
    - ~~Number of accounts with at least one login to Auction Dashboard over the last 6 months ***(supporter data)***~~
    - Average number of logins to Auction Dashboard per year for all accounts and for those with at least one login over the last six months ***(supporter data)***
2. Medium Priority
    - ~~Average number of bids made through Auction Dashboard app per year ***(supporter data)***~~
    - ~~Average number of purchases (store, fund a need) through Auction Dashboard per year ***(supporter data)***~~
3. Low Priority
    - ~~Average number of transactions and transaction size for supporters through Auction Dashboard based on form type: ***(supporter data)***~~
        - Auctions
        - General donation (non auction related)
    - Number of logins by month

In [124]:
q = '''select
            t.form,
            t.id as transactions,
            t.amount as volume,
            t.purchases_count as purchase_count,
            t.purchases_amt as purchase_vol,
            t.auctionpurchase_count as auctionpurchase_count,
            t.auctiondonation_count as auctiondonation_count,
            t.email as donor
        from transactions as t
            left join form as f on f.id=t.form
        where
            t.status='A' and
            f.type=5 and
            t.date>='2022-01-01' '''
auction_trans = redshift_query_read(q, schema='production')

In [119]:
print("{:,} forms in sample".format(len(auction_trans['form'].unique())))
print("{:,} transactions".format(len(auction_trans)))
print("{:,} unique transactions".format(len(auction_trans['transactions'].unique())))
print("${:,.2f} total volume".format(auction_trans['volume'].sum()))

2,169 forms in sample
269,777 transactions
269,777 unique transactions
$81,702,172.23 total volume


In [127]:
auction_trans.groupby('form').agg({
    'transactions': 'count',
    'volume': 'mean',
    'purchase_count': 'sum',
    'purchase_vol': 'mean',
    'auctionpurchase_count': 'sum',
    'auctiondonation_count': 'sum',
    'donor': 'nunique'
}).agg(['mean', 'median']).transpose()

Unnamed: 0,mean,median
transactions,124.378515,103.0
volume,336.668318,172.3875
purchase_count,3.869064,0.0
purchase_vol,1.735316,0.0
auctionpurchase_count,52.923467,30.0
auctiondonation_count,14.676349,0.0
donor,87.761641,72.0


In [252]:
q = '''select
            t.form,
            t.email as donor,
            count(t.id) as transactions,
            sum(t.amount) as volume
        from transactions as t
        left join form as f on t.form=f.id
        where
            t.year>=2019 and
            t.status='A' and
            f.type=5
        group by t.form, t.email'''
auction_donors = redshift_query_read(q, schema='production')

In [256]:
print("Average transactions & volume for auction donors, per donor:")

auction_donors['avg per transaction'] = auction_donors['volume'] / auction_donors['transactions']
auction_donors[['transactions', 'volume', 'avg per transaction']].agg(['mean', 'median']).reset_index()

Average transactions & volume for auction donors, per donor:


Unnamed: 0,index,transactions,volume,avg per transaction
0,mean,1.407406,373.667735,252.997312
1,median,1.0,65.0,50.0


# 5. VT app

1. Medium Priority
- ~~Average number of event registrations through VT app per year ***(supporter data)***~~
- Average number of bids through VT app per year ***(supporter data)***
- ~~Average number of purchases (store, fund a need) through VT app per year ***(supporter data)***~~
- Average number of P2P pages created through VT app per year ***(supporter data)***
2. Low Priority
- ~~Average number of transactions and transaction size for supporters through VT app ***(supporter data)***~~

In [324]:
q = '''select
            year,
            count(id) as transactions,
            sum(amount) as volume,
            sum(purchases_count) as purchases,
            sum(auctionpurchase_count) as auction_purchases,
            sum(registrations_count) as registrations
        from transactions
        where
            status='A' and
            (source='vt' or source='mobilevt') and
            date>='2020-01-01'
        group by year'''
redshift_query_read(q, schema='production').sort_values('year', ascending=True)

Unnamed: 0,year,transactions,volume,purchases,auction_purchases,registrations
3,2020,141922,14979408.48,0,6005,6981
4,2021,167738,23378854.26,2,13674,6878
2,2022,229203,33062108.87,7,20671,8836
0,2023,383846,48798332.3,16,31839,10633
1,2024,384778,42609159.67,22,29428,7038


In [322]:
q = '''select
            date_trunc('month', date) as month,
            count(id) as transactions,
            sum(amount) as volume,
            sum(purchases_count) as purchases,
            sum(auctionpurchase_count) as auction_purchases,
            sum(registrations_count) as registrations
        from transactions
        where
            status='A' and
            (source='vt' or source='mobilevt') and
            date>='2020-01-01'
        group by date_trunc('month', date)'''
mobilevt = redshift_query_read(q, schema='production')

In [325]:
mobilevt.sort_values('month').tail(36)

Unnamed: 0,month,transactions,volume,purchases,auction_purchases,registrations
47,2021-10-01,17165,2659262.73,0,2706,857
0,2021-11-01,19120,2603687.57,0,2282,1033
18,2021-12-01,15872,2484659.46,0,836,53
12,2022-01-01,12847,1367180.46,0,82,197
35,2022-02-01,14428,1832824.59,0,986,847
43,2022-03-01,17756,2701566.06,0,2111,685
54,2022-04-01,16975,2837802.67,0,1497,752
28,2022-05-01,18791,2886401.6,1,3890,368
29,2022-06-01,15047,2416934.76,1,1291,476
46,2022-07-01,13347,1399959.09,0,314,216


# 6. Qgiv forms

1. **Low Priority**
- ~~Average number of P2P campaigns created per year ***(customer data)***~~
- ~~Average number of Auctions created per year ***(customer data)***~~
- ~~Average number of Standard Events created per year ***(customer data)***~~
- Average number of event registrations and average transaction size by form per year for supporters with accounts vs supporters without accounts: ***(supporter data)***
    - Standard Events
    - P2P
    - Auctions

## forms created

In [177]:
q = '''select
            date_trunc('year', datecreated) as year,
            type,
            count(id) as forms
        from production.form
        group by date_trunc('year', datecreated), type'''
created_forms = redshift_query_read(q, schema='production')

- QGIV = 1;
- BARNSTORM = 2;
- HOBNOB = 3;
- AGGREGATOR = 4;
- AUCTION = 5;
- QGIV_EVENT = 6;

In [184]:
pvt = created_forms.pivot(index='year', columns='type', values='forms').reset_index()
pvt.columns = ['year', 'qgiv', 'barnstorm', 'p2p',
               'aggregator', 'auction']

pvt.tail(6)

Unnamed: 0,year,qgiv,barnstorm,p2p,aggregator,auction
14,2019-01-01,8232.0,9.0,2087.0,297.0,148.0
15,2020-01-01,10048.0,13.0,2646.0,1025.0,684.0
16,2021-01-01,10641.0,,2498.0,911.0,896.0
17,2022-01-01,11890.0,,2748.0,1254.0,1267.0
18,2023-01-01,13434.0,2.0,2837.0,1276.0,1480.0
19,2024-01-01,18125.0,,2222.0,3573.0,1540.0


## qgiv events

In [203]:
q = '''select
            date as week,
            sum(events_count) as event_count,
            sum(events_priv_count) as event_priv_count
        from analyticsqgiv_weekly
        where date>='2018-01-01' 
        group by date'''
qgiv_events = redshift_query_read(q, schema='public')
qgiv_events.sort_values("week", ascending=True, inplace=True)
qgiv_events['month'] = pd.to_datetime(qgiv_events['week']).dt.to_period('M')
qgiv_events['year'] = pd.to_datetime(qgiv_events['week']).dt.year

In [327]:
qgiv_events.tail()

Unnamed: 0,week,event_count,event_priv_count,month,year
57,2024-07-01,19196,1289,2024-07,2024
9,2024-07-08,19277,1314,2024-07,2024
135,2024-07-15,19398,1330,2024-07,2024
114,2024-07-22,19568,1332,2024-07,2024
188,2024-07-29,19717,1333,2024-07,2024


In [328]:
qgiv_events.groupby('month')[['event_count', 'event_priv_count']].agg(['mean','median']).reset_index().tail(36)

Unnamed: 0_level_0,month,event_count,event_count,event_priv_count,event_priv_count
Unnamed: 0_level_1,Unnamed: 1_level_1,mean,median,mean,median
25,2021-03,9119.8,9134.0,714.4,716.0
26,2021-04,9524.25,9547.0,731.75,726.0
27,2021-05,9654.6,9730.0,754.6,755.0
28,2021-06,9467.5,9471.5,746.5,747.0
29,2021-07,9642.5,9637.5,757.25,758.0
30,2021-08,9869.8,9864.0,764.2,764.0
31,2021-09,10245.25,10245.5,773.0,772.0
32,2021-10,10360.5,10353.5,790.75,790.0
33,2021-11,10493.2,10486.0,808.4,810.0
34,2021-12,10651.5,10659.5,826.5,828.0


In [196]:
qgiv_events.groupby('year')[['event_count', 'event_priv_count']].agg(['mean','median']).reset_index()

Unnamed: 0_level_0,year,event_count,event_count,event_priv_count,event_priv_count
Unnamed: 0_level_1,Unnamed: 1_level_1,mean,median,mean,median
0,2018,0.0,0.0,0.0,0.0
1,2019,0.0,0.0,0.0,0.0
2,2020,30266.5,33502.5,2146.083333,2380.5
3,2021,40337.181818,41442.0,3128.272727,3163.0
4,2022,50157.125,49028.5,3466.625,3416.0
5,2023,65248.363636,63969.0,4545.727273,4415.0
6,2024,79359.0,75304.0,5231.142857,5009.0


## registrations & transactions w/ & w/out accounts

In [228]:
print("transactions by donors with accounts")
accounts.groupby('product')['transaction'].agg(['mean', 'median']).reset_index()

transactions by donors with accounts


Unnamed: 0,product,mean,median
0,1.0,55.295541,14.0
1,2.0,35.5,5.5
2,3.0,17.229955,5.0


In [263]:
q = '''select
            year,
            email,
            count(id) as transactions,
            sum(amount) as volume,
            sum(events_count) as events
        from transactions
        where
            status='A' and
            (source='don_form' or source='mobile') and
            (recurring=0 or recurring_origin=1) and
            year>=2020
        group by email, year'''
donor_qgiv_grp = redshift_query_read(q, schema='production')

In [264]:
print("Transactions per donor - all qgiv")

donor_qgiv_grp['volume avg'] = donor_grp['volume'] / donor_grp['transactions']
donor_qgiv_grp.groupby('year')[['transactions', 'volume avg', 'events']].agg(['mean', 'median']).reset_index()

Transactions per donor - all qgiv


Unnamed: 0_level_0,year,transactions,transactions,volume avg,volume avg,events,events
Unnamed: 0_level_1,Unnamed: 1_level_1,mean,median,mean,median,mean,median
0,2020,1.477751,1.0,198.346649,52.0,0.303605,0.0
1,2021,1.469224,1.0,197.526859,52.0,0.404575,0.0
2,2022,1.456239,1.0,198.909412,52.0,0.465971,0.0
3,2023,1.455443,1.0,198.367622,51.97,0.540298,0.0
4,2024,1.414115,1.0,200.453162,51.5,0.696232,0.0


In [265]:
q = '''select
            year,
            form,
            count(id) as transactions,
            sum(amount) as volume,
            sum(events_count) as events
        from transactions
        where
            status='A' and
            (source='don_form' or source='mobile') and
            (recurring=0 or recurring_origin=1) and
            year>=2020
        group by form, year'''
form_qgiv_grp = redshift_query_read(q, schema='production')

In [266]:
print("Transactions per form - all qgiv")

form_qgiv_grp['volume avg'] = form_qgiv_grp['volume'] / form_qgiv_grp['transactions']
form_qgiv_grp.groupby('year')[['transactions', 'volume avg', 'events']].agg(['mean', 'median']).reset_index()

Transactions per form - all qgiv


Unnamed: 0_level_0,year,transactions,transactions,volume avg,volume avg,events,events
Unnamed: 0_level_1,Unnamed: 1_level_1,mean,median,mean,median,mean,median
0,2020,105.121107,16.0,253.400325,109.224879,21.597191,0.0
1,2021,88.560205,15.0,283.32157,112.577702,24.386507,0.0
2,2022,82.862575,16.0,313.856468,122.815789,26.51455,0.0
3,2023,78.489404,16.0,326.667363,125.0,29.137278,0.0
4,2024,56.044095,12.0,332.816397,106.921944,27.593015,0.0


In [235]:
q = '''select
            year,
            email,
            count(id) as transactions,
            sum(amount) as volume,
            sum(registrations_count) as registrations
        from transactions
        where
            status='A' and
            source='p2p' and
            (recurring=0 or recurring_origin=1) and
            year>=2020
        group by email, year'''
donor_p2p_grp = redshift_query_read(q, schema='production')

In [236]:
print("Transactions per donor - p2p")

donor_p2p_grp['volume avg'] = donor_p2p_grp['volume'] / donor_p2p_grp['transactions']
donor_p2p_grp.groupby('year')[['transactions', 'volume avg', 'registrations']].agg(['mean', 'median']).reset_index()

Transactions per donor - p2p


Unnamed: 0_level_0,year,transactions,transactions,volume avg,volume avg,registrations,registrations
Unnamed: 0_level_1,Unnamed: 1_level_1,mean,median,mean,median,mean,median
0,2020,1.156524,1.0,93.505304,50.0,0.359169,0.0
1,2021,1.161064,1.0,102.277356,50.0,0.378996,0.0
2,2022,1.170027,1.0,95.81812,50.0,0.454188,0.0
3,2023,1.181888,1.0,97.36328,50.0,0.498764,0.0
4,2024,1.17526,1.0,90.015564,50.0,0.508623,0.0


In [269]:
q = '''select
            year,
            form,
            count(id) as transactions,
            sum(amount) as volume,
            sum(registrations_count) as registrations
        from transactions
        where
            status='A' and
            source='p2p' and
            (recurring=0 or recurring_origin=1) and
            year>=2020
        group by form, year'''
form_p2p_grp = redshift_query_read(q, schema='production')

In [270]:
print("Transactions per form - p2p")

form_p2p_grp['volume avg'] = form_p2p_grp['volume'] / form_p2p_grp['transactions']
form_p2p_grp.groupby('year')[['transactions', 'volume avg', 'registrations']].agg(['mean', 'median']).reset_index()

Transactions per form - p2p


Unnamed: 0_level_0,year,transactions,transactions,volume avg,volume avg,registrations,registrations
Unnamed: 0_level_1,Unnamed: 1_level_1,mean,median,mean,median,mean,median
0,2020,160.890719,36.0,131.698522,68.327774,49.966068,5.0
1,2021,162.237857,36.0,157.40667,77.615433,52.957937,4.0
2,2022,184.078385,45.0,156.535333,73.666667,71.456532,7.0
3,2023,203.478812,47.0,156.535597,72.9,85.869375,8.0
4,2024,195.269036,47.0,126.210869,71.218092,84.61269,9.0


# 7. kiosk

1. Highest Priority
    - Number of accounts ***(supporter data)***
    - Number of accounts with at least one login to Kiosk app over the last 6 months ***(supporter data)***
    - Average number of logins to Kiosk app per year for all accounts and for those with at least one login over the last six months ***(supporter data)***
2. Medium Priority
    - ~~Number of event registrations with Kiosk account vs without Kiosk account ***(supporter data)***~~
3. Low Priority
    - ~~Average number of transactions and transaction size for supporters through Kiosk app ***(supporter data)***~~
    - Number of logins by month


In [245]:
q = '''select 
            year,
            count(distinct(form)) as forms,
            count(id) as transactions,
            sum(amount) as volume,
            sum(events_tickets) as tickets,
            sum(events_count) as events_counts,
            avg(amount) as avg_transaction
        from transactions
        where
            source='kiosk' and
            status='A' and
            year>=2020
        group by year'''
kiosk = redshift_query_read(q, schema='production')

In [329]:
kiosk.sort_values('year', ascending=True)

Unnamed: 0,year,forms,transactions,volume,tickets,events_counts,avg_transaction
4,2020,83,26456,2196290.16,7031,3929,83.016713
3,2021,83,25597,3217111.93,5817,3162,125.683163
2,2022,80,32593,4809263.47,9281,4242,147.555103
1,2023,71,35654,6230498.53,13043,5204,174.748935
0,2024,62,21551,3603222.2,9508,4021,167.195128


# Notes

- recent Barnstorm activity? _deprecated, ignore_
- is account login as an API user role the givi app? and is auction rolled into qgiv account user role? _confirmed_

__TODO__

- need to request events data dump from production group by created.year

