Past event data for legacy events and data this year for P2P events using the P2P beta. It would be great if we can find a client that used legacy P2P last year and the redesigned registration this year.

- number of people that start registration and finish registration
    - this might be an analytics pull since we don't have the pending registrations anymore. something like "x" people view registration form and "x" people reached the thank you page
- total event registrations
- total amount raised
- fundraisers vs non-fundraisers
- increase in donations made during the registration process

In [162]:
import pandas as pd
import numpy as np

import sys
sys.path.insert(1, '../../scripts/')
from s3_support import *

# load data

### transactions

In [163]:
# registrations data
q = '''select 
            org, 
            form, 
            date, 
            registrations_count, 
            registrations_amt,
            donations_count,
            donations_amt,
            id
        from transactions
        where 
            status='A' and 
            source='p2p' and 
            date>='2019-01-01';'''
df = redshift_query_read(q, schema='production')

### registrations

In [164]:
df_regs = df[df['registrations_count']>0].copy()
df_regs['year'] = df_regs['date'].dt.year
df_regs = df_regs.groupby(['org', 'form', 'year']).agg({
    'registrations_count': 'sum',
    'registrations_amt': 'sum',
    'donations_count': 'sum',
    'donations_amt': 'sum',
    'id': 'count'
}).reset_index()
df_regs['transactions'] = df_regs['id']
df_regs.drop('id', axis=1, inplace=True)

In [165]:
df_regs.head(3)

Unnamed: 0,org,form,year,registrations_count,registrations_amt,donations_count,donations_amt,transactions
0,9,980699,2021,1,0.0,0,0.0,1
1,9,981561,2021,20,0.0,0,0.0,20
2,13,949649,2019,16,0.0,0,0.0,16


### donations

In [166]:
df_donations = df[df['registrations_count']==0].copy()
df_donations['year'] = df_donations['date'].dt.year
df_donations = df_donations.groupby(['org', 'form', 'year']).agg({
    'registrations_count': 'sum',
    'registrations_amt': 'sum',
    'donations_count': 'sum',
    'donations_amt': 'sum',
    'id': 'count'
}).reset_index()
df_donations['transactions'] = df_donations['id']
df_donations.drop('id', axis=1, inplace=True)

In [167]:
df_donations.head(3)

Unnamed: 0,org,form,year,registrations_count,registrations_amt,donations_count,donations_amt,transactions
0,6,957164,2020,0,0.0,97,4876.89,97
1,13,942347,2019,0,0.0,139,10648.0,142
2,13,945074,2019,0,0.0,58,4413.53,58


# annual breakdown

- total events
- total registrations, registration donations & donations
- mean & median registrations, registration donations & donations per form

## all

In [168]:
annual_data = []
for year in df_donations['year'].unique():
    these_dons = df_donations[df_donations['year']==year]
    these_regs = df_regs[df_regs['year']==year]
    annual_data.append({
        'year': year,
        'events count': len(set(these_dons['form'].unique().tolist() + these_regs['form'].unique().tolist())),
        'total reg count': these_regs['registrations_count'].sum(),
        'total reg vol': these_regs['registrations_amt'].sum(),
        'total reg don count': these_regs['donations_count'].sum(),
        'total reg don vol': these_regs['donations_amt'].sum(),
        'total don count': these_dons['donations_amt'].count(),
        'total don vol': these_dons['donations_amt'].sum(),
        'mean reg count': these_regs.groupby('form')['registrations_count'].sum().mean(),
        'mean reg vol': these_regs.groupby('form')['registrations_amt'].sum().mean(),
        'mean reg don count': these_regs.groupby('form')['donations_count'].sum().mean(),
        'mean reg don vol': these_regs.groupby('form')['donations_amt'].sum().mean(),
        'mean don count': these_dons.groupby('form')['donations_count'].sum().mean(),
        'mean don vol': these_dons.groupby('form')['donations_amt'].sum().mean(),
        'median reg count': these_regs.groupby('form')['registrations_count'].sum().median(),
        'median reg vol': these_regs.groupby('form')['registrations_amt'].sum().median(),
        'median reg don count': these_regs.groupby('form')['donations_count'].sum().median(),
        'median reg don vol': these_regs.groupby('form')['donations_amt'].sum().median(),
        'median don count': these_dons.groupby('form')['donations_count'].sum().median(),
        'median don vol': these_dons.groupby('form')['donations_amt'].sum().median()
    })

In [169]:
for _, r in pd.DataFrame(annual_data).transpose().iterrows():
    if 'vol' in r.name:
        print("| {} | \${:,.2f} | \${:,.2f} | \${:,.2f} |".format(r.name, r[0], r[1], r[2]))
    elif 'mean' in r.name:
        print("| {} | {:,.2f} | {:,.2f} | {:,.2f} |".format(r.name, r[0], r[1], r[2]))
    elif 'count' in r.name:
        print("| {} | {:,.0f} | {:,.0f} | {:,.0f} |".format(r.name, r[0], r[1], r[2]))
    else:
        print("| {} | {} | {} | {} |".format(r.name, r[0], r[1], r[2]))

| year | 2020.0 | 2019.0 | 2021.0 |
| events count | 2,038 | 1,721 | 2,047 |
| total reg count | 100,132 | 159,469 | 104,596 |
| total reg vol | \$1,204,984.20 | \$2,078,153.05 | \$1,566,710.09 |
| total reg don count | 21,549 | 25,462 | 21,942 |
| total reg don vol | \$1,825,719.04 | \$1,718,610.02 | \$2,111,119.68 |
| total don count | 1,867 | 1,574 | 1,900 |
| total don vol | \$26,297,913.82 | \$19,482,425.74 | \$28,852,946.90 |
| mean reg count | 75.40 | 119.81 | 82.88 |
| mean reg vol | \$907.37 | \$1,561.35 | \$1,241.45 |
| mean reg don count | 16.23 | 19.13 | 17.39 |
| mean reg don vol | \$1,374.79 | \$1,291.22 | \$1,672.84 |
| mean don count | 125.60 | 132.97 | 141.01 |
| mean don vol | \$14,085.65 | \$12,377.65 | \$15,185.76 |
| median reg count | 20 | 33 | 21 |
| median reg vol | \$0.00 | \$0.00 | \$0.00 |
| median reg don count | 3 | 4 | 3 |
| median reg don vol | \$210.00 | \$250.00 | \$250.00 |
| median don count | 29 | 40 | 25 |
| median don vol | \$2,908.00 | \$3,528.21 

| year | 2020 | 2019 | 2021 |
|------|--------|--------|--------|
| events count | 2,038 | 1,721 | 2,047 |
| total reg count | 100,132 | 159,469 | 104,596 |
| total reg vol | \$1,204,984.20 | \$2,078,153.05 | \$1,566,710.09 |
| total reg don count | 21,549 | 25,462 | 21,942 |
| total reg don vol | \$1,825,719.04 | \$1,718,610.02 | \$2,111,119.68 |
| total don count | 1,867 | 1,574 | 1,900 |
| total don vol | \$26,297,913.82 | \$19,482,425.74 | \$28,852,946.90 |
| mean reg count | 75.40 | 119.81 | 82.88 |
| mean reg vol | \$907.37 | \$1,561.35 | \$1,241.45 |
| mean reg don count | 16.23 | 19.13 | 17.39 |
| mean reg don vol | \$1,374.79 | \$1,291.22 | \$1,672.84 |
| mean don count | 125.60 | 132.97 | 141.01 |
| mean don vol | \$14,085.65 | \$12,377.65 | \$15,185.76 |
| median reg count | 20 | 33 | 21 |
| median reg vol | \$0.00 | \$0.00 | \$0.00 |
| median reg don count | 3 | 4 | 3 |
| median reg don vol | \$210.00 | \$250.00 | \$250.00 |
| median don count | 29 | 40 | 25 |
| median don vol | \$2,908.00 | \$3,528.21 | \$2,858.25 |

## 2020 & 2021

In [170]:
orgs_2020 = df_donations[df_donations['year']==2020]['org'].tolist()
orgs_2021 = df_donations[df_donations['year']==2021]['org'].tolist()

orgs_20_21 = list(set(orgs_2020) & set(orgs_2021))
len(orgs_2020), len(orgs_2021), len(orgs_20_21)

(1867, 1900, 438)

In [171]:
this_don_df = df_donations[df_donations['org'].isin(orgs_20_21)]
this_reg_df = df_regs[df_regs['org'].isin(orgs_20_21)]

annual_data = []
for year in [2020, 2021]:
    these_dons = this_don_df[this_don_df['year']==year]
    these_regs = this_reg_df[this_reg_df['year']==year]
    annual_data.append({
        'year': year,
        'events count': len(set(these_dons['form'].unique().tolist() + these_regs['form'].unique().tolist())),
        'total reg count': these_regs['registrations_count'].sum(),
        'total reg vol': these_regs['registrations_amt'].sum(),
        'total reg don count': these_regs['donations_count'].sum(),
        'total reg don vol': these_regs['donations_amt'].sum(),
        'total don count': these_dons['donations_amt'].count(),
        'total don vol': these_dons['donations_amt'].sum(),
        'mean reg count': these_regs.groupby('form')['registrations_count'].sum().mean(),
        'mean reg vol': these_regs.groupby('form')['registrations_amt'].sum().mean(),
        'mean reg don count': these_regs.groupby('form')['donations_count'].sum().mean(),
        'mean reg don vol': these_regs.groupby('form')['donations_amt'].sum().mean(),
        'mean don count': these_dons.groupby('form')['donations_count'].sum().mean(),
        'mean don vol': these_dons.groupby('form')['donations_amt'].sum().mean(),
        'median reg count': these_regs.groupby('form')['registrations_count'].sum().median(),
        'median reg vol': these_regs.groupby('form')['registrations_amt'].sum().median(),
        'median reg don count': these_regs.groupby('form')['donations_count'].sum().median(),
        'median reg don vol': these_regs.groupby('form')['donations_amt'].sum().median(),
        'median don count': these_dons.groupby('form')['donations_count'].sum().median(),
        'median don vol': these_dons.groupby('form')['donations_amt'].sum().median()
    })

In [172]:
for _, r in pd.DataFrame(annual_data).transpose().iterrows():
    if 'vol' in r.name:
        print("| {} | \${:,.2f} | \${:,.2f} |".format(r.name, r[0], r[1]))
    elif 'mean' in r.name:
        print("| {} | {:,.2f} | {:,.2f} |".format(r.name, r[0], r[1]))
    elif 'count' in r.name:
        print("| {} | {:,.0f} | {:,.0f} |".format(r.name, r[0], r[1]))
    else:
        print("| {} | {} | {} |".format(r.name, r[0], r[1]))

| year | 2020.0 | 2021.0 |
| events count | 1,776 | 1,769 |
| total reg count | 87,129 | 85,215 |
| total reg vol | \$1,030,107.20 | \$1,407,037.09 |
| total reg don count | 18,072 | 17,788 |
| total reg don vol | \$1,583,387.58 | \$1,561,218.96 |
| total don count | 1,648 | 1,656 |
| total don vol | \$24,426,152.37 | \$23,894,673.94 |
| mean reg count | 78.07 | 83.54 |
| mean reg vol | \$923.04 | \$1,379.45 |
| mean reg don count | 16.19 | 17.44 |
| mean reg don vol | \$1,418.81 | \$1,530.61 |
| mean don count | 131.53 | 135.89 |
| mean don vol | \$14,821.69 | \$14,429.15 |
| median reg count | 21 | 21 |
| median reg vol | \$0.00 | \$0.00 |
| median reg don count | 3 | 3 |
| median reg don vol | \$225.00 | \$224.00 |
| median don count | 28 | 22 |
| median don vol | \$2,914.00 | \$2,277.50 |


| year | 2020 | 2021 |
|------|------|------|
| events count | 1,776 | 1,769 |
| total reg count | 87,129 | 85,215 |
| total reg vol | \$1,030,107.20 | \$1,407,037.09 |
| total reg don count | 18,072 | 17,788 |
| total reg don vol | \$1,583,387.58 | \$1,561,218.96 |
| total don count | 1,648 | 1,656 |
| total don vol | \$24,426,152.37 | \$23,894,673.94 |
| mean reg count | 78.07 | 83.54 |
| mean reg vol | \$923.04 | \$1,379.45 |
| mean reg don count | 16.19 | 17.44 |
| mean reg don vol | \$1,418.81 | \$1,530.61 |
| mean don count | 131.53 | 135.89 |
| mean don vol | \$14,821.69 | \$14,429.15 |
| median reg count | 21 | 21 |
| median reg vol | \$0.00 | \$0.00 |
| median reg don count | 3 | 3 |
| median reg don vol | \$225.00 | \$224.00 |
| median don count | 28 | 22 |
| median don vol | \$2,914.00 | \$2,277.50 |

## 2019, 2020, 2021

In [173]:
orgs_2019 = df_donations[df_donations['year']==2019]['org'].tolist()
orgs_2020 = df_donations[df_donations['year']==2020]['org'].tolist()
orgs_2021 = df_donations[df_donations['year']==2021]['org'].tolist()

orgs_19_20_21 = list(set(orgs_2019) & set(orgs_2020) & set(orgs_2021))
len(orgs_2020), len(orgs_2021), len(orgs_19_20_21)

(1867, 1900, 280)

In [174]:
this_don_df = df_donations[df_donations['org'].isin(orgs_19_20_21)]
this_reg_df = df_regs[df_regs['org'].isin(orgs_19_20_21)]

annual_data = []
for year in df_donations['year'].unique():
    these_dons = this_don_df[this_don_df['year']==year]
    these_regs = this_reg_df[this_reg_df['year']==year]
    annual_data.append({
        'year': year,
        'events count': len(set(these_dons['form'].unique().tolist() + these_regs['form'].unique().tolist())),
        'total reg count': these_regs['registrations_count'].sum(),
        'total reg vol': these_regs['registrations_amt'].sum(),
        'total reg don count': these_regs['donations_count'].sum(),
        'total reg don vol': these_regs['donations_amt'].sum(),
        'total don count': these_dons['donations_amt'].count(),
        'total don vol': these_dons['donations_amt'].sum(),
        'mean reg count': these_regs.groupby('form')['registrations_count'].sum().mean(),
        'mean reg vol': these_regs.groupby('form')['registrations_amt'].sum().mean(),
        'mean reg don count': these_regs.groupby('form')['donations_count'].sum().mean(),
        'mean reg don vol': these_regs.groupby('form')['donations_amt'].sum().mean(),
        'mean don count': these_dons.groupby('form')['donations_count'].sum().mean(),
        'mean don vol': these_dons.groupby('form')['donations_amt'].sum().mean(),
        'median reg count': these_regs.groupby('form')['registrations_count'].sum().median(),
        'median reg vol': these_regs.groupby('form')['registrations_amt'].sum().median(),
        'median reg don count': these_regs.groupby('form')['donations_count'].sum().median(),
        'median reg don vol': these_regs.groupby('form')['donations_amt'].sum().median(),
        'median don count': these_dons.groupby('form')['donations_count'].sum().median(),
        'median don vol': these_dons.groupby('form')['donations_amt'].sum().median()
    })

In [175]:
for _, r in pd.DataFrame(annual_data).transpose().iterrows():
    if 'vol' in r.name:
        print("| {} | \${:,.2f} | \${:,.2f} | \${:,.2f} |".format(r.name, r[0], r[1], r[2]))
    elif 'mean' in r.name:
        print("| {} | {:,.2f} | {:,.2f} | {:,.2f} |".format(r.name, r[0], r[1], r[2]))
    elif 'count' in r.name:
        print("| {} | {:,.0f} | {:,.0f} | {:,.0f} |".format(r.name, r[0], r[1], r[2]))
    else:
        print("| {} | {} | {} | {} |".format(r.name, r[0], r[1], r[2]))

| year | 2020.0 | 2019.0 | 2021.0 |
| events count | 1,390 | 1,320 | 1,170 |
| total reg count | 69,861 | 125,722 | 63,691 |
| total reg vol | \$803,904.70 | \$1,401,589.60 | \$954,653.34 |
| total reg don count | 14,424 | 20,013 | 13,554 |
| total reg don vol | \$1,205,375.22 | \$1,408,589.96 | \$1,160,017.47 |
| total don count | 1,284 | 1,217 | 1,097 |
| total don vol | \$18,503,518.38 | \$15,931,308.53 | \$15,656,587.43 |
| mean reg count | 78.67 | 126.48 | 89.08 |
| mean reg vol | \$905.30 | \$1,410.05 | \$1,335.18 |
| mean reg don count | 16.24 | 20.13 | 18.96 |
| mean reg don vol | \$1,357.40 | \$1,417.09 | \$1,622.40 |
| mean don count | 128.30 | 136.60 | 119.25 |
| mean don vol | \$14,410.84 | \$13,090.64 | \$14,272.19 |
| median reg count | 20 | 32 | 22 |
| median reg vol | \$0.00 | \$0.00 | \$0.00 |
| median reg don count | 3 | 4 | 3 |
| median reg don vol | \$169.50 | \$242.50 | \$240.00 |
| median don count | 26 | 39 | 22 |
| median don vol | \$2,702.75 | \$3,645.00 | \$2,

| year | 2020 | 2019 | 2021 |
|------|------|------|------|
| events count | 1,390 | 1,320 | 1,170 |
| total reg count | 69,861 | 125,722 | 63,691 |
| total reg vol | \$803,904.70 | \$1,401,589.60 | \$954,653.34 |
| total reg don count | 14,424 | 20,013 | 13,554 |
| total reg don vol | \$1,205,375.22 | \$1,408,589.96 | \$1,160,017.47 |
| total don count | 1,284 | 1,217 | 1,097 |
| total don vol | \$18,503,518.38 | \$15,931,308.53 | \$15,656,587.43 |
| mean reg count | 78.67 | 126.48 | 89.08 |
| mean reg vol | \$905.30 | \$1,410.05 | \$1,335.18 |
| mean reg don count | 16.24 | 20.13 | 18.96 |
| mean reg don vol | \$1,357.40 | \$1,417.09 | \$1,622.40 |
| mean don count | 128.30 | 136.60 | 119.25 |
| mean don vol | \$14,410.84 | \$13,090.64 | \$14,272.19 |
| median reg count | 20 | 32 | 22 |
| median reg vol | \$0.00 | \$0.00 | \$0.00 |
| median reg don count | 3 | 4 | 3 |
| median reg don vol | \$169.50 | \$242.50 | \$240.00 |
| median don count | 26 | 39 | 22 |
| median don vol | \$2,702.75 | \$3,645.00 | \$2,375.00 |

# traffic & conversion

In [176]:
q = '''select
            date,
            org,
            form,
            views,
            bounces,
            path
        from googleanalytics_traffic
        where
            date>'2019-01-01' and
            p2p_frontend=1 and
            (path like '%register%' or path like '%donate%') and
            org!=0'''
traff = redshift_query_read(q, schema='production')

In [177]:
traff_grpd = traff.groupby(['org', 'form', 'date', 'path'])[['views', 'bounces']].sum().reset_index()
traff_grpd['is_registration'] = traff_grpd['path'].str.contains('register')
traff_grpd['is_donation'] = traff_grpd['path'].str.contains('donate')
traff_grpd['year'] = traff_grpd['date'].dt.year

In [178]:
def flag_registration_stage(path):
    if 'register' not in path:
        return None
    elif 'billing' in path:
        return 'billing'
    elif 'information' in path:
        return 'info'
    elif 'details' in path:
        return 'details'
    elif 'receipt' in path:
        return 'receipt'
    elif 'store' in path:
        return 'store'
    else:
        return 'start'
traff_grpd['registration_stage'] = traff_grpd['path'].apply(flag_registration_stage)

In [179]:
def flag_donation_stage(path):
    if 'donate' not in path:
        return None
    elif 'receipt' in path:
        return 'receipt'
    else:
        return 'start'
traff_grpd['donation_stage'] = traff_grpd['path'].apply(flag_donation_stage)

In [180]:
traff_grpd = traff_grpd.groupby(['org', 'form', 'year', 'registration_stage'])[['views', 'bounces']].sum().reset_index()
traff_pvt = traff_grpd.pivot(index=['org', 'form', 'year'], columns=['registration_stage'], values=['views', 'bounces']).reset_index()

cols = ['org', 'form', 'year', 'views_billing',
        'views_details', 'views_info', 'views_receipt',
        'views_start', 'views_store', 'bounces_billing',
        'bounces_details', 'bounces_info', 'bounces_receipt',
        'bounces_start', 'bounces_store']
traff_pvt.columns = cols
traff_pvt.fillna(0, inplace=True)

In [181]:
traff_pvt.tail()

Unnamed: 0,org,form,year,views_billing,views_details,views_info,views_receipt,views_start,views_store,bounces_billing,bounces_details,bounces_info,bounces_receipt,bounces_start,bounces_store
5320,447841,980535,2021,4.0,4.0,0.0,4.0,12.0,4.0,0.0,0.0,0.0,0.0,0.0,0.0
5321,447855,980664,2021,2.0,0.0,2.0,0.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5322,447858,980729,2021,2.0,0.0,4.0,2.0,4.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5323,447858,980730,2021,2.0,2.0,0.0,0.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5324,447884,981071,2021,2.0,0.0,4.0,2.0,6.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


### traffic

In [182]:
traff_pvt.groupby('year')[['views_start', 'views_receipt']].agg(['mean', 'median']).reset_index()

Unnamed: 0_level_0,year,views_start,views_start,views_receipt,views_receipt
Unnamed: 0_level_1,Unnamed: 1_level_1,mean,median,mean,median
0,2019,821.045087,87.0,206.132948,19.0
1,2020,391.164303,24.0,82.556738,0.0
2,2021,397.132948,16.0,80.246749,0.0


In [183]:
traff_pvt[traff_pvt['org'].isin(orgs_20_21)].groupby('year')[['views_start', 'views_receipt']].agg(['mean', 'median']).reset_index()

Unnamed: 0_level_0,year,views_start,views_start,views_receipt,views_receipt
Unnamed: 0_level_1,Unnamed: 1_level_1,mean,median,mean,median
0,2019,448.045093,82.0,131.848806,18.0
1,2020,280.034341,14.0,60.951923,0.0
2,2021,358.939811,13.0,79.678418,0.0


In [184]:
traff_pvt[traff_pvt['org'].isin(orgs_19_20_21)].groupby('year')[['views_start', 'views_receipt']].agg(['mean', 'median']).reset_index()

Unnamed: 0_level_0,year,views_start,views_start,views_receipt,views_receipt
Unnamed: 0_level_1,Unnamed: 1_level_1,mean,median,mean,median
0,2019,453.660326,82.0,132.94837,18.5
1,2020,304.396907,14.0,65.702749,0.0
2,2021,336.595291,12.0,73.769058,0.0


### dropoff rates

In [185]:
traff_pvt['start_rec_dropoff'] = (traff_pvt['views_start'] - traff_pvt['views_receipt']) / traff_pvt['views_start']
traff_pvt[traff_pvt['start_rec_dropoff']>0].groupby('year')['start_rec_dropoff'].agg(['mean', 'median']).reset_index()

Unnamed: 0,year,mean,median
0,2019,0.811207,0.838372
1,2020,0.893967,1.0
2,2021,0.88819,1.0


In [186]:
traff_pvt[(traff_pvt['org'].isin(orgs_20_21))&(traff_pvt['start_rec_dropoff']>0)].groupby('year')['start_rec_dropoff'].agg(['mean', 'median']).reset_index()

Unnamed: 0,year,mean,median
0,2019,0.807521,0.838372
1,2020,0.905321,1.0
2,2021,0.903705,1.0


In [187]:
traff_pvt[(traff_pvt['org'].isin(orgs_19_20_21))&(traff_pvt['start_rec_dropoff']>0)].groupby('year')['start_rec_dropoff'].agg(['mean', 'median']).reset_index()

Unnamed: 0,year,mean,median
0,2019,0.808728,0.838633
1,2020,0.903996,1.0
2,2021,0.906544,1.0


## fundraisers vs non-fundraisers

### all

In [188]:
q = "select year, count(distinct transdonationentity) as fundraising_registrants, sum(registrations_count) as registrants from transactions where source='p2p' and year>=2019 group by year;"
fundraising_regs = redshift_query_read(q, schema='production')

In [189]:
fundraising_regs['fundraising_perc'] = fundraising_regs['fundraising_registrants'] / fundraising_regs['registrants']
fundraising_regs.sort_values('year')

Unnamed: 0,year,fundraising_registrants,registrants,fundraising_perc
2,2019,58665,170175,0.344733
0,2020,46712,106466,0.43875
1,2021,42458,112723,0.376658


### year over year, same orgs

In [190]:
q = "select year, org, count(distinct transdonationentity) as fundraising_registrants, sum(registrations_count) as registrants from transactions where source='p2p' and year>=2019 group by org, year;"
fundraising_regs = redshift_query_read(q, schema='production')

In [191]:
regs_orgs_20_21 = fundraising_regs[fundraising_regs['org'].isin(orgs_20_21)]
regs_orgs_20_21 = regs_orgs_20_21.groupby('year')[['fundraising_registrants', 'registrants']].sum()
regs_orgs_20_21['fundraising_perc'] = regs_orgs_20_21['fundraising_registrants'] / regs_orgs_20_21['registrants']
regs_orgs_20_21.sort_values('year')

Unnamed: 0_level_0,fundraising_registrants,registrants,fundraising_perc
year,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2019,48692,134034,0.363281
2020,41889,92393,0.453379
2021,35278,92115,0.382978


In [193]:
regs_orgs_19_20_21 = fundraising_regs[fundraising_regs['org'].isin(orgs_19_20_21)]
regs_orgs_19_20_21 = regs_orgs_19_20_21.groupby('year')[['fundraising_registrants', 'registrants']].sum()
regs_orgs_19_20_21['fundraising_perc'] = regs_orgs_19_20_21['fundraising_registrants'] / regs_orgs_19_20_21['registrants']
regs_orgs_19_20_21.sort_values('year')

Unnamed: 0_level_0,fundraising_registrants,registrants,fundraising_perc
year,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2019,48683,133997,0.363314
2020,33568,73984,0.45372
2021,25462,69096,0.368502
