In [1]:
import sys
sys.path.insert(1, '../../scripts/')
from s3_support import *
%matplotlib inline

# data load

### forms by type

In [22]:
q = "select * from form where type=3"
forms = redshift_query_read(q, schema='production')

In [23]:
print("{:,} forms".format(len(forms)))
forms.tail(3)

13,467 forms


Unnamed: 0,id,org,datecreated,datelive,path,status,type,template,enableenddate,enablestartdate,recurringplanenddaterequired
13464,992626,446458,2022-08-17 10:23:54,2022-08-17 10:23:54,ew78i4,1,3,10,True,False,False
13465,992690,441649,2022-08-17 00:00:00,2022-08-17 00:00:00,jtrivibo2,1,3,10,True,False,False
13466,992722,438484,2022-08-18 00:00:00,2022-08-18 00:00:00,d202heawg5,1,3,10,True,False,False


### processing

In [None]:
q = '''select
            t.form,
            date_trunc('week', t.date) as week,
            count(t.id) as trans_count,
            sum(t.amount) as trans_vol,
            sum(t.donations_count) as donations_count,
            sum(t.donations_amt) as donations_vol,
            sum(t.purchases_count) as purchases_count,
            sum(t.purchases_amt) as purchases_vol,
            sum(t.registrations_count) as registrations_count,
            sum(t.registrations_amt) as registrations_vol,
            sum(t.events_amt) as events_vol,
            sum(t.events_count) as events_count
        from transactions as t
            left join form as f on f.id=t.form
        where
            t.status='A' and
            f.type=3
        group by date_trunc('week', t.date), t.form'''
trans = redshift_query_read(q, schema='production')

In [25]:
print("{:,} trans entries".format(len(trans)))
trans.tail(3)

84,205 trans entries


Unnamed: 0,form,week,trans_count,trans_vol,donations_count,donations_vol,purchases_count,purchases_vol,registrations_count,registrations_vol,events_vol,events_count
84202,984028,2022-04-11,1,5000.0,1,5000.0,0,0.0,0,0.0,0.0,0
84203,988035,2022-05-02,1,0.0,0,0.0,0,0.0,1,0.0,0.0,0
84204,990977,2022-07-11,1,0.0,0,0.0,0,0.0,3,0.0,0.0,0


### p2p stats

In [26]:
q = "select * from analyticsp2p_weekly"
df = redshift_query_read(q, schema='public')

In [27]:
print("{:,} rows".format(len(df)))
df.tail(3)

1,395,788 rows


Unnamed: 0,date,form,org,reg_count,sub_reg_count,teams_count,reg_volume,don_volume,don_count,class_count,...,allows_social,social_templt_count,social_auto,pcnt_posts,mon_posts,count_posts,date_posts,email_templt_count,sponsors_count,inappr_content
1395785,2020-03-02,47152,8143,0,0,0,0.0,0.0,0,0,...,0,0,0,0,0,0,0,0,0,0
1395786,2020-06-01,936275,185347,0,0,0,0.0,0.0,0,0,...,0,0,0,0,0,0,0,0,1,0
1395787,2021-06-14,973849,444931,0,0,0,0.0,275.0,2,0,...,0,0,0,0,0,0,0,0,0,0


In [28]:
df['date'].max()

Timestamp('2022-08-22 00:00:00')

# identify top performers

establish metrics for top performers in various categories in order to be able to make recommendations optimizing for various outcomes. Ie, just raise as much money as possible, drive donations, drive registrations, etc.

1. total volume
2. donations
3. registrations (total participants vs total registration vol)
4. purchases (total purchases vs total purchase vol)
5. effective fundraising (total vol / # registrants)

In [31]:
form_totals = trans.groupby('form')[['trans_vol', 'donations_vol', 'purchases_count', 'purchases_vol', 'registrations_count', 'registrations_vol']].sum().reset_index()

### 1. total volume

In [34]:
form_totals.sort_values('trans_vol', ascending=False).head(10)

Unnamed: 0,form,trans_vol,donations_vol,purchases_count,purchases_vol,registrations_count,registrations_vol
8169,988435,1964286.23,1933912.37,0,0.0,136,0.0
6412,971650,1937942.37,1908113.24,0,0.0,127,0.0
4700,956326,1836820.2,1812864.95,0,0.0,153,0.0
3230,944755,1448886.86,1428326.62,0,0.0,166,0.0
3426,946088,1268614.36,1237148.71,167,4112.85,3858,0.0
1744,931717,1187499.81,1171942.16,0,0.0,175,0.0
7067,977668,1166078.34,1145017.79,0,0.0,3288,0.0
4614,955588,1128316.65,906125.29,1745,200782.0,50,0.0
6457,972054,1128296.74,1139748.34,0,0.0,0,0.0
7619,983168,781010.34,780565.26,0,0.0,4253,0.0


In [45]:
buckets = [(0, 250), (250, 500), (500, 1000), (1000, 2500), 
           (2500, 5000), (5000, 10000), (10000, 20000), 
           (20000, 50000), (50000, 100000), (100000)]
for b in buckets:
    try:
        len_forms = len(form_totals[(form_totals['trans_vol']>=b[0])&(form_totals['trans_vol']<b[1])])
    except:
        len_forms = len(form_totals[form_totals['trans_vol']>=b])
        
    perc_forms = (len_forms / len(form_totals)) * 100.
    print("{}: {:,} forms ({:.2f}%)".format(b, len_forms, perc_forms))

(0, 250): 1,120 forms (13.22%)
(250, 500): 375 forms (4.43%)
(500, 1000): 578 forms (6.82%)
(1000, 2500): 1,102 forms (13.01%)
(2500, 5000): 1,110 forms (13.11%)
(5000, 10000): 1,199 forms (14.16%)
(10000, 20000): 1,221 forms (14.42%)
(20000, 50000): 1,100 forms (12.99%)
(50000, 100000): 428 forms (5.05%)
100000: 237 forms (2.80%)


In [72]:
# top 10%
len_all = len(form_totals)
top_10perc = int(len_all/10)

top_10_total_vol = form_totals.sort_values('trans_vol', ascending=False).head(top_10perc)

print("Top 10% total volume:")
print("{:,} forms".format(len(top_10_total_vol)))
print("${:,.2f} median volume".format(top_10_total_vol['trans_vol'].median()))
print("${:,.2f} mean volume".format(top_10_total_vol['trans_vol'].mean()))
print("Volume range: ${:,.2f} to ${:,.2f}".format(top_10_total_vol['trans_vol'].min(), top_10_total_vol['trans_vol'].max()))

Top 10% total volume:
847 forms
$66,697.22 median volume
$111,086.93 mean volume
Volume range: $41,335.32 to $1,964,286.23


### 2. donations

In [35]:
form_totals.sort_values('donations_vol', ascending=False).head(10)

Unnamed: 0,form,trans_vol,donations_vol,purchases_count,purchases_vol,registrations_count,registrations_vol
8169,988435,1964286.23,1933912.37,0,0.0,136,0.0
6412,971650,1937942.37,1908113.24,0,0.0,127,0.0
4700,956326,1836820.2,1812864.95,0,0.0,153,0.0
3230,944755,1448886.86,1428326.62,0,0.0,166,0.0
3426,946088,1268614.36,1237148.71,167,4112.85,3858,0.0
1744,931717,1187499.81,1171942.16,0,0.0,175,0.0
7067,977668,1166078.34,1145017.79,0,0.0,3288,0.0
6457,972054,1128296.74,1139748.34,0,0.0,0,0.0
4614,955588,1128316.65,906125.29,1745,200782.0,50,0.0
7619,983168,781010.34,780565.26,0,0.0,4253,0.0


In [46]:
buckets = [(0, 250), (250, 500), (500, 1000), (1000, 2500), 
           (2500, 5000), (5000, 10000), (10000, 20000), 
           (20000, 50000), (50000, 100000), (100000)]
for b in buckets:
    try:
        len_forms = len(form_totals[(form_totals['donations_vol']>=b[0])&(form_totals['donations_vol']<b[1])])
    except:
        len_forms = len(form_totals[form_totals['donations_vol']>=b])
        
    perc_forms = (len_forms / len(form_totals)) * 100.
    print("{}: {:,} forms ({:.2f}%)".format(b, len_forms, perc_forms))

(0, 250): 1,322 forms (15.61%)
(250, 500): 397 forms (4.69%)
(500, 1000): 598 forms (7.06%)
(1000, 2500): 1,140 forms (13.46%)
(2500, 5000): 1,091 forms (12.88%)
(5000, 10000): 1,187 forms (14.01%)
(10000, 20000): 1,176 forms (13.88%)
(20000, 50000): 977 forms (11.53%)
(50000, 100000): 388 forms (4.58%)
100000: 194 forms (2.29%)


In [73]:
# top 10%
len_all = len(form_totals)
top_10perc = int(len_all/10)

top_10_donations_vol = form_totals.sort_values('donations_vol', ascending=False).head(top_10perc)

print("Top 10% donations volume:")
print("{:,} forms".format(len(top_10_donations_vol)))
print("${:,.2f} median volume".format(top_10_donations_vol['donations_vol'].median()))
print("${:,.2f} mean volume".format(top_10_donations_vol['donations_vol'].mean()))
print("Volume range: ${:,.2f} to ${:,.2f}".format(top_10_donations_vol['donations_vol'].min(), top_10_donations_vol['donations_vol'].max()))

Top 10% donations volume:
847 forms
$60,907.00 median volume
$102,789.96 mean volume
Volume range: $36,296.00 to $1,933,912.37


### 3. registrations (total participants vs total registration vol)

In [36]:
form_totals.sort_values('registrations_count', ascending=False).head(10)

Unnamed: 0,form,trans_vol,donations_vol,purchases_count,purchases_vol,registrations_count,registrations_vol
1486,930157,470655.92,424047.42,0,0.0,10032,37730.0
6817,975258,307879.86,275833.85,0,0.0,5865,26420.0
7619,983168,781010.34,780565.26,0,0.0,4253,0.0
3426,946088,1268614.36,1237148.71,167,4112.85,3858,0.0
777,856152,388504.62,385362.4,0,0.0,3713,0.0
1273,928529,372229.72,360756.2,0,0.0,3553,8370.0
7067,977668,1166078.34,1145017.79,0,0.0,3288,0.0
2386,938456,377072.64,364593.23,6,584.0,3265,8631.0
869,887898,153246.11,80527.99,0,0.0,3169,71249.0
827,865391,163174.27,152772.07,0,0.0,2996,8400.0


In [53]:
buckets = [(0, 10), (10, 25), (25, 50), (50, 100), 
           (100, 250), (250, 500), (500)]
for b in buckets:
    try:
        len_forms = len(form_totals[(form_totals['registrations_count']>=b[0])&(form_totals['registrations_count']<b[1])])
    except:
        len_forms = len(form_totals[form_totals['registrations_count']>=b])
        
    perc_forms = (len_forms / len(form_totals)) * 100.
    print("{}: {:,} forms ({:.2f}%)".format(b, len_forms, perc_forms))

(0, 10): 3,737 forms (44.12%)
(10, 25): 1,124 forms (13.27%)
(25, 50): 971 forms (11.46%)
(50, 100): 962 forms (11.36%)
(100, 250): 994 forms (11.74%)
(250, 500): 375 forms (4.43%)
500: 307 forms (3.62%)


In [76]:
# top 10%
len_all = len(form_totals)
top_10perc = int(len_all/10)

top_10_reg_count = form_totals.sort_values('registrations_count', ascending=False).head(top_10perc)

print("Top 10% registrations count:")
print("{:,} forms".format(len(top_10_reg_count)))
print("{:,.2f} median count".format(top_10_reg_count['registrations_count'].median()))
print("{:,.2f} mean count".format(top_10_reg_count['registrations_count'].mean()))
print("Count range: {:,} to {:,}".format(top_10_reg_count['registrations_count'].min(), top_10_reg_count['registrations_count'].max()))

Top 10% registrations count:
847 forms
386.00 median count
611.29 mean count
Count range: 202 to 10,032


In [37]:
form_totals.sort_values('registrations_vol', ascending=False).head(10)

Unnamed: 0,form,trans_vol,donations_vol,purchases_count,purchases_vol,registrations_count,registrations_vol
2205,937077,138830.22,20417.59,300,6635.0,1385,109839.0
2813,942299,208051.55,130343.23,0,0.0,2072,75798.75
869,887898,153246.11,80527.99,0,0.0,3169,71249.0
95,44075,185407.03,120067.03,0,0.0,2269,65355.0
449,522309,190543.24,125063.24,0,0.0,2518,65305.0
960,910470,194896.94,129596.94,0,0.0,2484,64920.0
6955,976491,288254.15,217054.34,0,0.0,2347,64240.0
1264,928462,197675.13,133830.13,0,0.0,2050,63620.0
6284,970231,471302.23,400859.07,0,0.0,2407,61950.0
338,153676,121539.0,61432.0,0,0.0,1674,60107.0


In [56]:
buckets = [(0, 100), (100, 250), (250, 500), (500, 1000), 
           (1000, 5000), (5000, 10000), (10000, 20000), 
           (20000, 50000), (50000)]
for b in buckets:
    try:
        len_forms = len(form_totals[(form_totals['registrations_vol']>=b[0])&(form_totals['registrations_vol']<b[1])])
    except:
        len_forms = len(form_totals[form_totals['registrations_vol']>=b])
        
    perc_forms = (len_forms / len(form_totals)) * 100.
    print("{}: {:,} forms ({:.2f}%)".format(b, len_forms, perc_forms))

(0, 100): 6,424 forms (75.84%)
(100, 250): 161 forms (1.90%)
(250, 500): 201 forms (2.37%)
(500, 1000): 304 forms (3.59%)
(1000, 5000): 872 forms (10.30%)
(5000, 10000): 262 forms (3.09%)
(10000, 20000): 137 forms (1.62%)
(20000, 50000): 90 forms (1.06%)
50000: 19 forms (0.22%)


In [77]:
# top 10%
len_all = len(form_totals)
top_10perc = int(len_all/10)

top_10_reg_vol = form_totals.sort_values('registrations_vol', ascending=False).head(top_10perc)

print("Top 10% registrations volume:")
print("{:,} forms".format(len(top_10_reg_vol)))
print("${:,.2f} median volume".format(top_10_reg_vol['registrations_vol'].median()))
print("${:,.2f} mean volume".format(top_10_reg_vol['registrations_vol'].mean()))
print("Volume range: ${:,.2f} to ${:,.2f}".format(top_10_reg_vol['registrations_vol'].min(), top_10_reg_vol['registrations_vol'].max()))

Top 10% registrations volume:
847 forms
$6,140.00 median volume
$10,714.86 mean volume
Volume range: $2,660.00 to $109,839.00


### 4. purchases (total purchases vs total purchase vol)

In [38]:
form_totals.sort_values('purchases_count', ascending=False).head(10)

Unnamed: 0,form,trans_vol,donations_vol,purchases_count,purchases_vol,registrations_count,registrations_vol
4882,958329,40211.0,2910.0,3151,37105.0,0,0.0
4614,955588,1128316.65,906125.29,1745,200782.0,50,0.0
4976,959038,195291.61,168445.62,1262,21899.91,202,0.0
1953,935036,96431.82,47290.47,889,46825.0,34,0.0
5730,965655,716592.82,676210.73,778,24197.16,3,0.0
5270,961896,28055.0,0.0,749,28055.0,0,0.0
939,908382,91258.77,59568.77,656,12380.0,807,19285.0
4929,958749,14546.75,807.0,626,13643.75,0,0.0
6640,973795,177464.55,155965.56,611,17072.23,159,0.0
5684,965257,7654.0,0.0,597,7290.0,0,0.0


In [57]:
buckets = [(0, 10), (10, 25), (25, 50), (50, 100), 
           (100, 250), (250, 500), (500)]
for b in buckets:
    try:
        len_forms = len(form_totals[(form_totals['purchases_count']>=b[0])&(form_totals['purchases_count']<b[1])])
    except:
        len_forms = len(form_totals[form_totals['purchases_count']>=b])
        
    perc_forms = (len_forms / len(form_totals)) * 100.
    print("{}: {:,} forms ({:.2f}%)".format(b, len_forms, perc_forms))

(0, 10): 7,972 forms (94.12%)
(10, 25): 197 forms (2.33%)
(25, 50): 110 forms (1.30%)
(50, 100): 87 forms (1.03%)
(100, 250): 72 forms (0.85%)
(250, 500): 19 forms (0.22%)
500: 13 forms (0.15%)


In [39]:
form_totals.sort_values('purchases_vol', ascending=False).head(10)

Unnamed: 0,form,trans_vol,donations_vol,purchases_count,purchases_vol,registrations_count,registrations_vol
4614,955588,1128316.65,906125.29,1745,200782.0,50,0.0
7739,984066,94704.53,9176.0,58,84600.0,33,750.0
5453,963349,72780.0,0.0,450,72780.0,0,0.0
7680,983586,191652.7,118655.0,33,72250.0,2,0.0
6347,970802,72846.04,5185.14,64,60800.0,66,1450.0
7640,983309,62105.04,9125.0,98,52800.0,255,0.0
6961,976571,62190.16,10426.4,90,51700.0,0,0.0
1953,935036,96431.82,47290.47,889,46825.0,34,0.0
4882,958329,40211.0,2910.0,3151,37105.0,0,0.0
5535,963961,48449.48,13265.0,446,34914.0,0,0.0


In [67]:
buckets = [(0, 10), (10, 100), (100, 250), (250, 500), 
           (500, 1000), (1000, 5000), (5000)]
for b in buckets:
    try:
        len_forms = len(form_totals[(form_totals['purchases_vol']>=b[0])&(form_totals['purchases_vol']<b[1])])
    except:
        len_forms = len(form_totals[form_totals['purchases_vol']>=b])
        
    perc_forms = (len_forms / len(form_totals)) * 100.
    print("{}: {:,} forms ({:.2f}%)".format(b, len_forms, perc_forms))

(0, 10): 7,594 forms (89.66%)
(10, 100): 157 forms (1.85%)
(100, 250): 129 forms (1.52%)
(250, 500): 135 forms (1.59%)
(500, 1000): 131 forms (1.55%)
(1000, 5000): 214 forms (2.53%)
5000: 110 forms (1.30%)


### 5. effective fundraising (total vol / # registrants)

In [82]:
def calculate_effectiveness(r):
    if r['registrations_count'] == 0:
        return 0
    else:
        return r['trans_vol'] / r['registrations_count']

form_totals['fundraiser_effectiveness'] = form_totals.apply(calculate_effectiveness, axis=1)

In [83]:
form_totals.sort_values('fundraiser_effectiveness', ascending=False).head(10)

Unnamed: 0,form,trans_vol,donations_vol,purchases_count,purchases_vol,registrations_count,registrations_vol,fundraiser_effectiveness
5449,963295,491528.66,486583.85,0,0.0,2,0.0,245764.33
5730,965655,716592.82,676210.73,778,24197.16,3,0.0,238864.273333
7680,983586,191652.7,118655.0,33,72250.0,2,0.0,95826.35
3837,949162,93167.38,91218.5,0,0.0,1,0.0,93167.38
1450,929903,75350.0,75350.0,0,0.0,1,0.0,75350.0
6922,976250,341610.0,329010.0,9,11400.0,5,0.0,68322.0
7276,979811,119914.98,116226.86,0,0.0,2,0.0,59957.49
2536,939944,95012.0,95012.0,0,0.0,2,0.0,47506.0
5494,963685,215342.95,212143.2,0,0.0,5,0.0,43068.59
686,828534,36296.0,36296.0,0,0.0,1,0.0,36296.0


In [86]:
buckets = [(0, 5), (5, 50), (50, 100), (100, 250), (250, 500), 
           (500, 1000), (1000)]
for b in buckets:
    try:
        len_forms = len(form_totals[(form_totals['fundraiser_effectiveness']>=b[0])&(form_totals['fundraiser_effectiveness']<b[1])])
    except:
        len_forms = len(form_totals[form_totals['fundraiser_effectiveness']>=b])
        
    perc_forms = (len_forms / len(form_totals)) * 100.
    print("{}: {:,} forms ({:.2f}%)".format(b, len_forms, perc_forms))

(0, 5): 2,538 forms (29.96%)
(5, 50): 889 forms (10.50%)
(50, 100): 1,140 forms (13.46%)
(100, 250): 1,728 forms (20.40%)
(250, 500): 772 forms (9.11%)
(500, 1000): 638 forms (7.53%)
1000: 765 forms (9.03%)


In [89]:
# top 10%
len_all = len(form_totals)
top_10perc = int(len_all/10)

top_10_fund_eff = form_totals.sort_values('fundraiser_effectiveness', ascending=False).head(top_10perc)

print("Top 10% fundraisers effectiveness ($ per fundraiser):")
print("{:,} forms".format(len(top_10_fund_eff)))
print("${:,.2f}/fundraiser median volume".format(top_10_fund_eff['fundraiser_effectiveness'].median()))
print("${:,.2f}/fundraiser mean volume".format(top_10_fund_eff['fundraiser_effectiveness'].mean()))
print("Volume range: ${:,.2f}/fundraiser to ${:,.2f}/fundraiser".format(top_10_fund_eff['fundraiser_effectiveness'].min(), top_10_fund_eff['fundraiser_effectiveness'].max()))

Top 10% fundraisers effectiveness ($ per fundraiser):
847 forms
$1,688.50/fundraiser median volume
$4,532.03/fundraiser mean volume
Volume range: $897.22/fundraiser to $245,764.33/fundraiser
