* ~~All-time number of donations made in our system (we publicly say we were founded in 2007, so I guess we start there)~~
* If we know from which countries those donations originated, could you tell us how many countries we’ve processed donations from?
* ~~What areas of the US did highest donations come from? For example, did donors in Florida, New York, and California give the most money per donation?~~
* ~~What types of organizations have seen the highest percentage of total donations on our platform (I’m guessing faith-based is #1)?~~
* ~~A comparison of percentage of donations made by text to donate vs. web (bonus stat requested by Heidi/Jess)~~
* ~~And just in case we do need it... can we get $ amount totals for donations each year 2007-2020?~~

In [3]:
import pandas as pd
import sys
sys.path.insert(1, '../../scripts/')
from s3_support import *

In [14]:
# lifetime totals
q = "select count(id) as count, sum(amount) as vol from transactions where status='A'"
total = redshift_query_read(q)
print("lifetime totals: {:,} transactions, ${:,} total raised".format(total['count'].iloc[0], total['vol'].iloc[0]))

lifetime totals: 8,006,802 transactions, $1,233,534,917.75067 total raised


In [7]:
# count & vol by year
q = '''select 
            date_trunc('year', date) as year, sum(amount) as vol, count(id) as count 
        from transactions 
        where status='A'
        group by date_trunc('year', date)'''
annual = redshift_query_read(q)

In [55]:
print("| year | volume | count |")
print("|------|------|------|")
for _, r in annual.sort_values('year', ascending=False).iterrows():
    print("| {} | ${:,} | {:,} |".format(r['year'], r['vol'], r['count']))

| year | volume | count |
|------|------|------|
| 2020-01-01 00:00:00 | $347,203,494.939985 | 1,307,322 |
| 2019-01-01 00:00:00 | $192,140,433.999998 | 1,645,130 |
| 2018-01-01 00:00:00 | $162,543,032.439998 | 1,292,738 |
| 2017-01-01 00:00:00 | $131,511,448.099997 | 980,762 |
| 2016-01-01 00:00:00 | $104,217,801.369998 | 734,457 |
| 2015-01-01 00:00:00 | $96,674,817.0600013 | 660,841 |
| 2014-01-01 00:00:00 | $72,019,085.4800001 | 501,972 |
| 2013-01-01 00:00:00 | $50,111,817.3099997 | 364,066 |
| 2012-01-01 00:00:00 | $31,639,310.6199999 | 219,660 |
| 2011-01-01 00:00:00 | $19,947,747.9800001 | 135,252 |
| 2010-01-01 00:00:00 | $14,214,071.11 | 90,805 |
| 2009-01-01 00:00:00 | $9,640,914.20000002 | 63,361 |
| 2008-01-01 00:00:00 | $1,670,943.14 | 10,365 |
| 2007-01-01 00:00:00 | $0.0 | 71 |


In [25]:
# national source
q = "select sum(amount) as vol, count(id) as count, state from transactions where status='A' group by state"
geo_source = redshift_query_read(q)
geo_source['avg_vol'] = geo_source['vol'] / geo_source['count']

In [60]:
print("| state | volume | count | per transaction |")
print("|-------|--------|-------|--------|")
for _, r in geo_source[geo_source['state']!='nan'].sort_values('vol', ascending=False).head(10).iterrows():
    print("| {} | ${:,} | {:,} | ${:,} |".format(r['state'], r['vol'], r['count'], r['avg_vol']))

| state | volume | count | per transaction |
|-------|--------|-------|--------|
| FL | $213,752,134.439999 | 809,020 | $264.21118691750394 |
| NY | $137,718,679.229999 | 324,043 | $425.00124745789606 |
| CA | $113,375,337.729997 | 835,175 | $135.75039689884994 |
| TX | $64,543,420.5299959 | 461,885 | $139.73915699794517 |
| CO | $39,327,435.3700001 | 255,304 | $154.04159500047044 |
| WA | $33,781,407.5599997 | 223,748 | $150.97970734933807 |
| PA | $32,007,927.8399998 | 289,931 | $110.3984321786901 |
| IL | $29,777,839.2499999 | 243,589 | $122.24623956746774 |
| OH  | $27,522,680.4699996 | 200,002 | $137.6120262297357 |
| GA | $25,817,724.2499998 | 180,777 | $142.815315277938 |


In [61]:
print("| state | volume | count | per transaction |")
print("|-------|--------|-------|--------|")
for _, r in geo_source[geo_source['state']!='nan'].sort_values('count', ascending=False).head(10).iterrows():
    print("| {} | ${:,} | {:,} | ${:,} |".format(r['state'], r['vol'], r['count'], r['avg_vol']))

| state | volume | count | per transaction |
|-------|--------|-------|--------|
| CA | $113,375,337.729997 | 835,175 | $135.75039689884994 |
| FL | $213,752,134.439999 | 809,020 | $264.21118691750394 |
| TX | $64,543,420.5299959 | 461,885 | $139.73915699794517 |
| NY | $137,718,679.229999 | 324,043 | $425.00124745789606 |
| PA | $32,007,927.8399998 | 289,931 | $110.3984321786901 |
| CO | $39,327,435.3700001 | 255,304 | $154.04159500047044 |
| IL | $29,777,839.2499999 | 243,589 | $122.24623956746774 |
| WA | $33,781,407.5599997 | 223,748 | $150.97970734933807 |
| OH  | $27,522,680.4699996 | 200,002 | $137.6120262297357 |
| VA | $22,752,701.49 | 195,432 | $116.42259962544516 |


In [68]:
print("| state | volume | count | per transaction |")
print("|-------|--------|-------|--------|")
for _, r in geo_source[geo_source['count']>4000].sort_values('avg_vol', ascending=False).head(12).iterrows():
    print("| {} | ${:,} | {:,} | ${:,} |".format(r['state'], r['vol'], r['count'], r['avg_vol']))

| state | volume | count | per transaction |
|-------|--------|-------|--------|
| NY | $137,718,679.229999 | 324,043 | $425.00124745789606 |
| FL | $213,752,134.439999 | 809,020 | $264.21118691750394 |
| DC | $1,645,230.75 | 9,742 | $168.88018374050503 |
| KS | $10,050,336.54 | 61,604 | $163.14422018050774 |
| AR | $4,915,395.34 | 31,778 | $154.67919126439674 |
| CO | $39,327,435.3700001 | 255,304 | $154.04159500047044 |
| LA | $7,341,863.20000003 | 48,511 | $151.3442971697147 |
| WA | $33,781,407.5599997 | 223,748 | $150.97970734933807 |
| MS | $2,869,671.48 | 19,183 | $149.5945097221498 |
| Dist | $5,792,354.90000003 | 39,196 | $147.77923512603405 |
| OK | $14,103,514.5099997 | 96,554 | $146.06867152059675 |
| OR | $19,564,261.5499997 | 134,619 | $145.33061120643964 |


In [33]:
# transaction source (frontend, sms, p2p)
q = "select sum(amount) as vol, count(id) as count, source from transactions where status='A' group by source"
trans_source = redshift_query_read(q)
trans_source['avg_vol'] = trans_source['vol'] / trans_source['count']

In [69]:
trans_source['vol_f'] = trans_source['vol'].apply(lambda x: "${:,}".format(x))

print("| source | volume | count | per transaction |")
print("|-------|--------|-------|--------|")
for _, r in trans_source.sort_values('vol', ascending=False).iterrows():
    print("| {} | ${:,} | {:,} | ${:,} |".format(r['source'], r['vol'], r['count'], r['avg_vol']))

| source | volume | count | per transaction |
|-------|--------|-------|--------|
| don_form | $920,518,010.040167 | 4,965,805 | $185.37135671661835 |
| vt | $104,655,081.369996 | 784,376 | $133.42463483073934 |
| p2p | $77,736,702.2399969 | 1,108,679 | $70.11651004483434 |
| mobile | $71,898,712.7099981 | 616,510 | $116.62213542359102 |
| kiosk | $40,426,606.4900005 | 368,272 | $109.77377180453713 |
| mobilevt | $9,076,305.1 | 62,277 | $145.74088507795815 |
| sms | $6,643,355.34000058 | 75,616 | $87.85647667161156 |
| givi | $1,798,475.04 | 13,469 | $133.52699086791893 |
| fb | $781,669.420000001 | 11,798 | $66.25440074588921 |


In [46]:
# org segment
df_orgs = get_dataframe_from_file("qgiv-stats-data", "organizations.names.csv")
df_orgs = df_orgs[['id', 'segment']]

q = "select org, sum(amount) as vol, count(id) as count from transactions where status='A' group by org"
trans_org = redshift_query_read(q)

In [70]:
segment = df_orgs.merge(trans_org, left_on="id", right_on="org")

print("| segment | volume | count |")
print("|-------|--------|-------|")
for _, r in segment.groupby('segment')[['vol', 'count']].sum().reset_index().sort_values('vol', ascending=False).iterrows():
    print("| {} | ${:,} | {:,} |".format(r['segment'], r['vol'], r['count']))

| segment | volume | count |
|-------|--------|-------|
| X - Religion, Spiritual Development  | $260,640,098.62000024 | 1,761,956 |
| Y - Mutual/Membership Benefit Organizations | $201,785,019.94000003 | 10,346 |
| P - Human Services  | $156,282,880.51999986 | 1,207,273 |
| B - Educational Institutions  | $106,306,067.94999999 | 647,799 |
| O - Youth Development  | $67,575,607.10000005 | 611,571 |
| Q - International, Foreign Affairs, and National Security  | $51,326,693.31999999 | 526,870 |
| L - Housing, Shelter  | $43,509,793.19999998 | 347,283 |
| T - Philanthropy, Voluntarism, and Grantmaking  | $41,039,839.6099999 | 234,858 |
| C - Environmental Advocacy and Protection | $30,610,024.710002203 | 255,302 |
| G - Disease, Disorders, Medical Disciplines  | $27,807,099.500000272 | 297,146 |
| E - Health, General and Rehabilitative  | $27,742,610.100000016 | 203,231 |
| A - Arts, Culture, and Humanities  | $25,062,932.780000005 | 224,648 |
| Z - Unknown  | $24,929,202.160000023 | 194,