In [1]:
import sys, datetime
sys.path.append("../../scripts/")
from s3_support import *

import pandas as pd
import numpy as np

# load data

## performance & YoY

In [100]:
q = '''select
            org,
            date_trunc('month', date) as month_year,
            sum(amount) as vol,
            count(id) as count
        from transactions
        where status='A' 
        group by org, date_trunc('month', date)'''
perf = redshift_query_read(q, schema='production')

In [101]:
perf.tail(3)

Unnamed: 0,org,month_year,vol,count
195350,444602,2022-01-01,262.5,1
195351,447803,2022-01-01,1.0,1
195352,448242,2022-02-01,1.0,1


In [102]:
org_perf = None
for org in perf['org'].unique():
    this_org = perf[perf['org']==org].copy()
    if len(this_org) > 12:
        this_org['month'] = this_org['month_year'].dt.month
        this_org['year'] = this_org['month_year'].dt.year
        
        this_org = this_org.pivot(index='year', columns=['month'], values=['vol']).pct_change().reset_index()
        this_org['org'] = org
        
        if org_perf is None:
            org_perf = this_org
        else:
            org_perf = org_perf.append(this_org)

In [103]:
org_perf.columns = ['org', '1', '2', '3', '4', '5',
                   '6', '7', '8', '9', '10', '11', 
                    '12', 'year']
org_perf.tail()

Unnamed: 0,org,1,2,3,4,5,6,7,8,9,10,11,12,year
1,444951,,,,,,,,,,,,0.0,2020
2,444951,0.0,0.0,0.0,0.0,0.0,1.828,,-0.991121,2.178431,1.998889,,0.0,2021
3,444951,2.354978,-1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,0.0,2022
0,446410,,,,,,,,,,,,,2021
1,446410,20.0,1.377778,9.599333,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2022


## segment

In [2]:
# segments
org_export = get_dataframe_from_file('qgiv-stats-data', 'org_download.csv')
org_export = org_export[['Id', 'Segment', 'Pricing Package']]
org_export.columns = ['org', 'segment', 'pricing_package']
org_export['pricing_package'] = org_export['pricing_package'].apply(lambda x: str(x).replace('Package Pricing - ', ''))
org_export.head(3)

Unnamed: 0,org,segment,pricing_package
0,441789,Z - Unknown,Start
1,443044,,Start
2,1045,,Legacy Pricing


In [10]:
org_export['pricing_package'] = org_export['pricing_package'].apply(lambda x: str(x).replace('Package Pricing - ', ''))

max_seg = 0
for s in org_export['segment'].unique():
    if len(str(s)) > max_seg:
        max_seg = len(s)
max_pack = 0
for p in org_export['pricing_package'].unique():
    if len(str(p)) > max_pack:
        max_pack = len(p)
        
max_seg, max_pack

(92, 74)

## merge

In [105]:
seg_counts = seg_perf.groupby('segment')['org'].nunique().reset_index()
seg_counts.columns = ['segment', 'org_count']

In [106]:
seg_perf = org_perf.merge(org_export[['org', 'segment']], on='org')

seg_perf_growth = seg_perf.replace([np.inf], np.nan).dropna().drop('org', axis=1).groupby('segment').agg(['mean', 'median', 'std']).reset_index()

In [107]:
seg_perf_growth = seg_perf_growth.drop('year', axis=1).merge(seg_counts, on='segment')

  obj = obj._drop_axis(labels, axis, level=level, errors=errors)
  validate=validate,


In [109]:
#seg_perf_growth[seg_perf_growth['org_count']>25].to_csv("seg_perf.csv", index=False)
#!rm seg_perf.csv

# exploration

## client growth by segment

In [164]:
seg_perf.groupby(['year', 'segment'])['org'].count().reset_index().pivot(index='segment', columns='year', values='org').fillna(0)

year,2006,2007,2008,2009,2010,2011,2012,2013,2014,2015,2016,2017,2018,2019,2020,2021,2022
segment,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1
"A - Arts, Culture, and Humanities",0.0,2.0,8.0,15.0,22.0,30.0,36.0,55.0,66.0,70.0,80.0,107.0,137.0,147.0,161.0,160.0,140.0
B - Educational Institutions,0.0,1.0,13.0,27.0,46.0,69.0,80.0,116.0,138.0,151.0,173.0,196.0,248.0,274.0,297.0,301.0,254.0
C - Environmental Advocacy and Protection,0.0,0.0,2.0,6.0,8.0,11.0,16.0,22.0,34.0,40.0,43.0,51.0,67.0,77.0,88.0,85.0,76.0
D - Animal,0.0,0.0,0.0,2.0,8.0,11.0,15.0,22.0,26.0,38.0,41.0,54.0,68.0,78.0,84.0,81.0,73.0
"E - Health, General and Rehabilitative",0.0,2.0,10.0,15.0,23.0,28.0,33.0,41.0,55.0,65.0,66.0,82.0,104.0,115.0,133.0,136.0,117.0
F - Mental Health and Crisis Intervention,0.0,0.0,3.0,6.0,8.0,13.0,15.0,23.0,28.0,30.0,42.0,51.0,59.0,73.0,78.0,74.0,69.0
"G - Disease, Disorders, Medical Disciplines",0.0,1.0,3.0,11.0,16.0,19.0,25.0,36.0,48.0,53.0,62.0,63.0,81.0,87.0,84.0,82.0,66.0
H - Medical Research,0.0,0.0,0.0,2.0,2.0,3.0,7.0,8.0,9.0,8.0,11.0,9.0,13.0,13.0,15.0,15.0,14.0
"I - Crime, Legal Related",0.0,0.0,1.0,5.0,6.0,9.0,11.0,17.0,17.0,18.0,22.0,27.0,33.0,39.0,39.0,37.0,31.0
"J - Employment, Job Related",0.0,0.0,0.0,0.0,1.0,2.0,3.0,5.0,6.0,5.0,6.0,8.0,15.0,20.0,19.0,21.0,17.0


We saw a decrease or stagnant growth in processing clients in nearly every segment from 2020 to 2021. There are several exceptional years in which individual segments decreased YoY, but 2020 is the only year in which we have seen widespread reduction in active clients.

__We saw very moderate growth in only 5 segments of 1 to 4 clients and decrease or stagnation in 24 segments.__

Despite the decrease in processing clients, we saw a decrease in YoY segment processing volume in only 4 sectors (agriculture, civil rights, social science, and unknown). This leads me to believe that the lost clients were likely to be struggling, low volume clients that didn't have the existing support to survive the shifting spending priorities of donors during the 2020 pandemic lockdowns.

_The civil rights sector went from 1 processing client to 0, and social science went from 9 to 7. In both cases, they very small segments to begin with so the decrease in fundraising is not meaningful. Additionally, the unknown sector has historically experienced extreme volatility in YoY processing volume growth and decreasing processing client count with 2021 being the first increase since 2016._

## YoY volume growth by segment

In [167]:
q = "select org, date_trunc('year', date) as year, sum(amount) from transactions where status='A' group by org, date_trunc('year', date)"
df_ann = redshift_query_read(q, schema='production')

In [170]:
df_ann = df_ann.merge(org_export[['org', 'segment']], on='org')
agg_ann = df_ann.groupby(['year', 'segment'])['sum'].sum().reset_index()
agg_ann['year'] = agg_ann['year'].dt.year

In [174]:
ann_seg_growth = None

for seg in agg_ann['segment'].unique().tolist():
    _this_seg = agg_ann[agg_ann['segment']==seg].sort_values('year').copy()
    _this_seg['growth'] = _this_seg['sum'].pct_change()
    
    if ann_seg_growth is None:
        ann_seg_growth = _this_seg
    else:
        ann_seg_growth = ann_seg_growth.append(_this_seg)

In [177]:
ann_seg_growth.dropna().pivot(index='segment', columns='year', values='growth')

year,2007,2008,2009,2010,2011,2012,2013,2014,2015,2016,2017,2018,2019,2020,2021,2022
segment,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1
"A - Arts, Culture, and Humanities",,8.129329,1.360504,0.774094,0.463743,0.834438,0.256337,0.399068,0.280129,-0.04212,0.232224,0.292611,0.230627,0.310814,0.313256,-0.744597
B - Educational Institutions,,56046.8125,5.04344,0.471987,0.462621,0.250129,0.482641,0.440683,0.37242,0.184921,0.033472,0.236198,0.130251,0.244819,0.668315,-0.756196
C - Environmental Advocacy and Protection,,,6.258628,0.624837,0.893233,0.275071,4.229208,1.285034,1.933851,-0.811998,-0.356866,0.157372,0.298106,0.153637,0.41405,-0.801
D - Animal,,,,1.849525,0.735637,0.676706,0.723339,0.510662,0.344751,0.359606,0.488363,0.679563,0.740532,0.107678,0.129862,-0.752508
"E - Health, General and Rehabilitative",,16.801642,0.518791,2.024441,-0.094273,0.169508,1.321615,0.163811,0.185161,0.22519,0.312095,0.376695,0.255261,0.045182,0.49368,-0.786892
F - Mental Health and Crisis Intervention,,,10.992793,0.957387,0.276718,0.925959,0.531916,0.417404,0.086478,0.693348,0.816715,0.145683,0.088606,0.330218,0.126174,-0.702704
"G - Disease, Disorders, Medical Disciplines",,10.379482,10.33628,0.866379,1.386759,-0.07944,0.917014,0.362761,0.593416,0.211976,0.372802,0.431194,0.315422,-0.183602,0.352099,-0.848077
H - Medical Research,,,,2.598507,0.412664,1.541447,0.72204,0.030535,0.052677,0.056032,0.420833,0.432926,0.511529,0.434226,0.166076,-0.81757
"I - Crime, Legal Related",,,1.538143,0.245036,0.365842,0.831556,0.378689,0.525033,0.006959,-0.056402,0.096147,2.473251,0.295008,0.543317,0.055685,-0.901082
"J - Employment, Job Related",,,,,0.531794,2.882185,0.523918,0.370669,0.118533,0.256427,-0.304525,0.713843,0.656628,0.601079,0.313817,-0.765523


## YoY monthly growth variance 

In [113]:
seg_perf_growth = seg_perf_growth[seg_perf_growth['org_count']>25]

In [128]:
cols = ['segment', 'segment_x', '1 mean', '1 median',
       '1 std', '2 mean', '2 median', '2 std',
       '3 mean', '3 median', '3 std', '4 mean',
       '4 median', '4 std', '5 mean', '5 median',
       '5 std', '6 mean', '6 median', '6 std', '7 mean',
       '7 median', '7 std', '8 mean', '8 median', '8 std',
       '9 mean', '9 median', '9 std', '10 mean',
       '10 median', '10 std', '11 mean', '11 median',
       '11 std', '12 mean', '12 median', '12 std',
       'org_count']

seg_perf_growth.columns = cols

In [134]:
seg_perf_growth.drop('segment_x', axis=1, inplace=True)
for i in range(0, 12):
    k = str(i + 1)
    seg_perf_growth["{} std ratio".format(k)] = seg_perf_growth["{} std".format(k)] / seg_perf_growth["{} mean".format(k)]

In [139]:
cols = [c for c in seg_perf_growth.columns if 'ratio' in c]
seg_perf_growth[['segment'] + cols]

Unnamed: 0,segment,1 std ratio,2 std ratio,3 std ratio,4 std ratio,5 std ratio,6 std ratio,7 std ratio,8 std ratio,9 std ratio,10 std ratio,11 std ratio,12 std ratio
0,"A - Arts, Culture, and Humanities",8.730967,17.369548,11.700362,7.867356,11.29625,7.010456,7.537804,12.410637,6.707316,12.239668,7.310259,9.699627
1,B - Educational Institutions,15.048054,15.86478,17.818942,8.246417,6.249659,16.5377,22.257483,17.090118,8.108593,14.155018,12.821819,8.55777
2,C - Environmental Advocacy and Protection,5.645543,5.531196,9.16623,7.590098,7.185436,5.594973,7.761953,8.76808,3.967966,3.955315,6.313316,6.7008
3,D - Animal,17.97518,4.476885,6.41426,5.206676,8.471098,4.621288,4.2919,9.239749,10.233214,8.252196,6.627622,12.28712
4,"E - Health, General and Rehabilitative",7.555182,22.356762,5.782638,6.492267,7.484883,9.444761,7.406784,11.884553,6.179892,6.825691,8.955534,11.145131
5,F - Mental Health and Crisis Intervention,6.269623,8.250858,6.371697,6.652166,6.202695,5.069944,11.498562,7.59858,12.48913,7.717786,6.736881,6.259933
6,"G - Disease, Disorders, Medical Disciplines",6.499843,6.803748,6.944474,8.772215,8.112298,4.268577,5.281413,9.226145,7.201736,6.208737,4.625176,6.048639
8,"I - Crime, Legal Related",9.665112,7.569549,7.745695,6.140628,8.984107,3.577258,5.724444,4.110522,4.416835,3.941901,9.992024,10.189306
10,"K - Agriculture, Food, Nutrition",7.208685,5.372888,5.024004,4.365603,9.406938,4.38583,4.760773,4.70962,8.946244,4.885938,7.450999,6.412126
11,"L - Housing, Shelter",4.182132,5.934186,10.041445,8.277455,7.851395,12.83306,5.320232,7.860633,4.604334,4.616319,5.465742,11.125866


In [140]:
seg_perf_growth[cols].mean()

1 std ratio     11.726032
2 std ratio      9.627251
3 std ratio     10.557700
4 std ratio      8.381615
5 std ratio      8.642326
6 std ratio      7.406420
7 std ratio      8.543963
8 std ratio      8.402188
9 std ratio      7.454115
10 std ratio     8.160549
11 std ratio     8.455347
12 std ratio     8.217386
dtype: float64

## Agriculture monthly growth

It appears that agg/food is the most consistent segment and primarily during harvest season (6 - 10). We will look individual year monthly growth.

In [149]:
agg_orgs = org_export[org_export['segment']=='K - Agriculture, Food, Nutrition']['org'].tolist()
org_perf[org_perf['org'].isin(agg_orgs)].replace(np.inf, np.nan).dropna().groupby('year').mean()

Unnamed: 0_level_0,org,1,2,3,4,5,6,7,8,9,10,11,12
year,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
2011,79.0,-0.970991,-0.626364,-0.307159,3.130841,0.34058,6.762712,0.358382,9.306407,0.364103,4.870588,2.183636,4.183784
2012,479.5,2.54954,2.22476,1.927448,0.078964,4.458859,0.434309,0.957329,-0.055562,5.630962,1.238269,-0.112165,-0.216373
2013,479.5,0.76922,0.222561,1.675124,6.826695,1.138191,0.312571,2.150023,0.406437,2.937384,1.014302,0.328677,0.746744
2014,2458.0,-0.400507,1.626267,-0.05473,-0.417843,5.553564,0.123615,-0.400424,0.001059,-0.302151,-0.182601,1.691217,-0.176166
2015,10509.166667,1.562527,0.258642,-0.198903,-0.13935,0.231497,0.462141,9.984004,6.343889,-0.01696,0.021509,0.99287,-0.017538
2016,51185.375,1.934084,-0.055632,2.722854,6.017123,0.253727,1.852663,1.31304,0.574932,0.071781,0.784797,-0.132491,1.508993
2017,113040.9,1.312633,6.790532,4.057973,3.572973,0.406267,-0.174117,4.537346,0.328028,7.551072,0.439646,-0.008876,-0.264577
2018,204953.571429,0.778997,1.494972,7.581062,1.776368,0.253277,0.249362,0.386762,0.360278,2.359478,1.397579,8.893382,-0.181318
2019,279043.857143,0.895507,2.230294,0.580745,0.067041,0.023073,0.58708,0.341387,0.678809,0.241122,0.93202,0.016277,0.322194
2020,320023.357143,21.808139,6.612699,4.65025,8.399348,31.840376,4.837354,2.009589,2.459818,16.924966,1.867603,2.475703,2.521264


In [150]:
org_perf[org_perf['org'].isin(agg_orgs)].replace(np.inf, np.nan).dropna().groupby('year')['org'].count()

year
2011     1
2012     2
2013     2
2014     5
2015     6
2016     8
2017    10
2018    14
2019    21
2020    28
2021    37
2022    50
Name: org, dtype: int64

The growth during harvest season is consistently low, accounting for the low variance. There are a few spikes (7/2015 at nearly 10x, for instance) but research does not support any decrease in food supply or harvest yield to support a rally in public support.

* [Cornell food prices 2013-2015](https://downloads.usda.library.cornell.edu/usda-esmis/files/k35694332/2227ms228/6h440w05z/CropValuSu-02-24-2016.pdf)

2020 is a notable exception to the pattern with consistent high growth of greater than 200%. Attribution to the pandemic lockdowns is not a sufficient explanation, however, as the second strongest growth month of 2020 was January (2,180%, the 2nd greatest growth month of all time), before the lockdowns began. The peak months were May, January, and September. These months are also the strongest growth months within this segment in all time of Qgiv processing by a wide margin. Furthermore, we did not see negative growth in any of these months for 2021 despite maintaining active client count from 2020 to 2021, so I am doubtful that the pandemic is the sole - or possibly even a strong - determining factor.

Agriculture saw a 435% YoY growth in processing volume from 2019 to 2020; -33% from 2020 to 2021; 2018 and 2019 saw back to back years of around 24% growth.

## Religious

Religious has some of the most unstable growth rates of any segment and coincidentally the largest org sample size

In [154]:
rel_orgs = org_export[org_export['segment']=='X - Religion, Spiritual Development ']['org'].tolist()
org_perf[org_perf['org'].isin(rel_orgs)].replace(np.inf, np.nan).dropna().groupby('year').mean()

Unnamed: 0_level_0,org,1,2,3,4,5,6,7,8,9,10,11,12
year,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
2009,45.666667,-0.192806,1.306644,-0.273833,0.043756,-0.270401,0.106925,0.01832,0.205501,0.404492,0.354421,0.315669,0.705966
2010,150.5,16.756554,0.043767,2.475246,2.279575,0.321114,-0.029533,0.314457,-0.210622,0.064871,10.237972,5.092089,0.662715
2011,351.52,0.289994,0.604519,2.500467,1.42809,7.079065,10.553157,3.46876,5.467996,3.596488,1.306226,1.502536,1.212946
2012,526.195652,25.22744,3.500787,0.765971,1.450025,2.321497,1.534101,1.264541,1.045223,1.797836,27.485654,0.730317,1.18295
2013,762.287879,1.751829,0.80862,0.658882,1.612033,1.049541,1.925888,0.55452,0.407407,0.315735,0.516568,0.802349,0.243204
2014,2894.474747,33.306999,1.018156,1.06177,0.709695,1.79397,0.669552,1.498599,1.88437,1.14742,1.489998,2.733085,1.13541
2015,14414.626506,96.708471,2.232464,2.379128,1.763853,1.860506,0.582183,0.622564,0.572084,0.689416,0.80478,0.332465,0.283101
2016,36187.236364,6.525755,3.092398,0.540134,0.886643,0.595097,0.67857,6.753539,0.458996,0.886448,0.280221,0.397378,0.450777
2017,80956.611336,0.682583,1.308136,0.888259,1.157971,0.989447,0.896348,0.530158,0.584268,0.519224,1.28965,0.236626,0.550468
2018,127817.703846,2.080442,0.627448,0.619589,0.976255,0.4846,1.042969,0.498686,0.774542,0.442272,0.4966,0.661066,0.208833


In [155]:
org_perf[org_perf['org'].isin(rel_orgs)].replace(np.inf, np.nan).dropna().groupby('year')['org'].count()

year
2009      3
2010      8
2011     25
2012     46
2013     66
2014     99
2015    166
2016    220
2017    247
2018    260
2019    273
2020    288
2021    297
2022    303
Name: org, dtype: int64