In [2]:
import pandas as pd
import numpy as np

import sys
sys.path.insert(1, '../../../scripts/')
from s3_support import *

# Load data

In [21]:
q = '''select 
        org, 
        date_trunc('month', date) as month, 
        count(id) as count, 
        sum(amount) as volume
    from transactions
        where status='A'
        group by org, date_trunc('month', date)
        order by date_trunc('month', date) desc'''
df = redshift_query_read(q)
df['month'] = pd.to_datetime(df['month'])
df['month_cat'] = df['month'].dt.month

In [22]:
orgs = get_dataframe_from_file("qgiv-stats-data", "organizations.names.csv")

In [23]:
def get_segment_for_org(org):
    if len(orgs[orgs['id']==org]['segment']) > 0:
        return orgs[orgs['id']==org]['segment'].iloc[0]
    else:
        return None
    
df['segment'] = df['org'].apply(get_segment_for_org)

In [81]:
len(df)

138311

In [82]:
orgs['segment'].value_counts().reset_index()

Unnamed: 0,index,segment
0,P - Human Services,703
1,"X - Religion, Spiritual Development",647
2,B - Educational Institutions,571
3,Z - Unknown,460
4,O - Youth Development,342
5,"A - Arts, Culture, and Humanities",321
6,"E - Health, General and Rehabilitative",211
7,"T - Philanthropy, Voluntarism, and Grantmaking",194
8,"S - Community Improvement, Capacity Building",164
9,"G - Disease, Disorders, Medical Disciplines",163


# Looking at segment wide performance monthly YoY

In [25]:
segment_grpd = df.groupby(['segment', 'month'])[['volume', 'count']].sum().reset_index()

In [28]:
segment_grpd['month_cat'] = segment_grpd['month'].dt.month
segment_grpd = segment_grpd[segment_grpd['volume']>0]

In [29]:
segment_grpd.head(3)

Unnamed: 0,segment,month,volume,count,month_cat
9,"A - Arts, Culture, and Humanities",2008-09-01,9986.0,193,9
10,"A - Arts, Culture, and Humanities",2008-10-01,8189.42,118,10
11,"A - Arts, Culture, and Humanities",2008-11-01,8431.42,138,11


## Volume

In [124]:
segmented_data = None
for segment in segment_grpd['segment'].unique():
    # slice non-zero segment observations
    this_segment = segment_grpd[(segment_grpd['segment']==segment)&(segment_grpd['volume']>0)].copy()
    
    # calculate volume and count growth
    this_segment['month_pct_change'] = this_segment.groupby('month_cat')['volume'].apply(pd.Series.pct_change)
    this_segment.dropna(inplace=True)
    
    # isolate January - April
    this_segment = this_segment[this_segment['month_cat'].isin([1, 2, 3, 4])]
    
    # get mean history and latest values
    segment_an = this_segment[(this_segment['month'].dt.year>2014)&(this_segment['month'].dt.year!=2020)].groupby('month_cat')['month_pct_change'].mean().reset_index()
    
    segment_an = segment_an.merge(this_segment[this_segment['month'].dt.year==2020][['month_cat', 'month_pct_change']], on="month_cat")
    segment_an.columns = ['month_cat', 'historical_mean_growth', '2020_growth']
    segment_an['segment'] = segment
    
    if segmented_data is None:
        segmented_data = segment_an
    else:
        segmented_data = segmented_data.append(segment_an)

In [125]:
segmented_data.tail()

Unnamed: 0,month_cat,historical_mean_growth,2020_growth,segment
3,4.0,0.140134,0.092246,Y - Mutual/Membership Benefit Organizations
0,1.0,-0.161501,-0.213468,Z - Unknown
1,2.0,-0.148962,-0.195902,Z - Unknown
2,3.0,-0.097561,-0.22107,Z - Unknown
3,4.0,-0.140008,1.351812,Z - Unknown


In [126]:
segmented_monthly = segmented_data.copy()

In [108]:
segmented_data['2020_growth_diff'] = (segmented_data['2020_growth'] - segmented_data['historical_mean_growth']) * 100

In [109]:
pvt = segmented_data[['segment', 'month_cat', '2020_growth_diff']].pivot(index='segment', columns='month_cat', values="2020_growth_diff").reset_index()

In [110]:
pvt.columns = ['segment', 'jan', 'feb', 'mar', 'apr']

In [111]:
wcounts = pvt.merge(orgs['segment'].value_counts().reset_index(), left_on="segment", right_on="index")
wcounts.drop("index", axis=1, inplace=True)
wcounts.columns = ['segment', 'jan', 'feb', 'mar', 'apr', 'count']
wcounts

Unnamed: 0,segment,jan,feb,mar,apr,count
0,"A - Arts, Culture, and Humanities",28.64615,74.780825,-2.86067,-31.005603,321
1,B - Educational Institutions,-21.232749,4.938779,-27.510901,-47.805313,571
2,C - Environmental Advocacy and Protection,-45.623082,27.301017,-49.006677,-43.679515,125
3,D - Animal,243.933325,2.807322,-42.246739,-23.369846,128
4,"E - Health, General and Rehabilitative",-24.463958,-46.689132,-67.323373,-81.913409,211
5,F - Mental Health and Crisis Intervention,2.364785,6.51075,-13.296864,-44.624931,125
6,"G - Disease, Disorders, Medical Disciplines",-33.112721,-22.190752,-140.598629,-103.643103,163
7,H - Medical Research,1.592758,-29.79298,-77.889869,-66.822585,31
8,"I - Crime, Legal Related",19.912921,17.059952,-74.994062,38.207685,67
9,"J - Employment, Job Related",65.403423,82.947975,-130.603875,-97.669287,38


## Count

In [127]:
segmented_data = None
for segment in segment_grpd['segment'].unique():
    # slice non-zero segment observations
    this_segment = segment_grpd[(segment_grpd['segment']==segment)&(segment_grpd['count']>0)].copy()
    
    # calculate volume and count growth
    this_segment['month_pct_change'] = this_segment.groupby('month_cat')['count'].apply(pd.Series.pct_change)
    this_segment.dropna(inplace=True)
    
    # isolate January - April
    this_segment = this_segment[this_segment['month_cat'].isin([1, 2, 3, 4])]
    
    # get mean history and latest values
    segment_an = this_segment[(this_segment['month'].dt.year>2012)&(this_segment['month'].dt.year!=2020)].groupby('month_cat')['month_pct_change'].mean().reset_index()
    
    segment_an = segment_an.merge(this_segment[this_segment['month'].dt.year==2020][['month_cat', 'month_pct_change']], on="month_cat")
    segment_an.columns = ['month_cat', 'historical_mean_growth', '2020_growth']
    segment_an['segment'] = segment
    
    if segmented_data is None:
        segmented_data = segment_an
    else:
        segmented_data = segmented_data.append(segment_an)

In [128]:
segmented_data.tail()

Unnamed: 0,month_cat,historical_mean_growth,2020_growth,segment
3,4.0,1.153361,0.762791,Y - Mutual/Membership Benefit Organizations
0,1.0,0.493192,-0.143825,Z - Unknown
1,2.0,0.33613,0.228705,Z - Unknown
2,3.0,0.535859,-0.514674,Z - Unknown
3,4.0,0.496664,-0.320887,Z - Unknown


In [129]:
segmented_monthly.columns = ['month_cat', 'mean_volume_growth', '2020_volume_growth', 'segment']
segmented_monthly = segmented_monthly.merge(segmented_data, on=['month_cat', 'segment'])
segmented_monthly = segmented_monthly[['segment', 'month_cat', 'mean_volume_growth', '2020_volume_growth', 'historical_mean_growth', '2020_growth']]
segmented_monthly.columns = ['segment', 'month', 'mean_volume_growth', '2020_volume_growth', 'mean_count_growth', '2020_count_growth']
segmented_monthly.head()

Unnamed: 0,segment,month,mean_volume_growth,2020_volume_growth,mean_count_growth,2020_count_growth
0,"A - Arts, Culture, and Humanities",1.0,0.148668,0.435129,0.40394,-0.344016
1,"A - Arts, Culture, and Humanities",2.0,0.185996,0.933805,0.34616,0.26181
2,"A - Arts, Culture, and Humanities",3.0,0.232048,0.203441,0.387324,-0.058195
3,"A - Arts, Culture, and Humanities",4.0,0.177452,-0.132604,0.31517,0.045358
4,B - Educational Institutions,1.0,0.429313,0.216986,0.323325,0.409008


In [130]:
segmented_monthly.to_csv("q1_2020.csv", index=False)

In [104]:
segmented_data['2020_growth_diff'] = (segmented_data['2020_growth'] - segmented_data['historical_mean_growth']) * 100

pvt = segmented_data[['segment', 'month_cat', '2020_growth_diff']].pivot(index='segment', columns='month_cat', values="2020_growth_diff").reset_index()
pvt.columns = ['segment', 'jan', 'feb', 'mar', 'apr']

wcounts = pvt.merge(orgs['segment'].value_counts().reset_index(), left_on="segment", right_on="index")
wcounts.drop("index", axis=1, inplace=True)
wcounts.columns = ['segment', 'jan', 'feb', 'mar', 'apr', 'count']
wcounts

Unnamed: 0,segment,jan,feb,mar,apr,count
0,"A - Arts, Culture, and Humanities",-74.795688,-8.434993,-44.551879,-26.981239,321
1,B - Educational Institutions,8.568291,3.024386,-61.420011,-85.301326,571
2,C - Environmental Advocacy and Protection,-77.175644,-24.68598,-127.453685,-150.066172,125
3,D - Animal,216.243954,-24.309848,-50.692024,-6.161769,128
4,"E - Health, General and Rehabilitative",-0.4501,-13.83107,-73.390825,-112.649574,211
5,F - Mental Health and Crisis Intervention,-17.646042,-23.264634,-36.339458,-47.035618,125
6,"G - Disease, Disorders, Medical Disciplines",-31.138466,-72.608199,-154.164565,-123.449217,163
7,H - Medical Research,-82.511532,-28.965118,-138.768528,-105.939432,31
8,"I - Crime, Legal Related",-7.849189,-90.426402,-129.759771,-81.075039,67
9,"J - Employment, Job Related",360.889604,113.627915,-199.428068,-104.183113,38
