In [2]:
import pandas as pd

import sys
sys.path.insert(1, '../../scripts/')
from s3_support import *

# 2023 vs 2024

We want to look at how the organizations that participated in 2023 performed in 2024. To do this, we will look at the average funds raised per organization in 2023 and then we will look at the same metrics for the subset of 2024 organizations that were also active during 2023.

In [1]:
dt_2023 = "(date='2023-11-28' or (date='2023-11-29' and hour<5))"
dt_2024 = "(date='2024-12-03' or (date='2024-12-04' and hour<=5))"

In [4]:
q = '''select
            org,
            sum(amount) as trans_vol,
            count(id) as trans_count
        from transactions
        where
            status='A' and
            (recurring=0 or recurring_origin=1) and
            {}
        group by org'''.format(dt_2023)
df_2023 = redshift_query_read(q, schema='production')

orgs_2023 = df_2023['org'].unique()

In [7]:
q = '''select
            org,
            sum(amount) as trans_vol,
            count(id) as trans_count
        from transactions
        where
            status='A' and
            (recurring=0 or recurring_origin=1) and
            {} and
            org in (select
                        org
                    from transactions
                    where
                        status='A' and
                        (recurring=0 or recurring_origin=1) and
                        {}
            )
        group by org'''.format(dt_2024, dt_2023)
df_2024 = redshift_query_read(q, schema='production')

In [11]:
print("2023:")
print("-"*40)
print("{:,} orgs".format(len(orgs_2023)))
print("${:,.2f}/org mean".format(df_2023['trans_vol'].sum() / len(orgs_2023)))
print("${:,.2f}/org median".format(df_2023['trans_vol'].median()))

print()
print("2024:")
print("-"*40)
perc_retained = (len(df_2024) / len(df_2023))
print("{:,} orgs, {:.2f}% retained from 2023".format(len(df_2024), perc_retained * 100.))
print("${:,.2f}/org".format(df_2024['trans_vol'].sum() / len(df_2024)))
print("${:,.2f}/org median".format(df_2024['trans_vol'].median()))

2023:
----------------------------------------
1,786 orgs
$3,798.49/org mean
$669.65/org median

2024:
----------------------------------------
1,314 orgs, 73.57% retained from 2023
$5,119.92/org
$1,099.10/org median


# 2019 vs 2020

In [5]:
# giving tuesday 2020
gt_2020 = '2020-12-01'
q = "select sum(amount), count(id), count(distinct(id)) from transactions where date='{}' and status='A'".format(gt_2020)
df_2020 = redshift_query_read(q, schema='production')

In [6]:
df_2020

Unnamed: 0,sum,count,count.1
0,6275752.41,40318,40318


In [10]:
# giving tuesday 2021
gt_2021 = '2021-11-30'
q = "select sum(amount), max(amount), avg(amount), count(id), count(distinct(id)) as count_distinct from transactions where date='{}' and status='A'".format(gt_2021)
df_2021 = redshift_query_read(q, schema='production')

In [11]:
df_2021

Unnamed: 0,sum,max,avg,count,count_distinct
0,1437489.12,30000.0,133.91924,10734,10734


# 2019 v 2020 by hour

In [1]:
DATE_2019 = '2019-12-03'
DATE_2020 = '2020-12-01'

In [9]:
q = "select hour, sum(amount), count(distinct(id)) from transactions where date='{}' and status='A' group by hour".format(DATE_2019)
df_2019 = redshift_query_read(q, schema='production')
df_2019.sort_values("hour", inplace=True)
df_2019['cumsum'] = df_2019['sum'].cumsum()

In [10]:
q = "select hour, sum(amount), count(distinct(id)) from transactions where date='{}' and status='A' group by hour".format(DATE_2020)
df_2020 = redshift_query_read(q, schema='production')
df_2020.sort_values('hour', inplace=True)
df_2020['cumsum'] = df_2020['sum'].cumsum()

In [11]:
df_2019

Unnamed: 0,hour,sum,count,cumsum
2,0,25177.08,79,25177.08
19,1,4380.34,46,29557.42
13,2,3416.05,37,32973.47
11,3,5716.35,99,38689.82
15,4,6680.37,46,45370.19
1,5,7975.81,54,53346.0
3,6,48784.16,793,102130.16
5,7,106381.83,1069,208511.99
10,8,120240.66,760,328752.65
12,9,170128.0,1051,498880.65


In [12]:
df_2020

Unnamed: 0,hour,sum,count,cumsum
17,0,21868.48,144,21868.48
14,1,11815.85,87,33684.33
0,2,6046.16,55,39730.49
11,3,9116.86,38,48847.35
2,4,3338.63,35,52185.98
22,5,7278.42,63,59464.4
23,6,71737.1,941,131201.5
6,7,292694.98,3978,423896.48
10,8,255473.48,2371,679369.96
1,9,346868.42,1901,1026238.38


### 2021 hourly

In [17]:
gt_2020 = '2021-11-30'
q = '''select 
            date,
            hour,
            sum(amount), 
            count(distinct(id)) 
        from transactions 
        where 
            (date='{}' or (date='{}' and hour<=3)) and 
            status='A'
        group by date, hour
        '''.format(gt_2020, '2021-12-01')
hourly_2020 = redshift_query_read(q, schema='production')

In [19]:
hourly_2020.sort_values(['date', 'hour'], inplace=True)
hourly_2020['cumsum'] = hourly_2020['sum'].cumsum()
hourly_2020

Unnamed: 0,date,hour,sum,count,cumsum
4,2021-11-30,0,18964.64,107,18964.64
26,2021-11-30,1,21899.21,62,40863.85
6,2021-11-30,2,6146.12,54,47009.97
1,2021-11-30,3,28917.43,39,75927.4
7,2021-11-30,4,3147.53,36,79074.93
14,2021-11-30,5,16205.49,101,95280.42
12,2021-11-30,6,82949.82,1030,178230.24
2,2021-11-30,7,253692.31,3663,431922.55
21,2021-11-30,8,409389.34,4163,841311.89
5,2021-11-30,9,403416.55,2829,1244728.44


# final numbers request

- % increase in dollars raised compared to GT 2020
- % increase in the number of donations made compared to GT 2020
- the largest donation
- average donation amount 
- % incrase in the number of orgs that processed donations this GT copmared to last
- % increase in recurring originated
- the most successful sectors/subsets

In [22]:
gt_2020 = '2020-12-01'
q = '''select 
            count(distinct(org)) as orgs,
            sum(amount) as volume, 
            count(distinct(id)) as count,
            max(amount) as max_amount,
            avg(amount) as avg_amount
        from transactions 
        where 
            (date='{}' or (date='{}' and hour<=3)) and 
            status='A'
        '''.format(gt_2020, '2020-12-02')
df_2020 = redshift_query_read(q, schema='production')

In [23]:
df_2020

Unnamed: 0,orgs,volume,count,max_amount,avg_amount
0,1714,6487582.91,41364,100000.0,156.841285


In [20]:
gt_2021 = '2021-11-30'
q = '''select 
            count(distinct(org)) as orgs,
            sum(amount) as volume, 
            count(distinct(id)) as count,
            max(amount) as max_amount,
            avg(amount) as avg_amount
        from transactions 
        where 
            (date='{}' or (date='{}' and hour<=3)) and 
            status='A'
        '''.format(gt_2020, '2021-12-01')
df_2021 = redshift_query_read(q, schema='production')

In [21]:
df_2021

Unnamed: 0,orgs,volume,count,max_amount,avg_amount
0,1814,6581471.05,41350,46350.0,159.164959


In [25]:
df_2020.append(df_2021).pct_change()

Unnamed: 0,orgs,volume,count,max_amount,avg_amount
0,,,,,
0,0.058343,0.014472,-0.000338,-0.5365,0.014815


### recurring

In [26]:
q = '''select 
            id, 
            recurring, 
            amount,
            date,
            hour
        from transactions 
        where 
            recurring!=0 and
            status='A' and 
            date>='2020-01-01' '''
rec = redshift_query_read(q, schema='production')
rec.drop_duplicates('recurring', keep='first', inplace=True)

In [29]:
rec_2020 = rec[(rec['date']=='2020-12-01')|((rec['date']=='2020-12-02')&(rec['hour']<=3))]
len(rec_2020)

985

In [30]:
rec_2021 = rec[(rec['date']=='2021-11-30')|((rec['date']=='2021-12-01')&(rec['hour']<=3))]
len(rec_2021)

746

### segments

In [5]:
q = '''select
            id,
            segment
        from organization'''
orgs = redshift_query_read(q, schema='production')

In [11]:
q = '''select
            org,
            amount
        from transactions
        where
            status='A' and
            (date='2023-11-28' or (date='2023-11-29' and hour<3))'''
df_2023 = redshift_query_read(q, schema='production')
df_2023['segment'] = df_2023['org'].apply(lambda x: orgs[orgs['id']==x]['segment'].iloc[0] if len(orgs[orgs['id']==x]['segment']) > 0 else None)

seg_data = []
for segment in df_2023['segment'].unique():
    new_segment = df_2023[df_2023['segment']==segment]
    seg_data.append({
        'segment': segment,
        'count': new_segment['amount'].count(),
        'sum': new_segment['amount'].sum(),
        'mean': new_segment['amount'].mean(),
        'median': new_segment['amount'].median()
    })

2023


In [21]:
len(df_2023[df_2023['segment'].isna()]), len(df_2023)

(7991, 35742)

In [18]:
df_2023['amount'].sum(), df_2023[df_2023['segment'].isna()]['amount'].sum(), pd.DataFrame(seg_data)['sum'].sum()

(6828754.13, 1510258.52, 5318495.61)

In [13]:
pd.DataFrame(seg_data).sort_values('segment')

Unnamed: 0,segment,count,sum,mean,median
9,A - Arts; Culture; and Humanities,657,106252.4,161.723592,53.0
0,B - Educational Institutions,2640,529982.86,200.751083,53.0
8,C - Environmental Advocacy and Protection,561,124081.94,221.179929,78.0
2,D - Animal,3146,342564.53,108.888916,51.75
16,E - Health; General and Rehabilitative,1015,207027.94,203.968414,52.22
18,F - Mental Health and Crisis Intervention,417,91029.73,218.296715,53.0
12,G - Disease; Disorders; Medical Disciplines,1010,187720.59,185.86197,79.375
11,H - Medical Research,390,78269.68,200.691487,100.0
4,I - Crime; Legal Related,90,35657.56,396.195111,102.95
25,J - Employment; Job Related,44,14387.92,326.998182,54.0


In [40]:
gt_2020 = '2020-12-01'
q = '''select 
            org,
            amount
        from transactions 
        where 
            (date='{}' or (date='{}' and hour<=3)) and 
            status='A'
        '''.format(gt_2020, '2020-12-02')
q = '''select 
            org,
            amount
        from transactions 
        where 
            date='{}' and 
            status='A'
        '''.format(gt_2020)
df_2020 = redshift_query_read(q, schema='production')
df_2020['segment'] = df_2020['org'].apply(lambda x: orgs[orgs['Id']==x]['Segment'].iloc[0])

In [41]:
df_2020.groupby('segment')['amount'].agg(['count', 'sum', 'mean', 'median']).reset_index()

Unnamed: 0,segment,count,sum,mean,median
0,"A - Arts, Culture, and Humanities",1046,188813.3,180.509847,51.5
1,B - Educational Institutions,3221,533819.69,165.731043,52.0
2,"B - Educational Institutions , O - Youth Devel...",2,75.0,37.5,37.5
3,C - Environmental Advocacy and Protection,633,71483.83,112.928641,50.0
4,D - Animal,3074,273249.63,88.890576,50.0
5,"E - Health, General and Rehabilitative",809,144644.97,178.794771,51.5
6,F - Mental Health and Crisis Intervention,480,70046.63,145.930479,52.5
7,"G - Disease, Disorders, Medical Disciplines",638,90282.88,141.509216,52.5
8,H - Medical Research,350,171415.35,489.758143,53.0
9,"I - Crime, Legal Related",260,43244.25,166.324038,100.0


In [44]:
gt_2021 = '2021-11-30'
q = '''select 
            org,
            amount
        from transactions 
        where 
            (date='{}' or (date='{}' and hour<=3)) and 
            status='A'
        '''.format(gt_2020, '2021-12-01')
q = '''select 
            org,
            amount
        from transactions 
        where 
            date='{}' and 
            status='A'
        '''.format(gt_2021)
df_2021 = redshift_query_read(q, schema='production')
df_2021['segment'] = df_2021['org'].apply(lambda x: orgs[orgs['Id']==x]['Segment'].iloc[0])

In [45]:
df_2021.groupby('segment')['amount'].agg(['count', 'sum', 'mean', 'median']).reset_index()

Unnamed: 0,segment,count,sum,mean,median
0,"A - Arts, Culture, and Humanities",1240,208985.29,168.536524,51.5
1,B - Educational Institutions,3098,622826.57,201.041501,53.0
2,C - Environmental Advocacy and Protection,697,123094.96,176.606829,52.0
3,D - Animal,4278,382216.17,89.344593,50.0
4,"E - Health, General and Rehabilitative",1024,196963.29,192.346963,52.5
5,F - Mental Health and Crisis Intervention,577,94440.43,163.674922,82.5
6,"G - Disease, Disorders, Medical Disciplines",874,128504.34,147.030137,52.5
7,H - Medical Research,627,124546.25,198.638357,100.0
8,"I - Crime, Legal Related",278,72627.7,261.250719,100.0
9,"J - Employment, Job Related",66,23328.7,353.465152,104.0
