In [1]:
import pandas as pd
import numpy as np

import sys
sys.path.insert(1, '../../scripts/')
from s3_support import *

# load data

In [2]:
q = '''select
            year,
            form,
            count(id) as trans_count,
            sum(amount) as trans_vol,
            sum(gift_assist_count) as ga_count,
            sum(gift_assist_amt) as ga_vol
        from transactions
        where
            status='A' 
        group by form, year'''
df = redshift_query_read(q, schema='production')

In [3]:
df['used_ga'] = df['ga_vol']>0

# analysis

### 1. average form processing

In [4]:
df.groupby('used_ga')['trans_vol', 'trans_count'].agg(['mean', 'median']).reset_index()

  """Entry point for launching an IPython kernel.


Unnamed: 0_level_0,used_ga,trans_vol,trans_vol,trans_count,trans_count
Unnamed: 0_level_1,Unnamed: 1_level_1,mean,median,mean,median
0,False,18408.956892,1500.0,146.453989,17.0
1,True,22268.708957,3948.65,175.027498,35.0


### 2. form/org adoption rate

In [5]:
print("adoption rate:")
print("-"*20)
df.groupby('form')['used_ga'].max().value_counts(normalize=True)

adoption rate:
--------------------


False    0.508318
True     0.491682
Name: used_ga, dtype: float64

### 2. donor utilization rate

on donation forms with GiftAssist enabled, donors elected to help offset processing fees X% of the time

In [6]:
annual = df[df['used_ga']].groupby('year')[['trans_count', 'ga_count']].sum().reset_index()
annual['ratio'] = annual['ga_count'] / annual['trans_count']
annual

Unnamed: 0,year,trans_count,ga_count,ratio
0,2016,111054,24876,0.223999
1,2017,279739,85357,0.305131
2,2018,499959,177625,0.355279
3,2019,776627,287433,0.370104
4,2020,1132452,520039,0.459215
5,2021,1491059,694528,0.465795
6,2022,1843359,819524,0.444582
7,2023,1293918,538052,0.415832


### 3. average donation amount between groups

In [7]:
df[df['used_ga']]['year'].min()

2016

In [8]:
q = '''select
            case when gift_assist_count>0 then 1 else 0 end as used_ga,
            year,
            avg(amount) as mean_amount
        from transactions
        where status='A' and recurring=0 and year>2016
        group by year, used_ga'''
mean_df = redshift_query_read(q, schema='production')

q = '''select
            case when gift_assist_count>0 then 1 else 0 end as used_ga,
            year,
            median(amount) as median_amount
        from transactions
        where status='A' and recurring=0 and year>2016
        group by year, used_ga'''
median_df = redshift_query_read(q, schema='production')

In [9]:
avgs = mean_df.merge(median_df, on=['used_ga', 'year'])
avgs.pivot(index='year', columns='used_ga', values=['mean_amount', 'median_amount']).reset_index()

Unnamed: 0_level_0,year,mean_amount,mean_amount,median_amount,median_amount
used_ga,Unnamed: 1_level_1,0,1,0,1
0,2017,170.390845,118.434185,50.0,51.97
1,2018,163.196996,114.117636,50.0,51.97
2,2019,153.554133,115.172078,50.0,51.95
3,2020,167.656064,131.947188,50.0,52.5
4,2021,189.206366,145.788241,50.0,52.5
5,2022,189.145582,148.340466,50.0,52.5
6,2023,177.160358,151.964685,50.0,52.5
