1. Mean & median transaction value for 2019 vs 2020.
2. Broken down by product (2019 vs 2020)
    1. donation forms (excluding one-time donations to events if possible)
    2. P2P donations (to any entity)
    3. Donations made through Givi
        - during an auction
        - outside of an auction to an org
    4. kiosk

In [26]:
import pandas as pd
import sys
sys.path.insert(1, '../../scripts/')
from s3_support import *

### all transactions

In [37]:
q = '''select 
            count(id), 
            median(amount), 
            avg(amount) as mean, 
            date_trunc('year', date) as year 
        from transactions 
        where 
            status='A' and
            amount!=99999999.99 and amount!=0.0
        group by date_trunc('year', date)'''
all_trans = redshift_query_read(q, schema='public')
all_trans['year'] = pd.to_datetime(all_trans['year']).dt.year
all_trans = all_trans[all_trans['year'].isin([2019, 2020])]
all_trans

Unnamed: 0,count,median,mean,year
10,1502733,50.0,127.860661,2019
14,2021372,50.0,133.307458,2020


In [28]:
print("All transactions")
print()

print("| year | mean | median |")
print("|------|------|--------|")
for _, r in all_trans.iterrows():
    print("| {:.0f} | {:.2f} | {:.2f} |".format(r['year'], r['mean'], r['median']))

All transactions

| year | mean | median |
|------|------|--------|
| 2019 | 127.86 | 50.00 |
| 2020 | 133.31 | 50.00 |


### segmented by products

In [29]:
q = '''select 
            count(id), 
            median(amount), 
            avg(amount) as mean, 
            date_trunc('year', date) as year 
        from transactions 
        where 
            (source='don_form' or source='mobile') and 
            status='A' and 
            events_amt=0 and 
            amount!=99999999.99 and amount!=0
        group by date_trunc('year', date)'''
df_donform = redshift_query_read(q, schema='public')
df_donform['year'] = pd.to_datetime(df_donform['year']).dt.year
df_donform = df_donform[df_donform['year'].isin([2019, 2020])]
df_donform

Unnamed: 0,count,median,mean,year
10,883299,40.0,134.648902,2019
14,1442482,50.0,140.68976,2020


In [30]:
print("Donation forms")
print()

print("| year | mean | median |")
print("|------|------|--------|")
for _, r in df_donform[df_donform['year'].isin([2019, 2020])].iterrows():
    print("| {:.0f} | {:.2f} | {:.2f} |".format(r['year'], r['mean'], r['median']))

Donation forms

| year | mean | median |
|------|------|--------|
| 2019 | 134.65 | 40.00 |
| 2020 | 140.69 | 50.00 |


In [31]:
q = '''select 
            count(id), 
            median(amount), 
            avg(amount) as mean, 
            date_trunc('year', date) as year 
        from transactions 
        where 
            source='p2p' and 
            status='A' and
            amount!=99999999.99 and amount!=0
        group by date_trunc('year', date)'''
df_p2p = redshift_query_read(q, schema='public')
df_p2p['year'] = pd.to_datetime(df_p2p['year']).dt.year
df_p2p = df_p2p[df_p2p['year'].isin([2019, 2020])]
df_p2p

Unnamed: 0,count,median,mean,year
6,272502,50.0,87.458264,2019
9,283458,50.0,108.622503,2020


In [32]:
print("P2P")
print()

print("| year | mean | median |")
print("|------|------|--------|")
for _, r in df_p2p[df_p2p['year'].isin([2019, 2020])].iterrows():
    print("| {:.0f} | {:.2f} | {:.2f} |".format(r['year'], r['mean'], r['median']))

P2P

| year | mean | median |
|------|------|--------|
| 2019 | 87.46 | 50.00 |
| 2020 | 108.62 | 50.00 |


In [33]:
q = '''select 
            count(id), 
            median(amount), 
            avg(amount) as mean, 
            date_trunc('year', date) as year 
        from transactions 
        where 
            source='givi' and 
            status='A' and 
            amount!=99999999.99 and amount!=0
        group by date_trunc('year', date)'''
df_givi = redshift_query_read(q, schema='public')
df_givi['year'] = pd.to_datetime(df_givi['year']).dt.year
df_givi = df_givi[df_givi['year'].isin([2019, 2020])]
df_givi

Unnamed: 0,count,median,mean,year
1,4393,70.0,120.36949,2019
2,11156,100.0,177.999744,2020


In [34]:
print("Givi")
print()

print("| year | mean | median |")
print("|------|------|--------|")
for _, r in df_givi[df_givi['year'].isin([2019, 2020])].iterrows():
    print("| {:.0f} | {:.2f} | {:.2f} |".format(r['year'], r['mean'], r['median']))

Givi

| year | mean | median |
|------|------|--------|
| 2019 | 120.37 | 70.00 |
| 2020 | 178.00 | 100.00 |


In [35]:
q = '''select 
            count(id), 
            median(amount), 
            avg(amount) as mean, 
            date_trunc('year', date) as year 
        from transactions 
        where 
            source='kiosk' and 
            status='A' and
            amount!=99999999.99 and amount!=0 
        group by date_trunc('year', date)'''
df_kiosk = redshift_query_read(q, schema='public')
df_kiosk['year'] = pd.to_datetime(df_kiosk['year']).dt.year
df_kiosk[df_kiosk['year'].isin([2019, 2020])]

Unnamed: 0,count,median,mean,year
3,57592,50.0,110.954333,2019
8,26637,30.0,83.190835,2020


In [36]:
print("Kiosk")
print()

print("| year | mean | median |")
print("|------|------|--------|")
for _, r in df_kiosk[df_kiosk['year'].isin([2019, 2020])].iterrows():
    print("| {:.0f} | {:.2f} | {:.2f} |".format(r['year'], r['mean'], r['median']))


Kiosk

| year | mean | median |
|------|------|--------|
| 2019 | 110.95 | 50.00 |
| 2020 | 83.19 | 30.00 |
