In [20]:
import pandas as pd
import sys
sys.path.insert(1, '../../scripts/')
from s3_support import *

# org comparison

In [21]:
def build_and_execute_query(q, where_clause=[], parameters=[], joins=[], group_on=None):
    if len(joins) > 0:
        q += ' left join ' + ' left join '.join(joins)

    if len(where_clause) > 0:
        q += ' where ' + ' and '.join(where_clause)
    
    if group_on is not None:
        q += ' group by {}'.format(group_on)

    df = redshift_query_read(q.format(*parameters), schema='production')

    return df

In [24]:
joins = []
where_clause = []
params = []

In [25]:
where_clause.append("t.date>='{}'")
params.append('2023-01-01')

In [26]:
# query data
q = '''select 
            t.org as org,
            f.type as type,
            sum(t.amount) as volume,
            count(distinct(t.id)) as count
        from production.transactions as t'''
joins.append("production.form as f on t.form=f.id")
group_on = 'f.type, t.org'

df = build_and_execute_query(q, where_clause=where_clause, parameters=params, joins=joins, group_on=group_on)

In [27]:
# data prep
def interpret_form_type(form_type):
    if form_type == 1:
        return 'qgiv'
    elif form_type == 2:
        return 'barnstorm'
    elif form_type == 3:
        return 'p2p'
    elif form_type == 5:
        return 'auction'

df['type'] = df['type'].apply(interpret_form_type)
df = df[~df['type'].isna()]
df_pivot = df.pivot(index='org', columns='type', values='volume').reset_index().fillna(0)

In [32]:
df_pivot.tail(2)

type,org,auction,p2p,qgiv
4567,451306,0.0,0.0,2.0
4568,451325,0.0,0.0,1.0


In [33]:
# compute return stats
type_cols = [c for c in df_pivot.columns if c!='org']
for c in df_pivot.columns:
    if c != 'org':
        df_pivot['{}_percentage'.format(c)] = df_pivot[c] / df_pivot[type_cols].sum(axis=1)

data = {
    'sample_size': len(df_pivot),
    'data': df_pivot[[c for c in df_pivot.columns if c!='org']].agg(['mean', 'median']).to_dict()
}

In [45]:
df_pivot[df_pivot['org']==comparison_org].iloc[0].to_dict()

{'org': 451254.0,
 'auction': 0.0,
 'p2p': 0.0,
 'qgiv': 14214.8,
 'auction_percentage': 0.0,
 'p2p_percentage': 0.0,
 'qgiv_percentage': 1.0}

In [40]:
df_pivot[k], this_org_data[k]

(0            0.0
 1       168146.8
 2            0.0
 3        42205.0
 4            0.0
           ...   
 4564         0.0
 4565         0.0
 4566         0.0
 4567         0.0
 4568         0.0
 Name: auction, Length: 4569, dtype: float64,
 {4554: 0.0})

In [48]:
comparison_org = 451254

# add comparison stats if comparison_org provided...
if comparison_org is not None:
    this_org_data = df_pivot[df_pivot['org']==comparison_org][[c for c in df_pivot.columns if c!='org']].iloc[0].to_dict()

    comparison_data = {}
    for k in this_org_data.keys():
        comparison_data[k] = len(df_pivot[df_pivot[k]<this_org_data[k]]) / len(df_pivot[df_pivot[k]>0])

    data['comparison_org'] = {
        'data': this_org_data,
        'better_than': comparison_data
    }

In [49]:
data

{'sample_size': 4569,
 'data': {'auction': {'mean': 15383.484808492, 'median': 0.0},
  'p2p': {'mean': 314209.9746377761, 'median': 0.0},
  'qgiv': {'mean': 431843.7161129345, 'median': 11902.0},
  'auction_percentage': {'mean': 0.08829225721113682, 'median': 0.0},
  'p2p_percentage': {'mean': 0.10303529969188324, 'median': 0.0},
  'qgiv_percentage': {'mean': 0.8086724430969783, 'median': 1.0}},
 'comparison_org': {'data': {'auction': 0.0,
   'p2p': 0.0,
   'qgiv': 14214.8,
   'auction_percentage': 0.0,
   'p2p_percentage': 0.0,
   'qgiv_percentage': 1.0},
  'better_than': {'auction': 0.0,
   'p2p': 0.0,
   'qgiv': 0.5579389134996503,
   'auction_percentage': 0.0,
   'p2p_percentage': 0.0,
   'qgiv_percentage': 0.2828165073443693}}}

# full build

In [1]:
def build_and_execute_query(q, where_clause=[], parameters=[], joins=[], group_on=None):
    if len(joins) > 0:
        q += ' left join ' + ' left join '.join(joins)

    if len(where_clause) > 0:
        q += ' where ' + ' and '.join(where_clause)
    
    if group_on is not None:
        q += ' group by {}'.format(group_on)

    return q, parameters, q.format(*parameters)

In [17]:
def get_benchmarks_data(_input):
    where_clause = []
    params = []
    joins = []
    data = {'sample_size': 0, 'data': None}

    filters = _input.get('filters', [])
    if len(filters) > 0:
        for f in filters:
            k = list(f.keys())[0]
            v = f[k]
            
            if k == 'org':
                where_clause.append('t.org={}')
                params.append(int(v))
            elif k == 'state':
                where_clause.append("o.state='{}'")
                params.append(v)
                joins.append('production.organization as o on t.org=o.id')
            elif k == 'volume':
                # org size
                # what buckets will we use here?
                pass
            elif k == 'date_min':
                where_clause.append("t.date>='{}'")
                params.append(v)
            elif k == 'date_max':
                where_clause.append("t.date<='{}'")
                params.append(v)
            elif k == 'trailing':
                # trailing30, trailing180, trailing365, etc.
                where_clause.append("t.date>='{}'")
                params.append("current_day - interval '{} day'".foramt(v))
            elif k == 'ntee':
                where_clause.append("o.ntee LIKE '%{}%'")
                params.append(v)
                joins.append('production.organization as o on t.org=o.id')
            elif k == 'tag':
                where_clause.append("o.tags LIKE '%{}%'")
                params.append(v)
                joins.append('production.organization as o on t.org=o.id')
            elif k == 'source':
                if v == 'don_form':
                    where_clause.append("t.source='{}'")
                    params.append('don_form')
                elif v == 'mobile':
                    where_clause.append("t.source='{}'")
                    params.append('mobile')
                elif v == 'p2p':
                    where_clause.append("t.source='{}'")
                    params.append('p2p')
            elif k == 'formtype':
                joins.append('production.form as f on f.id=t.form')

                if v == 'p2p':
                    where_clause.append("f.type={}")
                    params.append(3)
                elif v == 'yearround':
                    # qgiv
                    where_clause.append("f.type={}")
                    params.append(1)
                elif v == 'auction':
                    where_clause.append("f.type={}")
                    params.append(5)

    if _input['insight'] == 'form_type':  # contributions by form type
        q = '''select 
                    t.org as org,
                    f.type as type,
                    sum(t.amount) as volume,
                    count(distinct(t.id)) as count
                from production.transactions as t'''
        joins.append("production.form as f on t.form=f.id")
        group_on = 'f.type, t.org'

        return build_and_execute_query(q, where_clause=where_clause, parameters=params, joins=joins, group_on=group_on)
    elif _input['insight'] == 'payment_method':  # contributions by payment method
        q = '''select 
                    t.org as org,
                    t.payment_type as payment_type,
                    sum(t.amount) as volume,
                    count(distinct(t.id)) as count
                from production.transactions as t'''
        group_on = 't.org, t.payment_type'

        return build_and_execute_query(q, where_clause=where_clause, parameters=params, joins=joins, group_on=group_on)
    elif _input['insight'] == 'contribution_source':  # contributions by source
        q = '''select 
                    t.org as org,
                    t.source as source,
                    sum(t.amount) as volume,
                    count(distinct(t.id)) as count
                from production.transactions as t'''
        group_on = 't.org, t.source'

        return build_and_execute_query(q, where_clause=where_clause, parameters=params, joins=joins, group_on=group_on)
    elif _input['insight'] in ['trans_count', 'trans_average', 'trans_sum']:
        # contribution count, average contribution amount, amount raised
        q = '''select 
                    t.org as org,
                    median(t.amount) as trans_median,
                    count(t.id) as trans_count,
                    sum(t.amount) as trans_volume
                from production.transactions as t'''
        group_on = 't.org'

        return build_and_execute_query(q, where_clause=where_clause, parameters=params, joins=joins, group_on=group_on)
    elif _input['insight'] == 'oentime_v_recurring':  # contributions by type (one time v recurring)
        q = '''select
                    t.org as org,
                    sum(case when t.recurring=0 then 1 else 0 end) as onetime_count,
                    sum(case when t.recurring=0 then amount else 0 end) as onetime_volume,
                    sum(case when t.recurring_origin!=0 then 1 else 0 end) as recurring_count,
                    sum(case when t.recurring_origin!=0 then amount else 0 end) as recurring_volume
                from production.transactions as t'''
        group_on = 't.org'

        return build_and_execute_query(q, where_clause=where_clause, parameters=params, joins=joins, group_on=group_on)
    elif _input['insight'] in ['donation_count', 'registration_count']:
        # donation count, registration count
        q = '''select
                    t.org as org,
                    sum(t.donations_count) as donations_count,
                    sum(t.donations_amt) as donations_volume,
                    sum(t.registrations_count) as registrations_count,
                    sum(t.registrations_amt) as registrations_volume
                from production.transactions as t'''
        group_on = 't.org'

        return build_and_execute_query(q, where_clause=where_clause, parameters=params, joins=joins, group_on=group_on)
    elif _input['insight'] == 'giftassist':
        # total gift assist, gift assist success rate
        q = '''select
                    t.org as org,
                    count(distinct(t.id)) as trans_count,
                    sum(t.gift_assist_count) as giftassist_count,
                    sum(t.amount) as volume,
                    sum(t.gift_assist_amt) as giftassist_volume
                from production.transactions as t'''
        group_on = 't.org'

        return build_and_execute_query(q, where_clause=where_clause, parameters=params, joins=joins, group_on=group_on)
    elif _input['insight'] == 'form_conversion':
        q = '''select
                    t.form as form,
                    sum(fa.pageviews) as pageviews,
                    sum(fa.trans_onetime_count) + sum(fa.trans_rec_count) as transactions
                from production.transactions as t
                    public.form_aggregates as fa on t.form=fa.form'''
        
        for i in range(0, len(where_clause)):
            if 't.date' in where_clause[i]:
                where_clause[i] = where_clause[i].replace('t.', 'fa.')

        return build_and_execute_query(q, where_clause=where_clause, parameters=params, joins=joins, group_on=group_on)
    elif _input['insight'] == 'donor_liftetime_value':
        # public.donors_orgs, volume
        q = '''select
                    count(distinct(d.org)) as sample_size,
                    avg(d.volume) as average_lifetime_value
                from public.donors_orgs as d'''
        if len(where_clause) > 0:
            joins.append('production.transactions as t on t.form=d.form')
        
        return build_and_execute_query(q, where_clause=where_clause, parameters=params, joins=joins)
    elif _input['insight'] in ['churn', 'retention', 'new_donors']:
        # public.org_retention
        # org, retention, churn, new_donors
        q = '''select
                    count(distinct(r.org)) as sample_size,
                    avg(r.retention) as retention,
                    avg(r.churn) as churn,
                    avg(r.new_donors) as new_donors
                from public.org_retention as r'''
        if len(where_clause) > 0:
            joins.append('production.transactions as t on t.form=r.form')

        return build_and_execute_query(q, where_clause=where_clause, parameters=params, joins=joins)
    
    return data['sample_size'], data['data']

In [18]:
i = {
    "filters": [
        {"state": "FL"},
        {"date_min": "2019-01-01"},
        {"date_max": "2020-01-01"}
    ],
    "insight": "form_type"
}

In [19]:
get_benchmarks_data(i)

("select \n                    t.org as org,\n                    f.type as type,\n                    sum(t.amount) as volume,\n                    count(distinct(t.id)) as count\n                from production.transactions as t left join production.organization as o on t.org=o.id left join production.form as f on t.form=f.id where o.state='{}' and t.date>='{}' and t.date<='{}' group by f.type, t.org",
 ['FL', '2019-01-01', '2020-01-01'],
 "select \n                    t.org as org,\n                    f.type as type,\n                    sum(t.amount) as volume,\n                    count(distinct(t.id)) as count\n                from production.transactions as t left join production.organization as o on t.org=o.id left join production.form as f on t.form=f.id where o.state='FL' and t.date>='2019-01-01' and t.date<='2020-01-01' group by f.type, t.org")

In [16]:
filters = i.get('filters', [])
for f in filters:
    k = list(f.keys())[0]
    v = f[k]
    
    print(k, v)

state FL
date_min 2019-01-01
date_max 2020-01-01


In [15]:
select
    t.org as org,
    f.type as type,
    sum(t.amount) as volume,
    count(distinct(t.id)) as count
from production.transactions as t 
    left join production.organization as o on t.org=o.id 
    left join production.form as f on t.form=f.id 
where o.state='{}' and t.date>='{}' and t.date<='{}' 
group by f.type, t.org

select
    t.org as org,
    f.type as type,
    sum(t.amount) as volume,
    count(distinct(t.id)) as count
from production.transactions as t 
    left join production.organization as o on t.org=o.id 
    left join production.form as f on t.form=f.id 
where 
    o.state='FL' and 
    t.date>='2019-01-01' and 
    t.date<='2020-01-01' 
group by f.type, t.org

'state'