In [2]:
import pandas as pd
import numpy as np
import pickle as pkl

import sys
sys.path.insert(1, '../../../scripts/')
from s3_support import *

In [147]:
DATA_PATH = "adoption_data.pkl"

In [148]:
adoption_data = pkl.load(open(DATA_PATH, "rb"))

In [149]:
all_keys = list(adoption_data.keys())

# Building transaction data

In [3]:
q = "select * from transactions where status='A'"
trans = redshift_query_read(q)

In [150]:
trans_src_form = trans.groupby(['form', 'source'])['id'].count().reset_index().pivot(index='form', columns='source', values='id').reset_index().fillna(0)
trans_src_org = trans.groupby(['org', 'source'])['id'].count().reset_index().pivot(index='org', columns='source', values='id').reset_index().fillna(0)

In [151]:
len_forms = len(trans['form'].unique())
len_orgs = len(trans['org'].unique())

In [152]:
all_sources = list(trans['source'].unique())
trans_src_form['all'] = trans_src_form[all_sources].sum(axis=1)
trans_src_org['all'] = trans_src_org[all_sources].sum(axis=1)
for source in all_sources:
    trans_src_form["{}_perc".format(source)] = (trans_src_form[source] / trans_src_form['all']) * 100
    trans_src_org["{}_perc".format(source)] = (trans_src_org[source] / trans_src_org['all']) * 100
trans_src_form.head(3)

source,form,don_form,fb,givi,kiosk,mobile,mobilevt,p2p,sms,vt,all,don_form_perc,vt_perc,p2p_perc,kiosk_perc,mobile_perc,mobilevt_perc,sms_perc,fb_perc,givi_perc
0,0,7.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,7.0,100.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,1,1605.0,0.0,0.0,458.0,717.0,329.0,0.0,4.0,3009.0,6122.0,26.216923,49.150604,0.0,7.481215,11.711859,5.374061,0.065338,0.0,0.0
2,2,445.0,0.0,0.0,42.0,4.0,3.0,3.0,0.0,89.0,586.0,75.938567,15.187713,0.511945,7.167235,0.682594,0.511945,0.0,0.0,0.0


In [153]:
source_data = {}

for source in all_sources:
    trans_msk = (trans['source']==source)&(trans['status']=='A')
    mean_trans_amt = trans[trans_msk]['amount'].mean()
    
    source_data[source] = {
        'avg_perc_forms': trans_src_form["{}_perc".format(source)].mean(),
        'avg_count_forms': trans_src_form[source].mean(),
        'avg_perc_orgs': trans_src_org["{}_perc".format(source)].mean(),
        'avg_count_orgs': trans_src_org[source].mean(),
        'mean_value': mean_trans_amt
    }
    for threshold in [0., 10., 25., 50., 75., 90.]:
        source_data[source]['forms_gt_{}'.format(threshold)] = len(trans_src_form[trans_src_form["{}_perc".format(source)]>threshold])
        source_data[source]['orgs_gt_{}'.format(threshold)] = len(trans_src_org[trans_src_org["{}_perc".format(source)]>threshold])

In [154]:
trans.columns

Index(['amount', 'amount_std', 'creatingTransactionFor', 'date', 'day',
       'donations_amt', 'donations_count', 'email', 'events_amt',
       'events_count', 'events_tickets', 'form', 'form_amount_mean_diff',
       'form_day_mean_diff', 'form_hour_mean_diff', 'hour', 'id', 'is_fraud',
       'is_recurring', 'month', 'org', 'purchases_amt', 'purchases_count',
       'purchases_quantity', 'recurring', 'recurring_creatingTransaction',
       'registrations_amt', 'registrations_count', 'source', 'state', 'status',
       'transDonationEntity', 'transDonationEntityType', 'year', 'zip'],
      dtype='object')

In [155]:
# adding store purchases
trans['is_purchase'] = trans['purchases_count']>0

purch_forms = trans.groupby(['form', 'is_purchase'])['id'].count().reset_index().pivot(index='form', columns='is_purchase', values='id').reset_index().fillna(0)
purch_forms['perc'] = (purch_forms[True] / (purch_forms[True] + purch_forms[False])) * 100.
purch_orgs = trans.groupby(['org', 'is_purchase'])['id'].count().reset_index().pivot(index='org', columns='is_purchase', values='id').reset_index().fillna(0)
purch_orgs['perc'] = (purch_orgs[True] / (purch_orgs[True] + purch_orgs[False])) * 100.

source_data['store_purchases'] = {
    'avg_perc_forms': purch_forms['perc'].mean(),
    'avg_count_forms': purch_forms[True].mean(),
    'avg_perc_orgs': purch_orgs['perc'].mean(),
    'avg_count_orgs': purch_orgs[True].mean(),
    'mean_value': trans[trans['purchases_count']>0]['purchases_amt'].mean()
}
for threshold in [0., 10., 25., 50., 75., 90.]:
    source_data['store_purchases']['forms_gt_{}'.format(threshold)] = len(purch_forms[purch_forms['perc']>threshold])
    source_data['store_purchases']['orgs_gt_{}'.format(threshold)] = len(purch_orgs[purch_orgs['perc']>threshold])

In [156]:
# adding recurring
rec_forms = trans.groupby(['form', 'is_recurring'])['id'].count().reset_index().pivot(index='form', columns='is_recurring', values='id').reset_index().fillna(0)
rec_forms['perc'] = (rec_forms[True] / (rec_forms[True] + rec_forms[False])) * 100.
rec_orgs = trans.groupby(['org', 'is_recurring'])['id'].count().reset_index().pivot(index='org', columns='is_recurring', values='id').reset_index().fillna(0)
rec_orgs['perc'] = (rec_orgs[True] / rec_orgs[True] + rec_orgs[False]) * 100.

source_data['recurring'] = {
    'avg_perc_forms': rec_forms['perc'].mean(),
    'avg_count_forms': rec_forms[True].mean(),
    'avg_perc_orgs': rec_orgs['perc'].mean(),
    'avg_count_orgs': rec_orgs[True].mean(),
    'mean_value': trans[trans['is_recurring'].fillna(False)]['amount'].mean()
}
for threshold in [0., 10., 25., 50., 75., 90.]:
    source_data['recurring']['forms_gt_{}'.format(threshold)] = len(rec_forms[rec_forms['perc']>threshold])
    source_data['recurring']['orgs_gt_{}'.format(threshold)] = len(rec_orgs[rec_orgs['perc']>threshold])

In [157]:
# adding P2P registrations
trans['is_registration'] = trans['registrations_count']>0

reg_forms = trans.groupby(['form', 'is_registration'])['id'].count().reset_index().pivot(index='form', columns='is_registration', values='id').reset_index().fillna(0)
reg_forms['perc'] = (reg_forms[True] / (reg_forms[True] + reg_forms[False])) * 100.
reg_orgs = trans.groupby(['org', 'is_registration'])['id'].count().reset_index().pivot(index='org', columns='is_registration', values='id').reset_index().fillna(0)
reg_orgs['perc'] = (reg_orgs[True] / (reg_orgs[True] + reg_orgs[False])) * 100.

source_data['p2p_registrations'] = {
    'avg_perc_forms': purch_forms['perc'].mean(),
    'avg_count_forms': purch_forms[True].mean(),
    'avg_perc_orgs': purch_orgs['perc'].mean(),
    'avg_count_orgs': purch_orgs[True].mean(),
    'mean_value': trans[trans['registrations_count']>0]['registrations_amt'].mean()
}
for threshold in [0., 10., 25., 50., 75., 90.]:
    source_data['p2p_registrations']['forms_gt_{}'.format(threshold)] = len(reg_forms[reg_forms['perc']>threshold])
    source_data['p2p_registrations']['orgs_gt_{}'.format(threshold)] = len(reg_orgs[reg_orgs['perc']>threshold])

In [158]:
source_data['p2p_registrations']

{'avg_perc_forms': 0.3039882789335012,
 'avg_count_forms': 0.5240717973936563,
 'avg_perc_orgs': 0.14102903896630342,
 'avg_count_orgs': 2.1314,
 'mean_value': 16.722878309475107,
 'forms_gt_0.0': 3404,
 'orgs_gt_0.0': 721,
 'forms_gt_10.0': 3076,
 'orgs_gt_10.0': 513,
 'forms_gt_25.0': 2459,
 'orgs_gt_25.0': 345,
 'forms_gt_50.0': 1309,
 'orgs_gt_50.0': 120,
 'forms_gt_75.0': 605,
 'orgs_gt_75.0': 27,
 'forms_gt_90.0': 402,
 'orgs_gt_90.0': 9}

In [159]:
pkl.dump(source_data, open('trans_adoption_data.pkl', 'wb'))

# Merging datasets

In [160]:
adoption_data['recurring']

{'implemented': {'value': '98.13%', 'note': '7567 orgs; 3.48 per org'},
 'bidirectional': {'value': '69.08%'},
 'institutional': {'value': '69.08%'}}

In [162]:
for source in source_data:
    source_text = source
    if source in ['don_form', 'vt', 'p2p', 'kiosk', 'mobile', 'mobilevt', 'sms', 'fb', 'givi']:
        source_text = "{}_source".format(source)
    
    bidirectional_perc = "{:.2f}%".format((float(source_data[source]['orgs_gt_0.0']) / float(len_orgs)) * 100.)
    institutional_perc = "{:.2f}%".format((float(source_data[source]['orgs_gt_25.0']) / float(len_orgs)) * 100.)
    
    if source == 'recurring':
        adoption_data['recurring']['bidirectional'] = {'value': bidirectional_perc}
        adoption_data['recurring']['institutional'] = {'value': institutional_perc}
    else:
        adoption_data[source_text] = {
            'implemented': {'value': 0, 'note': ''},
            'bidirectional': {'value': bidirectional_perc},
            'institutional': {'value': institutional_perc}
        }
    
    meta_data = source_data[source]
    meta_data['len_forms'] = len_forms
    meta_data['len_orgs'] = len_orgs
    adoption_data[source_text]['meta'] = meta_data

In [163]:
adoption_data.keys()

dict_keys(['fielddata', 'event_promo', 'registrations_nonfundraising', 'events_donations', 'events_fields', 'nonfundraising_participants', 'events', 'classifications', 'custom_reports', 'aggregator_page', 'recurring', 'pledges', 'merchant_accounts', 'forms_with_notifications', 'teams', 'donor_logins', 'service_integrations', 'thermometers', 'custom_fields', 'giftassist_transactions', 'sms', 'dl_onetime', 'embeds', 'pledge', 'category_fields', 'product_fields', 'purchases_shipping', 'promises', 'registration_store', 'events_fielddata', 'dl_recurring', 'purchases', 'categories', 'events_promo', 'registrations_subregistrants', 'orgs_with_notifications', 'restrictions', 'shipping_address', 'don_form_source', 'vt_source', 'p2p_source', 'kiosk_source', 'mobile_source', 'mobilevt_source', 'sms_source', 'fb_source', 'givi_source', 'store_purchases', 'p2p_registrations'])

In [164]:
#pkl.dump(adoption_data, open('adoption_data.pkl', 'wb'))

In [2]:
import pandas as pd
cols = ['org', 'date']
pd.DataFrame(columns=cols)

Unnamed: 0,org,date
