# Feature adoption dashboard

Purpose is to develop a script that will automatically update adoption data for a deshboard presentation. the resulting data should provide:

1. reasonably up to date data
2. require no further processing from the dashboard
3. include all data used for the dashboard in either (1) a single data source or (2) consistent formats such that independent processing is not required for various data points

The data sources that will be required for this are:

1. transactions data (from Redshift)
2. adoption data export (from production)
3. table data export (from production)

The adoption data export will be used as the foundation of the singular datasource as it contains the most readily available and formatted data.

## Process

1. Update data sources
2. Integrate table data into adoption export data (implementation)
3. Integrate transactions data into adoption export data (bidirectional & institutional)

## Notes

*Tables Needed*

- qgiv events
- transregistration

## data basic prep

In [1]:
import pandas as pd
import numpy as np
import pickle as pkl

import sys, requests, json
sys.path.insert(1, '../../../scripts/')
from s3_support import *

In [34]:
orgs = get_dataframe_from_file("qgiv-stats-data", "org.exp.csv")
forms = get_dataframe_from_file("qgiv-stats-data", "form.exp.csv")
org_transaction_count = redshift_query_read("select org, count(id) as count from transactions where status='A' group by org")

"{} orgs, {} forms".format(len(orgs), len(forms))

'8034 orgs, 39096 forms'

In [32]:
org_transaction_count.head(3)

Unnamed: 0,org,count
0,131,1729
1,150,3725
2,138,45


In [35]:
forms.head(3)

Unnamed: 0,dateCreated,dateLive,form,org,path,status
0,2006-05-18 00:00:00,2006-05-18 00:00:00,1,6,cfsun,1
1,2007-08-01 00:00:00,2009-04-28 00:00:00,2,9,cipherbeta,9
2,2006-08-22 00:00:00,2006-08-22 00:00:00,3,13,viste,1


## 1. Data updates

### update adoption data

In [2]:
# update adoption data as foundation of data build
URL = "https://secure.qgiv.com/admin/qgivadmin/statistics/export_feature_adoption.php"
KEY = "DSQR59VwyFhw21PKDF4K"

rsp = requests.post(URL, data={'key': KEY})
data = json.loads(rsp.text)

In [5]:
data[0]['implementation_data'].keys()

dict_keys(['forms_with_notifications', 'orgs_with_notifications', 'forms_with_matchinggifts', 'service_integrations', 'custom_reports', 'merchant_accounts', 'embeds', 'promises', 'restrictions', 'custom_fields', 'pledges', 'recurring', 'events', 'events_donations', 'events_fields', 'events_promo', 'sms', 'donor_logins', 'thermometers', 'classifications', 'categories', 'registration_store', 'product_fields', 'shipping_address', 'nonfundraising_participants', 'teams', 'aggregator_page', 'category_fields'])

In [20]:
def get_org_for_form(form):
    if form != 0 and form is not None and len(forms[forms['form']==int(form)]) > 0:
        return forms[forms['form']==int(form)]['org'].iloc[0]
    else:
        return 0

In [21]:
normalized_agg_data = {
    'implementation_data': {}, 
    'bidirectional_adoption_data': {},
    'institutional_adoption_data': {}
}
for data_key in ['implementation_data', 'bidirectional_adoption_data']:
    for element_key in data[0][data_key]:
        if data[0][data_key][element_key] is None or data[0][data_key][element_key] == '0' or element_key == 'merchant_accounts' or element_key == 'events_promo':
            continue
        elif type(data[0][data_key][element_key][0]) == str:
            # list of ID's
            if 'org' in element_key:
                cols = ['org']
            else:
                cols = ['form']
            df = pd.DataFrame(data[0][data_key][element_key], columns=cols)
        else:
            df = pd.DataFrame(data[0][data_key][element_key])
            
        # add org if we don't yet have it
        if 'org' not in df.columns:
            df['org'] = df['form'].apply(get_org_for_form)
        
        # normalize count column name
        for c in df.columns:
            if 'count' in c:
                df['count'] = df[c]
                df.drop(c, axis=1, inplace=True)
        
        # type casting
        df['org'] = df['org'].fillna(0).astype(int)
        if 'count' in df.columns:
            df['count'] = df['count'].fillna(0).astype(int)
        
        data_entry = {
            'adoption_rate': float(len(df['org'].unique())) / float(len(orgs)),
            'meta': {
                'count_orgs': len(df['org'].unique())
            }
        }
        
        if 'form' in df.columns:
            data_entry['meta']['count_forms'] = len(df['form'].unique())
            if 'count' in df.columns:
                data_entry['meta']['per_form'] = df['count'].mean()
                data_entry['meta']['per_org'] = df[df['org']!=0].groupby("org")['count'].sum().mean()
        else:
            if 'count' in df.columns:
                data_entry['meta']['per_org'] = df['count'].mean()
            
        normalized_agg_data[data_key][element_key] = data_entry
        
        if data_key == 'bidirectional_adoption_data':
            df_orgs = df[df['org']!=0].groupby("org")['count'].sum().reset_index()
            df_orgs = df_orgs[df_orgs['org'].isin(org_transaction_count['org'].tolist())]
            
            df_orgs['len_trans_org'] = df_orgs['org'].apply(lambda x: org_transaction_count[org_transaction_count['org']==x]['count'].iloc[0])
            df_orgs['perc_all_trans'] = df_orgs['count'] / df_orgs['len_trans_org']
            
            normalized_agg_data['institutional_adoption_data'][element_key] = {
                'adoption_rate': float(len(df_orgs[df_orgs['perc_all_trans']>0.15])) / float(len(orgs)),
                'meta': {
                    'count_orgs': len(df_orgs[df_orgs['perc_all_trans']>0.15])
                }
            }

In [36]:
for k in normalized_agg_data.keys():
    if 'events_donatiosn' in normalized_agg_data[k]:
        normalized_agg_data[k]['events_donations'] = normalized_agg_data[k]['events_donatiosn']

In [37]:
print(normalized_agg_data.keys())
print(normalized_agg_data['institutional_adoption_data'])

dict_keys(['implementation_data', 'bidirectional_adoption_data', 'institutional_adoption_data'])
{'giftassist_transactions': {'adoption_rate': 0.18963992419456727, 'meta': {'count_orgs': 1501}}, 'restrictions': {'adoption_rate': 0.1861023373341756, 'meta': {'count_orgs': 1473}}, 'fielddata': {'adoption_rate': 0.05824384080859128, 'meta': {'count_orgs': 461}}, 'pledge': {'adoption_rate': 0.00012634238787113077, 'meta': {'count_orgs': 1}}, 'recurring': {'adoption_rate': 0.23133291219204044, 'meta': {'count_orgs': 1831}}, 'events': {'adoption_rate': 0.1602021478205938, 'meta': {'count_orgs': 1268}}, 'events_donatiosn': {'adoption_rate': 0.6207201516108655, 'meta': {'count_orgs': 4913}}, 'events_fielddata': {'adoption_rate': 0.10764371446620341, 'meta': {'count_orgs': 852}}, 'event_promo': {'adoption_rate': 0.00012634238787113077, 'meta': {'count_orgs': 1}}, 'dl_onetime': {'adoption_rate': 0.08237523689197726, 'meta': {'count_orgs': 652}}, 'dl_recurring': {'adoption_rate': 0.03436512950094

In [38]:
pkl.dump(normalized_agg_data, open("adoption_data.pkl", 'wb'))

### update table data

In [39]:
# update table data
TABLES = ['org', 'form', 'embed', 'thermometers', 'facebookfundraiser', 'givi', 
          'badges', 'emailcampaign', 'smscampaign', 'smspledgereminders', 'lists']

In [40]:
def fetch_table(table):
    url = 'https://secure.qgiv.com/admin/qgivadmin/utilities/export_tables.php'
    payload = {'key': 'DSQR59VwyFhw21PKDF4K', 'table': table}

    rsp = requests.post(url, data=payload)
    json_data = json.loads(rsp.content)

    return json_data[0]

In [41]:
for table in TABLES:
    print("fetching {}".format(table))
    data = fetch_table(table)
    
    if table != 'facebookfundraiser': 
        df = pd.DataFrame(data)

        if len(df) > 0:
            filename = "{}.exp.csv".format(table)
            save_dataframe_to_file("qgiv-stats-data", filename, df)
    else:
        fundraisers = pd.DataFrame(data['facebook_fundraisers'])
        settings = pd.DataFrame(data['social_settings'])

        if len(fundraisers) > 0:
            filename = "facebook_fundraisers.exp.csv"
            save_dataframe_to_file("qgiv-stats-data", filename, fundraisers)

            filename = "social_settings.exp.csv"
            save_dataframe_to_file("qgiv-stats-data", filename, settings)

fetching org
uploading to S3
Done
fetching form
uploading to S3
Done
fetching embed
uploading to S3
Done
fetching thermometers
uploading to S3
Done
fetching facebookfundraiser
uploading to S3
Done
uploading to S3
Done
fetching givi
uploading to S3
Done
fetching badges
uploading to S3
Done
fetching emailcampaign
uploading to S3
Done
fetching smscampaign
uploading to S3
Done
fetching smspledgereminders
uploading to S3
Done
fetching lists
uploading to S3
Done


### update transactions

In [42]:
q = "select * from transactions where status='A'"
trans = redshift_query_read(q)

trans_src_form = trans.groupby(['form', 'source'])['id'].count().reset_index().pivot(index='form', columns='source', values='id').reset_index().fillna(0)
trans_src_org = trans.groupby(['org', 'source'])['id'].count().reset_index().pivot(index='org', columns='source', values='id').reset_index().fillna(0)

len_forms = len(trans['form'].unique())
len_orgs = len(trans['org'].unique())

In [43]:
all_sources = list(trans['source'].unique())

trans_src_form['all'] = trans_src_form[all_sources].sum(axis=1)
trans_src_org['all'] = trans_src_org[all_sources].sum(axis=1)
for source in all_sources:
    trans_src_form["{}_perc".format(source)] = (trans_src_form[source] / trans_src_form['all']) * 100
    trans_src_org["{}_perc".format(source)] = (trans_src_org[source] / trans_src_org['all']) * 100
trans_src_form.head(3)

source,form,don_form,fb,givi,kiosk,mobile,mobilevt,p2p,sms,vt,all,vt_perc,don_form_perc,p2p_perc,kiosk_perc,mobile_perc,mobilevt_perc,sms_perc,fb_perc,givi_perc
0,0,7.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,7.0,0.0,100.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,1,1522.0,0.0,0.0,458.0,759.0,329.0,0.0,4.0,2980.0,6052.0,49.239921,25.148711,0.0,7.567746,12.541309,5.436219,0.066094,0.0,0.0
2,2,445.0,0.0,0.0,42.0,4.0,3.0,3.0,0.0,89.0,586.0,15.187713,75.938567,0.511945,7.167235,0.682594,0.511945,0.0,0.0,0.0


In [44]:
source_data = {}

for source in all_sources:
    trans_msk = (trans['source']==source)
    mean_trans_amt = trans[trans_msk]['amount'].mean()
    
    source_data[source] = {
        'average_percentage_per_form': trans_src_form["{}_perc".format(source)].mean(),
        'average_count_per_form': trans_src_form[source].mean(),
        'average_percentage_per_org': trans_src_org["{}_perc".format(source)].mean(),
        'average_count_per_org': trans_src_org[source].mean(),
        'mean_transaction_value': mean_trans_amt,
        'bidirectional_adoption_rate': float(len(trans[trans_msk]['org'].unique())) / float(len_orgs),
        'institutional_adoption_rate': float(len(trans_src_org[trans_src_org["{}_perc".format(source)]>0.15]['org'].unique().tolist())) / float(len_orgs)
    }
    for threshold in [0., 10., 25., 50., 75., 90.]:
        source_data[source]['forms_gt_{}'.format(threshold)] = len(trans_src_form[trans_src_form["{}_perc".format(source)]>threshold])
        source_data[source]['orgs_gt_{}'.format(threshold)] = len(trans_src_org[trans_src_org["{}_perc".format(source)]>threshold])

In [45]:
# adding store purchases
trans['is_purchase'] = trans['purchases_count']>0

purch_forms = trans.groupby(['form', 'is_purchase'])['id'].count().reset_index().pivot(index='form', columns='is_purchase', values='id').reset_index().fillna(0)
purch_forms['perc'] = (purch_forms[True] / (purch_forms[True] + purch_forms[False])) * 100.
purch_orgs = trans.groupby(['org', 'is_purchase'])['id'].count().reset_index().pivot(index='org', columns='is_purchase', values='id').reset_index().fillna(0)
purch_orgs['perc'] = (purch_orgs[True] / (purch_orgs[True] + purch_orgs[False])) * 100.

source_data['store_purchases'] = {
    'average_percentage_per_form': purch_forms['perc'].mean(),
    'average_count_per_form': purch_forms[True].mean(),
    'average_percentage_per_org': purch_orgs['perc'].mean(),
    'average_count_per_org': purch_orgs[True].mean(),
    'mean_transaction_value': trans[trans['purchases_count']>0]['purchases_amt'].mean(),
    'bidirectional_adoption_rate': float(len(purch_orgs[purch_orgs[True]>0.]['org'].unique())) / float(len_orgs),
    'institutional_adoption_rate': float(len(purch_orgs[purch_orgs['perc']>0.15]['org'].unique().tolist())) / float(len_orgs)
}
for threshold in [0., 10., 25., 50., 75., 90.]:
    source_data['store_purchases']['forms_gt_{}'.format(threshold)] = len(purch_forms[purch_forms['perc']>threshold])
    source_data['store_purchases']['orgs_gt_{}'.format(threshold)] = len(purch_orgs[purch_orgs['perc']>threshold])

In [46]:
# adding recurring
trans['is_recurring'] = trans['recurring']!=0

rec_forms = trans.groupby(['form', 'is_recurring'])['id'].count().reset_index().pivot(index='form', columns='is_recurring', values='id').reset_index().fillna(0)
rec_forms['perc'] = (rec_forms[True] / (rec_forms[True] + rec_forms[False])) * 100.
rec_orgs = trans.groupby(['org', 'is_recurring'])['id'].count().reset_index().pivot(index='org', columns='is_recurring', values='id').reset_index().fillna(0)
rec_orgs['perc'] = (rec_orgs[True] / rec_orgs[True] + rec_orgs[False]) * 100.

source_data['recurring'] = {
    'average_percentage_per_form': rec_forms['perc'].mean(),
    'average_count_per_form': rec_forms[True].mean(),
    'average_percentage_per_org': rec_orgs['perc'].mean(),
    'average_count_per_org': rec_orgs[True].mean(),
    'mean_transaction_value': trans[trans['is_recurring'].fillna(False)]['amount'].mean(),
    'bidirectional_adoption_rate': float(len(rec_orgs[rec_orgs[True]>0.]['org'].unique())) / float(len_orgs),
    'institutional_adoption_rate': float(len(rec_orgs[rec_orgs['perc']>0.15]['org'].unique().tolist())) / float(len_orgs)
}
for threshold in [0., 10., 25., 50., 75., 90.]:
    source_data['recurring']['forms_gt_{}'.format(threshold)] = len(rec_forms[rec_forms['perc']>threshold])
    source_data['recurring']['orgs_gt_{}'.format(threshold)] = len(rec_orgs[rec_orgs['perc']>threshold])

In [47]:
# adding P2P registrations
trans['is_registration'] = trans['registrations_count']>0

reg_forms = trans.groupby(['form', 'is_registration'])['id'].count().reset_index().pivot(index='form', columns='is_registration', values='id').reset_index().fillna(0)
reg_forms['perc'] = (reg_forms[True] / (reg_forms[True] + reg_forms[False])) * 100.
reg_orgs = trans.groupby(['org', 'is_registration'])['id'].count().reset_index().pivot(index='org', columns='is_registration', values='id').reset_index().fillna(0)
reg_orgs['perc'] = (reg_orgs[True] / (reg_orgs[True] + reg_orgs[False])) * 100.

source_data['p2p_registrations'] = {
    'average_percentage_per_form': reg_forms['perc'].mean(),
    'average_count_per_form': reg_forms[True].mean(),
    'average_percentage_per_org': reg_orgs['perc'].mean(),
    'average_count_per_org': reg_orgs[True].mean(),
    'mean_transaction_value': trans[trans['registrations_count']>0]['registrations_amt'].mean(),
    'bidirectional_adoption_rate': float(len(reg_orgs[reg_orgs[True]>0.]['org'].unique())) / float(len_orgs),
    'institutional_adoption_rate': float(len(reg_orgs[reg_orgs['perc']>0.15]['org'].unique().tolist())) / float(len_orgs)
}
for threshold in [0., 10., 25., 50., 75., 90.]:
    source_data['p2p_registrations']['forms_gt_{}'.format(threshold)] = len(reg_forms[reg_forms['perc']>threshold])
    source_data['p2p_registrations']['orgs_gt_{}'.format(threshold)] = len(reg_orgs[reg_orgs['perc']>threshold])

In [48]:
source_data

{'vt': {'average_percentage_per_form': 6.667099980646715,
  'average_count_per_form': 34.421935116135465,
  'average_percentage_per_org': 9.164216673562702,
  'average_count_per_org': 142.76694247438928,
  'mean_transaction_value': 136.76648859849456,
  'bidirectional_adoption_rate': 0.6790780141843972,
  'institutional_adoption_rate': 0.6213553979511426,
  'forms_gt_0.0': 6524,
  'orgs_gt_0.0': 3447,
  'forms_gt_10.0': 2746,
  'orgs_gt_10.0': 1084,
  'forms_gt_25.0': 1779,
  'orgs_gt_25.0': 619,
  'forms_gt_50.0': 979,
  'orgs_gt_50.0': 238,
  'forms_gt_75.0': 635,
  'orgs_gt_75.0': 114,
  'forms_gt_90.0': 528,
  'orgs_gt_90.0': 91},
 'don_form': {'average_percentage_per_form': 59.49686594145226,
  'average_count_per_form': 204.37790338669075,
  'average_percentage_per_org': 64.58538492666025,
  'average_count_per_org': 847.6690307328605,
  'mean_transaction_value': 148.05529930511568,
  'bidirectional_adoption_rate': 0.9446414499605988,
  'institutional_adoption_rate': 0.931442080378

In [49]:
pkl.dump(source_data, open('adoption_data.trans.pkl', 'wb'))

## 2. Integrate table data into adoption export data

In [50]:
# load adoption data
DATA_ADOPTION = "adoption_data.pkl"
adoption_data = pkl.load(open(DATA_ADOPTION, "rb"))

In [51]:
# load table data
table_agg_files = list_files("qgiv-stats-data", search_key=".agg")

table_data = {}
for f in table_agg_files:
    table_data[f.replace(".agg.csv", "")] = get_dataframe_from_file("qgiv-stats-data", f)

badges.agg.csv (0MB)
emailcampaigns.agg.csv (0MB)
embeds.agg.csv (0MB)
lists.agg.csv (0MB)
smscampaigns.agg.csv (0MB)
smspledges.agg.csv (0MB)
thermometers.agg.csv (0MB)
--------------------------------------------------
Matched files: 7 files (0.0GB)
Bucket qgiv-stats-data contains 32 files (0.8GB)


In [52]:
refined_table_data = {}
for e in table_data.keys():
    cols = table_data[e].columns
    data = {
        'adoption_rate': 0,
        'meta': {}
    }
    
    if 'org' in cols and 'form' in cols:
        # org & form available
        data['meta']['average_count_per_org'] = table_data[e].groupby('org')['count'].sum().mean()
        data['meta']['average_count_per_form'] = table_data[e]['count'].mean()
        data['meta']['count_forms'] = len(table_data[e])
        data['meta']['count_orgs'] = len(table_data[e]['org'].unique())
        data['meta']['adoption_rate_forms'] = float(data['meta']['count_forms']) / float(len(forms))
        
        data['adoption_rate'] = float(data['meta']['count_orgs']) / float(len(orgs))
    elif 'org' in cols:
        # only org level available
        data['meta']['average_count_per_org'] = table_data[e]['count'].mean()
        data['meta']['count_orgs'] = len(table_data[e])
        
        data['adoption_rate'] = float(data['meta']['count_orgs']) / float(len(orgs))
    elif 'form' in cols:
        # only form level available
        data['meta']['average_count_per_form'] = table_data[e]['count'].mean()
        data['meta']['count_forms'] = len(table_data[e])
        
        data['adoption_rate'] = float(data['meta']['count_forms']) / float(len(forms))
        
    refined_table_data[e] = data

In [53]:
refined_table_data

{'badges': {'adoption_rate': 0.174857864813645,
  'meta': {'average_count_per_org': 38.05274566473989,
   'average_count_per_form': 8.656311637080869,
   'count_forms': 6084,
   'count_orgs': 1384,
   'adoption_rate_forms': 0.16031620553359685}},
 'emailcampaigns': {'adoption_rate': 0.03790271636133923,
  'meta': {'average_count_per_org': 9.153333333333334,
   'average_count_per_form': 4.173252279635259,
   'count_forms': 658,
   'count_orgs': 300,
   'adoption_rate_forms': 0.017338603425559946}},
 'embeds': {'adoption_rate': 0.2886923562855338,
  'meta': {'average_count_per_org': 3.803501094091904, 'count_orgs': 2285}},
 'lists': {'adoption_rate': 0.01794061907770057,
  'meta': {'average_count_per_org': 3.8098591549295775, 'count_orgs': 142}},
 'smscampaigns': {'adoption_rate': 0.011749842072015161,
  'meta': {'average_count_per_org': 8.580645161290322, 'count_orgs': 93}},
 'smspledges': {'adoption_rate': 0.049273531269740996,
  'meta': {'average_count_per_org': 3.9846153846153847,
  

In [54]:
for table_key in refined_table_data.keys():
    adoption_data['implementation_data'][table_key] = refined_table_data[table_key]

In [55]:
pkl.dump(adoption_data, open("adoption_data.pkl", 'wb'))

## 3. integrate transaction data

In [56]:
# load adoption data
DATA_ADOPTION = "adoption_data.pkl"
adoption_data = pkl.load(open(DATA_ADOPTION, "rb"))

TRANS_ADOPTION = 'adoption_data.trans.pkl'
source_data = pkl.load(open(TRANS_ADOPTION, 'rb'))

In [57]:
for source_key in source_data.keys():
    adoption_data['bidirectional_adoption_data'][source_key] = {
        'adoption_rate': source_data[source_key]['bidirectional_adoption_rate'],
        'meta': source_data[source_key]
    }
    adoption_data['institutional_adoption_data'][source_key] = {
        'adoption_rate': source_data[source_key]['institutional_adoption_rate']
    }

In [58]:
for k in adoption_data.keys():
    print(k)
    print("\t" + str(adoption_data[k].keys()))
    print()

implementation_data
	dict_keys(['orgs_with_notifications', 'service_integrations', 'custom_reports', 'embeds', 'promises', 'restrictions', 'custom_fields', 'pledges', 'recurring', 'events', 'events_donations', 'events_fields', 'sms', 'donor_logins', 'thermometers', 'classifications', 'categories', 'registration_store', 'product_fields', 'shipping_address', 'nonfundraising_participants', 'teams', 'category_fields', 'badges', 'emailcampaigns', 'lists', 'smscampaigns', 'smspledges'])

bidirectional_adoption_data
	dict_keys(['giftassist_transactions', 'restrictions', 'fielddata', 'pledge', 'recurring', 'events', 'events_donatiosn', 'events_fielddata', 'event_promo', 'dl_onetime', 'dl_recurring', 'classifications', 'categories', 'purchases', 'purchases_shipping', 'registrations_nonfundraising', 'registrations_subregistrants', 'teams', 'events_donations', 'vt', 'don_form', 'p2p', 'kiosk', 'mobile', 'mobilevt', 'sms', 'fb', 'givi', 'store_purchases', 'p2p_registrations'])

institutional_adopt