In [1]:
import pandas as pd
import datetime, os

import sys, os, requests, json
sys.path.insert(1, '../../../scripts/')
from s3_support import *

import warnings
warnings.filterwarnings('ignore')



In [2]:
print("Load data")
q = '''select
            org,
            source,
            date_trunc('month', date) as month,
            sum(amount) as amount,
            sum(donations_amt) as donations_amt,
            sum(events_amt) as events_amt,
            sum(purchases_amt) as purchases_amt,
            sum(registrations_amt) as registrations_amt,
            sum(case when recurring!=0 then amount else 0 end) as recurring_amt
        from transactions
            where status='A'
            group by org, date_trunc('month', date), source'''
df_trans = redshift_query_read(q)

Load data


In [3]:
print("\tprepping analytics features - getting donor logins activation setting")
df_dl_trans = get_dataframe_from_file("qgiv-stats-data", "dl_transactions.csv")
org_dl_count = df_dl_trans.groupby('org')['count'].sum().reset_index()

	prepping analytics features - getting donor logins activation setting


In [4]:
print("\tprepping trans features")
print("\t\tdate handling")
df_trans['date'] = pd.to_datetime(df_trans['month'])

	prepping trans features
		date handling


In [6]:
print("\t\tcalculating transaction source & type percentages")
trans_types = ['amount', 'donations_amt', 'events_amt', 'purchases_amt', 'registrations_amt', 'recurring_amt']
trans_type_agg = df_trans.groupby(['org', 'month'])[trans_types].sum().reset_index()
for col in trans_types:
    if col != "amount":
        trans_type_agg["{}_perc".format(col)] = trans_type_agg[col] / trans_type_agg['amount']

		calculating transaction source & type percentages


In [7]:
trans_source_agg = df_trans[['org', 'source', 'amount']].groupby(['org', 'source'])['amount'].sum().reset_index().pivot(index='org', columns='source', values='amount').reset_index().fillna(0.)
sources = ['don_form', 'fb', 'givi', 'kiosk', 'mobile', 'mobilevt', 'p2p', 'sms', 'vt']
trans_source_agg['total'] = trans_source_agg[sources].sum(axis=1)
for s in sources:
    trans_source_agg[s+'_perc'] = trans_source_agg[s] / trans_source_agg['total']

In [8]:
print("Find designated groups")
print("\tlow donor logins")
orgs_low_dl = org_dl_count[org_dl_count['count']<10]['org'].tolist()

Find designated groups
	low donor logins


In [9]:
print("\tlow transaction source rates")
# recurring
orgs_low_recurring = trans_type_agg[trans_type_agg["recurring_amt_perc"]<.1]['org'].tolist()
# events
orgs_high_events = trans_type_agg[trans_type_agg["events_amt_perc"]>.5]['org'].tolist()
# sms
orgs_high_sms = trans_source_agg[trans_source_agg['sms_perc']>.5]['org'].tolist()
# p2p
orgs_only_p2p = trans_source_agg[trans_source_agg['p2p_perc']>.9]['org'].tolist()

	low transaction source rates


In [10]:
print("Output list of orgs with groups")
unique_orgs = list(set(orgs_low_recurring + orgs_high_events + orgs_high_sms + orgs_only_p2p + orgs_low_dl))
org_list = pd.DataFrame(unique_orgs, columns=['org'])
org_list['low_recurring'] = org_list['org'].apply(lambda x: True if x in orgs_low_recurring else False)
org_list['high_events'] = org_list['org'].apply(lambda x: True if x in orgs_high_events else False)
org_list['high_sms'] = org_list['org'].apply(lambda x: True if x in orgs_high_sms else False)
org_list['only_p2p'] = org_list['org'].apply(lambda x: True if x in orgs_only_p2p else False)
org_list['low_dl'] = org_list['org'].apply(lambda x: True if x in orgs_low_dl else False)
org_list['total'] = org_list[['low_recurring', 'low_dl', 'high_events', 'high_sms', 'only_p2p']].sum(axis=1)
org_list.sort_values('total', ascending=False, inplace=True)

Output list of orgs with groups


In [14]:
# remove previously recommended orgs
try:
    df_prior_preds = get_dataframe_from_file("qiv-stats-data", "preds.growth.csv")
    prior_preds = df_prior_preds['org'].tolist()
except:
    df_prior_preds = None
    prior_preds = []

In [19]:
orgs = redshift_query_read("select id, status from organization", schema="production")
active_orgs = orgs[orgs['status']==1]['id'].tolist()
org_names = get_dataframe_from_file("qgiv-stats-data", "organizations.names.csv")

In [21]:
# filter prior preds & not active orgs from preds
these_preds = org_list[(~org_list['org'].isin(prior_preds))&(org_list['org'].isin(active_orgs))].head(20).copy()
these_preds['date'] = datetime.datetime.today()

In [22]:
# store to prior preds
if df_prior_preds is not None:
    save_dataframe_to_file("qgiv-stats-data", "preds.growth.csv", df_prior_preds.append(these_preds))
else:
    save_dataframe_to_file("qgiv-stats-data", "preds.growth.csv", these_preds)

uploading to S3
Done


In [25]:
# print report
#these_preds.drop('date', axis=1, inplace=True)
these_preds['name'] = these_preds['org'].apply(lambda x: org_names[org_names['id']==x]['org_name'].iloc[0])
print(these_preds)

         org  low_recurring  high_events  high_sms  only_p2p  low_dl  total  \
2317  441647           True         True     False      True   False      3   
3733  443938           True         True      True     False   False      3   
4753   27639           True         True     False      True   False      3   
1623    9139           True         True     False      True   False      3   
3563  443613           True         True     False      True   False      3   
2262  441545           True         True     False      True   False      3   
2943  442623           True         True     False      True   False      3   
3663  443803           True         True     False      True   False      3   
3039  442777           True         True      True     False   False      3   
2481  441940           True         True     False      True   False      3   
3752  443969           True         True      True     False   False      3   
3840  444130           True         True     False  