In [1]:
import sys
import pandas as pd
import numpy as np

sys.path.insert(1, '../../../../scripts/')
from s3_support import *

# Load transactions data for performance indicator

In [2]:
trans = redshift_query_read("select count(id) as count, sum(amount) as volume, org, form, date_trunc('month', date) as month from transactions where status='A' group by org, form, date_trunc('month', date)")

In [3]:
trans_agg = []
for form in trans['form'].unique():
    _trans = trans[trans['form']==form].copy()
    
    _trans.sort_values('month', ascending=True, inplace=True)
    _trans['growth_vol'] = _trans['volume'].diff() / _trans['volume'].shift()
    _trans['growth_count'] = _trans['count'].diff() / _trans['count'].shift()
    
    trans_agg.append({
        'form': form,
        'org': _trans['org'].iloc[0],
        'volume': _trans['growth_vol'].tail(12).mean(),
        'count': _trans['growth_count'].tail(12).mean(),
        'last_12_available': len(_trans.tail(12))
    })

In [4]:
trans_agg = pd.DataFrame(trans_agg)
trans_agg.head()

Unnamed: 0,count,form,last_12_available,org,volume
0,-0.47561,112,3,138,-0.65247
1,0.414418,7,12,25,2.212562
2,0.026137,1,12,6,-0.020075
3,0.190199,117,12,143,1.464255
4,-0.036596,26,12,52,-0.000772


In [5]:
# dropping forms w/ less than 3 months data
less_than_3_months = len(trans_agg[trans_agg['last_12_available']<=3])

print("{:.2f}% of forms have less than 3 months performance history".format((less_than_3_months / len(trans_agg)) * 100.))
print("{} forms left after dropping those with less than 3 months history".format(len(trans_agg[trans_agg['last_12_available']>3])))

trans_agg = trans_agg[trans_agg['last_12_available']>3]

52.37% of forms have less than 3 months performance history
10502 forms left after dropping those with less than 3 months history


In [6]:
print("System wide means:")
print("Volume: {:.2f}".format(trans_agg['volume'].replace([np.inf, -np.inf], np.nan).dropna().mean()))
print("Count: {:.2f}".format(trans_agg['count'].replace([np.inf, -np.inf], np.nan).dropna().mean()))

System wide means:
Volume: 38.17
Count: 1.72


# Load table data

In [7]:
ENTITY_TYPES = {
    '3': 'organization',
    '4': 'form',
    '10': 'registration'
}

In [13]:
tables = ['badges', 'emailcampaign', 'embed', 'facebookfundraisers', 'lists', 
          'reminders', 'smscampaign', 'smspledge', 'socialsettings', 'thermometers']
data = {}

for table in tables:
    data[table] = redshift_query_read("select * from {} where date=(select max(date) from {})".format(table, table), schema='production')

## email campaigns

In [14]:
emailcampaigns = data['emailcampaign'][data['emailcampaign']['entitytype']==4].groupby('entity')['entitytype'].count().reset_index()
emailcampaigns.columns = ['form', 'count']

In [15]:
len_ec = len(emailcampaigns)
len_transagg = len(trans_agg)

mrgd = emailcampaigns.merge(trans_agg[['form', 'volume', 'count']], on='form')
mrgd.columns = ['form', 'campaigns', 'volume', 'count']

print("{} email campaigns; {} trans agg; {} merged".format(len_ec, len_transagg, len(mrgd)))

print("Means:")
for c in mrgd.columns:
    if c != 'form':
        print("{}: {:.2f}".format(c, mrgd[c].replace([np.inf, -np.inf], np.nan).dropna().mean()))

723 email campaigns; 10502 trans agg; 413 merged
Means:
campaigns: 5.27
volume: 23.52
count: 6.13


In [16]:
mrgd[['campaigns', 'count', 'volume']].corr()

Unnamed: 0,campaigns,count,volume
campaigns,1.0,-0.033697,-0.028222
count,-0.033697,1.0,0.245359
volume,-0.028222,0.245359,1.0


In [18]:
forms_with_emailcampaigns = emailcampaigns['form'].unique().tolist()

len_with_emailcampaigns = len(forms_with_emailcampaigns)
perc_with_emailcampaigns = (float(len_with_emailcampaigns) / float(len(trans_agg['form'].unique()))) * 100.

print("Forms with email campaigns:")
print("\t{} forms ({:.2f}%)".format(len_with_emailcampaigns, perc_with_emailcampaigns))
print("\tVolume: {:.2f}".format(trans_agg[trans_agg['form'].isin(forms_with_emailcampaigns)]['volume'].replace([np.inf, -np.inf], np.nan).dropna().mean()))
print("\tCount: {:.2f}".format(trans_agg[trans_agg['form'].isin(forms_with_emailcampaigns)]['count'].replace([np.inf, -np.inf], np.nan).dropna().mean()))

print("Forms without email campaigns:")
print("\tVolume: {:.2f}".format(trans_agg[~trans_agg['form'].isin(forms_with_emailcampaigns)]['volume'].replace([np.inf, -np.inf], np.nan).dropna().mean()))
print("\tCount: {:.2f}".format(trans_agg[~trans_agg['form'].isin(forms_with_emailcampaigns)]['count'].replace([np.inf, -np.inf], np.nan).dropna().mean()))

Forms with email campaigns:
	723 forms (6.88%)
	Volume: 23.52
	Count: 6.13
Forms without email campaigns:
	Volume: 38.64
	Count: 1.54


The sample size is rather small but it does not appear that forms using email campaigns are raising more money than forms not using them.

## badges

In [19]:
badges = data['badges'].groupby('entity')['entitytype'].count().reset_index()
badges.columns = ['form', 'badges']
badges.head(3)

Unnamed: 0,form,badges
0,61496,8
1,389189,8
2,413762,8


In [20]:
print("{} badge entries".format(len(badges)))
print("{:.2f} badges per form".format(badges['badges'].mean()))

6473 badge entries
8.66 badges per form


In [21]:
mrgd = badges.merge(trans_agg[['form', 'volume', 'count']], on='form')
print("{} merged entries".format(len(mrgd)))

print("{:.2f} badges per form".format(mrgd['badges'].mean()))
print("{:.2f} mean volume growth".format(mrgd['volume'].replace([np.inf, -np.inf], np.nan).dropna().mean()))
print("{:.2f} mean count growth".format(mrgd['count'].mean()))

1674 merged entries
8.96 badges per form
21.58 mean volume growth
4.54 mean count growth


In [22]:
mrgd[['badges', 'volume', 'count']].corr()

Unnamed: 0,badges,volume,count
badges,1.0,-0.011574,-0.000359
volume,-0.011574,1.0,0.154073
count,-0.000359,0.154073,1.0


In [24]:
forms_with_badges = mrgd['form'].unique().tolist()

len_with_badges = len(forms_with_badges)
perc_with_badges = (float(len_with_badges) / float(len(trans_agg['form'].unique()))) * 100.

print("Forms with badges:")
print("\t{} forms ({:.2f}%)".format(len_with_badges, perc_with_badges))
print("\tVolume: {:.2f}".format(trans_agg[trans_agg['form'].isin(forms_with_badges)]['volume'].replace([np.inf, -np.inf], np.nan).dropna().mean()))
print("\tCount: {:.2f}".format(trans_agg[trans_agg['form'].isin(forms_with_badges)]['count'].replace([np.inf, -np.inf], np.nan).dropna().mean()))

print("Forms without badges:")
print("\tVolume: {:.2f}".format(trans_agg[~trans_agg['form'].isin(forms_with_badges)]['volume'].replace([np.inf, -np.inf], np.nan).dropna().mean()))
print("\tCount: {:.2f}".format(trans_agg[~trans_agg['form'].isin(forms_with_badges)]['count'].replace([np.inf, -np.inf], np.nan).dropna().mean()))

Forms with badges:
	1674 forms (15.94%)
	Volume: 21.58
	Count: 4.54
Forms without badges:
	Volume: 40.41
	Count: 1.18


Again we see that a feature does not correlate to greater growth. However, badges are a P2P only feature so it stands to reason that we cannot draw too much of a conclusion from this data point. The difference is sufficiently strong that I will not pursue further developing this (ie, isolating P2P forms to calculate a precise figure) due to it's being nearly half of the opposing subset growth rates.

## embed

In [16]:
embeds = data['embed'].groupby('entity')['entitytype'].count().reset_index()
embeds.columns = ['org', 'embeds']
embeds.head(3)

Unnamed: 0,org,embeds
0,6,1
1,9,3
2,13,2


In [17]:
print("{} embed entries".format(len(embeds)))
print("{:.2f} embeds per org".format(embeds['embeds'].mean()))

2342 embed entries
3.79 embeds per org


In [18]:
trans_org_agg = trans_agg.groupby('org')[['volume', 'count']].mean().reset_index()
trans_org_agg = trans_org_agg.replace([np.inf, -np.inf], np.nan)
trans_org_agg.dropna(inplace=True)

mrgd = embeds.merge(trans_org_agg[['org', 'volume', 'count']], on='org')

print("{} merged entries".format(len(mrgd)))
print("{:.2f} mean volume".format(mrgd['volume'].replace([np.inf, -np.inf], np.nan).dropna().mean()))
print("{:.2f} mean count".format(mrgd['count'].mean()))

1604 merged entries
61.23 mean volume
1.07 mean count


In [19]:
mrgd[['embeds', 'volume', 'count']].corr()

Unnamed: 0,embeds,volume,count
embeds,1.0,-0.020082,-0.014775
volume,-0.020082,1.0,0.148196
count,-0.014775,0.148196,1.0


## lists

In [20]:
lists = data['lists'].groupby("creatingentity")['creatingentitytype'].count().reset_index()
lists.columns = ['org', 'lists']
lists.head(3)

Unnamed: 0,org,lists
0,9,1
1,243,1
2,295,1


In [21]:
print("{} lists entries".format(len(lists)))
print("{:.2f} lists per org".format(lists['lists'].mean()))

147 lists entries
3.95 lists per org


In [22]:
mrgd = lists.merge(trans_org_agg[['org', 'volume', 'count']], on='org')

print("{} merged entries".format(len(mrgd)))
print("{:.2f} mean volume".format(mrgd['volume'].replace([np.inf, -np.inf], np.nan).dropna().mean()))
print("{:.2f} mean count".format(mrgd['count'].mean()))

47 merged entries
197.31 mean volume
4.68 mean count


In [23]:
mrgd[['lists', 'volume', 'count']].corr()

Unnamed: 0,lists,volume,count
lists,1.0,0.131103,-0.061716
volume,0.131103,1.0,0.152059
count,-0.061716,0.152059,1.0


## smscampaign

In [24]:
smscamp = data['smscampaign'].groupby("entity")['entitytype'].count().reset_index()
smscamp.columns = ['org', 'smscampaigns']
smscamp.head(3)

Unnamed: 0,org,smscampaigns
0,9,12
1,295,1
2,314,1


In [25]:
print("{} smscampaigns entries".format(len(smscamp)))
print("{:.2f} smscampaigns per org".format(smscamp['smscampaigns'].mean()))

98 smscampaigns entries
8.87 smscampaigns per org


In [26]:
mrgd = smscamp.merge(trans_org_agg[['org', 'volume', 'count']], on='org')

print("{} merged entries".format(len(mrgd)))
print("{:.2f} mean volume".format(mrgd['volume'].replace([np.inf, -np.inf], np.nan).dropna().mean()))
print("{:.2f} mean count".format(mrgd['count'].mean()))

31 merged entries
219.41 mean volume
2.13 mean count


In [27]:
mrgd[['smscampaigns', 'volume', 'count']].corr()

Unnamed: 0,smscampaigns,volume,count
smscampaigns,1.0,-0.035307,-0.186278
volume,-0.035307,1.0,0.69428
count,-0.186278,0.69428,1.0


## smspledge

In [26]:
smspled = data['smspledge'].groupby('form')['created'].count().reset_index()
smspled.columns = ['form', 'smspledges']
smspled.head(3)

Unnamed: 0,form,smspledges
0,36,1
1,38,10
2,147,1


In [27]:
print("{} smspledges entries".format(len(smspled)))
print("{:.2f} smspledges per form".format(smspled['smspledges'].mean()))

897 smspledges entries
2.07 smspledges per form


In [28]:
mrgd = smspled.merge(trans_agg[['form', 'volume', 'count']], on='form')

print("{} merged entries".format(len(mrgd)))
print("{:.2f} mean volume".format(mrgd['volume'].replace([np.inf, -np.inf], np.nan).dropna().mean()))
print("{:.2f} mean count".format(mrgd['count'].mean()))

343 merged entries
69.63 mean volume
2.55 mean count


In [29]:
mrgd[['smspledges', 'volume', 'count']].corr()

Unnamed: 0,smspledges,volume,count
smspledges,1.0,-0.000414,0.001226
volume,-0.000414,1.0,0.314184
count,0.001226,0.314184,1.0


In [30]:
forms_with_smspledges = mrgd['form'].unique().tolist()

len_with_smspledges = len(forms_with_smspledges)
perc_with_smspledges = (float(len_with_smspledges) / float(len(trans_agg['form'].unique()))) * 100.

print("Forms with SMS Pledges:")
print("\t{} forms ({:.2f}%)".format(len_with_smspledges, perc_with_smspledges))
print("\tVolume: {:.2f}".format(trans_agg[trans_agg['form'].isin(forms_with_smspledges)]['volume'].replace([np.inf, -np.inf], np.nan).dropna().mean()))
print("\tCount: {:.2f}".format(trans_agg[trans_agg['form'].isin(forms_with_smspledges)]['count'].replace([np.inf, -np.inf], np.nan).dropna().mean()))

print("Forms without SMS Pledges:")
print("\tVolume: {:.2f}".format(trans_agg[~trans_agg['form'].isin(forms_with_smspledges)]['volume'].replace([np.inf, -np.inf], np.nan).dropna().mean()))
print("\tCount: {:.2f}".format(trans_agg[~trans_agg['form'].isin(forms_with_smspledges)]['count'].replace([np.inf, -np.inf], np.nan).dropna().mean()))

Forms with SMS Pledges:
	343 forms (3.27%)
	Volume: 69.63
	Count: 2.55
Forms without SMS Pledges:
	Volume: 37.05
	Count: 1.69


The sample size is incredibly small at 3.27% but the growth is very strong. Nearly double the average growth rate of forms not using SMS pledges. Given the small sample size, I'm not sure how much weight we can put in it but the absolute number (343) is not insignificant.

## thermometers

In [31]:
therms = data['thermometers'].groupby("form")['targetentity'].count().reset_index()
therms.columns = ['form', 'thermometers']
therms.head(3)

Unnamed: 0,form,thermometers
0,1,1
1,2,1
2,28,1


In [32]:
print("{} thermometers entries".format(len(therms)))
print("{:.2f} thermometers per form".format(therms['thermometers'].mean()))

1974 thermometers entries
1.20 thermometers per form


In [33]:
mrgd = therms.merge(trans_agg[['form', 'volume', 'count']], on='form')

print("{} merged entries".format(len(mrgd)))
print("{:.2f} mean volume".format(mrgd['volume'].replace([np.inf, -np.inf], np.nan).dropna().mean()))
print("{:.2f} mean count".format(mrgd['count'].mean()))

702 merged entries
56.21 mean volume
1.64 mean count


In [34]:
mrgd[['thermometers', 'volume', 'count']].corr()

Unnamed: 0,thermometers,volume,count
thermometers,1.0,0.007408,0.003017
volume,0.007408,1.0,0.368754
count,0.003017,0.368754,1.0


In [36]:
forms_with_therm = mrgd['form'].unique().tolist()

len_with_therm = len(forms_with_therm)
perc_with_therm = (float(len_with_therm) / float(len(trans_agg['form'].unique()))) * 100.

print("Forms with thermometers:")
print("\t{} forms ({:.2f}%)".format(len_with_therm, perc_with_therm))
print("\tVolume: {:.2f}".format(trans_agg[trans_agg['form'].isin(forms_with_smspledges)]['volume'].replace([np.inf, -np.inf], np.nan).dropna().mean()))
print("\tCount: {:.2f}".format(trans_agg[trans_agg['form'].isin(forms_with_smspledges)]['count'].replace([np.inf, -np.inf], np.nan).dropna().mean()))

print("Forms without thermometers:")
print("\tVolume: {:.2f}".format(trans_agg[~trans_agg['form'].isin(forms_with_smspledges)]['volume'].replace([np.inf, -np.inf], np.nan).dropna().mean()))
print("\tCount: {:.2f}".format(trans_agg[~trans_agg['form'].isin(forms_with_smspledges)]['count'].replace([np.inf, -np.inf], np.nan).dropna().mean()))

Forms with thermometers:
	702 forms (6.68%)
	Volume: 69.63
	Count: 2.55
Forms without thermometers:
	Volume: 37.05
	Count: 1.69


Seeing similar numbers here as to SMS pledges. Nearly double the average growth rate of forms not using thermometers but with a relatively small sample size. The sample size is double that of SMS pledges however with almost 7% (700 forms).