In [2]:
import sys
sys.path.insert(1, '../../scripts/')
from s3_support import *
%matplotlib inline

# load data

In [3]:
q = '''select * from form where type=3'''
forms_table = redshift_query_read(q, schema='production')

In [4]:
forms_table.head(2)

Unnamed: 0,id,org,datecreated,datelive,path,status,type,template,enableenddate,enablestartdate,recurringplanenddaterequired,categorization,isvirtual
0,1637,1648,NaT,2011-11-10 11:27:42,andhob,11,3,9,True,False,False,0,0
1,1782,53,2012-01-13 09:44:43,2007-09-10 00:00:00,nubawh,11,3,9,True,False,False,0,0


## processing

In [5]:
q = '''select
            t.org,
            t.form,
            date_trunc('week', t.date) as week,
            count(t.id) as trans_count,
            sum(t.amount) as trans_vol,
            sum(t.donations_count) as donations_count,
            sum(t.donations_amt) as donations_vol,
            sum(t.purchases_count) as purchases_count,
            sum(t.purchases_amt) as purchases_vol,
            sum(t.registrations_count) as registrations_count,
            sum(t.registrations_amt) as registrations_vol,
            sum(t.events_amt) as events_vol,
            sum(t.events_count) as events_count
        from transactions as t
            left join form as f on f.id=t.form
        where
            t.status='A' and
            f.type=3
        group by date_trunc('week', t.date), t.form, t.org'''
trans = redshift_query_read(q, schema='production')

## p2p

In [6]:
q = "select * from analyticsp2p_weekly"
df = redshift_query_read(q, schema='public')

## merging

In [7]:
# $41k floor for top 10 volume
top_10_vol_floor = 41000

form_totals = trans.groupby(['form', 'org'])[['trans_vol', 'donations_vol', 'purchases_count', 'purchases_vol', 'registrations_count', 'registrations_vol']].sum().reset_index()
form_totals['is_top_performer'] = form_totals['trans_vol']>top_10_vol_floor
top_performers = form_totals[form_totals['is_top_performer']]['form'].tolist()

In [8]:
trans['is_top_performer'] = trans['form'].isin(top_performers)
df['is_top_performer'] = df['form'].isin(top_performers)

## top & bottom performers

In [9]:
# top 10%
len_all = len(form_totals)
top_10perc = int(len_all/10)

top_10_total_vol = form_totals.sort_values('trans_vol', ascending=False).head(top_10perc)
form_totals['is_top_performer'] = form_totals['form'].isin(top_10_total_vol['form'].tolist())

In [10]:
# bottom 10%
len_all = len(form_totals)
bottom_10perc = int(len_all/10)

bottom_10_total_vol = form_totals.sort_values('trans_vol', ascending=True).head(bottom_10perc)
form_totals['is_bottom_performer'] = form_totals['form'].isin(bottom_10_total_vol['form'].tolist())
df['is_bottom_performer'] = df['form'].isin(bottom_10_total_vol['form'].tolist())

# analysis

In [11]:
settings = ['share_tfp', 'share_therm', 'share_donation', 
            'allows_social', 'social_templt_count', 
            'social_auto', 'pcnt_posts', 'mon_posts', 
            'count_posts', 'date_posts']

In [12]:
top = df[df['is_top_performer']][settings].agg(['mean', 'median']).transpose()
mid = df[~df['is_top_performer']&~df['is_bottom_performer']][settings].agg(['mean', 'median']).transpose()
bot = df[df['is_bottom_performer']][settings].agg(['mean', 'median']).transpose()

top.columns = ['top mean', 'top median']
mid.columns = ['mid mean', 'mid median']
bot.columns = ['bottom mean', 'bottom median']

means = top.merge(mid, left_index=True, right_index=True).merge(bot, left_index=True, right_index=True)

In [13]:
means

Unnamed: 0,top mean,top median,mid mean,mid median,bottom mean,bottom median
share_tfp,0.0,0.0,0.0,0.0,0.0,0.0
share_therm,0.0,0.0,0.0,0.0,0.0,0.0
share_donation,0.0,0.0,0.0,0.0,0.0,0.0
allows_social,0.04503,0.0,0.02388,0.0,0.007207,0.0
social_templt_count,0.0,0.0,0.0,0.0,0.0,0.0
social_auto,0.198266,0.0,0.014728,0.0,0.000219,0.0
pcnt_posts,0.087267,0.0,0.005965,0.0,0.000219,0.0
mon_posts,0.0,0.0,0.0,0.0,0.0,0.0
count_posts,0.071941,0.0,0.006537,0.0,0.0,0.0
date_posts,0.012204,0.0,0.000908,0.0,0.0,0.0


In [14]:
means[[c for c in means.columns if 'mean' in c]]

Unnamed: 0,top mean,mid mean,bottom mean
share_tfp,0.0,0.0,0.0
share_therm,0.0,0.0,0.0
share_donation,0.0,0.0,0.0
allows_social,0.04503,0.02388,0.007207
social_templt_count,0.0,0.0,0.0
social_auto,0.198266,0.014728,0.000219
pcnt_posts,0.087267,0.005965,0.000219
mon_posts,0.0,0.0,0.0
count_posts,0.071941,0.006537,0.0
date_posts,0.012204,0.000908,0.0


## social auto fundraising differences

In [20]:
socials = df.groupby('form').agg({'don_volume': 'sum', 'social_auto': 'mean', 'allows_social': 'mean'}).reset_index()
socials.tail(3)

Unnamed: 0,form,don_volume,social_auto,allows_social
11990,992953,0.0,0.0,0.0
11991,992968,0.0,0.0,0.0
11992,992973,0.0,0.0,0.0


In [22]:
socials.groupby(['allows_social'])['don_volume'].mean()

allows_social
0.0    8784.819777
1.0       0.000000
Name: don_volume, dtype: float64

In [29]:
print("Social auto counts:")
print("0: ${:,.2f} ({:,})".format(socials[socials['social_auto']==0]['don_volume'].mean(), len(socials[socials['social_auto']==0])))
print("1-2: ${:,.2f} ({:,})".format(socials[(socials['social_auto']==1)|(socials['social_auto']==2)]['don_volume'].mean(), len(socials[(socials['social_auto']==1)|(socials['social_auto']==2)])))
print("3-5: ${:,.2f} ({:,})".format(socials[(socials['social_auto']>=3)&(socials['social_auto']<=5)]['don_volume'].mean(), len(socials[(socials['social_auto']>=3)&(socials['social_auto']<=5)])))
print("6+: ${:,.2f} ({:,})".format(socials[socials['social_auto']>=6]['don_volume'].mean(), len(socials[socials['social_auto']>=6])))

Social auto counts:
0: $6,909.31 (11,498)
1-2: $3,591.18 (19)
3-5: $19,802.11 (5)
6+: $296,315.48 (3)


In [28]:
print("0: ${:,.2f} ({:,})".format(socials[socials['social_auto']==0]['don_volume'].mean(), len(socials[socials['social_auto']==0])))
print("!0: ${:,.2f} ({:,})".format(socials[socials['social_auto']!=0]['don_volume'].mean(), len(socials[socials['social_auto']!=0])))

0: $6,909.31 (11,498)
!0: $49,013.34 (495)


# teams + sharing

sharing presumably can increase participation by third parties and, when engaged with, might be further encouraged by the ability of others to join the same team as the friend that shared

In [41]:
q = '''select
            t.form,
            count(t.id) as transactions,
            sum(t.registrations_amt) as registrations_amt,
            sum(t.registrations_count) as registrations_count
        from transactions as t
            left join form as f on f.id=t.form
        where
            t.status='A' and
            f.type=3
        group by t.form'''
regs = redshift_query_read(q, schema='production')

In [49]:
teams_settings = df.groupby('form').agg({
    'allows_teams': 'max',
    'allows_reg_team_join': 'mean',
    'don_volume': 'sum'
}).reset_index()
teams = teams_settings.merge(regs, on='form').merge(socials.drop('don_volume', axis=1), on='form')

In [55]:
teams.groupby(['allows_teams', 'allows_social'])[['registrations_count', 'registrations_amt']].mean().reset_index()

Unnamed: 0,allows_teams,allows_social,registrations_count,registrations_amt
0,0,0.0,20.722996,439.150177
1,1,0.0,130.421196,1685.155439
2,1,1.0,199.694444,3752.136204
3,2,0.0,95.241935,1228.806452
4,3,0.0,211.263158,4458.157895
5,4,0.0,96.5,777.5
6,5,0.0,95.702079,1103.310647
7,5,1.0,154.317073,4477.97561


Here we can see that __events that allow teams see about 50% more registrations when they allow social sharing__. We also see a 100% and
400% increase in registration volume in the two sample groups.

In [58]:
teams.groupby('allows_social')[['registrations_count', 'registrations_amt']].mean().reset_index()

Unnamed: 0,allows_social,registrations_count,registrations_amt
0,0.0,93.434144,1258.95791
1,1.0,187.208054,3951.863826


Allowing social sharing presents a 100% increase in registrations and more than 300% increase in registrations volume.