In [1]:
import sys
sys.path.insert(1, '../../scripts/')
from s3_support import *
%matplotlib inline

# load data

In [2]:
q = '''select * from form where type=3'''
forms_table = redshift_query_read(q, schema='production')

In [3]:
q = '''select
            t.org,
            t.form,
            date_trunc('week', t.date) as week,
            count(t.id) as trans_count,
            sum(t.amount) as trans_vol,
            sum(t.donations_count) as donations_count,
            sum(t.donations_amt) as donations_vol,
            sum(t.purchases_count) as purchases_count,
            sum(t.purchases_amt) as purchases_vol,
            sum(t.registrations_count) as registrations_count,
            sum(t.registrations_amt) as registrations_vol,
            sum(t.events_amt) as events_vol,
            sum(t.events_count) as events_count
        from transactions as t
            left join form as f on f.id=t.form
        where
            t.status='A' and
            f.type=3
        group by date_trunc('week', t.date), t.form, t.org'''
trans = redshift_query_read(q, schema='production')

In [4]:
q = "select * from analyticsp2p_weekly"
df = redshift_query_read(q, schema='public')

### merging

In [5]:
# $41k floor for top 10 volume
top_10_vol_floor = 41000

form_totals = trans.groupby(['form', 'org'])[['trans_vol', 'donations_vol', 'purchases_count', 'purchases_vol', 'registrations_count', 'registrations_vol']].sum().reset_index()
form_totals['is_top_performer'] = form_totals['trans_vol']>top_10_vol_floor
top_performers = form_totals[form_totals['is_top_performer']]['form'].tolist()

In [6]:
len_all = len(form_totals)
bottom_90perc = len_all - len(top_performers)

not_top_10_total_vol = form_totals.sort_values('trans_vol', ascending=False).tail(bottom_90perc)
bottom_performers = not_top_10_total_vol['form'].tolist()

In [7]:
trans['is_top_performer'] = trans['form'].isin(top_performers)
df['is_top_performer'] = df['form'].isin(top_performers)

trans['is_bottom_performer'] = trans['form'].isin(bottom_performers)
df['is_bottom_performer'] = df['form'].isin(bottom_performers)
form_totals['is_bottom_performer'] = form_totals['form'].isin(bottom_performers)

In [8]:
def form_cat(form):
    if form in forms_table['id'].tolist():
        return forms_table[forms_table['id']==form]['categorization'].iloc[0]
    return 0

form_totals['category'] = form_totals['form'].apply(form_cat)

In [9]:
def form_isvirtual(form):
    if form in forms_table['id'].tolist():
        return forms_table[forms_table['id']==form]['isvirtual'].iloc[0]
    return 0

form_totals['isvirtual'] = form_totals['form'].apply(form_isvirtual)

In [10]:
form_totals = form_totals.merge(df.groupby('form')['teams_count'].max().reset_index(), on='form')

# analysis

## isvirtual

In [13]:
form_totals.groupby('isvirtual')[['trans_vol', 'donations_vol', 'registrations_vol', 'registrations_count', 'is_top_performer', 'is_bottom_performer']].agg(['median', 'mean']).transpose()

Unnamed: 0,isvirtual,0,1
trans_vol,median,4991.75,7189.24
trans_vol,mean,18351.280086,20675.059533
donations_vol,median,4226.92,6680.0
donations_vol,mean,16494.766453,19657.608879
registrations_vol,median,0.0,0.0
registrations_vol,mean,1286.218519,439.115265
registrations_count,median,16.0,15.0
registrations_count,mean,93.611877,58.190031
is_top_performer,median,0.0,0.0
is_top_performer,mean,0.102746,0.11838


## category

In [18]:
form_totals.groupby('category')['form'].count()

category
0     6136
1      346
2       60
3      115
4       72
5      245
6      449
7       69
8      489
9      201
10      11
11      73
12      15
13      18
14      22
15      49
Name: form, dtype: int64

In [15]:
form_totals.groupby('category')[['trans_vol', 'donations_vol', 'registrations_vol', 'registrations_count', 'is_top_performer', 'is_bottom_performer']].agg(['median', 'mean'])

Unnamed: 0_level_0,trans_vol,trans_vol,donations_vol,donations_vol,registrations_vol,registrations_vol,registrations_count,registrations_count,is_top_performer,is_top_performer,is_bottom_performer,is_bottom_performer
Unnamed: 0_level_1,median,mean,median,mean,median,mean,median,mean,median,mean,median,mean
category,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2
0,4883.2,16204.147513,4223.0,14603.645104,0.0,1219.032044,16.0,92.025587,0.0,0.092405,1.0,0.907595
1,7756.375,18573.435954,6651.995,16925.160491,0.0,894.187023,41.5,112.014451,0.0,0.118497,1.0,0.881503
2,10484.315,20720.6205,10237.285,19552.847167,0.0,794.15,22.5,72.066667,0.0,0.083333,1.0,0.916667
3,7524.3,29116.15113,6304.75,27442.394435,0.0,844.808696,23.0,85.113043,0.0,0.13913,1.0,0.86087
4,8488.6,37575.469306,5877.345,28942.541528,0.0,2397.916667,3.5,82.458333,0.0,0.277778,1.0,0.722222
5,12579.02,34858.804,9888.0,29034.836939,1330.0,4755.946,88.0,272.371429,0.0,0.236735,1.0,0.763265
6,6750.0,26197.122272,4883.0,22599.514744,0.0,2226.711047,34.0,96.267261,0.0,0.14922,1.0,0.85078
7,2375.0,12545.40913,2375.0,12304.131884,0.0,22.028986,1.0,11.492754,0.0,0.086957,1.0,0.913043
8,2056.0,19388.929346,1950.0,18846.990859,0.0,98.267894,1.0,40.609407,0.0,0.067485,1.0,0.932515
9,7239.33,30337.87005,6594.25,28527.324328,0.0,582.238806,2.0,54.427861,0.0,0.154229,1.0,0.845771
