In [2]:
import sys, datetime
sys.path.append("../../scripts/")
from s3_support import *

import pandas as pd
import numpy as np

%matplotlib inline

Looking at P2P templates. 10 is the new registrations system, 9 is the legacy system.

The donation redesign is flagged under `form.isStandardDonationForm`

# all time

## load data

In [10]:
q = "select id as form, template from form"
forms = redshift_query_read(q, schema='production')

In [88]:
q = '''select 
            form, 
            count(id) as count_trans, 
            sum(amount) as vol_trans,
            sum(registrations_count) as count_reg,
            sum(donations_count) as count_don,
            sum(purchases_count) as count_purch
        from transactions 
        where status='A' and source='p2p' 
        group by form'''
trans = redshift_query_read(q, schema='production')

In [89]:
q = '''select
            form,
            sum(views) as views,
            sum(bounces) as bounces
        from googleanalytics_traffic
        group by form'''
traf = redshift_query_read(q, schema='production')

In [90]:
df = forms.merge(trans, on='form').merge(traf, on='form')

In [91]:
df['conversion'] = df['count_trans'] / df['views']
df['conversion_reg'] = df['count_reg'] / df['views']
df['conversion_don'] = df['count_don'] / df['views']
df['bounce_rate'] = df['bounces'] / df['views']
df['purch_rate'] = df['count_purch'] / df['count_trans']

df = df[df['template']!=8]

In [92]:
df.tail(3)

Unnamed: 0,form,template,count_trans,vol_trans,count_reg,count_don,count_purch,views,bounces,conversion,conversion_reg,conversion_don,bounce_rate,purch_rate
3162,982742,9,60,4276.0,30,39,0,1025,242,0.058537,0.029268,0.038049,0.236098,0.0
3163,982774,9,24,3240.0,5,20,0,2,0,12.0,2.5,10.0,0.0,0.0
3164,983398,10,5,3705.0,9,0,0,4,0,1.25,2.25,0.0,0.0,0.0


## exploration

In [93]:
agg_cols = ['conversion', 'conversion_reg', 
            'conversion_don', 'bounce_rate', 
            'purch_rate', 'vol_trans']
df.groupby('template')[agg_cols].agg(['mean', 'median']).transpose()

Unnamed: 0,template,6,9,10
conversion,mean,27.835043,19.993399,52.413567
conversion,median,0.082243,0.5,0.5
conversion_reg,mean,6.803667,8.071001,13.137105
conversion_reg,median,0.036001,0.031604,0.218543
conversion_don,mean,22.693964,14.51774,44.343812
conversion_don,median,0.036438,0.25,0.170213
bounce_rate,mean,0.210122,0.321668,0.171047
bounce_rate,median,0.152685,0.23979,0.035714
purch_rate,mean,0.138889,0.037872,0.033563
purch_rate,median,0.0,0.0,0.0


# annually

## load data

In [97]:
q = '''select 
            form, 
            date_trunc('year', date) as year,
            count(id) as count_trans, 
            sum(amount) as vol_trans,
            sum(registrations_count) as count_reg,
            sum(donations_count) as count_don,
            sum(purchases_count) as count_purch
        from transactions 
        where status='A' and source='p2p' 
        group by form, date_trunc('year', date)'''
trans = redshift_query_read(q, schema='production')

In [98]:
q = '''select
            form,
            date_trunc('year', date) as year,
            sum(views) as views,
            sum(bounces) as bounces
        from googleanalytics_traffic
        group by form, date_trunc('year', date)'''
traf = redshift_query_read(q, schema='production')

In [99]:
df = trans.merge(traf, on=['form', 'year'])
df = df.merge(forms, on='form')

In [100]:
df['conversion'] = df['count_trans'] / df['views']
df['conversion_reg'] = df['count_reg'] / df['views']
df['conversion_don'] = df['count_don'] / df['views']
df['bounce_rate'] = df['bounces'] / df['views']
df['purch_rate'] = df['count_purch'] / df['count_trans']

df = df[df['template']!=8]
df = df[df['year'].dt.year>=2019]

In [101]:
df.tail(3)

Unnamed: 0,form,year,count_trans,vol_trans,count_reg,count_don,count_purch,views,bounces,template,conversion,conversion_reg,conversion_don,bounce_rate,purch_rate
2314,980140,2021-01-01,1,25.0,0,1,0,16,6,9,0.0625,0.0,0.0625,0.375,0.0
2315,973876,2021-01-01,1,105.0,0,1,0,3056,194,9,0.000327,0.0,0.000327,0.063482,0.0
2316,976188,2021-01-01,1,250.0,0,1,0,121,11,9,0.008264,0.0,0.008264,0.090909,0.0


In [102]:
df.groupby('template')['vol_trans'].agg(['mean', 'median']).reset_index()

Unnamed: 0,template,mean,median
0,6,6194.8325,4519.445
1,9,16470.930926,3256.0
2,10,20895.830292,3144.5


## exploration

### conversion, bounce & purchase rate

In [56]:
agg_cols = ['conversion', 'conversion_reg', 
            'conversion_don', 'bounce_rate', 
            'purch_rate']
df.groupby(['year', 'template'])[agg_cols].agg(['mean', 'median']).transpose()

Unnamed: 0_level_0,year,2019-01-01,2019-01-01,2020-01-01,2020-01-01,2021-01-01,2021-01-01,2021-01-01,2022-01-01,2022-01-01
Unnamed: 0_level_1,template,9,10,9,10,6,9,10,9,10
conversion,mean,32.507493,22.038108,12.057887,1.161495,41.753852,22.686034,9.763863,8.429721,76.762528
conversion,median,0.068136,0.069453,0.096508,0.3,0.984284,0.05615,0.159091,0.141531,0.618834
conversion_reg,mean,17.66871,0.083333,3.851787,0.132782,10.206335,6.772609,3.847466,3.712978,16.430647
conversion_reg,median,0.028,0.0,0.003292,0.0,0.146277,0.003842,0.018455,0.0,0.4
conversion_don,mean,19.489227,21.954775,9.773899,1.023282,34.042198,18.032208,7.412326,6.173432,66.856754
conversion_don,median,0.049479,0.069453,0.062966,0.159722,0.827369,0.043478,0.02947,0.078611,0.293532
bounce_rate,mean,0.140075,0.216253,0.300822,0.30995,0.200894,0.241047,0.20642,0.286824,0.194763
bounce_rate,median,0.070929,0.212862,0.263192,0.304348,0.135831,0.193496,0.17654,0.208333,0.037736
purch_rate,mean,0.026396,0.0,0.050061,0.09073,0.208333,0.058892,0.01802,0.102224,0.032791
purch_rate,median,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


### view shares by template

In [104]:
df.groupby(['year', 'template'])['form'].count()

year        template
2019-01-01  9           295
            10            3
2020-01-01  9           562
            10           13
2021-01-01  6             4
            9           961
            10           47
2022-01-01  9           148
            10           74
Name: form, dtype: int64

In [107]:
df.groupby(['year', 'template'])['views'].agg(['mean', 'median'])

Unnamed: 0_level_0,Unnamed: 1_level_0,mean,median
year,template,Unnamed: 2_level_1,Unnamed: 3_level_1
2019-01-01,9,3118.847458,404.0
2019-01-01,10,571.666667,156.0
2020-01-01,9,1770.745552,132.5
2020-01-01,10,935.307692,92.0
2021-01-01,6,130.0,45.5
2021-01-01,9,2418.895942,211.0
2021-01-01,10,1283.744681,88.0
2022-01-01,9,673.682432,47.5
2022-01-01,10,116.22973,25.0


### registration, donation, purchase counts per form by template

In [113]:
agg_cols = {
    'template': 'last',
    'count_reg': 'sum', 
    'count_don': 'sum', 
    'count_purch': 'sum'
}
temp_agg = df.groupby(['year', 'form']).agg(agg_cols).reset_index()

In [115]:
agg_cols = ['count_reg', 'count_don', 'count_purch']
temp_agg.groupby(['year', 'template'])[agg_cols].mean()

Unnamed: 0_level_0,Unnamed: 1_level_0,count_reg,count_don,count_purch
year,template,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2019-01-01,9,132.169492,176.938983,5.742373
2019-01-01,10,0.333333,126.0,0.0
2020-01-01,9,40.578292,113.879004,7.685053
2020-01-01,10,20.461538,104.538462,0.923077
2021-01-01,6,27.0,87.0,1.25
2021-01-01,9,57.421436,179.566077,5.613944
2021-01-01,10,36.021277,72.340426,8.361702
2022-01-01,9,46.027027,86.567568,3.533784
2022-01-01,10,131.324324,315.797297,0.675676


### total amount raised, per registrant per form by template

In [121]:
trans_cols = ['form', 'year', 'vol_trans']
temp_agg = temp_agg.merge(trans[trans_cols], on=['form', 'year'])

In [123]:
temp_agg['vol_per_reg'] = temp_agg['vol_trans'] / temp_agg['count_reg']
temp_agg['don_per_reg'] = temp_agg['count_don'] / temp_agg['count_reg']
temp_agg['purch_per_reg'] = temp_agg['count_purch'] / temp_agg['count_reg']

In [124]:
agg_cols = ['vol_trans', 'vol_per_reg', 'don_per_reg', 'purch_per_reg']
temp_agg.groupby(['year', 'template'])[agg_cols].agg(['mean', 'median'])

Unnamed: 0_level_0,Unnamed: 1_level_0,vol_trans,vol_trans,vol_per_reg,vol_per_reg,don_per_reg,don_per_reg,purch_per_reg,purch_per_reg
Unnamed: 0_level_1,Unnamed: 1_level_1,mean,median,mean,median,mean,median,mean,median
year,template,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2
2019-01-01,9,17419.937085,5776.75,,169.904412,,1.941176,,0.0
2019-01-01,10,19220.12,24159.0,,inf,,inf,0.0,0.0
2020-01-01,9,15655.105658,2853.125,,2195.958333,,11.666667,,0.0
2020-01-01,10,47018.556923,4053.47,,inf,,inf,,0.0
2021-01-01,6,6194.8325,4519.445,288.569204,169.340031,2.903299,2.410494,0.625,0.0
2021-01-01,9,17475.188522,3761.5,,1318.5,,8.0,,0.0
2021-01-01,10,8876.939574,3596.75,,230.922963,,2.041667,0.142519,0.0
2022-01-01,9,11156.373041,817.095,,inf,,inf,,0.0
2022-01-01,10,24008.256622,2530.0,,90.76017,,0.69697,,0.0


### registration donations per form by template

In [125]:
q = '''select 
            form, 
            date_trunc('year', date) as year,
            count(id) as count_trans, 
            sum(amount) as vol_trans,
            sum(registrations_count) as count_reg,
            sum(donations_count) as count_don,
            sum(purchases_count) as count_purch
        from transactions 
        where status='A' and source='p2p' and
            registrations_count!=0 and 
            donations_count!=0 
        group by form, date_trunc('year', date)'''
trans_reg_dons = redshift_query_read(q, schema='production')

In [130]:
trans_reg_dons = trans_reg_dons.merge(temp_agg[['year', 'form', 'template']], on=['year', 'form'])

In [136]:
trans_reg_dons.groupby(['year', 'template'])['count_trans'].agg(['mean', 'median'])

Unnamed: 0_level_0,Unnamed: 1_level_0,mean,median
year,template,Unnamed: 2_level_1,Unnamed: 3_level_1
2019-01-01,9,28.049451,11.0
2020-01-01,9,19.700893,6.5
2020-01-01,10,3.8,4.0
2021-01-01,6,11.0,11.0
2021-01-01,9,25.858191,9.0
2021-01-01,10,32.235294,7.0
2022-01-01,9,18.666667,10.0
2022-01-01,10,45.883721,11.0
