In [1]:
import sys
sys.path.insert(1, '../../scripts/')
from s3_support import *

import pandas as pd

In [10]:
# transactions
q = '''select
            id,
            date,
            form,
            gift_assist_count
        from transactions
        where
            status='A' and
            recurring=0 and
            year>=2019'''
trans_onetime = redshift_query_read(q, schema='production')

In [11]:
# recurring: count, mean, median
q = '''select
            t.id,
            t.date,
            t.form,
            t.recurring,
            t.gift_assist_count
        from transactions as t
        inner join (select
                        recurring,
                        min(date) as min_date
                    from transactions
                    group by recurring) as r on t.recurring=r.recurring and t.date=r.min_date
        where
            t.status='A' and
            t.recurring!=0 and
            r.min_date>=2019
        order by date asc;'''
trans_rec = redshift_query_read(q, schema='production')

In [12]:
trans_rec = trans_rec.groupby('recurring').first()

In [23]:
aggs = {
    'id': 'count',
    'gift_assist_count': 'sum'
}
trans = pd.concat([trans_onetime, trans_rec]).groupby(['form', 'date']).agg(aggs).reset_index()
trans['trans_count'] = trans['id']
trans.drop('id', axis=1, inplace=True)

In [14]:
# traffic
q = '''select
            date,
            form,
            sum(views) as views
        from googleanalytics_traffic
        where
            date>=2019 and
            form!=0
        group by date, form'''
traff = redshift_query_read(q, schema='production')

In [26]:
# merge
df = trans.merge(traff, on=['date', 'form'], how='outer').fillna(0)

In [28]:
print("{:,} entries; {:,} entries w/ >0 views".format(len(df), len(df[df['views']>0])))

5,113,155 entries; 4,583,711 entries w/ >0 views


In [32]:
df = df[df['views']>0]
df['conversion'] = df['trans_count'] / df['views']

# get forms gift assist active by month
df['month'] = df['date'].dt.to_period('M')
forms_ga = df.groupby(['form', 'month'])['gift_assist_count'].sum().reset_index()
forms_ga['ga_active'] = forms_ga['gift_assist_count'] > 0
forms_ga.drop('gift_assist_count', axis=1, inplace=True)

df = df.merge(forms_ga, on=['form', 'month'])

In [36]:
df_grpd = df.groupby(['month', 'ga_active'])['conversion'].agg(['mean', 'median']).reset_index()

df_grpd.pivot(index='month', columns='ga_active', values=['mean', 'median']).reset_index()

Unnamed: 0_level_0,month,mean,mean,median,median
ga_active,Unnamed: 1_level_1,False,True,False,True
0,2019-01,0.05095,0.125213,0.0,0.0
1,2019-02,0.050572,0.191673,0.0,0.0
2,2019-03,0.048806,0.192192,0.0,0.0
3,2019-04,0.047956,0.265539,0.0,0.0
4,2019-05,0.058991,0.313783,0.0,0.0
5,2019-06,0.045521,0.285931,0.0,0.0
6,2019-07,0.038843,0.300636,0.0,0.0
7,2019-08,0.080112,0.337946,0.0,0.0
8,2019-09,0.059417,0.422331,0.0,0.0
9,2019-10,0.05718,0.394748,0.0,0.0
