In [231]:
import sys
sys.path.append("../../scripts/")
from s3_support import *

import pandas as pd
import numpy as np

# load data

In [232]:
START_DATE = '2019-10-01'

In [233]:
# form status
q = "select id, status from form"
all_forms = redshift_query_read(q, schema='production')

#### form upgrades

In [234]:
# logged upgrades
q = "select * from syslog_logs where message like '%Qgiv Form Template Upgraded%'"
form_upgrades = redshift_query_read(q, schema="production")[['org', 'form', 'created']]
forms_upgrades_ids_logged = form_upgrades['form'].unique().tolist()

In [235]:
# created new template forms
bucket = "qgiv-stats-data"
new_form_template_list = "form_download new template.csv"
new_forms = get_dataframe_from_file(bucket, new_form_template_list)

new_forms_ids = new_forms[new_forms['Status']=='active']['Form ID'].unique().tolist()

In [236]:
print("{} new template forms".format(len(new_forms)))
print("{} active".format(len(new_forms[new_forms['Status']=='active'])))

6510 new template forms
4745 active


In [237]:
def is_new_form_entry(daily_row):
    try:
        # check for log conversion
        if daily_row['form'] in form_upgrades['form'].unique().tolist():
            # is date after logged upgrade date?
            return daily_row['date'] >= form_upgrades[form_upgrades['form']==daily_row['form']]['created'].iloc[0]
        elif daily_row['form'] in new_forms_ids:
            # form created as new form template
            return True
        # not logged as upgrade or in new form template list
        return False
    except:
        print("error with form {}".format(daily_row['form']))
        raise Exception

#### traffic

In [238]:
q = '''select
            date,
            form,
            devicecategory,
            sum(views) as pageviews,
            sum(bounces) as bounces
        from googleanalytics_traffic
        where 
            qgiv_frontend=1 and
            date >= '{}' 
        group by date, devicecategory, form'''.format(START_DATE)
traffic = redshift_query_read(q, schema='public')
traffic['form'] = traffic['form'].astype(int)

#### transactions

In [239]:
# one time
q = '''select 
            form,
            date,
            amount,
            source,
            useragent
        from transactions
        where
            status='A' and
            (source='don_form' or source='mobile') and
            recurring=0 and
            date >= '{}'
        order by date asc'''.format(START_DATE)
onetime = redshift_query_read(q, schema='public')
onetime['useragent'].fillna('', inplace=True)
onetime['is_recurring'] = False

In [240]:
# recurring
q = '''select 
            form,
            date,
            recurring,
            amount,
            source,
            useragent
        from transactions
        where
            status='A' and
            (source='don_form' or source='mobile') and
            recurring!=0 and
            date >= '{}'
        order by date asc
        '''.format(START_DATE)
rec = redshift_query_read(q, schema='public')
rec.drop_duplicates(subset=['recurring'], keep='first', inplace=True)
rec['useragent'].fillna('', inplace=True)
rec['is_recurring'] = True

In [241]:
trans = onetime.append(rec)

In [242]:
# tagging mobile vs desktop
trans['is_mobile'] = (trans['useragent'].str.contains('iPhone').fillna(False))|(trans['useragent'].str.contains('iPad').fillna(False))|(trans['useragent'].str.contains('Android').fillna(False))
trans['is_desktop'] = (trans['useragent'].str.contains('Macintosh').fillna(False))|(trans['useragent'].str.contains('Windows').fillna(False))|(trans['useragent'].str.contains('CrOS').fillna(False))

# defaulting to source for ambiguous/missing useragent
trans_assigned = trans[trans['is_mobile']|trans['is_desktop']].copy()
trans_unassigned = trans[~trans['is_mobile']&~trans['is_desktop']].copy()
trans_unassigned['is_mobile'] = trans_unassigned['source']=='mobile'
trans_unassigned['is_desktop'] = trans_unassigned['source']=='don_form'
trans = trans_assigned.append(trans_unassigned)

In [243]:
trans = trans[trans['form'].isin(traffic['form'].unique().tolist())]

#### compiling & merging

In [244]:
# calculate conversions
daily_trans = trans.groupby(['form', 'date', 'is_recurring'])['amount'].agg({'count', 'sum'}).reset_index()
daily_trans['trans_count'] = daily_trans['count']
daily_trans['trans_vol'] = daily_trans['sum']
daily_trans.drop(['count', 'sum'], axis=1, inplace=True)

daily_trans_pvt = daily_trans.pivot(index=['form', 'date'], columns='is_recurring', values=['trans_count', 'trans_vol']).reset_index()
daily_trans_pvt.columns = ['form', 'date', 'trans_count_onetime', 'trans_count_recurring', 'trans_vol_onetime', 'trans_vol_recurring']
daily_trans_pvt.fillna(0, inplace=True)

dailies = daily_trans_pvt.merge(traffic.groupby(['date', 'form'])[['pageviews', 'bounces']].sum().reset_index(), on=['date', 'form'], how='outer')
dailies.fillna(0, inplace=True)
dailies = dailies[dailies['pageviews']>0]

dailies['conversion'] = (dailies['trans_count_onetime'] + dailies['trans_count_recurring']) / dailies['pageviews']
dailies['conversion_onetime'] = dailies['trans_count_onetime'] / dailies['pageviews']
dailies['conversion_recurring'] = dailies['trans_count_recurring'] / dailies['pageviews']

#### embeds

In [245]:
# embeds
q = "select widget as form, date from embed"
embeds = redshift_query_read(q, schema='production')
embeds['date'] = pd.to_datetime(embeds['date'])

In [246]:
def is_embed(r):
    if r['form'] in embeds['form'].unique().tolist():
        return True
    return False

#### tagging new template & embeds

In [247]:
dailies['is new template'] = dailies[['form', 'date']].apply(is_new_form_entry, axis=1)

In [248]:
dailies['is embed'] = dailies[['form', 'date']].apply(is_embed, axis=1)

# analysis

In [249]:
def report(df):
    # recurring frequency?
    df.replace(np.inf, np.nan, inplace=True)
    return {
        'form sample size': len(df['form'].unique().tolist()),
        'transactions': df['trans_count_onetime'].sum() + df['trans_count_recurring'].sum(),
        'conversion': df['conversion'].mean(),
        'conversion onetime': df['conversion_onetime'].mean(),
        'conversion recurring': df['conversion_recurring'].mean(),
        'mean transaction onetime': df['trans_vol_onetime'].sum() / df['trans_count_onetime'].sum(),
        'mean transaction recurring': df['trans_vol_recurring'].sum() / df['trans_count_recurring'].sum(),
        'onetime/recurring': df['trans_count_onetime'].sum() / df['trans_count_recurring'].sum(),
        'pageviews': df['pageviews'].sum(),
        'bounce rate': df['bounces'].sum() / df['pageviews'].sum()
    }

In [250]:
dailies['conversion'].mean()

0.054197668970101996

### new vs old

In [251]:
data_all = {
    'all': report(dailies),
    'new template': report(dailies[dailies['is new template']]),
    'old template': report(dailies[~dailies['is new template']])
}

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  method=method,


In [252]:
print("{:%Y-%m-%d} +".format(dailies['date'].min()))
df = pd.DataFrame(data_all)
df = df.transpose()

df['pageviews_perc'] = df['pageviews'] / df['pageviews']['all']
df['pageviews'] = df['pageviews_perc']
df['transactions_perc'] = df['transactions'] / df['transactions']['all']
df['transactions'] = df['transactions_perc']

df.drop(['pageviews_perc', 'transactions_perc'], axis=1, inplace=True)
df.transpose().drop('all', axis=1)

2019-10-01 +


Unnamed: 0,new template,old template
form sample size,849.0,9437.0
transactions,0.121882,0.878118
conversion,0.08823,0.050656
conversion onetime,0.08025,0.043979
conversion recurring,0.00798,0.006677
mean transaction onetime,234.122677,201.396222
mean transaction recurring,91.358499,3649.263842
onetime/recurring,18.143254,12.075036
pageviews,0.080748,0.919252
bounce rate,0.361376,0.310667


### embeds

In [253]:
dailies.groupby('is embed')['conversion'].mean()

is embed
False    0.027109
True     0.079850
Name: conversion, dtype: float64

In [254]:
data_all = {
    'all': report(dailies),
    'embed': report(dailies[dailies['is embed']]),
    'not embed': report(dailies[~dailies['is embed']])
}

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  method=method,


In [256]:
print("Embeds {:%Y-%m-%d} +".format(dailies['date'].min()))
df = pd.DataFrame(data_all)
df = df.transpose()

df['pageviews_perc'] = df['pageviews'] / df['pageviews']['all']
df['pageviews'] = df['pageviews_perc']
df['transactions_perc'] = df['transactions'] / df['transactions']['all']
df['transactions'] = df['transactions_perc']

df.drop(['pageviews_perc', 'transactions_perc'], axis=1, inplace=True)
df.transpose().drop('all', axis=1)

Embeds 2019-10-01 +


Unnamed: 0,embed,not embed
form sample size,3493.0,6386.0
transactions,0.662168,0.337832
conversion,0.07985,0.027109
conversion onetime,0.070869,0.022612
conversion recurring,0.008981,0.004497
mean transaction onetime,214.763679,186.937538
mean transaction recurring,5426.419092,69.316245
onetime/recurring,13.745448,10.804007
pageviews,0.47384,0.52616
bounce rate,0.318566,0.311336


In [258]:
data_all = {
    'all': report(dailies),
    'embed': report(dailies[dailies['is embed']&dailies['is new template']]),
    'not embed': report(dailies[~dailies['is embed']&dailies['is new template']])
}

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  method=method,


In [259]:
print("New template embeds {:%Y-%m-%d} +".format(dailies['date'].min()))
df = pd.DataFrame(data_all)
df = df.transpose()

df['pageviews_perc'] = df['pageviews'] / df['pageviews']['all']
df['pageviews'] = df['pageviews_perc']
df['transactions_perc'] = df['transactions'] / df['transactions']['all']
df['transactions'] = df['transactions_perc']

df.drop(['pageviews_perc', 'transactions_perc'], axis=1, inplace=True)
df.transpose().drop('all', axis=1)

New template embeds 2019-10-01 +


Unnamed: 0,embed,not embed
form sample size,508.0,341.0
transactions,0.092744,0.029138
conversion,0.104022,0.050691
conversion onetime,0.094457,0.046477
conversion recurring,0.009564,0.004214
mean transaction onetime,222.169883,271.620126
mean transaction recurring,79.609473,141.10098
onetime/recurring,17.007331,22.952473
pageviews,0.058658,0.02209
bounce rate,0.384761,0.299276
