In [1]:
import sys, datetime
sys.path.append("../../scripts/")
from s3_support import *

import pandas as pd

# load data

In [2]:
template_upgrades = pd.read_csv("converted_forms_dates.csv")
daily_conversions = pd.read_csv("forms_conversion.csv")

In [3]:
q = "select * from googleanalytics_traffic where qgiv_frontend=1 or p2p_frontend=1"
traffic = redshift_query_read(q, schema="public")

In [4]:
frontend = traffic[traffic['qgiv_frontend']|traffic['p2p_frontend']].copy()
frontend['bounce_rate'] = frontend['bounces'] / frontend['views']
frontend.tail(3)

Unnamed: 0,date,org,form,views,sessions,sessionduration,bounces,path,devicecategory,controlpanel,qgiv_frontend,p2p_frontend,bounce_rate
27765049,2021-05-12,442709,943495,1,0,0.0,0,/for/sponada/embed/,desktop,False,True,False,0.0
27765050,2021-05-17,437294,944235,1,1,0.0,1,/for/placerfoodbank/embed,desktop,False,True,False,1.0
27765051,2021-05-10,437294,944235,1,1,0.0,1,/for/placerfoodbank/embed,mobile,False,True,False,1.0


In [5]:
def is_new_frontend_view(pageview):
    if pageview['qgiv_frontend']:
        if pageview['form'] in template_upgrades['form'].tolist():
            if pageview['date'] >= template_upgrades[template_upgrades['form']==pageview['form']]['created'].iloc[0]:
                return True
    return False

template_upgrades['created'] = pd.to_datetime(template_upgrades['created'])
frontend['date'] = pd.to_datetime(frontend['date'])
frontend['is_new_form'] = frontend[['form', 'date', 'qgiv_frontend']].apply(is_new_frontend_view, axis=1)

In [6]:
frontend['year'] = frontend['date'].dt.year

# analysis

In [7]:
# how much of our traffic is being exposed to the new forms?
start_date = template_upgrades['created'].min()

time_rel_traffic = frontend[(frontend['date']>=start_date)&frontend['qgiv_frontend']]

print("Qgiv frontend traffic grouped by new form/old form since first form conversion ({:%Y-%m-%d})".format(start_date))
print("-"*80)
print("Page views:")
print(time_rel_traffic['is_new_form'].value_counts())
print()

print("Normalized:")
print(time_rel_traffic['is_new_form'].value_counts(normalize=True))
print()

print("Normalized, past 30 days:")
thirty_days_ago = datetime.datetime.now() - pd.to_timedelta("30day")
print(time_rel_traffic[time_rel_traffic['date']>=thirty_days_ago]['is_new_form'].value_counts(normalize=True))

Qgiv frontend traffic grouped by new form/old form since first form conversion (2020-12-09)
--------------------------------------------------------------------------------
Page views:
False    1366661
True       75532
Name: is_new_form, dtype: int64

Normalized:
False    0.947627
True     0.052373
Name: is_new_form, dtype: float64

Normalized, past 30 days:
False    0.943405
True     0.056595
Name: is_new_form, dtype: float64


In [8]:
# bounce rates
print("-"*40)
print("Bounce rate")
print("-"*40)
print()

print("P2P")
print("\tmean: {:.1f}%".format(frontend[frontend['p2p_frontend']]['bounce_rate'].mean() * 100.))
print("\tmedian: {:.1f}%".format(frontend[frontend['p2p_frontend']]['bounce_rate'].median() * 100.))

print("Qgiv")
print("\tmean: {:.1f}%".format(frontend[frontend['qgiv_frontend']]['bounce_rate'].mean() * 100.))
print("\tmedian: {:.1f}%".format(frontend[frontend['qgiv_frontend']]['bounce_rate'].median() * 100.))
print()

# over time
print("By year:")
print()
print("P2P:")
print(frontend[frontend['p2p_frontend']].groupby('year')['bounce_rate'].mean())

print()

print("Qgiv:")
print(frontend[frontend['qgiv_frontend']].groupby('year')['bounce_rate'].mean())

print()
print("All vs new form bounce rates:")
print(frontend.groupby('is_new_form')['bounce_rate'].mean())

print()

print("2021 all vs new form bounce rates:")
print(frontend[frontend['year']==2021].groupby('is_new_form')['bounce_rate'].mean())

print()

print("Last 30 days all vs new form bounce rates:")
print(frontend[frontend['date']>=thirty_days_ago].groupby('is_new_form')['bounce_rate'].mean())

print()
print("-"*40)
print("Session duration")
print("-"*40)
print()

print("P2P")
print("\tmean: {:.2f}".format(frontend[frontend['p2p_frontend']]['sessionduration'].mean()))
print("\tmedian: {:.2f}".format(frontend[frontend['p2p_frontend']]['sessionduration'].median()))

print("Qgiv")
print("\tmean: {:.2f}".format(frontend[frontend['qgiv_frontend']]['sessionduration'].mean()))
print("\tmedian: {:.2f}".format(frontend[frontend['qgiv_frontend']]['sessionduration'].median()))
print()


print("All vs new form:")
print(frontend.groupby('is_new_form')['sessionduration'].mean())

print()

print("2021 all vs new form:")
print(frontend[frontend['year']==2021].groupby('is_new_form')['sessionduration'].mean())

print()

print("Past 30 days all vs new form:")
print(frontend[frontend['date']>=thirty_days_ago].groupby('is_new_form')['sessionduration'].mean())

----------------------------------------
Bounce rate
----------------------------------------

P2P
	mean: 18.8%
	median: 0.0%
Qgiv
	mean: 25.8%
	median: 0.0%

By year:

P2P:
year
2016    0.206819
2017    0.143903
2018    0.144266
2019    0.176589
2020    0.228937
2021    0.220209
Name: bounce_rate, dtype: float64

Qgiv:
year
2016    0.208117
2017    0.282490
2018    0.275285
2019    0.274390
2020    0.231160
2021    0.174570
Name: bounce_rate, dtype: float64

All vs new form bounce rates:
is_new_form
False    0.228897
True     0.099235
Name: bounce_rate, dtype: float64

2021 all vs new form bounce rates:
is_new_form
False    0.200817
True     0.099548
Name: bounce_rate, dtype: float64

Last 30 days all vs new form bounce rates:
is_new_form
False    0.213985
True     0.098616
Name: bounce_rate, dtype: float64

----------------------------------------
Session duration
----------------------------------------

P2P
	mean: 195.13
	median: 0.00
Qgiv
	mean: 201.26
	median: 0.00

All vs new fo

In [9]:
print()
print("-"*40)
print("Session duration (dropping 0 values)")
print("-"*40)
print()

frontend_nozero = frontend[frontend['sessionduration']!=0]

print("Sample size:")
print("\tall: {:,}".format(len(frontend)))
print("\tnon-zero: {:,} ({:.2f}%)".format(len(frontend_nozero), (len(frontend_nozero) / len(frontend)) * 100.))

print("P2P")
print("\tmean: {:.2f}".format(frontend_nozero[frontend_nozero['p2p_frontend']]['sessionduration'].mean()))
print("\tmedian: {:.2f}".format(frontend_nozero[frontend_nozero['p2p_frontend']]['sessionduration'].median()))

print("Qgiv")
print("\tmean: {:.2f}".format(frontend_nozero[frontend_nozero['qgiv_frontend']]['sessionduration'].mean()))
print("\tmedian: {:.2f}".format(frontend_nozero[frontend_nozero['qgiv_frontend']]['sessionduration'].median()))
print()


print("All vs new form:")
print(frontend_nozero.groupby('is_new_form')['sessionduration'].mean())

print()

print("2021 all vs new form:")
print(frontend_nozero[frontend_nozero['year']==2021].groupby('is_new_form')['sessionduration'].mean())

print()

print("Past 30 days all vs new form:")
print(frontend_nozero[frontend_nozero['date']>=thirty_days_ago].groupby('is_new_form')['sessionduration'].mean())


----------------------------------------
Session duration (dropping 0 values)
----------------------------------------

Sample size:
	all: 27,765,052
	non-zero: 6,569,735 (23.66%)
P2P
	mean: 1090.39
	median: 244.00
Qgiv
	mean: 724.88
	median: 197.00

All vs new form:
is_new_form
False    841.600421
True     353.342873
Name: sessionduration, dtype: float64

2021 all vs new form:
is_new_form
False    811.012828
True     352.340517
Name: sessionduration, dtype: float64

Past 30 days all vs new form:
is_new_form
False    699.266608
True     577.930632
Name: sessionduration, dtype: float64
