In [1]:
import sys
sys.path.insert(1, '../../scripts/')
from s3_support import *

import pandas as pd
import numpy as np

# load 2022+ transactions & traffic

In [2]:
# look at 2022 comparing conversion rates w/ feature
q = '''select
        form,
        date_trunc('month', date) as month,
        count(distinct(case when recurring=0 or recurring_origin=1 then id else null end)) as trans_count,
        sum(amount) as trans_vol,
        sum(gift_assist_count) as ga_count,
        sum(gift_assist_amt) as ga_vol,
        sum(isexpresscheckout::int) as expresscheckout
    from production.transactions
    where
        status='A' and
        year>=2022 and
        (recurring=0 or recurring_origin=1) and
        (source='don_form' or source='mobile')
    group by form, date_trunc('month', date)'''
trans = redshift_query_read(q, schema='production')

In [3]:
print("{:,} transactions date observations; {:,} forms".format(len(trans), len(trans['form'].unique())))

177,959 transactions date observations; 34,669 forms


In [4]:
q = '''select
            date_trunc('month', date) as month,
            form,
            sum(views) as views_ga
        from ga
        where
            date>='2022-01-01'
        group by date_trunc('month', date), form'''
traff_ga = redshift_query_read(q, schema='production')
q = '''select
            date_trunc('month', date) as month,
            form,
            sum(views) as views_traff
        from ga4_traffic
        where
            date>='2022-01-01'
        group by date_trunc('month', date), form'''
traff_ga4 = redshift_query_read(q, schema='production')
q = '''select
            date_trunc('month', week) as month,
            form,
            sum(views) as views_weekly
        from ga4_traffic_weekly
        where
            week>='2022-01-01'
        group by date_trunc('month', week), form'''
traff_ga4_weekly = redshift_query_read(q, schema='production')

In [5]:
traff = traff_ga.merge(traff_ga4, on=['form', 'month'], how='outer')
traff = traff.merge(traff_ga4_weekly, on=['form', 'month'], how='outer')

In [6]:
traff['views'] = traff[['views_ga', 'views_traff', 'views_weekly']].max(axis=1)

In [7]:
traff = traff[['month', 'form', 'views']]
print("{:,} traffic observations; {:,} forms".format(len(traff), len(traff['form'].unique())))
print("{} to {}".format(traff['month'].min(), traff['month'].max()))

323,409 traffic observations; 65,941 forms
2022-01-01 00:00:00 to 2024-11-01 00:00:00


In [8]:
trans.head(2)

Unnamed: 0,form,month,trans_count,trans_vol,ga_count,ga_vol,expresscheckout
0,138318,2022-05-01,72,6700.0,0,0.0,0
1,138318,2022-03-01,118,6078.0,0,0.0,0


In [9]:
traff.head(2)

Unnamed: 0,month,form,views
0,2023-09-01,0,5163122.0
1,2023-08-01,1007426,11599.0


In [10]:
# merge
df = trans.merge(traff, on=['month', 'form'], how='outer').fillna(0)

print("{:,} merged observations; {:,} forms".format(len(df), len(df['form'].unique())))
print("{:,} observations w/ gift assist".format(len(df[df['ga_count']>0])))
print("{} to {}".format(df['month'].min(), df['month'].max()))

376,846 merged observations; 68,567 forms
91,796 observations w/ gift assist
2022-01-01 00:00:00 to 2024-11-01 00:00:00


In [11]:
df.head(2)

Unnamed: 0,form,month,trans_count,trans_vol,ga_count,ga_vol,expresscheckout,views
0,138318,2022-05-01,72.0,6700.0,0.0,0.0,0.0,959.0
1,138318,2022-03-01,118.0,6078.0,0.0,0.0,0.0,1624.0


In [12]:
df['conversion'] = df['trans_count'] / df['views']
df['conversion'].fillna(0, inplace=True)
df['conversion'] = df['conversion'].replace(np.inf, np.nan)
df.dropna(inplace=True)

In [13]:
df['ga_enabled'] = df['ga_count']>0
df['ec_enabled'] = df['expresscheckout']>0

In [14]:
print("{:,} forms".format(len(df['form'].unique())))
print("{:,} rows".format(len(df)))

65,938 forms
323,216 rows


In [15]:
df[df['views']>0]['conversion'].agg(['mean', 'median'])

mean      0.172178
median    0.000000
Name: conversion, dtype: float64

In [16]:
ga = df[df['ga_enabled']]
ga.groupby('ec_enabled')['conversion'].agg(['mean', 'median', 'count']).reset_index()

Unnamed: 0,ec_enabled,mean,median,count
0,False,0.523727,0.090909,64710
1,True,0.637477,0.111888,2531


In [17]:
df['ga_conversion'] = df['ga_count'] / df['views']
df[df['ga_enabled']].groupby('ec_enabled')['ga_conversion'].agg(['mean', 'median', 'count']).reset_index()

Unnamed: 0,ec_enabled,mean,median,count
0,False,0.276776,0.045872,64710
1,True,0.377159,0.065574,2531


In [18]:
print("All")
df_total = df_grpd[df_grpd['conversion']<1].groupby(['ga_enabled'])['trans_count', 'views'].sum().reset_index()
df_total['conversion'] = df_total['trans_count'] / df_total['views']
df_total

All


NameError: name 'df_grpd' is not defined

In [None]:
print("mean")
monthly = df_grpd[df_grpd['conversion']<1].groupby(['month', 'ga_enabled'])['conversion'].mean().reset_index()
monthly.pivot(index='month', columns='ga_enabled', values='conversion').reset_index()

In [None]:
print('median')
monthly = df_grpd[df_grpd['conversion']<1].groupby(['month', 'ga_enabled'])['conversion'].median().reset_index()
monthly.pivot(index='month', columns='ga_enabled', values='conversion').reset_index()

# donor utilization rates

In [25]:
# look at 2022 comparing conversion rates w/ feature
q = '''select
            form,
            year,
            count(distinct(case when recurring=0 or recurring_origin=1 then id else null end)) as trans_count,
            sum(gift_assist_count) as ga_count
        from transactions
        where
            status='A' and
            year>=2022 and
            (recurring=0 or recurring_origin=1) and
            (source='don_form' or source='mobile')
        group by form, year'''
trans = redshift_query_read(q, schema='production')

In [26]:
trans['utilization'] = trans['ga_count'] / trans['trans_count']

In [27]:
trans[trans['ga_count']!=0].groupby('year')['utilization'].agg(['mean', 'median']).reset_index()

Unnamed: 0,year,mean,median
0,2022,0.5295,0.5
1,2023,0.538716,0.507833
2,2024,0.560463,0.533333


In [42]:
trans_count = trans[trans['year']==2024][['trans_count', 'ga_count']].sum()['trans_count']
ga_count = trans[trans['year']==2024][['trans_count', 'ga_count']].sum()['ga_count']

print("Percentage of all transactions in 2024 that used GA: {:.2f}%".format((ga_count / trans_count) * 100.))

trans_count = trans[(trans['year']==2024)&(trans['ga_count']>0)][['trans_count', 'ga_count']].sum()['trans_count']
ga_count = trans[(trans['year']==2024)&(trans['ga_count']>0)][['trans_count', 'ga_count']].sum()['ga_count']

print("Percentage of all transactions for forms w/ GA enabled that used GA: {:.2f}%".format((ga_count / trans_count) * 100.))

Percentage of all transactions in 2024 that used GA: 31.05%
Percentage of all transactions for forms w/ GA enabled that used GA: 46.98%


# gift assist checked by default

got dataset from Sandra

In [19]:
checked_by_default = pd.read_csv("giftassistbydefault.csv")

In [20]:
checked_by_default_forms = checked_by_default['id'].unique()

In [21]:
len(checked_by_default_forms)

5865

In [22]:
df.head()

Unnamed: 0,form,month,trans_count,trans_vol,ga_count,ga_vol,expresscheckout,views,conversion,ga_enabled,ec_enabled,ga_conversion
0,138318,2022-05-01,72.0,6700.0,0.0,0.0,0.0,959.0,0.075078,False,False,0.0
1,138318,2022-03-01,118.0,6078.0,0.0,0.0,0.0,1624.0,0.07266,False,False,0.0
2,138318,2022-06-01,15.0,1250.0,0.0,0.0,0.0,341.0,0.043988,False,False,0.0
4,921717,2022-01-01,16.0,753.91,13.0,21.91,0.0,2472.0,0.006472,True,False,0.005259
7,977894,2022-01-01,15.0,828.61,9.0,13.61,0.0,2116.0,0.007089,True,False,0.004253


In [23]:
df['month'].unique()[-3:]

array(['2024-09-01T00:00:00.000000000', '2024-10-01T00:00:00.000000000',
       '2024-11-01T00:00:00.000000000'], dtype='datetime64[ns]')

In [24]:
_df = df[df['month']>='2024-06-01']
_df['is_default'] = _df['form'].isin(checked_by_default_forms)

_df[_df['conversion']<1.].groupby('is_default')[['conversion', 'ga_conversion']].agg(['mean', 'median']).reset_index()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  


Unnamed: 0_level_0,is_default,conversion,conversion,ga_conversion,ga_conversion
Unnamed: 0_level_1,Unnamed: 1_level_1,mean,median,mean,median
0,False,0.068213,0.000586,0.018703,0.0
1,True,0.103079,0.03809,0.050003,0.0


In [29]:
print("Gift assist utilization by donor rates by whether or not it is checked by default or not")
trans['is_default'] = trans['form'].isin(checked_by_default_forms)
trans[trans['ga_count']!=0].groupby(['year', 'is_default'])['utilization'].agg(['mean', 'median']).reset_index()

Gift assist utilization by donor rates by whether or not it is checked by default or not


Unnamed: 0,year,is_default,mean,median
0,2022,False,0.531002,0.5
1,2022,True,0.486262,0.485673
2,2023,False,0.511016,0.5
3,2023,True,0.715967,0.755737
4,2024,False,0.51828,0.5
5,2024,True,0.725294,0.777778


In [44]:
print("Gift assist utilization by donor rates by whether or not it is checked by default or not; forms >= 100 transactions")
trans['is_default'] = trans['form'].isin(checked_by_default_forms)
trans[(trans['ga_count']!=0)&(trans['trans_count']>=100)].groupby(['year', 'is_default'])['utilization'].agg(['mean', 'median']).reset_index()

Gift assist utilization by donor rates by whether or not it is checked by default or not; forms >= 100 transactions


Unnamed: 0,year,is_default,mean,median
0,2022,False,0.449757,0.473799
1,2022,True,0.429648,0.456311
2,2023,False,0.426769,0.450529
3,2023,True,0.610299,0.678043
4,2024,False,0.387822,0.403509
5,2024,True,0.632129,0.71134


# org adoption rate

In [34]:
q = '''select
            org,
            sum(gift_assist_count) as ga_count
        from transactions
        where
            status='A' and
            year>=2024
        group by org'''
orgs = redshift_query_read(q, schema='production')

In [35]:
len_w_ga = len(orgs[orgs['ga_count']>0])
perc_w_ga = len_w_ga / len(orgs)

print("orgs: {:,}".format(len(orgs)))
print("orgs w/ GA: {:,}".format(len_w_ga))
print("perc orgs w/ GA: {:.2f}%".format(perc_w_ga * 100.))

orgs: 6,913
orgs w/ GA: 4,421
perc orgs w/ GA: 63.95%
