In [1]:
import sys
sys.path.insert(1, '../../scripts/')
from s3_support import *

import pandas as pd

# 0. data points

Resolutions
- all time
- per year
- YoY year growth
- per month/year
- YoY growth per specific month average

Data points
- transactions count & volume
- one time, recurring, recurring origin
- form count (active, total)
- org count (active, total)
- traffic
- traffic device ratio

# 1. transactions system wide stats

## 1. all time

In [2]:
q = '''select 
            count(distinct(org)) as orgs_processing_transactions,
            count(distinct(form)) as forms_processing_transactions,
            count(distinct(id)) as transactions_count,
            count(distinct(case when recurring=0 then id else null end)) as transactions_count_onetime,
            count(distinct(case when recurring!=0 then id else null end)) as transactions_count_recurring,
            count(distinct(case when recurring_origin=1 then id else null end)) as transactions_count_rec_origin,
            sum(amount) as volume_total,
            sum(case when recurring=0 then amount else null end) as volume_onetime,
            sum(case when recurring!=0 then amount else null end) as volume_recurring,
            sum(case when recurring=0 then amount else null end) as volume_recurring_origin,
            count(distinct(case when platform='Windows' or platform='Mac' then id else null end)) as transactions_count_desktop,
            count(distinct(case when platform='Android' or platform='iPhone' or platform='iPad' then id else null end)) as transactions_count_mobile,
            sum(case when platform='Windows' or platform='Mac' then amount else null end) as volume_desktop,
            sum(case when platform='Android' or platform='iPhone' or platform='iPad' then amount else null end) as volume_mobile,
            avg(case when recurring=0 then amount else null end) as mean_onetime_transaction,
            avg(case when recurring_origin=1 then amount else null end) as mean_recurring_transaction,
            avg(case when recurring=0 and (platform='Windows' or platform='Mac') then amount else null end) as mean_onetime_transaction_desktop,
            avg(case when recurring=0 and (platform='Android' or platform='iPhone') then amount else null end) as mean_onetime_transaction_mobile,
            avg(case when recurring_origin=1 and (platform='Windows' or platform='Mac') then amount else null end) as mean_recurring_transaction_desktop,
            avg(case when recurring_origin=1 and (platform='Android' or platform='iPhone') then amount else null end) as mean_recurring_transaction_mobile
        from transactions
        where status='A' '''
df_system_all = redshift_query_read(q, schema='production')

mdn_qs = [
    "median(case when recurring=0 then amount else null end) as median_onetime_transaction",
    "median(case when recurring_origin=1 then amount else null end) as median_recurring_transaction",
    "median(case when recurring=0 and (platform='Windows' or platform='Mac') then amount else null end) as median_onetime_transaction_desktop",
    "median(case when recurring=0 and (platform='Android' or platform='iPhone') then amount else null end) as median_onetime_transaction_mobile",
    "median(case when recurring_origin=1 and (platform='Windows' or platform='Mac') then amount else null end) as median_recurring_transaction_desktop",
    "median(case when recurring_origin=1 and (platform='Android' or platform='iPhone') then amount else null end) as median_recurring_transaction_mobile"
]
q = '''select
            {}
        from transactions
        where status='A' '''

df_system_all_medians = None
for mdn_q in mdn_qs:
    if df_system_all_medians is None:
        df_system_all_medians = redshift_query_read(q.format(mdn_q), schema='production')
    else:
        df_system_all_medians = df_system_all_medians.merge(redshift_query_read(q.format(mdn_q), schema='production'), left_index=True, right_index=True)
        
df_system_all = df_system_all.merge(df_system_all_medians, left_index=True, right_index=True)

In [3]:
df_system_all.transpose()

Unnamed: 0,0
orgs_processing_transactions,10140.0
forms_processing_transactions,63122.0
transactions_count,19586860.0
transactions_count_onetime,11873350.0
transactions_count_recurring,7713508.0
transactions_count_rec_origin,479620.0
volume_total,2480257000.0
volume_onetime,2012464000.0
volume_recurring,467793300.0
volume_recurring_origin,2012464000.0


## 2. per year

In [4]:
q = '''select
            year,
            count(distinct(org)) as orgs_processing_transactions,
            count(distinct(form)) as forms_processing_transactions,
            count(distinct(id)) as transactions_count,
            count(distinct(case when recurring=0 then id else null end)) as transactions_count_onetime,
            count(distinct(case when recurring!=0 then id else null end)) as transactions_count_recurring,
            count(distinct(case when recurring_origin=1 then id else null end)) as transactions_count_recurring_origin,
            sum(amount) as volume_total,
            sum(case when recurring=0 then amount else null end) as volume_onetime,
            sum(case when recurring!=0 then amount else null end) as volume_recurring,
            sum(case when recurring=0 then amount else null end) as volume_recurring_origin,
            count(distinct(case when platform='Windows' or platform='Mac' then id else null end)) as transactions_count_desktop,
            count(distinct(case when platform='Android' or platform='iPhone' or platform='iPad' then id else null end)) as transactions_count_mobile,
            sum(case when platform='Windows' or platform='Mac' then amount else null end) as volume_desktop,
            sum(case when platform='Android' or platform='iPhone' or platform='iPad' then amount else null end) as volume_mobile,
            avg(case when recurring=0 then amount else null end) as mean_onetime_transaction,
            avg(case when recurring_origin=1 then amount else null end) as mean_recurring_transaction,
            avg(case when recurring=0 and (platform='Windows' or platform='Mac') then amount else null end) as mean_onetime_transaction_desktop,
            avg(case when recurring=0 and (platform='Android' or platform='iPhone') then amount else null end) as mean_onetime_transaction_mobile,
            avg(case when recurring_origin=1 and (platform='Windows' or platform='Mac') then amount else null end) as mean_recurring_transaction_desktop,
            avg(case when recurring_origin=1 and (platform='Android' or platform='iPhone') then amount else null end) as mean_recurring_transaction_mobile
        from transactions
        where status='A' 
        group by year'''
df_system_year = redshift_query_read(q, schema='production')

mdn_qs = [
    "median(case when recurring=0 then amount else null end) as median_onetime_transaction",
    "median(case when recurring_origin=1 then amount else null end) as median_recurring_transaction",
    "median(case when recurring=0 and (platform='Windows' or platform='Mac') then amount else null end) as median_onetime_transaction_desktop",
    "median(case when recurring=0 and (platform='Android' or platform='iPhone') then amount else null end) as median_onetime_transaction_mobile",
    "median(case when recurring_origin=1 and (platform='Windows' or platform='Mac') then amount else null end) as median_recurring_transaction_desktop",
    "median(case when recurring_origin=1 and (platform='Android' or platform='iPhone') then amount else null end) as median_recurring_transaction_mobile"
]
q = '''select
            year,
            {}
        from transactions
        where status='A' 
        group by year'''

df_system_year_medians = None
for mdn_q in mdn_qs:
    if df_system_year_medians is None:
        df_system_year_medians = redshift_query_read(q.format(mdn_q), schema='production')
    else:
        df_system_year_medians = df_system_year_medians.merge(redshift_query_read(q.format(mdn_q), schema='production'), on='year')
        
df_system_year = df_system_year.merge(df_system_year_medians, on='year')

In [5]:
df_system_year.sort_values('year', ascending=True).tail()

Unnamed: 0,year,orgs_processing_transactions,forms_processing_transactions,transactions_count,transactions_count_onetime,transactions_count_recurring,transactions_count_recurring_origin,volume_total,volume_onetime,volume_recurring,...,mean_onetime_transaction_desktop,mean_onetime_transaction_mobile,mean_recurring_transaction_desktop,mean_recurring_transaction_mobile,median_onetime_transaction,median_recurring_transaction,median_onetime_transaction_desktop,median_onetime_transaction_mobile,median_recurring_transaction_desktop,median_recurring_transaction_mobile
10,2020,3458,13401,2153379,1419725,733654,54883,265468700.0,223650900.0,41817800.0,...,190.807698,108.556751,68.691161,55.327972,50.0,25.0,52.5,50.0,25.0,24.0
15,2021,3862,15925,2501174,1429311,1071863,63903,313878400.0,251185500.0,62692860.0,...,217.458212,122.937309,82.396232,71.281524,51.37,25.75,52.5,50.0,25.75,25.0
18,2022,4148,18312,2872289,1572180,1300109,61808,355705900.0,278372200.0,77333700.0,...,219.671317,124.615488,92.443149,70.247929,50.0,26.0,52.0,50.0,27.58,25.75
13,2023,4498,20852,3251720,1794558,1457162,67593,412668600.0,328336200.0,84332460.0,...,230.127076,130.066092,91.331369,65.654705,50.0,25.75,52.0,50.0,26.5,25.75
12,2024,5321,19437,2099118,1080974,1018144,49989,246972800.0,189804500.0,57168380.0,...,216.652533,131.667567,87.741096,62.624735,50.0,30.0,50.0,50.0,30.0,26.0


## 3. YoY year growth

In [6]:
_df_years = df_system_year.sort_values('year', ascending=True)['year']
_df_yoy = df_system_year.sort_values('year', ascending=True).drop('year', axis=1).pct_change()
_df_yoy['year'] = _df_years
df_yoy = _df_yoy[['year'] + [c for c in _df_yoy.columns if c!='year']]

In [7]:
df_yoy.tail()

Unnamed: 0,year,orgs_processing_transactions,forms_processing_transactions,transactions_count,transactions_count_onetime,transactions_count_recurring,transactions_count_recurring_origin,volume_total,volume_onetime,volume_recurring,...,mean_onetime_transaction_desktop,mean_onetime_transaction_mobile,mean_recurring_transaction_desktop,mean_recurring_transaction_mobile,median_onetime_transaction,median_recurring_transaction,median_onetime_transaction_desktop,median_onetime_transaction_mobile,median_recurring_transaction_desktop,median_recurring_transaction_mobile
10,2020,0.122363,0.211007,0.308942,0.304597,0.317432,0.40276,0.381639,0.41087,0.243813,...,0.110603,0.073889,-0.224646,-0.033273,0.0,-0.122807,0.05,0.232742,-0.028749,-0.04
15,2021,0.116831,0.188344,0.161511,0.006752,0.460993,0.16435,0.182356,0.123115,0.499191,...,0.139672,0.13247,0.199517,0.288345,0.0274,0.03,0.0,0.0,0.03,0.041667
18,2022,0.074055,0.14989,0.148376,0.099957,0.212943,-0.032784,0.13326,0.108234,0.233533,...,0.010177,0.013651,0.121934,-0.0145,-0.026669,0.009709,-0.009524,0.0,0.071068,0.03
13,2023,0.084378,0.138707,0.132101,0.141446,0.1208,0.093596,0.16014,0.179486,0.090501,...,0.047597,0.043739,-0.012027,-0.065386,0.0,-0.009615,0.0,0.0,-0.039159,0.0
12,2024,0.18297,-0.067859,-0.354459,-0.397638,-0.301283,-0.260441,-0.401523,-0.42192,-0.322107,...,-0.058553,0.012313,-0.03931,-0.04615,0.0,0.165049,-0.038462,0.0,0.132075,0.009709


## 4. per month/year

In [8]:
q = '''select
            date_trunc('month', date) as month,
            count(distinct(org)) as orgs_processing_transactions,
            count(distinct(form)) as forms_processing_transactions,
            count(distinct(id)) as transaction_count,
            count(distinct(case when recurring=0 then id else null end)) as transactions_count_onetime,
            count(distinct(case when recurring!=0 then id else null end)) as transactions_count_recurring,
            count(distinct(case when recurring_origin=1 then id else null end)) as transactions_count_rec_origin,
            sum(amount) as volume_totalcount_,
            sum(case when recurring=0 then amount else null end) as volume_onetime,
            sum(case when recurring!=0 then amount else null end) as volume_recurring,
            sum(case when recurring=0 then amount else null end) as volume_recurring_origin,
            count(distinct(case when platform='Windows' or platform='Mac' then id else null end)) as transactions_count_desktop,
            count(distinct(case when platform='Android' or platform='iPhone' or platform='iPad' then id else null end)) as transactions_count_mobile,
            sum(case when platform='Windows' or platform='Mac' then amount else null end) as volume_desktop,
            sum(case when platform='Android' or platform='iPhone' or platform='iPad' then amount else null end) as volume_mobile,
            avg(case when recurring=0 then amount else null end) as mean_onetime_transaction,
            avg(case when recurring_origin=1 then amount else null end) as mean_recurring_transaction,
            avg(case when recurring=0 and (platform='Windows' or platform='Mac') then amount else null end) as mean_onetime_transaction_desktop,
            avg(case when recurring=0 and (platform='Android' or platform='iPhone') then amount else null end) as mean_onetime_transaction_mobile,
            avg(case when recurring_origin=1 and (platform='Windows' or platform='Mac') then amount else null end) as mean_recurring_transaction_desktop,
            avg(case when recurring_origin=1 and (platform='Android' or platform='iPhone') then amount else null end) as mean_recurring_transaction_mobile
        from transactions
        where status='A' 
        group by date_trunc('month', date)'''
df_system_monthyear = redshift_query_read(q, schema='production')

mdn_qs = [
    "median(case when recurring=0 then amount else null end) as median_onetime_transaction",
    "median(case when recurring_origin=1 then amount else null end) as median_recurring_transaction",
    "median(case when recurring=0 and (platform='Windows' or platform='Mac') then amount else null end) as median_onetime_transaction_desktop",
    "median(case when recurring=0 and (platform='Android' or platform='iPhone') then amount else null end) as median_onetime_transaction_mobile",
    "median(case when recurring_origin=1 and (platform='Windows' or platform='Mac') then amount else null end) as median_recurring_transaction_desktop",
    "median(case when recurring_origin=1 and (platform='Android' or platform='iPhone') then amount else null end) as median_recurring_transaction_mobile"
]
q = '''select
            date_trunc('month', date) as month,
            {}
        from transactions
        where status='A' 
        group by date_trunc('month', date)'''

df_system_monthyear_medians = None
for mdn_q in mdn_qs:
    if df_system_monthyear_medians is None:
        df_system_monthyear_medians = redshift_query_read(q.format(mdn_q), schema='production')
    else:
        df_system_monthyear_medians = df_system_monthyear_medians.merge(redshift_query_read(q.format(mdn_q), schema='production'), on='month')
        
df_system_monthyear = df_system_monthyear.merge(df_system_monthyear_medians, on='month')

In [9]:
df_system_monthyear.sort_values('month', ascending=True).tail()

Unnamed: 0,month,orgs_processing_transactions,forms_processing_transactions,transaction_count,transactions_count_onetime,transactions_count_recurring,transactions_count_rec_origin,volume_totalcount_,volume_onetime,volume_recurring,...,mean_onetime_transaction_desktop,mean_onetime_transaction_mobile,mean_recurring_transaction_desktop,mean_recurring_transaction_mobile,median_onetime_transaction,median_recurring_transaction,median_onetime_transaction_desktop,median_onetime_transaction_mobile,median_recurring_transaction_desktop,median_recurring_transaction_mobile
185,2024-03-01,3528,10991,376806,197393,179413,11049,40868435.23,32215851.38,8652583.85,...,201.289557,107.935823,64.357838,41.292645,50.0,25.0,50.0,45.0,25.0,19.57
216,2024-04-01,3831,11635,397064,207400,189664,7925,44134696.09,34506362.04,9628334.05,...,216.940499,116.458641,65.671174,43.238623,50.0,25.0,50.0,47.68,26.0,20.0
121,2024-05-01,3834,11668,296443,160893,135550,5618,42285115.76,34441039.91,7844075.85,...,234.300818,198.438821,140.406744,67.564526,50.0,26.245,52.0,50.0,26.25,25.0
139,2024-06-01,3853,11420,273166,126778,146388,6117,33045754.63,25084174.55,7961580.08,...,249.923799,136.408106,113.043847,76.569059,51.0,26.0,52.5,50.0,36.0,25.0
186,2024-07-01,3801,10436,206597,95905,110692,5719,26570967.88,18552330.95,8018636.93,...,232.208892,143.493839,97.580383,94.613725,50.0,50.0,50.0,50.0,36.0,36.0


## 5. month/year YoY growth

In [10]:
_df_monthyear_yoy = df_system_monthyear.copy()
_df_monthyear_yoy['month'] = pd.to_datetime(_df_monthyear_yoy['month'])
_df_monthyear_yoy['dt_year'] = _df_monthyear_yoy['month'].dt.year
_df_monthyear_yoy['dt_month'] = _df_monthyear_yoy['month'].dt.month

In [11]:
df_monthyear_yoy = None
for month in _df_monthyear_yoy['dt_month'].unique():
    _df = _df_monthyear_yoy[_df_monthyear_yoy['dt_month']==month].sort_values('dt_year', ascending=True)
    _df_yoy = _df.drop(['month', 'dt_year', 'dt_month'], axis=1).pct_change()
    _df_yoy['month'] = _df['month']
    
    df_monthyear_yoy = pd.concat([df_monthyear_yoy, _df_yoy])

In [12]:
cols = ['month'] + [c for c in df_monthyear_yoy.columns if c!='month']
df_monthyear_yoy[cols].sort_values('month', ascending=True).tail()

Unnamed: 0,month,orgs_processing_transactions,forms_processing_transactions,transaction_count,transactions_count_onetime,transactions_count_recurring,transactions_count_rec_origin,volume_totalcount_,volume_onetime,volume_recurring,...,mean_onetime_transaction_desktop,mean_onetime_transaction_mobile,mean_recurring_transaction_desktop,mean_recurring_transaction_mobile,median_onetime_transaction,median_recurring_transaction,median_onetime_transaction_desktop,median_onetime_transaction_mobile,median_recurring_transaction_desktop,median_recurring_transaction_mobile
185,2024-03-01,0.175217,0.192212,0.328845,0.222755,0.469081,0.570576,0.253574,0.248532,0.272708,...,0.024887,-0.028666,-0.298,-0.046469,0.0,0.0,0.0,-0.1,0.0,-0.05
216,2024-04-01,0.278278,0.251614,0.150823,0.257938,0.052794,-0.088976,0.215309,0.266568,0.061367,...,0.100523,-0.038994,-0.171333,-0.172399,0.0,0.0,0.0,-0.0464,0.009709,0.0
121,2024-05-01,0.279706,0.247514,0.186664,0.162354,0.216874,0.196847,0.401816,0.439577,0.257041,...,0.089647,0.587066,0.382047,-0.046256,0.0,-0.00019,0.009709,0.0,-0.125,-0.047619
139,2024-06-01,0.301689,0.272707,0.223889,0.207318,0.238613,0.270141,0.272195,0.276539,0.2587,...,0.079098,0.019731,-0.099101,0.231358,0.02,0.04,0.010198,0.0,0.236264,0.0
186,2024-07-01,0.288475,0.181612,-0.018691,-0.024692,-0.013431,0.016892,0.038931,0.038735,0.039385,...,0.015156,0.104814,-0.042089,0.029778,0.0,0.0,-0.037906,0.0,0.16129,0.165803


# 2. traffic system wide stats

## 1. all time

In [13]:
tables = ['ga', 'ga4_traffic', 'ga4_traffic_daily', 'ga4_traffic_daily_device',
          'ga4_traffic_weekly', 'ga4_traffic_weekly_device']
q = '''select
            sum(views) as pageviews
        from {}'''

max_pageviews = 0

for table in tables:
    _df = redshift_query_read(q.format(table), schema='production')
    max_pageviews = max(max_pageviews, _df['pageviews'].iloc[0])

In [14]:
print("Pageviews total: {:,}".format(max_pageviews))

Pageviews total: 258,311,892


In [15]:
tables = ['ga', 'ga4_traffic_daily_device', 'ga4_traffic_weekly_device']
q = '''select
            devicecategory,
            sum(views) as pageviews
        from {}
        group by devicecategory'''

max_pageviews_desktop = 0
max_pageviews_mobile = 0
max_pageviews_tablet = 0

for table in tables:
    _df = redshift_query_read(q.format(table), schema='production')
    max_pageviews_desktop = max(max_pageviews_desktop, _df[_df['devicecategory']=='desktop']['pageviews'].iloc[0])
    max_pageviews_mobile = max(max_pageviews_mobile, _df[_df['devicecategory']=='mobile']['pageviews'].iloc[0])
    max_pageviews_tablet = max(max_pageviews_tablet, _df[_df['devicecategory']=='tablet']['pageviews'].iloc[0])

In [16]:
print("Pageviews desktop: {:,}".format(max_pageviews_desktop))
print("Pageviews mobile: {:,}".format(max_pageviews_mobile))
print("Pageviews tablet: {:,}".format(max_pageviews_tablet))

Pageviews desktop: 139,171,177
Pageviews mobile: 111,710,475
Pageviews tablet: 7,430,084


## 2. per year

In [17]:
tables = ['ga', 'ga4_traffic', 'ga4_traffic_daily', 'ga4_traffic_daily_device',
          'ga4_traffic_weekly', 'ga4_traffic_weekly_device']
q = '''select
            date_part('year', date) as year,
            sum(views) as pageviews
        from {}
        group by date_part('year', date)'''
q_week = '''select
            date_part('year', week) as year,
            sum(views) as pageviews
        from {}
        group by date_part('year', week)'''

year_data = None
for table in tables:
    if 'week' in table:
        _df = redshift_query_read(q_week.format(table), schema='production')
    else:
        _df = redshift_query_read(q.format(table), schema='production')
    
    _df['table'] = table
    year_data = pd.concat([year_data, _df])

In [18]:
year_data_pvt = year_data.pivot(index='year', columns='table', values='pageviews').fillna(0)
year_data_pvt = year_data_pvt.max(axis=1).reset_index()
year_data_pvt.columns = ['year', 'pageviews']
year_data_pvt

Unnamed: 0,year,pageviews
0,2016.0,1313789.0
1,2017.0,15172924.0
2,2018.0,20598534.0
3,2019.0,30315517.0
4,2020.0,29928321.0
5,2021.0,38901276.0
6,2022.0,42785263.0
7,2023.0,65022128.0
8,2024.0,33876440.0


In [19]:
tables = ['ga', 'ga4_traffic_daily_device', 'ga4_traffic_weekly_device']
q = '''select
            date_part('year', date) as year,
            devicecategory,
            sum(views) as pageviews
        from {}
        group by devicecategory, date_part('year', date)'''
q_week = '''select
            date_part('year', week) as year,
            devicecategory,
            sum(views) as pageviews
        from {}
        group by devicecategory, date_part('year', week)'''

year_data_device = None
for table in tables:
    if 'week' in table:
        _df = redshift_query_read(q_week.format(table), schema='production')
    else:
        _df = redshift_query_read(q.format(table), schema='production')
    
    _df['table'] = table
    year_data_device = pd.concat([year_data_device, _df])

In [20]:
year_data_device_pvt = year_data_device.pivot(index=['year', 'devicecategory'], columns='table', values='pageviews').fillna(0)
year_data_device_pvt = year_data_device_pvt.max(axis=1).reset_index()
year_data_device_pvt.columns = ['year', 'device', 'pageviews']
year_data_device_pvt = year_data_device_pvt.pivot(index='year', columns='device', values='pageviews')
year_data_device_pvt

device,desktop,mobile,smart tv,tablet
year,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2016.0,928837.0,309078.0,,75874.0
2017.0,9845524.0,4570584.0,,756816.0
2018.0,13036147.0,6382865.0,,1179522.0
2019.0,17835946.0,10734427.0,,1745144.0
2020.0,17045788.0,11925181.0,,957352.0
2021.0,20592495.0,17417666.0,,891115.0
2022.0,20027683.0,21931435.0,,826145.0
2023.0,31438163.0,32661993.0,48.0,921924.0
2024.0,9786691.0,6080905.0,108.0,76192.0


## 3. YoY year growth

In [21]:
year_data_pvt.sort_values('year', ascending=True, inplace=True)
year_data_pvt['YoY'] = year_data_pvt['pageviews'].pct_change()
year_data_pvt

Unnamed: 0,year,pageviews,YoY
0,2016.0,1313789.0,
1,2017.0,15172924.0,10.548981
2,2018.0,20598534.0,0.357585
3,2019.0,30315517.0,0.471732
4,2020.0,29928321.0,-0.012772
5,2021.0,38901276.0,0.299815
6,2022.0,42785263.0,0.099842
7,2023.0,65022128.0,0.519732
8,2024.0,33876440.0,-0.479001


In [22]:
year_data_device_pvt.pct_change()

device,desktop,mobile,smart tv,tablet
year,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2016.0,,,,
2017.0,9.59984,13.787801,,8.974642
2018.0,0.324068,0.39651,,0.558532
2019.0,0.368192,0.681757,,0.479535
2020.0,-0.044301,0.110929,,-0.451419
2021.0,0.208069,0.460579,,-0.069188
2022.0,-0.027428,0.259149,,-0.072909
2023.0,0.569735,0.489278,,0.115935
2024.0,-0.6887,-0.813823,1.25,-0.917355


## 4. per month/year

In [23]:
tables = ['ga', 'ga4_traffic', 'ga4_traffic_daily', 'ga4_traffic_daily_device',
          'ga4_traffic_weekly', 'ga4_traffic_weekly_device']
q = '''select
            date_trunc('month', date) as month,
            sum(views) as pageviews
        from {}
        group by date_trunc('month', date)'''
q_week = '''select
            date_trunc('month', week) as month,
            sum(views) as pageviews
        from {}
        group by date_trunc('month', week)'''

monthyear_data = None
for table in tables:
    if 'week' in table:
        _df = redshift_query_read(q_week.format(table), schema='production')
    else:
        _df = redshift_query_read(q.format(table), schema='production')
    
    _df['table'] = table
    monthyear_data = pd.concat([monthyear_data, _df])

In [24]:
monthyear_max = monthyear_data.groupby('month')['pageviews'].max().reset_index()
monthyear_max.sort_values('month', inplace=True)
monthyear_max.tail(2)

Unnamed: 0,month,pageviews
90,2024-06-01,4349240
91,2024-07-01,2976215


## 5. month/year YoY growth

In [25]:
monthyear_max['dt_month'] = monthyear_max['month'].dt.month
monthyear_max['dt_year'] = monthyear_max['month'].dt.year

In [26]:
monthyear_max_yoy = None
for month in monthyear_max['dt_month'].unique().tolist():
    _df = monthyear_max[monthyear_max['dt_month']==month]
    _df.sort_values('dt_year', ascending=True, inplace=True)
    _df['YoY'] = _df['pageviews'].pct_change()
    
    monthyear_max_yoy = pd.concat([monthyear_max_yoy, _df])
    
monthyear_max_yoy.drop(['dt_month', 'dt_year'], axis=1, inplace=True)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return func(*args, **kwargs)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  """


In [27]:
monthyear_max_yoy.sort_values("month", ascending=True).tail(10)

Unnamed: 0,month,pageviews,YoY
82,2023-10-01,5130119,0.063809
83,2023-11-01,4603890,0.146229
84,2023-12-01,3207444,0.180542
85,2024-01-01,4419452,-0.133126
86,2024-02-01,4956151,-0.345422
87,2024-03-01,5744447,-0.33014
88,2024-04-01,6219568,-0.238424
89,2024-05-01,5211367,-0.202237
90,2024-06-01,4349240,-0.085156
91,2024-07-01,2976215,-0.337006


# 3. merge transactions & traffic

## 1. all time

In [28]:
df_system_all['google_analytics_total_pageviews'] = max_pageviews
df_system_all['google_analytics_desktop_pageviews'] = max_pageviews_desktop
df_system_all['google_analytics_mobile_pageviews'] = max_pageviews_mobile
df_system_all['google_analytics_tablet_pageviews'] = max_pageviews_tablet

In [29]:
df_system_all.transpose()

Unnamed: 0,0
orgs_processing_transactions,10140.0
forms_processing_transactions,63122.0
transactions_count,19586860.0
transactions_count_onetime,11873350.0
transactions_count_recurring,7713508.0
transactions_count_rec_origin,479620.0
volume_total,2480257000.0
volume_onetime,2012464000.0
volume_recurring,467793300.0
volume_recurring_origin,2012464000.0


In [30]:
df_system_all.to_csv("stats.system-all.csv", index=False)

## 2. per year

In [31]:
df_system_year_mrgd = df_system_year.merge(year_data_pvt.drop('YoY', axis=1), on='year').sort_values('year', ascending=True)
df_system_year_mrgd

Unnamed: 0,year,orgs_processing_transactions,forms_processing_transactions,transactions_count,transactions_count_onetime,transactions_count_recurring,transactions_count_recurring_origin,volume_total,volume_onetime,volume_recurring,...,mean_onetime_transaction_mobile,mean_recurring_transaction_desktop,mean_recurring_transaction_mobile,median_onetime_transaction,median_recurring_transaction,median_onetime_transaction_desktop,median_onetime_transaction_mobile,median_recurring_transaction_desktop,median_recurring_transaction_mobile,pageviews
1,2016,1740,3604,734457,496701,237756,19733,104217800.0,86858150.0,17359660.0,...,118.362403,109.255094,80.502008,50.0,38.0,50.0,50.0,35.0,25.0,1313789.0
7,2017,2104,5523,980761,657986,322775,25234,131011400.0,108466800.0,22544690.0,...,116.080446,97.020491,58.191986,50.0,31.18,50.0,50.0,30.0,25.0,15172924.0
6,2018,2667,8454,1292736,856275,436461,34658,160743000.0,132751300.0,27991700.0,...,111.756333,76.690713,72.010822,50.0,25.69,50.0,50.0,25.0,20.0,20598534.0
0,2019,3081,11066,1645130,1088248,556882,39125,192140400.0,158519800.0,33620640.0,...,101.087534,88.593341,57.232267,50.0,28.5,50.0,40.56,25.74,25.0,30315517.0
2,2020,3458,13401,2153379,1419725,733654,54883,265468700.0,223650900.0,41817800.0,...,108.556751,68.691161,55.327972,50.0,25.0,52.5,50.0,25.0,24.0,29928321.0
5,2021,3862,15925,2501174,1429311,1071863,63903,313878400.0,251185500.0,62692860.0,...,122.937309,82.396232,71.281524,51.37,25.75,52.5,50.0,25.75,25.0,38901276.0
8,2022,4148,18312,2872289,1572180,1300109,61808,355705900.0,278372200.0,77333700.0,...,124.615488,92.443149,70.247929,50.0,26.0,52.0,50.0,27.58,25.75,42785263.0
4,2023,4498,20852,3251720,1794558,1457162,67593,412668600.0,328336200.0,84332460.0,...,130.066092,91.331369,65.654705,50.0,25.75,52.0,50.0,26.5,25.75,65022128.0
3,2024,5321,19437,2099118,1080974,1018144,49989,246972800.0,189804500.0,57168380.0,...,131.667567,87.741096,62.624735,50.0,30.0,50.0,50.0,30.0,26.0,33876440.0


In [32]:
dupe_cols = [c for c in df_system_year_mrgd.columns if '_x' in c]

for col in dupe_cols:
    canonical_name = col.replace("_x", "")
    df_system_year_mrgd[canonical_name] = df_system_year_mrgd[col]
    
    drop_cols = [col, canonical_name + "_y"]
    df_system_year_mrgd.drop(drop_cols, axis=1, inplace=True)

In [33]:
df_system_year_mrgd.to_csv("stats.system-year.csv", index=False)

## 3. per month

In [34]:
df_system_monthyear.merge(monthyear_max_yoy.drop('YoY', axis=1), on='month').tail(2)

Unnamed: 0,month,orgs_processing_transactions,forms_processing_transactions,transaction_count,transactions_count_onetime,transactions_count_recurring,transactions_count_rec_origin,volume_totalcount_,volume_onetime,volume_recurring,...,mean_onetime_transaction_mobile,mean_recurring_transaction_desktop,mean_recurring_transaction_mobile,median_onetime_transaction,median_recurring_transaction,median_onetime_transaction_desktop,median_onetime_transaction_mobile,median_recurring_transaction_desktop,median_recurring_transaction_mobile,pageviews
90,2023-07-01,2950,8832,210532,98333,112199,5624,25575290.85,17860501.58,7714789.27,...,129.880486,101.867894,91.877778,50.0,50.0,51.97,50.0,31.0,30.88,4489051
91,2024-04-01,3831,11635,397064,207400,189664,7925,44134696.09,34506362.04,9628334.05,...,116.458641,65.671174,43.238623,50.0,25.0,50.0,47.68,26.0,20.0,6219568


In [35]:
df_system_monthyear.merge(monthyear_max_yoy.drop('YoY', axis=1), on='month').to_csv("stats.system-month.csv")