This notebook will review broad trends in the Google Analytics data by year. It will also look at peak days such as New Years Eve and Giving Tuesday.

In [1]:
import sys
sys.path.insert(1, '../../scripts/')
from s3_support import *

import pandas as pd

# 0. load data

In [23]:
START_DATE = "2018-01-01"
GIVING_TUESDAY_DATES = [
    "2018-11-27",
    "2019-12-03",
    "2020-12-01",
    "2021-11-30",
    "2022-11-29",
    "2023-11-28"
]

In [24]:
q = """select
            date,
            sum(views) as pageviews
        from ga
        where date>'{}' 
        group by date""".format(START_DATE)
traff = redshift_query_read(q, schema='production')

In [25]:
print("{:,} days of traffic data".format(len(traff)))
print("{:,.0f} mean traffic per day".format(traff['pageviews'].mean()))
print("{:,.0f} median traffic per day".format(traff['pageviews'].median()))

2,437 days of traffic data
101,231 mean traffic per day
86,342 median traffic per day


In [26]:
print("2024:")
print("{:,.0f} mean traffic per day".format(traff[traff['date']>='2024-01-01']['pageviews'].mean()))
print("{:,.0f} median traffic per day".format(traff[traff['date']>='2024-01-01']['pageviews'].median()))

2024:
77,635 mean traffic per day
84,456 median traffic per day


In [27]:
traff.tail(2)

Unnamed: 0,date,pageviews
2435,2023-12-04,75060
2436,2024-04-24,101663


In [28]:
d, traff[traff['date'].astype(str)==d]

('2023-11-28',
           date  pageviews
 862 2023-11-28     178813)

In [31]:
print("Giving Tuesday's:")
print("-"*20)
for d in GIVING_TUESDAY_DATES:
    print("{}: {:,}".format(d, traff[traff['date']==d]['pageviews'].iloc[0]))

Giving Tuesday's:
--------------------
2018-11-27: 180,146
2019-12-03: 231,387
2020-12-01: 255,040
2021-11-30: 263,155
2022-11-29: 264,028
2023-11-28: 178,813


In [32]:
print("New Years Eve:")
print("-"*20)
for year in [2018, 2019, 2020, 2021, 2022, 2023]:
    nye_traff = traff[traff['date']=="{}-12-31".format(year)]['pageviews'].iloc[0]
    print("{}: {:,}".format(year, nye_traff))

New Years Eve:
--------------------
2018: 74,673
2019: 79,882
2020: 89,612
2021: 102,487
2022: 97,588
2023: 58,895


In [35]:
q = """select
            date,
            count(id) as transactions
        from transactions
        where 
            status='A' and
            date in ({})
        group by date""".format(", ".join(["'{}'".format(d) for d in GIVING_TUESDAY_DATES]))
trans = redshift_query_read(q, schema='production')

In [37]:
trans.sort_values('date')

Unnamed: 0,date,transactions
0,2018-11-27,14804
4,2019-12-03,20032
2,2020-12-01,40318
5,2021-11-30,40490
1,2022-11-29,32820
3,2023-11-28,34717
