In [None]:
from utilities import init_bigquery_client
from google.cloud import bigquery
import os
import pandas as pd
import numpy as np
import plotly.express as px

#init BigQuery client
bq = init_bigquery_client()

Using BigQuery credentials: etl-testing-478716-c0b6c2c512e0.json


## Perform bulk queries

In [31]:
# Read from the 'events' table in BigQuery
query = """
    SELECT *
    FROM `etl-testing-478716.firebase_etl_prod.events`
"""
events_df = bq.query(query).to_dataframe()

# Read from the 'userinvites' table in BigQuery
query = """
    SELECT *
    FROM `etl-testing-478716.firebase_etl_prod.userinvites`
"""
userinvites_df = bq.query(query).to_dataframe()


BigQuery Storage module not found, fetch data with the REST endpoint instead.



## Number of Events Created Per Day with Invites

In [34]:
#filtering out duplicate events to get most recent edit
events_df = events_df.sort_values('createdAt', ascending=False).drop_duplicates(subset=['document_id'], keep='first')

In [None]:
# Merge events with userinvites on event ID
events_w_invites = events_df.merge(userinvites_df, left_on='document_id', right_on='event_id', how='left', suffixes=('_event', '_invite'))
#df num events created per day that have invites
unique_events_per_day = (
    events_w_invites
    .dropna(subset=['document_id_invite'])
    .groupby(pd.Grouper(key='createdAt_event', freq='D'))['document_id_event']
    .nunique()
    .reset_index(name='unique_event_count')
)

In [42]:
#Graph number of events created per day with invites
fig = px.bar(unique_events_per_day, x='createdAt_event', y='unique_event_count', title='Number of Events Created Per Day with Invites')
fig.show()

## Attendees Per Event

In [43]:
count_rsvp = events_w_invites[events_w_invites['status'] == 'accepted'].groupby('document_id_event').size().reset_index(name='accepted_invite_count')

In [46]:
#merge with events to get event details
count_rsvp = count_rsvp.merge(events_df, left_on='document_id_event', right_on='document_id', how='left')

In [63]:
count_rsvp['createdAt_date'] = count_rsvp['createdAt'].dt.date
px.bar(count_rsvp, x='createdAt_date', y='accepted_invite_count', color='type', title='Number of Accepted Invites per Event by Type').show()

### Biggest event in last period

In [64]:
import pytz

# Make the comparison timestamp timezone-aware (UTC)
now_utc = pd.Timestamp.now(tz='UTC')
two_weeks_ago = now_utc - pd.Timedelta(weeks=2)
four_weeks_ago = now_utc - pd.Timedelta(weeks=4)

# Biggest event in last two weeks
biggest_recent = count_rsvp[count_rsvp['createdAt'] >= two_weeks_ago].sort_values('accepted_invite_count', ascending=False).head(1)

# Biggest event in prior two-week period
biggest_prior = count_rsvp[
    (count_rsvp['createdAt'] < two_weeks_ago) &
    (count_rsvp['createdAt'] >= four_weeks_ago)
].sort_values('accepted_invite_count', ascending=False).head(1)
import plotly.graph_objects as go

# Get the accepted invite counts for the two periods
recent_value = biggest_recent['accepted_invite_count'].values[0] if not biggest_recent.empty else 0
prior_value = biggest_prior['accepted_invite_count'].values[0] if not biggest_prior.empty else 0

fig = go.Figure(go.Indicator(
    mode = "number+delta",
    value = recent_value,
    delta = {'reference': prior_value, 'relative': False},
    title = {'text': "Biggest Event: Accepted Invites (Last 2 Weeks)"},
    gauge = {'shape': "bullet"}
))

fig.update_layout(
    grid = {'rows': 1, 'columns': 1, 'pattern': "independent"},
    template = {'data' : {'indicator': [{
        'title': {'text': "Biggest Event: Accepted Invites"},
        'mode' : "number+delta",
        'delta' : {'reference': prior_value}}]
    }}
)

fig.show()