# Crisis Support Funnel

This notebook measures how many sessions move through a crisis-support path:

### Quick start (beginner-friendly)
1. Run the **Setup (run once)** cell.
2. In **Parameters**, choose `DAYS_BACK`.
3. Run the remaining cells from top to bottom.

### Links
- GitHub repo: [github.com/aidanm-lla/lla-data](https://github.com/aidanm-lla/lla-data)
- Open this notebook in Colab: [Crisis Support Funnel](https://colab.research.google.com/github/aidanm-lla/lla-data/blob/main/notebooks/crisis_funnel.ipynb)

### Other notebooks
- [Search Contribution Overview](https://colab.research.google.com/github/aidanm-lla/lla-data/blob/main/notebooks/seo/01_search_contribution_overview.ipynb)
- [Top Pages Search Performance](https://colab.research.google.com/github/aidanm-lla/lla-data/blob/main/notebooks/seo/02_top_pages_search_performance.ipynb)
- [Query Drivers by Page](https://colab.research.google.com/github/aidanm-lla/lla-data/blob/main/notebooks/seo/03_query_drivers_by_page.ipynb)
- [Opportunity Watchlist](https://colab.research.google.com/github/aidanm-lla/lla-data/blob/main/notebooks/seo/04_opportunity_watchlist.ipynb)
- [Top Pages (Last 7 Days)](https://colab.research.google.com/github/aidanm-lla/lla-data/blob/main/notebooks/top_pages_last_7_days.ipynb)
- [Traffic Source Quality](https://colab.research.google.com/github/aidanm-lla/lla-data/blob/main/notebooks/traffic_sources.ipynb)
- [Time Patterns for Crisis-Related Pages](https://colab.research.google.com/github/aidanm-lla/lla-data/blob/main/notebooks/time_patterns.ipynb)
- [Analysis Template](https://colab.research.google.com/github/aidanm-lla/lla-data/blob/main/notebooks/templates/analysis_template.ipynb)

1. Session has at least one page view (landing)
2. Session reaches a crisis-related page
3. Session clicks a call link (`tel:131114`)


In [1]:
#@title Setup (run once)
import sys
import os

if "google.colab" in sys.modules:
    from google.colab import auth
    auth.authenticate_user()
    if not os.path.exists("lla-data"):
        !git clone -q https://github.com/aidoanto/lla-data.git
    repo = os.path.abspath("lla-data")
    if repo not in sys.path:
        sys.path.insert(0, repo)
    !pip install -q db-dtypes google-cloud-bigquery kaleido plotly
else:
    for p in ("..", "../.."):
        ap = os.path.abspath(p)
        if ap not in sys.path:
            sys.path.insert(0, ap)

import pandas as pd
import plotly.graph_objects as go

import lifeline_theme
from lla_data import config
from lla_data.bq import get_client, run_query

lifeline_theme.inject_fonts()

client = get_client()

In [2]:
#@title Parameters
DAYS_BACK = 35 #@param {type:"integer"}

In [3]:
query = f"""
WITH event_base AS (
  SELECT
    PARSE_DATE('%Y%m%d', event_date) AS event_day,
    event_name,
    user_pseudo_id,
    CONCAT(
      user_pseudo_id,
      '.',
      COALESCE(CAST((
        SELECT ep.value.int_value
        FROM UNNEST(event_params) ep
        WHERE ep.key = 'ga_session_id'
      ) AS STRING), '0')
    ) AS session_key,
    COALESCE((
      SELECT ep.value.string_value
      FROM UNNEST(event_params) ep
      WHERE ep.key = 'source'
    ), '(direct)') AS source,
    COALESCE((
      SELECT ep.value.string_value
      FROM UNNEST(event_params) ep
      WHERE ep.key = 'medium'
    ), '(none)') AS medium,
    COALESCE((
      SELECT ep.value.string_value
      FROM UNNEST(event_params) ep
      WHERE ep.key = 'page_location'
    ), '(unknown)') AS page_location,
    COALESCE((
      SELECT ep.value.string_value
      FROM UNNEST(event_params) ep
      WHERE ep.key = 'link_url'
    ), '') AS link_url
  FROM `{config.PROJECT_ID}.{config.GA4_DATASET}.events_*`
  WHERE _TABLE_SUFFIX BETWEEN FORMAT_DATE('%Y%m%d', DATE_SUB(CURRENT_DATE(), INTERVAL {DAYS_BACK} DAY))
    AND FORMAT_DATE('%Y%m%d', CURRENT_DATE())
), cleaned AS (
  SELECT
    event_day,
    event_name,
    user_pseudo_id,
    session_key,
    source,
    medium,
    CASE
      WHEN page_location = '(unknown)' THEN '(unknown)'
      ELSE REGEXP_REPLACE(REGEXP_REPLACE(page_location, r'#.*$', ''), r'\\?.*$', '')
    END AS page_location_clean,
    link_url
  FROM event_base
), page_paths AS (
  SELECT
    event_day,
    event_name,
    user_pseudo_id,
    session_key,
    source,
    medium,
    CASE
      WHEN page_location_clean = '(unknown)' THEN '(unknown)'
      WHEN REGEXP_CONTAINS(page_location_clean, r'^https?://') THEN COALESCE(NULLIF(REGEXP_EXTRACT(page_location_clean, r'^https?://[^/]+(/.*)$'), ''), '/')
      WHEN STARTS_WITH(page_location_clean, '/') THEN page_location_clean
      ELSE CONCAT('/', page_location_clean)
    END AS page_path,
    link_url
  FROM cleaned
), session_flags AS (
  SELECT
    source,
    medium,
    session_key,
    MAX(CASE WHEN event_name = 'page_view' THEN 1 ELSE 0 END) AS has_landing,
    MAX(CASE
      WHEN event_name = 'page_view'
        AND REGEXP_CONTAINS(page_path, r'^/(get-help|crisis-support|suicide|131114|chat|text)') THEN 1
      ELSE 0
    END) AS has_crisis_page,
    MAX(CASE
      WHEN event_name = 'click'
        AND REGEXP_CONTAINS(LOWER(link_url), r'tel:(\+?61)?131114|13\s*11\s*14') THEN 1
      ELSE 0
    END) AS has_call_click
  FROM page_paths
  GROUP BY source, medium, session_key
)
SELECT
  source,
  medium,
  COUNT(*) AS sessions,
  SUM(has_landing) AS landing_sessions,
  SUM(has_crisis_page) AS crisis_sessions,
  SUM(has_call_click) AS call_click_sessions
FROM session_flags
GROUP BY source, medium
HAVING sessions >= 20
ORDER BY sessions DESC
"""

df = run_query(client, query)
df["source_medium"] = df["source"] + " / " + df["medium"]
df.head()



Unnamed: 0,source,medium,sessions,landing_sessions,crisis_sessions,call_click_sessions,source_medium
0,(direct),(none),278971,143707,74216,418,(direct) / (none)
1,google,organic,119045,119044,74884,0,google / organic
2,meta,social,9144,9144,1670,0,meta / social
3,bing,organic,5881,5881,2691,0,bing / organic
4,facebook,social,5170,5170,132,0,facebook / social


In [4]:
overall = pd.DataFrame(
    {
        "stage": ["Landing sessions", "Reached crisis page", "Clicked tel:131114"],
        "sessions": [
            int(df["landing_sessions"].sum()),
            int(df["crisis_sessions"].sum()),
            int(df["call_click_sessions"].sum()),
        ],
    }
)

fig = go.Figure(
    go.Funnel(
        y=overall["stage"],
        x=overall["sessions"],
        textinfo="value+percent initial",
    )
)
fig.update_layout(
    template="lifeline",
    title=f"Crisis Support Funnel (Last {DAYS_BACK} Days)",
)
lifeline_theme.add_lifeline_logo(fig)
fig.show()

In [5]:
df["crisis_rate"] = df["crisis_sessions"] / df["landing_sessions"].replace(0, pd.NA)
df["call_rate_from_crisis"] = df["call_click_sessions"] / df["crisis_sessions"].replace(0, pd.NA)

channel_view = df[[
    "source_medium",
    "landing_sessions",
    "crisis_sessions",
    "call_click_sessions",
    "crisis_rate",
    "call_rate_from_crisis",
]].sort_values("landing_sessions", ascending=False)

channel_view.head(15)

Unnamed: 0,source_medium,landing_sessions,crisis_sessions,call_click_sessions,crisis_rate,call_rate_from_crisis
0,(direct) / (none),143707,74216,418,0.51644,0.005632
1,google / organic,119044,74884,0,0.629045,0.0
2,meta / social,9144,1670,0,0.182633,0.0
3,bing / organic,5881,2691,0,0.457575,0.0
4,facebook / social,5170,132,0,0.025532,0.0
5,ig / social,3577,110,0,0.030752,0.0
6,m.facebook.com / referral,2435,769,0,0.315811,0.0
7,apps.mypurecloud.com.au / referral,2232,2121,0,0.950269,0.0
8,s0.2mdn.net / referral,2172,2172,0,1.0,0.0
9,youtube.com / referral,1638,1102,0,0.672772,0.0
