# Query Drivers by Page

Use this notebook to understand which queries are driving each page and how concentrated performance is across a few terms.

In [None]:
#@title Setup (run once)
import sys
import os

if "google.colab" in sys.modules:
    from google.colab import auth
    auth.authenticate_user()
    if not os.path.exists("lla-data"):
        !git clone -q https://github.com/aidoanto/lla-data.git
    repo = os.path.abspath("lla-data")
    if repo not in sys.path:
        sys.path.insert(0, repo)
    !pip install -q db-dtypes google-cloud-bigquery kaleido plotly
else:
    for p in ("..", "../.."):
        ap = os.path.abspath(p)
        if ap not in sys.path:
            sys.path.insert(0, ap)

import pandas as pd
import plotly.express as px

import lifeline_theme
from lla_data import config
from lla_data.bq import build_date_params, default_query_window, get_client, run_query

lifeline_theme.inject_fonts()

client = get_client()

In [None]:
#@title Parameters
DAYS_BACK = config.DEFAULT_DAYS_BACK #@param {type:"integer"}
PAGE_PATH = "/" #@param {type:"string"}

window = default_query_window(DAYS_BACK)

In [None]:
query = f"""
SELECT
  report_date,
  page_path,
  query,
  SUM(clicks) AS clicks,
  SUM(impressions) AS impressions,
  SAFE_DIVIDE(SUM(clicks), NULLIF(SUM(impressions), 0)) AS ctr,
  SAFE_DIVIDE(SUM(avg_position * impressions), NULLIF(SUM(impressions), 0)) AS avg_position
FROM `{config.PROJECT_ID}.{config.SEARCHCONSOLE_DATASET}.curated_search_query_page_daily`
WHERE report_date BETWEEN DATE(@start_date) AND DATE(@end_date)
  AND page_path = @page_path
GROUP BY report_date, page_path, query
ORDER BY report_date DESC, clicks DESC
"""

from google.cloud import bigquery

params = build_date_params(window) + [
    bigquery.ScalarQueryParameter("page_path", "STRING", PAGE_PATH),
]

df_queries = run_query(client, query, params=params)
df_queries.head(20)

In [None]:
top_queries = (
    df_queries.groupby("query", as_index=False)[["clicks", "impressions"]]
    .sum()
    .sort_values("clicks", ascending=False)
    .head(20)
)

total_clicks = float(top_queries["clicks"].sum()) or 1.0
top_queries["click_share"] = top_queries["clicks"] / total_clicks

fig = px.bar(
    top_queries.sort_values("clicks", ascending=True),
    x="clicks",
    y="query",
    orientation="h",
    template="lifeline",
    title=f"Top Query Drivers for {PAGE_PATH}",
)
fig.update_layout(height=700, margin={"l": 320})
lifeline_theme.add_lifeline_logo(fig)
fig.show()

top_queries.head(10)