# Dashboard Processing Job
This notebook processes Lakeview dashboards, fetches active dashboards in parallel, and merges with dashboard actions data.

In [None]:
# Get job parameters
dbutils.widgets.text("catalog", "andrea_tardif", "Catalog")
dbutils.widgets.text("schema", "bronze", "Schema")
dbutils.widgets.text("table_name", "dashboards_merged", "Table Name")

catalog = dbutils.widgets.get("catalog")
schema = dbutils.widgets.get("schema")
table_name = dbutils.widgets.get("table_name")

print(f"Catalog: {catalog}")
print(f"Schema: {schema}")
print(f"Table Name: {table_name}")

# Construct full table path
target_table = f"{catalog}.{schema}.{table_name}"

In [None]:
# Import required libraries
from concurrent.futures import ThreadPoolExecutor, as_completed
from databricks.sdk import WorkspaceClient
from databricks.sdk.service.dashboards import LifecycleState
from pyspark.sql import Row
from pyspark.sql import functions as F

In [None]:
# Initialize Workspace Client
w = WorkspaceClient()

def fetch_name_if_active(dashboard_id: str):
    """
    Returns (dashboard_id, display_name) ONLY if the dashboard exists and is ACTIVE.
    If not ACTIVE / not found / no perms, return None so we can skip it.
    """
    try:
        d = w.lakeview.get(dashboard_id)
        if d.lifecycle_state != LifecycleState.ACTIVE:
            return None
        return {"dashboard_id": dashboard_id, "display_name": d.display_name}
    except Exception:
        return None

In [None]:
w = WorkspaceClient()
host = w.config.host.rstrip("/")

dashboards_actions = spark.sql("""
WITH actions AS (
  SELECT
    request_params.dashboard_id AS dashboard_id,
    action_name,
    user_identity.email AS actor_email,
    identity_metadata.run_by AS run_by,
    identity_metadata.run_as AS run_as,
    event_time,
    row_number() OVER (
      PARTITION BY request_params.dashboard_id
      ORDER BY event_time DESC
    ) AS rn
  FROM system.access.audit
  WHERE event_time >= current_timestamp() - INTERVAL 180 DAYS
    AND service_name = 'dashboards'
    AND action_name IN ('publishDashboard','unpublishDashboard')
)
SELECT
  dashboard_id,
  CASE
    WHEN action_name = 'publishDashboard' THEN 'published'
    ELSE 'unpublished'
  END AS publish_state,
  actor_email,
  run_by,
  run_as,
  event_time
FROM actions
WHERE rn = 1""")


dashboards_actions = dashboards_actions.withColumn(
    "dashboard_url",
    F.concat(F.lit(f"{host}/sql/dashboardsv3/"), F.col("dashboard_id"))
)

In [None]:
# Only IDs from dashboards_actions, and only where publish_state is published/unpublished
dashboard_ids_limited = [
    r.dashboard_id
    for r in (
        dashboards_actions
          .select("dashboard_id")
          .distinct()
          .collect()
    )
]

print(f"Processing {len(dashboard_ids_limited)} unique dashboard IDs")

In [None]:
# Fetch dashboard names in parallel
max_workers = 16
rows = []

print(f"Fetching dashboard details with {max_workers} parallel workers...")

with ThreadPoolExecutor(max_workers=max_workers) as ex:
    futures = {ex.submit(fetch_name_if_active, did): did for did in dashboard_ids_limited}
    for fut in as_completed(futures):
        result = fut.result()
        if result is not None:
            rows.append(Row(**result))

print(f"Found {len(rows)} active dashboards")

In [None]:
# Create DataFrame from results
df_names_spark = spark.createDataFrame(rows)

# Display sample of results
display(df_names_spark.limit(10))

In [None]:
# Merge with dashboards_actions
dashboards_merged = dashboards_actions.join(df_names_spark, "dashboard_id", "left")

print(f"Merged dataset has {dashboards_merged.count()} rows")
display(dashboards_merged.limit(10))

In [None]:
# Save to Unity Catalog table
print(f"Saving to: {target_table}")

dashboards_merged.write \
    .format("delta") \
    .mode("overwrite") \
    .option("overwriteSchema", "true") \
    .saveAsTable(target_table)

print(f"Successfully saved {dashboards_merged.count()} rows to {target_table}")

In [None]:
# Display final table info
spark.sql(f"DESCRIBE EXTENDED {target_table}").show(truncate=False)