In [0]:
-- Model Serving Monitoring
-- Based on Databricks documentation for tracking model serving spend

--------------------------------------------------
-- Model serving costs for the past 30 days
--------------------------------------------------
CREATE MATERIALIZED VIEW IDENTIFIER(:catalog || '.' || :schema || '.model_serving_costs')
SCHEDULE EVERY 4 HOURS
AS
SELECT
  usage_data.workspace_id,
  served_entities.endpoint_name,
  served_entities.endpoint_id,
  served_entities.created_by,
  served_entities.served_entity_name,
  served_entities.entity_type,
  served_entities.custom_model_config,
  served_entities.external_model_config,
  max(usage_data.usage_date) AS last_usage_date,
  FIRST(usage_data.custom_tags) AS custom_tags,
  SUM(usage_data.usage_quantity * pricing_data.pricing.default) as total_list_cost,
  SUM(usage_data.usage_quantity * pricing_data.pricing.effective_list.default) as total_effective_cost,
  SUM(CASE WHEN usage_data.usage_date >= date_sub(current_date(), 7) THEN usage_data.usage_quantity * pricing_data.pricing.default ELSE 0 END) AS t7d_list_cost,
  SUM(CASE WHEN usage_data.usage_date >= date_sub(current_date(), 7) THEN usage_data.usage_quantity * pricing_data.pricing.effective_list.default ELSE 0 END) AS t7d_effective_cost,
  SUM(CASE WHEN usage_data.usage_date >= date_sub(current_date(), 14) THEN usage_data.usage_quantity * pricing_data.pricing.default ELSE 0 END) AS t14d_list_cost,
  SUM(CASE WHEN usage_data.usage_date >= date_sub(current_date(), 14) THEN usage_data.usage_quantity * pricing_data.pricing.effective_list.default ELSE 0 END) AS t14d_effective_cost,
  SUM(CASE WHEN usage_data.usage_date >= date_sub(current_date(), 30) THEN usage_data.usage_quantity * pricing_data.pricing.default ELSE 0 END) AS t30d_list_cost,
  SUM(CASE WHEN usage_data.usage_date >= date_sub(current_date(), 30) THEN usage_data.usage_quantity * pricing_data.pricing.effective_list.default ELSE 0 END) AS t30d_effective_cost
FROM
  system.billing.usage usage_data
    INNER JOIN system.serving.served_entities served_entities
      ON usage_data.usage_metadata.endpoint_name = served_entities.endpoint_name
      AND usage_data.account_id = served_entities.account_id
    INNER JOIN system.billing.list_prices pricing_data
      ON usage_data.sku_name = pricing_data.sku_name
      AND usage_data.cloud = pricing_data.cloud
      AND usage_data.usage_unit = pricing_data.usage_unit
      AND usage_data.account_id = pricing_data.account_id
      AND usage_data.usage_start_time >= pricing_data.price_start_time
      AND (
        pricing_data.price_end_time IS NULL
        OR usage_data.usage_start_time < pricing_data.price_end_time
      )
WHERE
  usage_data.sku_name LIKE '%SERVERLESS_REAL_TIME_INFERENCE%'
  AND usage_data.usage_quantity IS NOT NULL
  AND pricing_data.pricing.effective_list.default IS NOT NULL
  AND served_entities.endpoint_name IS NOT NULL
  AND usage_data.usage_date >= CURRENT_DATE() - INTERVAL 30 DAY
GROUP BY ALL
ORDER BY
  total_effective_cost DESC;

--------------------------------------------------
-- Batch inference costs for the past 30 days
--------------------------------------------------
CREATE MATERIALIZED VIEW IDENTIFIER(:catalog || '.' || :schema || '.batch_inference_costs')
SCHEDULE EVERY 4 HOURS
AS
SELECT
  usage_data.workspace_id,
  usage_data.usage_date,
  usage_data.identity_metadata.run_as AS run_as,
  usage_data.custom_tags,
  usage_data.usage_metadata.endpoint_name,
  SUM(usage_data.usage_quantity * pricing_data.pricing.default) AS total_list_cost,
  SUM(usage_data.usage_quantity * pricing_data.pricing.effective_list.default) AS total_effective_cost
FROM
  system.billing.usage usage_data
    INNER JOIN system.billing.list_prices pricing_data
      ON usage_data.sku_name = pricing_data.sku_name
      AND usage_data.cloud = pricing_data.cloud
      AND usage_data.usage_unit = pricing_data.usage_unit
      AND usage_data.account_id = pricing_data.account_id
      AND usage_data.usage_start_time >= pricing_data.price_start_time
      AND (
        pricing_data.price_end_time IS NULL
        OR usage_data.usage_start_time < pricing_data.price_end_time
      )
WHERE
  usage_data.billing_origin_product = 'MODEL_SERVING'
  AND usage_data.product_features.model_serving.offering_type = 'BATCH_INFERENCE'
  AND usage_data.usage_date >= CURRENT_DATE() - INTERVAL 30 DAY
GROUP BY ALL
ORDER BY
  total_effective_cost DESC;