Materialize query history system table for best performance, and so that primary key optimizations can be applied.

See [query optimization using primary key constraints](https://docs.databricks.com/aws/en/sql/user/queries/query-optimization-constraints) for more details.

In [0]:
use catalog identifier(:catalog);
use schema identifier(:schema);

In [0]:
create or replace table fct_usage(
  calendar_key int comment 'FK for calendar dimension (dim_calendar).',
  compute_key string comment 'FK for compute dimension (dim_compute).',
  workspace_key string comment 'FK for workspace dimension (dim_workspace).',
  record_id string comment 'Unique ID for this usage record',
  sku_name string comment 'Name of the SKU',
  cloud string comment 'Cloud this usage is relevant for. Possible values are AWS, AZURE, and GCP.',
  usage_start_time timestamp comment 'The start time relevant to this usage record. Timezone information is recorded at the end of the value with +00:00 representing UTC timezone.',
  usage_end_time timestamp comment 'The end time relevant to this usage record. Timezone information is recorded at the end of the value with +00:00 representing UTC timezone.',
  usage_date date comment 'Date of the usage record, this field can be used for faster aggregation by date',
  custom_tags map<string, string> comment 'Tags applied by the users to this usage. Includes compute resource tags and jobs tags.',
  usage_unit string comment 'Unit this usage is measured in. Possible values include DBUs.',
  usage_quantity decimal(38, 18) comment 'Number of units consumed for this record.',
  usage_metadata struct<
    cluster_id: string,
    job_id: string,
    warehouse_id: string,
    instance_pool_id: string,
    node_type: string,
    job_run_id: string,
    notebook_id: string,
    dlt_pipeline_id: string,
    endpoint_name: string,
    endpoint_id: string,
    dlt_update_id: string,
    dlt_maintenance_id: string,
    run_name: string,
    job_name: string,
    notebook_path: string,
    central_clean_room_id: string,
    source_region: string,
    destination_region: string,
    app_id: string,
    app_name: string,
    metastore_id: string,
    private_endpoint_name: string,
    storage_api_type: string,
    budget_policy_id: string,
    ai_runtime_pool_id: string,
    ai_runtime_workload_id: string,
    uc_table_catalog: string,
    uc_table_schema: string,
    uc_table_name: string,
    database_instance_id: string,
    sharing_materialization_id: string,
    schema_id: string
  > comment 'System-provided metadata about the usage, including IDs for compute resources and jobs (if applicable). See [Analyze usage metadata](https://docs.databricks.com/en/admin/system-tables/billing.html#usage-metadata).',
  identity_metadata struct<run_as: string, created_by: string, owned_by: string> comment 'System-provided metadata about the identities involved in the usage. See [Analyze identity metadata](https://docs.databricks.com/en/admin/system-tables/billing.html#identity-metadata).',
  record_type string comment 'Whether the record is original, a retraction, or a restatement. The value is ORIGINAL unless the record is related to a correction. See [Analyze correction records](https://docs.databricks.com/en/admin/system-tables/billing.html#record-type).',
  ingestion_date date comment 'Date the record was ingested into the usage table.',
  billing_origin_product string comment 'The product that originated the usage. Some products can be billed as different SKUs. For possible values, see [View information about the product associated with the usage](https://docs.databricks.com/en/admin/system-tables/billing.html#features).',
  product_features struct<
    jobs_tier: string,
    sql_tier: string,
    dlt_tier: string,
    is_serverless: boolean,
    is_photon: boolean,
    serving_type: string,
    networking: struct<connectivity_type: string>,
    ai_runtime: struct<compute_type: string>,
    model_serving: struct<offering_type: string>,
    ai_gateway: struct<feature_type: string>,
    performance_target: string,
    serverless_gpu: struct<workload_type: string>
  > comment 'Details about the specific product features used.',
  usage_type string comment 'The type of usage attributed to the product or workload for billing purposes. Possible values are COMPUTE_TIME, STORAGE_SPACE, NETWORK_BYTES, API_CALLS, TOKEN, or GPU_TIME.',
  constraint fk_fct_usage_dim_calendar
    foreign key (calendar_key) references dim_calendar (calendar_key),
  constraint fk_fct_usage_dim_compute
    foreign key (compute_key) references dim_compute (compute_key),
  constraint fk_fct_usage_dim_workspace
    foreign key (workspace_key) references dim_workspace (workspace_key)
)
cluster by (calendar_key, compute_key, workspace_key);

In [0]:
-- Returns usage associated with DBSQL warehouses
create or replace temporary view vw_fct_usage_src as
select
  year(usage_date) * 10000 + month(usage_date) * 100 + day(usage_date) as calendar_key,
  usage_metadata.warehouse_id as compute_key,
  workspace_id as workspace_key,
  record_id,
  sku_name,
  cloud,
  usage_start_time,
  usage_end_time,
  usage_date,
  custom_tags,
  usage_unit,
  usage_quantity,
  usage_metadata,
  identity_metadata,
  record_type,
  ingestion_date,
  billing_origin_product,
  product_features,
  usage_type
from system.billing.usage
where usage_date >= current_date() - interval 6 months
and usage_metadata.warehouse_id is not null;

In [0]:
-- Perform insert-only merge to enable schema evolution
-- This will gracefully accomodate schema changes in system.billing.usage
truncate table fct_usage;

merge with schema evolution into fct_usage as tgt
using vw_fct_usage_src as src
on tgt.record_id = src.record_id
when not matched then
  insert *;

In [0]:
create or replace view vw_fct_usage as
select * from fct_usage

In [0]:
optimize fct_usage

In [0]:
analyze table fct_usage compute statistics for all columns;

In [0]:
vacuum fct_usage