Materialize query history system table for best performance, and so that primary key optimizations can be applied.

See [query optimization using primary key constraints](https://docs.databricks.com/aws/en/sql/user/queries/query-optimization-constraints) for more details.

In [0]:
use catalog identifier(:catalog);
use schema identifier(:schema);

In [0]:
create or replace table fct_usage (
  calendar_key int comment 'FK for calendar dimension (dim_calendar).',
  compute_key string comment 'FK for compute dimension (dim_compute).',
  workspace_key string comment 'FK for workspace dimension (dim_workspace).',
  record_id STRING COMMENT 'Unique ID for this usage record',
  sku_name STRING COMMENT 'Name of the SKU',
  cloud STRING COMMENT 'Cloud this usage is relevant for. Possible values are AWS, AZURE, and GCP.',
  usage_start_time TIMESTAMP COMMENT 'The start time relevant to this usage record. Timezone information is recorded at the end of the value with +00:00 representing UTC timezone.',
  usage_end_time TIMESTAMP COMMENT 'The end time relevant to this usage record. Timezone information is recorded at the end of the value with +00:00 representing UTC timezone.',
  usage_date DATE COMMENT 'Date of the usage record, this field can be used for faster aggregation by date',
  custom_tags MAP<STRING, STRING> COMMENT 'Tags applied by the users to this usage. Includes compute resource tags and jobs tags.',
  usage_unit STRING COMMENT 'Unit this usage is measured in. Possible values include DBUs.',
  usage_quantity DECIMAL(38,18) COMMENT 'Number of units consumed for this record.',
  usage_metadata STRUCT<cluster_id: STRING, job_id: STRING, warehouse_id: STRING, instance_pool_id: STRING, node_type: STRING, job_run_id: STRING, notebook_id: STRING, dlt_pipeline_id: STRING, endpoint_name: STRING, endpoint_id: STRING, dlt_update_id: STRING, dlt_maintenance_id: STRING, run_name: STRING, job_name: STRING, notebook_path: STRING, central_clean_room_id: STRING, source_region: STRING, destination_region: STRING, app_id: STRING, app_name: STRING, metastore_id: STRING, private_endpoint_name: STRING, storage_api_type: STRING, budget_policy_id: STRING, ai_runtime_pool_id: STRING, ai_runtime_workload_id: STRING, uc_table_catalog: STRING, uc_table_schema: STRING, uc_table_name: STRING, database_instance_id: STRING, sharing_materialization_id: STRING, schema_id: STRING> COMMENT 'System-provided metadata about the usage, including IDs for compute resources and jobs (if applicable). See [Analyze usage metadata](https://docs.databricks.com/en/admin/system-tables/billing.html#usage-metadata).',
  identity_metadata STRUCT<run_as: STRING, created_by: STRING, owned_by: STRING> COMMENT 'System-provided metadata about the identities involved in the usage. See [Analyze identity metadata](https://docs.databricks.com/en/admin/system-tables/billing.html#identity-metadata).',
  record_type STRING COMMENT 'Whether the record is original, a retraction, or a restatement. The value is ORIGINAL unless the record is related to a correction. See [Analyze correction records](https://docs.databricks.com/en/admin/system-tables/billing.html#record-type).',
  ingestion_date DATE COMMENT 'Date the record was ingested into the usage table.',
  billing_origin_product STRING COMMENT 'The product that originated the usage. Some products can be billed as different SKUs. For possible values, see [View information about the product associated with the usage](https://docs.databricks.com/en/admin/system-tables/billing.html#features).',
  product_features STRUCT<jobs_tier: STRING, sql_tier: STRING, dlt_tier: STRING, is_serverless: BOOLEAN, is_photon: BOOLEAN, serving_type: STRING, networking: STRUCT<connectivity_type: STRING>, ai_runtime: STRUCT<compute_type: STRING>, model_serving: STRUCT<offering_type: STRING>, ai_gateway: STRUCT<feature_type: STRING>, performance_target: STRING, serverless_gpu: STRUCT<workload_type: STRING>> COMMENT 'Details about the specific product features used.',
  usage_type STRING COMMENT 'The type of usage attributed to the product or workload for billing purposes. Possible values are COMPUTE_TIME, STORAGE_SPACE, NETWORK_BYTES, API_CALLS, TOKEN, or GPU_TIME.',
  constraint fk_fct_usage_dim_calendar foreign key (calendar_key) references dim_calendar(calendar_key),
  constraint fk_fct_usage_dim_compute foreign key (compute_key) references dim_compute(compute_key),
  constraint fk_fct_usage_dim_workspace foreign key (workspace_key) references dim_workspace(workspace_key)
)
cluster by (calendar_key);

In [0]:
insert overwrite fct_usage
select
  year(usage_date) * 10000 + month(usage_date) * 100 + day(usage_date) as calendar_key,
  usage_metadata.warehouse_id as compute_key,
  workspace_id as workspace_key,
  record_id,
  sku_name,
  cloud,
  usage_start_time,
  usage_end_time,
  usage_date,
  custom_tags,
  usage_unit,
  usage_quantity,
  usage_metadata,
  identity_metadata,
  record_type,
  ingestion_date,
  billing_origin_product,
  product_features,
  usage_type
from system.billing.usage
where usage_date >= current_date() - interval 6 months
and usage_metadata.warehouse_id is not null;

In [0]:
create or replace view vw_fct_usage as
select * from fct_usage

In [0]:
optimize fct_usage

In [0]:
analyze table fct_usage compute statistics for all columns;

In [0]:
vacuum fct_usage