In [None]:
# Add widgets for catalog and schema parameters
dbutils.widgets.text("catalog", "", "Catalog Name")
dbutils.widgets.text("schema", "", "Schema Name")

# Get parameter values (fallback to defaults if not provided)
catalog = dbutils.widgets.get("catalog") or spark.conf.get("bundle.var.catalog", "main")
schema = dbutils.widgets.get("schema") or spark.conf.get("bundle.var.schema", "incremental_dlt")

%load_ext autoreload
%autoreload 2

In [None]:
import sys
sys.path.append('../src')

In [None]:
spark.sql(f"""
CREATE OR REPLACE TABLE {catalog}.{schema}.combined_event_log
with refresh_method as (
SELECT 
    origin.update_id, 
    timestamp, 
    origin.pipeline_name, 
    REGEXP_EXTRACT(message, 'executed as ([A-Z_]+)', 1) AS refresh_type,
    -- maintenance type
    CASE 
        WHEN details:planning_information.technique_information[0].is_chosen = true THEN details:planning_information.technique_information[0].maintenance_type 
        WHEN details:planning_information.technique_information[1].is_chosen = true THEN details:planning_information.technique_information[1].maintenance_type 
        WHEN details:planning_information.technique_information[2].is_chosen = true THEN details:planning_information.technique_information[2].maintenance_type 
        WHEN details:planning_information.technique_information[3].is_chosen = true THEN details:planning_information.technique_information[3].maintenance_type 
        ELSE NULL 
    END AS chosen_maintenance_type,
    -- cost
    CASE 
        WHEN details:planning_information.technique_information[0].is_chosen = true THEN details:planning_information.technique_information[0].cost
        WHEN details:planning_information.technique_information[1].is_chosen = true THEN details:planning_information.technique_information[1].cost
        WHEN details:planning_information.technique_information[2].is_chosen = true THEN details:planning_information.technique_information[2].cost
        WHEN details:planning_information.technique_information[3].is_chosen = true THEN details:planning_information.technique_information[3].cost
        ELSE NULL 
    END AS cost,
    -- recompute reason
    CASE 
        WHEN (CASE 
                WHEN details:planning_information.technique_information[0].is_chosen = true THEN details:planning_information.technique_information[0].maintenance_type 
                WHEN details:planning_information.technique_information[1].is_chosen = true THEN details:planning_information.technique_information[1].maintenance_type 
                WHEN details:planning_information.technique_information[2].is_chosen = true THEN details:planning_information.technique_information[2].maintenance_type 
                WHEN details:planning_information.technique_information[3].is_chosen = true THEN details:planning_information.technique_information[3].maintenance_type 
                ELSE NULL 
              END) = 'MAINTENANCE_TYPE_COMPLETE_RECOMPUTE' THEN 
            COALESCE(
                -- Check for EXPECTATIONS_NOT_SUPPORTED specifically in each entry
                CASE WHEN details:planning_information.technique_information[1].incrementalization_issues[0].issue_type = 'EXPECTATIONS_NOT_SUPPORTED' 
                     THEN 'EXPECTATIONS_NOT_SUPPORTED' ELSE NULL END,
                CASE WHEN details:planning_information.technique_information[2].incrementalization_issues[0].issue_type = 'EXPECTATIONS_NOT_SUPPORTED' 
                     THEN 'EXPECTATIONS_NOT_SUPPORTED' ELSE NULL END,
                -- Fall back to other issue types if EXPECTATIONS_NOT_SUPPORTED not found
                details:planning_information.technique_information[0].incrementalization_issues[0].issue_type,
                details:planning_information.technique_information[1].incrementalization_issues[0].issue_type,
                details:planning_information.technique_information[2].incrementalization_issues[0].issue_type,
                details:planning_information.technique_information[3].incrementalization_issues[0].issue_type,
                'UNKNOWN_ISSUE'
            )
        ELSE 'incremental recompute'
    END AS recompute_reason,
    -- row numbers
    details:planning_information.source_table_information[0].num_rows
FROM {catalog}.{schema}.demo_event_log
WHERE event_type = 'planning_information'),

flow_info as(
SELECT 
  origin.update_id, 
  details:flow_definition.output_dataset as output_dataset,
  details:flow_definition.explain_text as query,
  details:flow_definition.flow_type as table_type
FROM {catalog}.{schema}.demo_event_log 
where event_type = 'flow_definition')

SELECT 
    r.timestamp,
    r.pipeline_name,
    r.refresh_type,
    r.chosen_maintenance_type,
    r.recompute_reason,
    r.num_rows,
    f.output_dataset, 
    f.query, 
    f.table_type
FROM refresh_method r
LEFT JOIN flow_info f on r.update_id = f.update_id;
""")

In [None]:
event_log = spark.sql(f"SELECT * FROM {catalog}.{schema}.combined_event_log")

event_log.display()