In [None]:
# Configuração: cria widgets com parâmetros do job
dbutils.widgets.text('catalog', 'manufatura_lakehouse')
dbutils.widgets.text('schema_bronze', 'bronze')
dbutils.widgets.text('schema_silver', 'silver')

catalog = dbutils.widgets.get('catalog')
schema_bronze = dbutils.widgets.get('schema_bronze')
schema_silver = dbutils.widgets.get('schema_silver')

print(f'Catalog: {catalog}')
print(f'Schema Bronze: {schema_bronze}')
print(f'Schema Silver: {schema_silver}')

In [None]:
%sql
-- Usa catalog e schema configurados
USE CATALOG `${catalog}`;
CREATE SCHEMA IF NOT EXISTS `${catalog}`.`${schema_silver}`;
USE SCHEMA `${schema_silver}`;

In [None]:
%sql
-- =========================================================
-- SILVER: maintenance_orders_clean (dedupe + idempotência via hash)
-- =========================================================
CREATE TABLE IF NOT EXISTS ${catalog}.${schema_silver}.maintenance_orders_clean (
  maintenance_order_id STRING,
  equipment_id         STRING,
  maintenance_type     STRING,
  scheduled_start      TIMESTAMP,
  scheduled_end        TIMESTAMP,
  actual_start         TIMESTAMP,
  actual_end           TIMESTAMP,
  technician_id        STRING,
  status               STRING,
  priority             STRING,
  description          STRING,
  last_update          TIMESTAMP,
  row_hash             STRING
) USING DELTA;

CREATE OR REPLACE TEMP VIEW stage_maintenance_orders AS
SELECT
  maintenance_order_id,
  equipment_id,
  LOWER(TRIM(maintenance_type)) AS maintenance_type_norm,
  COALESCE(
    try_to_timestamp(scheduled_start, 'yyyy-MM-dd HH:mm:ss'),
    try_to_timestamp(scheduled_start, 'yyyy/MM/dd HH:mm:ss'),
    try_to_timestamp(scheduled_start, 'dd/MM/yyyy HH:mm:ss'),
    try_to_timestamp(scheduled_start, 'yyyy-MM-dd'),
    try_to_timestamp(scheduled_start, 'yyyy/MM/dd')
  ) AS scheduled_start_ts,
  COALESCE(
    try_to_timestamp(scheduled_end, 'yyyy-MM-dd HH:mm:ss'),
    try_to_timestamp(scheduled_end, 'yyyy/MM/dd HH:mm:ss'),
    try_to_timestamp(scheduled_end, 'dd/MM/yyyy HH:mm:ss'),
    try_to_timestamp(scheduled_end, 'yyyy-MM-dd'),
    try_to_timestamp(scheduled_end, 'yyyy/MM/dd')
  ) AS scheduled_end_ts,
  COALESCE(
    try_to_timestamp(actual_start, 'yyyy-MM-dd HH:mm:ss'),
    try_to_timestamp(actual_start, 'yyyy/MM/dd HH:mm:ss'),
    try_to_timestamp(actual_start, 'dd/MM/yyyy HH:mm:ss'),
    try_to_timestamp(actual_start, 'yyyy-MM-dd'),
    try_to_timestamp(actual_start, 'yyyy/MM/dd')
  ) AS actual_start_ts,
  COALESCE(
    try_to_timestamp(actual_end, 'yyyy-MM-dd HH:mm:ss'),
    try_to_timestamp(actual_end, 'yyyy/MM/dd HH:mm:ss'),
    try_to_timestamp(actual_end, 'dd/MM/yyyy HH:mm:ss'),
    try_to_timestamp(actual_end, 'yyyy-MM-dd'),
    try_to_timestamp(actual_end, 'yyyy/MM/dd')
  ) AS actual_end_ts,
  technician_id,
  UPPER(TRIM(status)) AS status_norm,
  LOWER(TRIM(priority)) AS priority_norm,
  description,
  COALESCE(
    try_to_timestamp(last_update, 'yyyy-MM-dd HH:mm:ss'),
    try_to_timestamp(last_update, 'yyyy/MM/dd HH:mm:ss'),
    try_to_timestamp(last_update, 'dd/MM/yyyy HH:mm:ss'),
    try_to_timestamp(last_update, 'yyyy-MM-dd'),
    try_to_timestamp(last_update, 'yyyy/MM/dd')
  ) AS last_update_ts
FROM ${catalog}.${schema_bronze}.maintenance_orders
WHERE maintenance_order_id IS NOT NULL;

CREATE OR REPLACE TEMP VIEW stage_maintenance_orders_win AS
SELECT *
FROM stage_maintenance_orders
WHERE last_update_ts >= date_sub(current_timestamp(), 60);

CREATE OR REPLACE TEMP VIEW stage_maintenance_orders_dedup AS
SELECT
  maintenance_order_id,
  equipment_id,
  maintenance_type_norm AS maintenance_type,
  scheduled_start_ts AS scheduled_start,
  scheduled_end_ts AS scheduled_end,
  actual_start_ts AS actual_start,
  actual_end_ts AS actual_end,
  technician_id,
  status_norm AS status,
  priority_norm AS priority,
  description,
  last_update_ts AS last_update
FROM (
  SELECT
    s.*,
    ROW_NUMBER() OVER (
      PARTITION BY maintenance_order_id
      ORDER BY last_update_ts DESC NULLS LAST,
               maintenance_order_id DESC
    ) AS rn
  FROM stage_maintenance_orders_win s
  WHERE last_update_ts IS NOT NULL
) z
WHERE rn = 1;

CREATE OR REPLACE TEMP VIEW stage_maintenance_orders_final AS
SELECT
  maintenance_order_id,
  equipment_id,
  maintenance_type,
  scheduled_start,
  scheduled_end,
  actual_start,
  actual_end,
  technician_id,
  status,
  priority,
  description,
  last_update,
  sha2(concat_ws('||',
    coalesce(equipment_id,''),
    coalesce(maintenance_type,''),
    coalesce(date_format(scheduled_start,'yyyy-MM-dd HH:mm:ss'),''),
    coalesce(date_format(scheduled_end,'yyyy-MM-dd HH:mm:ss'),''),
    coalesce(date_format(actual_start,'yyyy-MM-dd HH:mm:ss'),''),
    coalesce(date_format(actual_end,'yyyy-MM-dd HH:mm:ss'),''),
    coalesce(technician_id,''),
    coalesce(status,''),
    coalesce(priority,'')
  ), 256) AS row_hash
FROM stage_maintenance_orders_dedup;

MERGE INTO ${catalog}.${schema_silver}.maintenance_orders_clean AS t
USING stage_maintenance_orders_final AS s
ON t.maintenance_order_id = s.maintenance_order_id
WHEN MATCHED AND (t.row_hash IS NULL OR t.row_hash <> s.row_hash) THEN UPDATE SET
  t.equipment_id     = s.equipment_id,
  t.maintenance_type = s.maintenance_type,
  t.scheduled_start  = s.scheduled_start,
  t.scheduled_end    = s.scheduled_end,
  t.actual_start     = s.actual_start,
  t.actual_end       = s.actual_end,
  t.technician_id    = s.technician_id,
  t.status          = s.status,
  t.priority         = s.priority,
  t.description      = s.description,
  t.last_update      = s.last_update,
  t.row_hash         = s.row_hash
WHEN NOT MATCHED THEN INSERT (
  maintenance_order_id, equipment_id, maintenance_type, scheduled_start, scheduled_end,
  actual_start, actual_end, technician_id, status, priority, description, last_update, row_hash
) VALUES (
  s.maintenance_order_id, s.equipment_id, s.maintenance_type, s.scheduled_start, s.scheduled_end,
  s.actual_start, s.actual_end, s.technician_id, s.status, s.priority, s.description, s.last_update, s.row_hash
);
