In [None]:
# Configuração: cria widgets com parâmetros do job
dbutils.widgets.text('catalog', 'manufatura_lakehouse')
dbutils.widgets.text('schema_bronze', 'bronze')
dbutils.widgets.text('schema_silver', 'silver')

catalog = dbutils.widgets.get('catalog')
schema_bronze = dbutils.widgets.get('schema_bronze')
schema_silver = dbutils.widgets.get('schema_silver')

print(f'Catalog: {catalog}')
print(f'Schema Bronze: {schema_bronze}')
print(f'Schema Silver: {schema_silver}')

In [None]:
%sql
-- Usa catalog e schema configurados
USE CATALOG `${catalog}`;
CREATE SCHEMA IF NOT EXISTS `${catalog}`.`${schema_silver}`;
USE SCHEMA `${schema_silver}`;

In [None]:
%sql
-- =========================================================
-- SILVER: production_orders_clean (dedupe + idempotência via hash)
-- =========================================================
CREATE TABLE IF NOT EXISTS ${catalog}.${schema_silver}.production_orders_clean (
  production_order_id STRING,
  equipment_id        STRING,
  product_id          STRING,
  planned_start       TIMESTAMP,
  planned_end         TIMESTAMP,
  actual_start        TIMESTAMP,
  actual_end          TIMESTAMP,
  planned_quantity    INT,
  actual_quantity     INT,
  status              STRING,
  last_update         TIMESTAMP,
  row_hash            STRING
) USING DELTA;

CREATE OR REPLACE TEMP VIEW stage_production_orders AS
SELECT
  production_order_id,
  equipment_id,
  product_id,
  COALESCE(
    try_to_timestamp(planned_start, 'yyyy-MM-dd HH:mm:ss'),
    try_to_timestamp(planned_start, 'yyyy/MM/dd HH:mm:ss'),
    try_to_timestamp(planned_start, 'dd/MM/yyyy HH:mm:ss'),
    try_to_timestamp(planned_start, 'yyyy-MM-dd'),
    try_to_timestamp(planned_start, 'yyyy/MM/dd')
  ) AS planned_start_ts,
  COALESCE(
    try_to_timestamp(planned_end, 'yyyy-MM-dd HH:mm:ss'),
    try_to_timestamp(planned_end, 'yyyy/MM/dd HH:mm:ss'),
    try_to_timestamp(planned_end, 'dd/MM/yyyy HH:mm:ss'),
    try_to_timestamp(planned_end, 'yyyy-MM-dd'),
    try_to_timestamp(planned_end, 'yyyy/MM/dd')
  ) AS planned_end_ts,
  COALESCE(
    try_to_timestamp(actual_start, 'yyyy-MM-dd HH:mm:ss'),
    try_to_timestamp(actual_start, 'yyyy/MM/dd HH:mm:ss'),
    try_to_timestamp(actual_start, 'dd/MM/yyyy HH:mm:ss'),
    try_to_timestamp(actual_start, 'yyyy-MM-dd'),
    try_to_timestamp(actual_start, 'yyyy/MM/dd')
  ) AS actual_start_ts,
  COALESCE(
    try_to_timestamp(actual_end, 'yyyy-MM-dd HH:mm:ss'),
    try_to_timestamp(actual_end, 'yyyy/MM/dd HH:mm:ss'),
    try_to_timestamp(actual_end, 'dd/MM/yyyy HH:mm:ss'),
    try_to_timestamp(actual_end, 'yyyy-MM-dd'),
    try_to_timestamp(actual_end, 'yyyy/MM/dd')
  ) AS actual_end_ts,
  CAST(regexp_replace(planned_quantity, ',', '.') AS INT) AS planned_quantity_norm,
  CAST(regexp_replace(actual_quantity, ',', '.') AS INT) AS actual_quantity_norm,
  UPPER(TRIM(status)) AS status_norm,
  COALESCE(
    try_to_timestamp(last_update, 'yyyy-MM-dd HH:mm:ss'),
    try_to_timestamp(last_update, 'yyyy/MM/dd HH:mm:ss'),
    try_to_timestamp(last_update, 'dd/MM/yyyy HH:mm:ss'),
    try_to_timestamp(last_update, 'yyyy-MM-dd'),
    try_to_timestamp(last_update, 'yyyy/MM/dd')
  ) AS last_update_ts
FROM ${catalog}.${schema_bronze}.production_orders
WHERE production_order_id IS NOT NULL;

CREATE OR REPLACE TEMP VIEW stage_production_orders_win AS
SELECT *
FROM stage_production_orders
WHERE last_update_ts >= date_sub(current_timestamp(), 60);

CREATE OR REPLACE TEMP VIEW stage_production_orders_dedup AS
SELECT
  production_order_id,
  equipment_id,
  product_id,
  planned_start_ts AS planned_start,
  planned_end_ts AS planned_end,
  actual_start_ts AS actual_start,
  actual_end_ts AS actual_end,
  planned_quantity_norm AS planned_quantity,
  actual_quantity_norm AS actual_quantity,
  status_norm AS status,
  last_update_ts AS last_update
FROM (
  SELECT
    s.*,
    ROW_NUMBER() OVER (
      PARTITION BY production_order_id
      ORDER BY last_update_ts DESC NULLS LAST,
               production_order_id DESC
    ) AS rn
  FROM stage_production_orders_win s
  WHERE last_update_ts IS NOT NULL
) z
WHERE rn = 1;

CREATE OR REPLACE TEMP VIEW stage_production_orders_final AS
SELECT
  production_order_id,
  equipment_id,
  product_id,
  planned_start,
  planned_end,
  actual_start,
  actual_end,
  planned_quantity,
  actual_quantity,
  status,
  last_update,
  sha2(concat_ws('||',
    coalesce(equipment_id,''),
    coalesce(product_id,''),
    coalesce(date_format(planned_start,'yyyy-MM-dd HH:mm:ss'),''),
    coalesce(date_format(planned_end,'yyyy-MM-dd HH:mm:ss'),''),
    coalesce(date_format(actual_start,'yyyy-MM-dd HH:mm:ss'),''),
    coalesce(date_format(actual_end,'yyyy-MM-dd HH:mm:ss'),''),
    cast(coalesce(planned_quantity,0) as string),
    cast(coalesce(actual_quantity,0) as string),
    coalesce(status,'')
  ), 256) AS row_hash
FROM stage_production_orders_dedup;

MERGE INTO ${catalog}.${schema_silver}.production_orders_clean AS t
USING stage_production_orders_final AS s
ON t.production_order_id = s.production_order_id
WHEN MATCHED AND (t.row_hash IS NULL OR t.row_hash <> s.row_hash) THEN UPDATE SET
  t.equipment_id     = s.equipment_id,
  t.product_id       = s.product_id,
  t.planned_start    = s.planned_start,
  t.planned_end      = s.planned_end,
  t.actual_start     = s.actual_start,
  t.actual_end       = s.actual_end,
  t.planned_quantity = s.planned_quantity,
  t.actual_quantity  = s.actual_quantity,
  t.status           = s.status,
  t.last_update      = s.last_update,
  t.row_hash         = s.row_hash
WHEN NOT MATCHED THEN INSERT (
  production_order_id, equipment_id, product_id, planned_start, planned_end,
  actual_start, actual_end, planned_quantity, actual_quantity, status, last_update, row_hash
) VALUES (
  s.production_order_id, s.equipment_id, s.product_id, s.planned_start, s.planned_end,
  s.actual_start, s.actual_end, s.planned_quantity, s.actual_quantity, s.status, s.last_update, s.row_hash
);
