In [None]:
%sql
-- =========================================================
-- SILVER: iot_readings_clean (dedupe + idempotência via hash)
-- =========================================================
CREATE TABLE IF NOT EXISTS silver.iot_readings_clean (
  reading_id        STRING,
  equipment_id       STRING,
  sensor_id          STRING,
  sensor_type        STRING,
  reading_value      DECIMAL(18,4),
  reading_timestamp  TIMESTAMP,
  unit               STRING,
  row_hash           STRING
) USING DELTA;

-- 1) Stage: normaliza tipos e parseia reading_timestamp
CREATE OR REPLACE TEMP VIEW stage_iot_readings AS
SELECT
  reading_id,
  equipment_id,
  sensor_id,
  LOWER(TRIM(sensor_type)) AS sensor_type_norm,
  CAST(regexp_replace(reading_value, ',', '.') AS DECIMAL(18,4)) AS reading_value_norm,
  COALESCE(
    try_to_timestamp(reading_timestamp, 'yyyy-MM-dd HH:mm:ss'),
    try_to_timestamp(reading_timestamp, 'yyyy/MM/dd HH:mm:ss'),
    try_to_timestamp(reading_timestamp, 'dd/MM/yyyy HH:mm:ss'),
    try_to_timestamp(reading_timestamp, 'dd-MM-yyyy HH:mm:ss'),
    try_to_timestamp(reading_timestamp, 'yyyy-MM-dd'),
    try_to_timestamp(reading_timestamp, 'yyyy/MM/dd'),
    try_to_timestamp(reading_timestamp, 'dd/MM/yyyy'),
    try_to_timestamp(reading_timestamp, 'dd-MM-yyyy')
  ) AS parsed_timestamp,
  unit
FROM bronze.iot_sensor_readings
WHERE equipment_id IS NOT NULL
  AND sensor_id IS NOT NULL;

-- 2) Janela incremental (watermark de 60 dias)
CREATE OR REPLACE TEMP VIEW stage_iot_readings_win AS
SELECT *
FROM stage_iot_readings
WHERE parsed_timestamp >= date_sub(current_timestamp(), 60);

-- 3) Dedup: mantém 1 linha por (equipment_id, sensor_id, reading_timestamp)
CREATE OR REPLACE TEMP VIEW stage_iot_readings_dedup AS
SELECT
  reading_id,
  equipment_id,
  sensor_id,
  sensor_type_norm AS sensor_type,
  reading_value_norm AS reading_value,
  parsed_timestamp AS reading_timestamp,
  unit
FROM (
  SELECT
    s.*,
    ROW_NUMBER() OVER (
      PARTITION BY equipment_id, sensor_id, parsed_timestamp
      ORDER BY parsed_timestamp DESC NULLS LAST,
               reading_id DESC
    ) AS rn
  FROM stage_iot_readings_win s
  WHERE parsed_timestamp IS NOT NULL
) z
WHERE rn = 1;

-- 4) Calcula hash da linha
CREATE OR REPLACE TEMP VIEW stage_iot_readings_final AS
SELECT
  reading_id,
  equipment_id,
  sensor_id,
  sensor_type,
  reading_value,
  reading_timestamp,
  unit,
  sha2(concat_ws('||',
    equipment_id,
    sensor_id,
    sensor_type,
    cast(coalesce(reading_value,0) as string),
    coalesce(date_format(reading_timestamp,'yyyy-MM-dd HH:mm:ss'),''),
    coalesce(unit,'')
  ), 256) AS row_hash
FROM stage_iot_readings_dedup;

-- 5) MERGE idempotente
MERGE INTO silver.iot_readings_clean AS t
USING stage_iot_readings_final AS s
ON  t.reading_id = s.reading_id
WHEN MATCHED AND (t.row_hash IS NULL OR t.row_hash <> s.row_hash) THEN UPDATE SET
  t.equipment_id      = s.equipment_id,
  t.sensor_id         = s.sensor_id,
  t.sensor_type       = s.sensor_type,
  t.reading_value     = s.reading_value,
  t.reading_timestamp = s.reading_timestamp,
  t.unit              = s.unit,
  t.row_hash          = s.row_hash
WHEN NOT MATCHED THEN INSERT (
  reading_id, equipment_id, sensor_id, sensor_type, reading_value, reading_timestamp, unit, row_hash
) VALUES (
  s.reading_id, s.equipment_id, s.sensor_id, s.sensor_type, s.reading_value, s.reading_timestamp, s.unit, s.row_hash
);
