In [0]:
%sql
USE CATALOG hive_metastore;

In [0]:
%sql
INSERT INTO work.department
WITH cleaned AS (
  SELECT
    LPAD(TRIM(Dept_ID), 8, '0')                                                     AS Dept_ID,

    -- Normalize Dept_Name
    TRIM(BOTH '_' FROM REGEXP_REPLACE(
      TRIM(REPLACE(Dept_Name, ' ', '_')),
      '_+', '_'
    ))                                                                              AS Dept_Name,

    -- Normalize Base_Loc
    TRIM(BOTH '_' FROM REGEXP_REPLACE(
      TRIM(REPLACE(Base_Loc, ' ', '_')),
      '_+', '_'
    ))                                                                              AS Base_Loc,

    CASE 
      WHEN Dept_MangID IS NOT NULL THEN LPAD(TRIM(Dept_MangID), 8, '0')
      ELSE NULL
    END                                                                              AS Dept_MangID,

    ingestTimestamp                                                               AS ingestTimestamp,
    loadKey                                                                       AS loadKey
  FROM rawz.Department
  WHERE Dept_ID IS NOT NULL
),
filtered AS (
  SELECT
    Dept_ID,
    Dept_Name,
    Base_Loc,
    Dept_MangID,
    ingestTimestamp,
    loadKey,

    -- Compute checksum over cleaned columns
    SHA2(
      CONCAT_WS('||',
        Dept_ID,
        Dept_Name,
        Base_Loc,
        COALESCE(Dept_MangID, '')
      ),
      256
    )                                                                               AS checksum
  FROM cleaned
),
ranked AS (
  SELECT
    Dept_ID,
    Dept_Name,
    Base_Loc,
    Dept_MangID,
    ingestTimestamp,
    loadKey,
    checksum,
    ROW_NUMBER() OVER (
      PARTITION BY checksum
      ORDER BY ingestTimestamp DESC
    )                                                                                 AS rn
  FROM filtered
)
SELECT
  uuid() AS Dept_Key,
  Dept_ID,
  Dept_Name,
  Base_Loc,
  Dept_MangID,
  ingestTimestamp,
  loadKey,
  checksum
FROM ranked
WHERE rn = 1;

In [0]:
%sql
SELECT * FROM work.Department;

In [0]:
%sql
select count(*) from work.department;