In [4]:
%%sql
CREATE TABLE IF NOT EXISTS gold_dim_user (
  client_id            BIGINT       NOT NULL,

  current_age          INT,
  retirement_age       INT,
  birth_year           INT,
  birth_month          INT,
  gender               STRING,
  address              STRING,
  latitude             DECIMAL(9,6),
  longitude            DECIMAL(9,6),
  per_capita_income    DECIMAL(18,2),
  yearly_income        DECIMAL(18,2),
  total_debt           DECIMAL(18,2),
  credit_score         INT,
  num_credit_cards     INT,

  -- Audit / lineage (Silver)
  source_file          STRING       NOT NULL,
  ingestion_date       DATE         NOT NULL,
  ingestion_ts         TIMESTAMP    NOT NULL,
  record_hash          STRING       NOT NULL,

  -- Audit (Gold)
  gold_run_id          STRING       NOT NULL,
  gold_load_ts         TIMESTAMP    NOT NULL
)
USING DELTA;


StatementMeta(, b6785450-1c51-48dd-856e-68d50242a1bb, 5, Finished, Available, Finished)

<Spark SQL result set with 0 rows and 0 fields>

In [5]:
%%sql
CREATE TABLE IF NOT EXISTS gold_dim_card (
  card_id               BIGINT        NOT NULL,

  client_id             BIGINT,
  card_brand            STRING,
  card_type             STRING,
  card_number           STRING,
  expires_raw           STRING,
  expires_month         DATE,
  cvv                   STRING,
  has_chip              BOOLEAN,
  num_cards_issued      INT,
  credit_limit          DECIMAL(18,2),
  acct_open_date        DATE,
  year_pin_last_changed INT,
  card_on_dark_web      BOOLEAN,

  -- Audit / lineage (Silver)
  source_file           STRING       NOT NULL,
  ingestion_date        DATE         NOT NULL,
  ingestion_ts          TIMESTAMP    NOT NULL,
  record_hash           STRING       NOT NULL,

  -- Audit (Gold)
  gold_run_id           STRING       NOT NULL,
  gold_load_ts          TIMESTAMP    NOT NULL
)
USING DELTA;


StatementMeta(, b6785450-1c51-48dd-856e-68d50242a1bb, 6, Finished, Available, Finished)

<Spark SQL result set with 0 rows and 0 fields>

In [6]:
%%sql
CREATE TABLE IF NOT EXISTS gold_dim_mcc (
  mcc_code          STRING      NOT NULL,
  mcc_description   STRING,

  -- Audit / lineage (Silver)
  source_file       STRING      NOT NULL,
  ingestion_date    DATE        NOT NULL,
  ingestion_ts      TIMESTAMP   NOT NULL,
  record_hash       STRING      NOT NULL,

  -- Audit (Gold)
  gold_run_id       STRING      NOT NULL,
  gold_load_ts      TIMESTAMP   NOT NULL
)
USING DELTA;


StatementMeta(, b6785450-1c51-48dd-856e-68d50242a1bb, 7, Finished, Available, Finished)

<Spark SQL result set with 0 rows and 0 fields>

In [7]:
%%sql
CREATE TABLE IF NOT EXISTS gold_dim_date (
  date_id              INT         NOT NULL,   -- YYYYMMDD
  date_value           DATE        NOT NULL,

  day_of_month          INT,
  day_of_week_iso       INT,                  -- 1=Mon ... 7=Sun
  day_name              STRING,
  week_of_year          INT,
  month_number          INT,
  month_name            STRING,
  quarter_number        INT,
  year_number           INT,
  is_weekend            BOOLEAN,

  gold_run_id           STRING     NOT NULL,
  gold_load_ts          TIMESTAMP  NOT NULL
)
USING DELTA;


StatementMeta(, b6785450-1c51-48dd-856e-68d50242a1bb, 8, Finished, Available, Finished)

<Spark SQL result set with 0 rows and 0 fields>

In [3]:
%%sql
DROP TABLE IF EXISTS gold_fact_transactions;

CREATE TABLE gold_fact_transactions (
  transaction_id   BIGINT        NOT NULL,
  txn_ts           TIMESTAMP     NOT NULL,
  txn_date         DATE          NOT NULL,
  txn_month        DATE          NOT NULL,
  client_id        BIGINT        NOT NULL,
  card_id          BIGINT        NOT NULL,
  merchant_id      BIGINT        NOT NULL,
  mcc_code         STRING        NOT NULL,
  amount           DECIMAL(18,2) NOT NULL,
  use_chip         STRING,
  merchant_city    STRING,
  merchant_state   STRING,
  zip              STRING,
  error_code       INT           NOT NULL,
  is_success       BOOLEAN       NOT NULL,
  is_chip_used     BOOLEAN,
  record_hash      STRING        NOT NULL,
  source_file      STRING        NOT NULL,
  ingestion_date   DATE          NOT NULL,
  ingestion_ts     TIMESTAMP     NOT NULL,
  gold_run_id      STRING        NOT NULL,
  gold_load_ts     TIMESTAMP     NOT NULL
)
USING DELTA
PARTITIONED BY (txn_month);



StatementMeta(, 6c4ad1bd-dd2d-4418-ac5b-53dbad0b26d8, 7, Finished, Available, Finished)

<Spark SQL result set with 0 rows and 0 fields>

<Spark SQL result set with 0 rows and 0 fields>

In [1]:
%%sql
DROP TABLE IF EXISTS gold_anomaly_event;

CREATE TABLE gold_anomaly_event (
  gold_run_id     STRING    NOT NULL,
  event_ts        TIMESTAMP NOT NULL,
  entity          STRING    NOT NULL,
  anom_domain     STRING    NOT NULL,
  anom_type       STRING    NOT NULL,
  severity        STRING    NOT NULL,
  rule_id         STRING    NOT NULL,
  key_hash        STRING    NOT NULL,
  natural_keys    STRING    NOT NULL,
  source_table    STRING    NOT NULL,
  detail          STRING,
  gold_load_ts    TIMESTAMP NOT NULL
)
USING DELTA;


StatementMeta(, 6c4ad1bd-dd2d-4418-ac5b-53dbad0b26d8, 3, Finished, Available, Finished)

<Spark SQL result set with 0 rows and 0 fields>

<Spark SQL result set with 0 rows and 0 fields>

In [2]:
%%sql
DROP TABLE IF EXISTS gold_anomaly_kpi;

CREATE TABLE gold_anomaly_kpi (
  gold_run_id   STRING    NOT NULL,
  kpi_ts        TIMESTAMP NOT NULL,
  entity        STRING    NOT NULL,
  anom_domain   STRING    NOT NULL,
  anom_type     STRING    NOT NULL,
  severity      STRING    NOT NULL,
  rule_id       STRING    NOT NULL,
  row_count     BIGINT    NOT NULL,
  sample_keys   STRING,
  gold_load_ts  TIMESTAMP NOT NULL
)
USING DELTA;


StatementMeta(, 6c4ad1bd-dd2d-4418-ac5b-53dbad0b26d8, 5, Finished, Available, Finished)

<Spark SQL result set with 0 rows and 0 fields>

<Spark SQL result set with 0 rows and 0 fields>

In [1]:
%%sql
CREATE TABLE IF NOT EXISTS gold_log_runs ( 
  gold_run_id      STRING,
  pipeline_name    STRING,
  layer            STRING,      -- 'gold'
  start_ts         TIMESTAMP,
  end_ts           TIMESTAMP,
  duration_ms      BIGINT,
  status           STRING,      -- RUNNING|SUCCESS|FAILED|PARTIAL
  triggered_by     STRING,      -- manual|schedule|ci
  environment      STRING,      -- dev|prod
  params_json      STRING,
  error_message    STRING
)
USING delta;


StatementMeta(, a0610250-bcf1-442f-8d07-21edd504701f, 2, Finished, Available, Finished)

<Spark SQL result set with 0 rows and 0 fields>

In [2]:
%%sql
CREATE TABLE IF NOT EXISTS gold_log_steps ( 
  gold_run_id       STRING,
  step_seq          INT,
  entity_code       STRING,
  notebook_name     STRING,
  start_ts          TIMESTAMP,
  end_ts            TIMESTAMP,
  duration_ms       BIGINT,
  status            STRING,      -- RUNNING|SUCCESS|FAILED|SKIPPED
  critical          BOOLEAN,
  row_in            BIGINT,
  row_out           BIGINT,
  row_rejected      BIGINT,
  partition_count   INT,
  dedup_dropped     BIGINT,
  anom_count        BIGINT,
  error_message     STRING,
  payload_json      STRING
)
USING delta;


StatementMeta(, a0610250-bcf1-442f-8d07-21edd504701f, 3, Finished, Available, Finished)

<Spark SQL result set with 0 rows and 0 fields>

In [3]:
%%sql
CREATE TABLE IF NOT EXISTS gold_ctl_entity ( 
  entity_code           STRING,          -- ex: dim_user, fact_transactions
  entity_type           STRING,          -- DIM|FACT
  target_table          STRING,          -- ex: gold_dim_user
  enabled               BOOLEAN,
  load_order            INT,
  notebook_name         STRING,          -- ex: nb_gold_dim_user
  notebook_path         STRING,          -- optionnel (si tu l’utilises)
  load_mode_default     STRING,          -- full|incremental
  partition_cols        ARRAY<STRING>,   -- ex: ["txn_month"] / [] si non partitionné
  natural_key_cols      ARRAY<STRING>,   -- ex: ["transaction_id"]
  depends_on            ARRAY<STRING>,   -- optionnel
  critical              BOOLEAN,          -- fail fast si true
  timeout_minutes       INT,              -- ex: 60
  retries               INT,              -- ex: 1
  owner                 STRING,
  notes                 STRING,
  updated_ts            TIMESTAMP
)
USING delta;


StatementMeta(, a0610250-bcf1-442f-8d07-21edd504701f, 4, Finished, Available, Finished)

<Spark SQL result set with 0 rows and 0 fields>

In [1]:
%%sql
-- ==========================================================
-- Seed: gold_ctl_entity (snake_case convention)
-- ==========================================================
-- Notes:
-- - entity_code in snake_case
-- - entity_type = DIM/FACT
-- - target_table aligned with your implemented Gold tables
-- - load_order ensures DIMs run before FACT
-- - critical=true for banking-grade fail-fast (adjust if needed)
-- - timeout_minutes/retries can be tuned per your capacity
-- ==========================================================

INSERT INTO gold_ctl_entity (
  entity_code,
  entity_type,
  target_table,
  enabled,
  load_order,
  notebook_name,
  notebook_path,
  load_mode_default,
  partition_cols,
  natural_key_cols,
  depends_on,
  critical,
  timeout_minutes,
  retries,
  owner,
  notes,
  updated_ts
)
VALUES
  -- -----------------------
  -- Dimensions
  -- -----------------------
  (
    'dim_date',
    'DIM',
    'gold_dim_date',
    true,
    10,
    'nb_gold_dim_date',
    null,
    'full',
    array(),
    array('date_id'),
    array(),
    true,
    30,
    0,
    'data_engineering',
    'Generated calendar dimension. No Silver dependency.',
    current_timestamp()
  ),
  (
    'dim_mcc',
    'DIM',
    'gold_dim_mcc',
    true,
    20,
    'nb_gold_dim_mcc',
    null,
    'full',
    array(),
    array('mcc_code'),
    array(),
    true,
    30,
    0,
    'data_engineering',
    'Conformed reference dimension from silver_mcc.',
    current_timestamp()
  ),
  (
    'dim_user',
    'DIM',
    'gold_dim_user',
    true,
    30,
    'nb_gold_dim_user',
    null,
    'full',
    array(),
    array('client_id'),
    array(),
    true,
    30,
    0,
    'data_engineering',
    'Conformed customer dimension from silver_users.',
    current_timestamp()
  ),
  (
    'dim_card',
    'DIM',
    'gold_dim_card',
    true,
    40,
    'nb_gold_dim_card',
    null,
    'full',
    array(),
    array('card_id'),
    array('dim_user'),
    true,
    45,
    0,
    'data_engineering',
    'Card dimension from silver_cards; conformance enforced against gold_dim_user.',
    current_timestamp()
  ),

  -- -----------------------
  -- Facts
  -- -----------------------
  (
    'fact_transactions',
    'FACT',
    'gold_fact_transactions',
    true,
    100,
    'nb_gold_fact_transactions',
    null,
    'full',
    array('txn_month'),
    array('transaction_id'),
    array('dim_date','dim_user','dim_card','dim_mcc'),
    true,
    60,
    1,
    'data_engineering',
    'Main fact: 1 row per transaction. Partitioned by txn_month. Strict conformance to all dims.',
    current_timestamp()
  );


StatementMeta(, 6f07f132-1099-4d69-8190-f206e1b0bcc5, 2, Finished, Available, Finished)

<Spark SQL result set with 0 rows and 0 fields>