In [26]:
#!/usr/bin/env python
# coding: utf-8

# ============================================================
# nb_gold_load_v1_0_final â€” Gold Dispatcher (Fabric)
#
# Orchestrates Gold child notebooks (dims then facts), driven by:
# - gold_ctl_entity (enabled, load_order, notebook_name, retries, timeout, critical, target_table)
#
# Data-driven execution contract:
# - Parent writes RUNNING step row to gold_log_steps with payload_json={"ctx": {...}}
# - Child notebooks DO NOT rely on runtime args; they read ctx from gold_log_steps
#
# Logging:
# - gold_log_runs: RUNNING then final SUCCESS/FAILED/PARTIAL
# - gold_log_steps: append-only RUNNING + SUCCESS/FAILED per entity
# ============================================================

from datetime import datetime
import json
import re
import uuid
import time

from pyspark.sql import functions as F
from pyspark.sql import Row

# Prefer Fabric notebookutils when available (fallback to mssparkutils)
try:
    _nbu = notebookutils
except NameError:
    _nbu = None


# -----------------------------
# Config (tables)
# -----------------------------
CTL_TABLE         = "gold_ctl_entity"
LOG_RUNS_TABLE    = "gold_log_runs"
LOG_STEPS_TABLE   = "gold_log_steps"
ANOM_EVENT_TABLE  = "gold_anomaly_event"


# -----------------------------
# Helpers (time, parsing, safety)
# -----------------------------
def _utc_now() -> datetime:
    return datetime.utcnow()

def _calc_duration_ms(start_ts: datetime, end_ts: datetime) -> int:
    return int((end_ts - start_ts).total_seconds() * 1000)

def _json_dumps(obj) -> str:
    return json.dumps(obj, ensure_ascii=False)

def _safe_int(x, default=0) -> int:
    try:
        return int(x)
    except Exception:
        return default

def _safe_bool(x, default=False) -> bool:
    if x is None:
        return default
    if isinstance(x, bool):
        return x
    s = str(x).strip().lower()
    if s in ("1", "true", "t", "yes", "y"):
        return True
    if s in ("0", "false", "f", "no", "n"):
        return False
    return default

def _get_widget(name: str, default: str = "") -> str:
    # notebookutils first (Fabric), fallback to mssparkutils
    try:
        if _nbu is not None:
            v = _nbu.widgets.get(name)
            return v if v is not None and str(v).strip() != "" else default
    except Exception:
        pass
    try:
        v = mssparkutils.widgets.get(name)
        return v if v is not None and str(v).strip() != "" else default
    except Exception:
        return default


# -----------------------------
# Helpers (logging with schema enforcement)
# -----------------------------
def _append_row_to_table(table_name: str, row_dict: dict):
    """
    Appends one row to a Delta table using the table's schema as authoritative.
    Avoids Spark schema inference issues.
    """
    target_df = spark.table(table_name)
    target_schema = target_df.schema
    target_cols = [f.name for f in target_schema.fields]

    normalized = {c: row_dict.get(c, None) for c in target_cols}
    df = spark.createDataFrame([Row(**normalized)], schema=target_schema)
    df.write.mode("append").saveAsTable(table_name)

def _write_run_log(row: dict):
    _append_row_to_table(LOG_RUNS_TABLE, row)

def _write_step_log(row: dict):
    _append_row_to_table(LOG_STEPS_TABLE, row)


# -----------------------------
# Helpers (selector)
# -----------------------------
def _parse_entity_selector(selector: str):
    s = (selector or "").strip()
    if s == "" or s.upper() == "ALL":
        return {"mode": "ALL", "values": [], "pattern": None}

    if "," in s:
        values = [x.strip() for x in s.split(",") if x.strip()]
        return {"mode": "LIST", "values": values, "pattern": None}

    if "*" in s or "%" in s:
        pat = s.replace("%", "*")
        return {"mode": "WILDCARD", "values": [], "pattern": pat}

    return {"mode": "SINGLE", "values": [s], "pattern": None}

def _selector_filter(df, selector: str):
    sel = _parse_entity_selector(selector)

    if sel["mode"] == "ALL":
        return df

    if sel["mode"] in ("SINGLE", "LIST"):
        return df.filter(F.col("entity_code").isin(sel["values"]))

    if sel["mode"] == "WILDCARD":
        regex = "^" + re.escape(sel["pattern"]).replace("\\*", ".*") + "$"
        return df.filter(F.col("entity_code").rlike(regex))

    raise ValueError(f"Unsupported selector mode: {sel['mode']} for selector={selector}")


# -----------------------------
# Helpers (anomaly count)
# -----------------------------
def _count_anomalies(gold_run_id: str, entity_value: str) -> int:
    """
    Counts anomalies for a given gold_run_id and entity identifier.
    Prefer passing target_table (gold_*). Fallback entity_code if needed.
    """
    try:
        df = spark.table(ANOM_EVENT_TABLE).filter(
            (F.col("gold_run_id") == gold_run_id) &
            (F.col("entity") == entity_value)
        )
        return int(df.count())
    except Exception:
        return 0

StatementMeta(, 03bb4dbd-ccf1-4a99-974b-a21bdb9739e9, 29, Finished, Available, Finished)

AttributeError: module 'notebookutils.mssparkutils' has no attribute 'widgets'

In [None]:
# ============================================================
# 1) Parameters (widgets)
# ============================================================
p_entity_code   = _get_widget("p_entity_code", "ALL")
p_load_mode     = _get_widget("p_load_mode", "FULL")
p_as_of_date    = _get_widget("p_as_of_date", "")
p_gold_run_id   = _get_widget("p_gold_run_id", "")
p_pipeline_name = _get_widget("p_pipeline_name", "manual")
p_triggered_by  = _get_widget("p_triggered_by", "manual")
p_environment   = _get_widget("p_environment", "dev")

gold_run_id = p_gold_run_id.strip() if p_gold_run_id.strip() else str(uuid.uuid4())
run_start = _utc_now()

# Fail fast: required tables exist
for t in (CTL_TABLE, LOG_RUNS_TABLE, LOG_STEPS_TABLE):
    try:
        spark.table(t).limit(1).count()
    except Exception as e:
        raise ValueError(f"Required table '{t}' not found or not accessible. Root error: {str(e)}")

StatementMeta(, 03bb4dbd-ccf1-4a99-974b-a21bdb9739e9, -1, Cancelled, , Cancelled)

In [None]:
# ============================================================
# 2) Load control table and resolve entity list
# ============================================================
ctl_df = spark.table(CTL_TABLE)

required_cols = {
    "entity_code", "enabled", "load_order", "notebook_name", "load_mode_default",
    "critical", "timeout_minutes", "retries"
}
missing = [c for c in required_cols if c not in ctl_df.columns]
if missing:
    raise ValueError(f"'{CTL_TABLE}' missing required columns: {missing}. Current columns={ctl_df.columns}")

ctl_df = (
    ctl_df
    .select(
        F.col("entity_code").cast("string").alias("entity_code"),
        F.col("enabled").cast("boolean").alias("enabled"),
        F.col("load_order").cast("int").alias("load_order"),
        F.col("notebook_name").cast("string").alias("notebook_name"),
        F.col("load_mode_default").cast("string").alias("load_mode_default"),
        F.col("critical").cast("boolean").alias("critical"),
        F.col("timeout_minutes").cast("int").alias("timeout_minutes"),
        F.col("retries").cast("int").alias("retries"),
        # optional columns
        (F.col("target_table").cast("string") if "target_table" in ctl_df.columns else F.lit(None).cast("string")).alias("target_table"),
        (F.col("entity_type").cast("string") if "entity_type" in ctl_df.columns else F.lit(None).cast("string")).alias("entity_type"),
    )
    .filter(F.col("enabled") == True)
)

ctl_df = _selector_filter(ctl_df, p_entity_code)

entities = (
    ctl_df
    .orderBy(F.col("load_order").asc(), F.col("entity_code").asc())
    .collect()
)

if len(entities) == 0:
    raise ValueError(f"No enabled entities matched selector '{p_entity_code}' in '{CTL_TABLE}'.")

In [None]:
# ============================================================
# 3) Start run log (RUNNING)
# ============================================================
params_obj = {
    "selector": p_entity_code,
    "load_mode": p_load_mode,
    "as_of_date": p_as_of_date,
    "pipeline_name": p_pipeline_name,
    "triggered_by": p_triggered_by,
    "environment": p_environment,
    "entity_count": len(entities),
    "entities": [r["entity_code"] for r in entities],
    "execution_contract": "data-driven (ctx in gold_log_steps.RUNNING.payload_json)"
}

_write_run_log({
    "gold_run_id": gold_run_id,
    "pipeline_name": p_pipeline_name,
    "layer": "gold",
    "start_ts": run_start,
    "end_ts": None,
    "duration_ms": None,
    "status": "RUNNING",
    "triggered_by": p_triggered_by,
    "environment": p_environment,
    "params_json": _json_dumps(params_obj),
    "error_message": None
})

In [24]:
# ============================================================
# 4) Execute entities
# ============================================================
results = []
failures = []
step_seq = 0

def _run_child_notebook(nb_name: str, timeout_sec: int) -> str:
    if _nbu is not None:
        return _nbu.notebook.run(nb_name, timeout_sec, {})
    return mssparkutils.notebook.run(nb_name, timeout_sec, {})

for r in entities:
    step_seq += 1

    entity_code  = r["entity_code"]
    nb_name      = r["notebook_name"]
    target_table = r["target_table"] if "target_table" in r.asDict() else None

    critical     = _safe_bool(r["critical"], True)
    timeout_min  = _safe_int(r["timeout_minutes"], 0)
    retries      = _safe_int(r["retries"], 0)

    timeout_sec = 0 if timeout_min <= 0 else timeout_min * 60

    ctl_default_mode = (r["load_mode_default"] or "").strip().upper()
    effective_mode = (p_load_mode or "").strip().upper() if (p_load_mode or "").strip() else (ctl_default_mode or "FULL")

    step_exec_id = str(uuid.uuid4())

    ctx = {
        "gold_run_id": gold_run_id,
        "step_exec_id": step_exec_id,
        "entity_code": entity_code,
        "notebook_name": nb_name,
        "load_mode": effective_mode,
        "as_of_date": p_as_of_date,
        "pipeline_name": p_pipeline_name,
        "triggered_by": p_triggered_by,
        "environment": p_environment
    }

    step_start = _utc_now()

    # RUNNING step row (append)
    _write_step_log({
        "gold_run_id": gold_run_id,
        "step_seq": step_seq,
        "entity_code": entity_code,
        "notebook_name": nb_name,
        "start_ts": step_start,
        "end_ts": None,
        "duration_ms": None,
        "status": "RUNNING",
        "critical": critical,
        "row_in": None,
        "row_out": None,
        "row_rejected": None,
        "partition_count": None,
        "dedup_dropped": None,
        "anom_count": None,
        "error_message": None,
        "payload_json": _json_dumps({"ctx": ctx})
    })

    last_error = None
    attempt = 0
    success = False
    child_payload = None
    child_raw = None

    while attempt <= retries and not success:
        attempt += 1
        try:
            child_raw = _run_child_notebook(nb_name, timeout_sec)

            try:
                child_payload = json.loads(child_raw) if isinstance(child_raw, str) else {"raw": str(child_raw)}
            except Exception:
                child_payload = {"raw": child_raw}

            success = True

        except Exception as e:
            last_error = str(e)
            child_payload = None
            # minimal backoff
            time.sleep(1)

    step_end = _utc_now()
    duration_ms = _calc_duration_ms(step_start, step_end)

    if success:
        # Extract metrics if provided by child
        def _p(key, default=None):
            return child_payload.get(key, default) if isinstance(child_payload, dict) else default

        row_in          = _p("row_in")
        row_out         = _p("row_out")
        row_rejected    = _p("row_rejected")
        dedup_dropped   = _p("dedup_dropped")
        partition_count = _p("partition_count")

        # anomaly count: prefer target_table (gold_*), fallback entity_code
        anom_count = _p("anom_count")
        if anom_count is None:
            anom_count = _count_anomalies(gold_run_id, target_table or entity_code)

        _write_step_log({
            "gold_run_id": gold_run_id,
            "step_seq": step_seq,
            "entity_code": entity_code,
            "notebook_name": nb_name,
            "start_ts": step_start,
            "end_ts": step_end,
            "duration_ms": duration_ms,
            "status": "SUCCESS",
            "critical": critical,
            "row_in": row_in,
            "row_out": row_out,
            "row_rejected": row_rejected,
            "partition_count": partition_count,
            "dedup_dropped": dedup_dropped,
            "anom_count": anom_count,
            "error_message": None,
            "payload_json": _json_dumps({
                "ctx": ctx,
                "child_payload": child_payload if isinstance(child_payload, dict) else {"raw": str(child_payload)}
            })
        })

        results.append({
            "entity_code": entity_code,
            "notebook_name": nb_name,
            "status": "SUCCESS",
            "attempts": attempt,
            "payload": child_payload
        })

    else:
        _write_step_log({
            "gold_run_id": gold_run_id,
            "step_seq": step_seq,
            "entity_code": entity_code,
            "notebook_name": nb_name,
            "start_ts": step_start,
            "end_ts": step_end,
            "duration_ms": duration_ms,
            "status": "FAILED",
            "critical": critical,
            "row_in": None,
            "row_out": None,
            "row_rejected": None,
            "partition_count": None,
            "dedup_dropped": None,
            "anom_count": None,
            "error_message": last_error,
            "payload_json": _json_dumps({
                "ctx": ctx,
                "error": last_error,
                "attempts": attempt
            })
        })

        failures.append({
            "entity_code": entity_code,
            "notebook_name": nb_name,
            "status": "FAILED",
            "attempts": attempt,
            "error": last_error,
            "critical": critical
        })

        if critical:
            break

StatementMeta(, 03bb4dbd-ccf1-4a99-974b-a21bdb9739e9, 26, Finished, Available, Finished)

In [25]:
# ============================================================
# 5) End run log + exit payload
# ============================================================
run_end = _utc_now()
run_duration_ms = _calc_duration_ms(run_start, run_end)

if len(failures) == 0:
    run_status = "SUCCESS"
    run_error = None
else:
    any_critical_failed = any(f.get("critical") == True for f in failures)
    run_status = "FAILED" if any_critical_failed else "PARTIAL"
    run_error = failures[0].get("error")

_write_run_log({
    "gold_run_id": gold_run_id,
    "pipeline_name": p_pipeline_name,
    "layer": "gold",
    "start_ts": run_start,
    "end_ts": run_end,
    "duration_ms": run_duration_ms,
    "status": run_status,
    "triggered_by": p_triggered_by,
    "environment": p_environment,
    "params_json": _json_dumps(params_obj),
    "error_message": run_error
})

final_payload = {
    "schema_version": "gold_dispatcher_runtime_v1",
    "gold_run_id": gold_run_id,
    "status": run_status,
    "start_ts_utc": run_start.strftime("%Y-%m-%dT%H:%M:%SZ"),
    "end_ts_utc": run_end.strftime("%Y-%m-%dT%H:%M:%SZ"),
    "duration_ms": run_duration_ms,
    "selector": p_entity_code,
    "load_mode": p_load_mode,
    "as_of_date": p_as_of_date,
    "pipeline_name": p_pipeline_name,
    "triggered_by": p_triggered_by,
    "environment": p_environment,
    "entity_count_total": len(entities),
    "entity_count_success": len([x for x in results if x.get("status") == "SUCCESS"]),
    "entity_count_failed": len([x for x in failures if x.get("status") == "FAILED"]),
    "results": results,
    "failures": failures
}

if _nbu is not None:
    _nbu.notebook.exit(_json_dumps(final_payload))
else:
    mssparkutils.notebook.exit(_json_dumps(final_payload))


StatementMeta(, 03bb4dbd-ccf1-4a99-974b-a21bdb9739e9, 27, Finished, Available, Finished)

ExitValue: {"schema_version": "gold_dispatcher_runtime_v1", "gold_run_id": "25709f75-a99f-45db-b2c0-f4a3b73c3c73", "status": "SUCCESS", "start_ts_utc": "2026-01-07T19:19:05Z", "end_ts_utc": "2026-01-07T19:24:42Z", "duration_ms": 337248, "selector": "ALL", "load_mode": "FULL", "as_of_date": "", "pipeline_name": "manual", "triggered_by": "manual", "environment": "dev", "entity_count_total": 5, "entity_count_success": 5, "entity_count_failed": 0, "results": [{"entity_code": "dim_date", "notebook_name": "nb_gold_dim_date", "status": "SUCCESS", "attempts": 1, "payload": {"status": "SUCCESS", "gold_run_id": "25709f75-a99f-45db-b2c0-f4a3b73c3c73", "step_exec_id": "2d95a558-9930-4209-a5e3-7adc63047c6c", "entity_code": "dim_date", "entity": "gold_dim_date", "target_table": "gold_dim_date", "row_in": 7670, "row_out": 7670, "row_rejected": 0, "dedup_dropped": 0, "partition_count": 0, "anom_count": 0}}, {"entity_code": "dim_mcc", "notebook_name": "nb_gold_dim_mcc", "status": "SUCCESS", "attempts":