In [0]:
# Databricks notebook source
# MAGIC %python
from __future__ import annotations
import json
from typing import Any, Dict, Optional
from pyspark.sql import SparkSession

In [0]:


# We can switch these to dbutils.widgets as needed for jobs.
# Example widgets (uncomment if you want interactive params in the UI):
# dbutils.widgets.text("env", "dev")
# dbutils.widgets.text("raw_path", "/mnt/raw/nasa/neo")
# dbutils.widgets.text("bronze_path", "/mnt/bronze/nasa/neo")
# dbutils.widgets.text("silver_path", "/mnt/silver/nasa/neo")
# dbutils.widgets.text("since_date", "2020-01-01")

def load_config(
    *,
    env: str = "dev",
    overrides: Optional[Dict[str, Any]] = None
) -> Dict[str, Any]:
    """
    Build a configuration dictionary for the pipeline.

    Parameters
    ----------
    env : str
        Environment name (e.g., 'dev', 'prod').
    overrides : dict, optional
        Runtime key/value overrides that take precedence over defaults.

    Returns
    -------
    dict
        Resolved configuration object.
    """
    # Defaults â€“ adjust to your workspace/mounts
    cfg = {
        "env": env,
        "paths": {
            "raw": "/mnt/raw/nasa/neo",
            "bronze": "/mnt/bronze/nasa/neo",
            "silver": "/mnt/silver/nasa/neo",
            "gold": "/mnt/gold/nasa/neo",
        },
        # Example: NASA NEO API parameters, if you fetch from API
        "source": {
            "type": "file",  # 'file' or 'api'
            "format": "json",
            "api_url": "https://api.nasa.gov/neo/rest/v1/feed",
            "api_key_secret_scope": "secrets-scope",
            "api_key_secret_key": "nasa_api_key",
            "since_date": "2020-01-01",
        },
        "io": {
            "read_mode": "PERMISSIVE",  # or FAILFAST/DROPMALFORMED
            "write_mode": "append",     # or 'overwrite' for idempotent runs
            "merge_schema": True,
        },
        "expectations": {
            "min_rows": 1,
            "required_columns": [],
        },
    }

    # Apply runtime overrides
    if overrides:
        def deep_update(d, u):
            for k, v in u.items():
                if isinstance(v, dict) and isinstance(d.get(k), dict):
                    deep_update(d[k], v)
                else:
                    d[k] = v
        deep_update(cfg, overrides)

    return cfg

In [0]:
def show_config(cfg: Dict[str, Any]) -> str:
    """
    Pretty print a config dict as JSON (for logs/inspect).

    Parameters
    ----------
    cfg : dict
        Configuration.

    Returns
    -------
    str
        JSON string.
    """
    return json.dumps(cfg, indent=2, sort_keys=True)