### Extract data from order - Complex JSON file (Having Arrays, Nexted JSON and Data Issues)
Note : This structure is compactable with 
1. incremental load
1. fault tolerance
1. schema evolution
1. Reusability
2. Error Handling
3. Logging
4. Schema validation
5. Modularization
6. Maintainability
7. Observability
8. Re-run safety

In [0]:
from pyspark.sql.functions import current_timestamp, col
from pyspark.sql.utils import AnalysisException
import traceback
import logging

In [0]:
# ADF Parameters (Widgets)
dbutils.widgets.text("SOURCE_PATH", "/Volumes/adf_adb_landing/landing/operational_data/customer")
dbutils.widgets.text("TARGET_TABLE", "adf_adb_bronze.bronze.customer")
dbutils.widgets.text("FILE_FORMAT", "json")
dbutils.widgets.text("WRITE_MODE", "append")
dbutils.widgets.text("JOB_NAME", "adb_nbk_ext_customer_json_incremental_01")
dbutils.widgets.text("LOG_DIR", "/Volumes/adf_adb_logs/bronze_logs/operational_logs")
dbutils.widgets.text("SCHEMA_EVOLUTION_MODE", "rescue")
dbutils.widgets.text("QUERY_NAME", "customer_incremental_query")

# Fetch parameters
SOURCE_PATH = dbutils.widgets.get("SOURCE_PATH").strip()
TARGET_TABLE = dbutils.widgets.get("TARGET_TABLE").strip()
FILE_FORMAT = dbutils.widgets.get("FILE_FORMAT").strip()
WRITE_MODE = dbutils.widgets.get("WRITE_MODE").strip()
JOB_NAME = dbutils.widgets.get("JOB_NAME").strip()
LOG_DIR = dbutils.widgets.get("LOG_DIR").strip()
SCHEMA_EVOLUTION_MODE = dbutils.widgets.get("SCHEMA_EVOLUTION_MODE").strip()
QUERY_NAME = dbutils.widgets.get("QUERY_NAME").strip()

##### Implemenation for incremental load using autoloader - Structured streaming compactable with batch style

In [0]:
%run ../01_includes/13_incremental_core_modules

In [0]:
# Init logger and listener
logger = get_logger(JOB_NAME, LOG_DIR)
logger.info("Pipeline starting... (widget-driven)")
attach_streaming_listener(job_name=JOB_NAME, log_dir=LOG_DIR)

In [0]:
# -----------------------------------------
# Orchestration
# -----------------------------------------
# Incremental read via Auto Loader
try:
    readed_data_df = read_incremental_data(
        path=SOURCE_PATH,
        file_format=FILE_FORMAT,
        schema_evolution_mode=SCHEMA_EVOLUTION_MODE,
        job_name=JOB_NAME,
        logger=logger
    )
except Exception as e:
    logger.error(f"Read stage failed: {e}")
    close_logger(logger)
    raise

# COMMAND ----------
# Enrichment (reuses your existing enricher)
try:
    enriched_df = enrich_data(readed_data_df)
    logger.info("Enrichment complete.")
except Exception as e:
    logger.error(f"Enrichment failed: {e}")
    close_logger(logger)
    raise

# COMMAND ----------
# Streaming write to Delta (availableNow) + register table if needed
try:
    streaming_write_to_table(
        df=enriched_df,
        table_name=TARGET_TABLE,
        write_mode=WRITE_MODE,
        job_name=JOB_NAME,
        source_path=SOURCE_PATH,
        query_name=QUERY_NAME,
        logger=logger
    )
    logger.info("Pipeline completed successfully.")
except Exception as e:
    logger.error(f"Write stage failed: {e}")
    raise
finally:
    close_logger(logger)



In [0]:
# Quick checks 
display(spark.table(TARGET_TABLE))

In [0]:
display(spark.read.text(LOG_DIR))