In [0]:
# -------------------------------------------
# Simple Streaming Listener
# -------------------------------------------
from pyspark.sql.streaming import StreamingQueryListener
import logging, os, re
from datetime import datetime

def attach_streaming_listener(job_name: str, log_dir: str = "/dbfs/tmp"):
    """
    Attach a lightweight listener that logs progress to a job-specific file.
    No dependency on an external logger object.
    """
    safe_job = re.sub(r"[^\w\-]", "_", job_name)
    logger = logging.getLogger(f"{safe_job}_listener")
    logger.setLevel(logging.INFO)

    if not logger.handlers:
        os.makedirs(log_dir, exist_ok=True)
        ts = datetime.now().strftime("%Y%m%d_%H%M%S")
        fh = logging.FileHandler(os.path.join(log_dir, f"{safe_job}_listener_{ts}.log"))
        fh.setFormatter(logging.Formatter("%(asctime)s - %(levelname)s - %(name)s - %(message)s"))
        fh.setLevel(logging.INFO)
        logger.addHandler(fh)
        logger.propagate = False
        logger.info(f"{job_name} - Listener started.")

    class _Listener(StreamingQueryListener):
        def onQueryStarted(self, event):
            logger.info(f"{job_name} - Query started: {event.name}")

        def onQueryProgress(self, event):
            p = event.progress
            src = p['sources'][0]['description'] if p['sources'] else "Unknown"
            logger.info(f"{job_name} - Batch {p['batchId']} at {p['timestamp']} - "
                        f"inputRows={p['numInputRows']}, inputRPS={p['inputRowsPerSecond']}, "
                        f"procRPS={p['processedRowsPerSecond']}, source={src}")

        def onQueryTerminated(self, event):
            logger.info(f"{job_name} - Query terminated.")

    spark.streams.addListener(_Listener())
