In [0]:
from pyspark.sql.streaming import StreamingQueryListener

# 1. Define the Listener Class
class MyStreamMonitor(StreamingQueryListener):
    def onQueryStarted(self, event):
        print(f"--- STREAM STARTED: {event.name if event.name else event.id} ---")

    def onQueryProgress(self, event):
        # This runs every time a micro-batch finishes
        progress = event.progress
        batch_id = progress.batchId
        rows_processed = progress.numInputRows
        
        print(f"Batch: {batch_id} | Rows Processed: {rows_processed} | Input Rate: {progress.inputRowsPerSecond} rows/sec")

    def onQueryTerminated(self, event):
        print(f"--- STREAM STOPPED: {event.id} ---")
        if event.exception:
            print(f"Termination caused by Error: {event.exception}")



In [0]:
# 2. Register the Listener to the Spark Session
monitor = MyStreamMonitor()
spark.streams.addListener(monitor)

# Now, when you start any stream (like the 'query' above), 
# the 'onQueryProgress' method will fire automatically.

In [0]:
display(spark.streams.addListener(monitor))

In [0]:
from pyspark.sql.streaming import StreamingQueryListener

class DatabricksStreamMonitor(StreamingQueryListener):
    def onQueryStarted(self, event):
        print(f"Stream '{event.name}' (ID: {event.id}) started.")

    def onQueryProgress(self, event):
        progress = event.progress
        # Log to the 'Log4j' system so it shows in the 'Driver Logs' tab
        print(f"Batch {progress.batchId} processed {progress.numInputRows} rows.")
        
    def onQueryTerminated(self, event):
        if event.exception:
            print(f"Stream terminated with error: {event.exception}")
        else:
            print("Stream stopped successfully.")

In [0]:
# 1. Instantiate
my_monitor = DatabricksStreamMonitor()

# 2. Register the listener (no duplicate check possible in PySpark)
spark.streams.addListener(my_monitor)
print("âœ… Listener registered successfully.")
