#### Step 1: Import Required Libraries.
Import the necessary libraries for Application Insights.

In [0]:
import os
import json
import logging
from pyspark.sql.functions import col
from pyspark.sql.streaming import StreamingQueryListener
from pyspark.sql.session import SparkSession
from pyspark.sql.types import TimestampType
from pyspark.sql import DataFrame
import azure.identity
from azure.identity import DefaultAzureCredential, EnvironmentCredential, ManagedIdentityCredential, SharedTokenCacheCredential
from azure.identity import ClientSecretCredential
from azure.monitor.ingestion import LogsIngestionClient
from azure.core.exceptions import HttpResponseError
from opentelemetry._logs import (
    get_logger_provider,
    set_logger_provider,
)
from opentelemetry.sdk._logs import (
    LoggerProvider,
    LoggingHandler,
)
from opentelemetry.sdk._logs.export import BatchLogRecordProcessor
from azure.monitor.opentelemetry.exporter import AzureMonitorLogExporter

#### Step 3: Set Up Application Insights Logger.
Configure the logger to send metrics to Application Insights.

In [0]:
set_logger_provider(LoggerProvider())
exporter = AzureMonitorLogExporter.from_connection_string(
    dbutils.secrets.get("myscope", key="appinsightsconnstr")
)
get_logger_provider().add_log_record_processor(BatchLogRecordProcessor(exporter))

# Attach LoggingHandler to namespaced logger
handler = LoggingHandler()
logger = logging.getLogger(__name__)
logger.addHandler(handler)
logger.setLevel(logging.INFO)

####Step 4: Modify the StreamingQueryListener to Send Metrics.
Update the ValueTrackingListener class to send metrics to Application Insights.

In [0]:
# COMMAND ----------
from pyspark.sql.streaming import StreamingQueryListener
from pyspark.sql.functions import *

class ValueTrackingListener(StreamingQueryListener):
    def onQueryStarted(self, event):
        logger.info(f"'{event.name}' [{event.id}] got started!")
    
    def onQueryProgress(self, event):
        row = event.progress.observedMetrics.get("metric")
        if row is not None:
            avg_value = row.avg_value
            logger.info(f"Recorded metric avg_value: {avg_value}")
            # Send custom metric to Application Insights
            logger.info({
                'custom_dimensions': {
                    'avg_value': avg_value,
                    'query_name': event.name,
                    'query_id': event.id
                }
            })
    
    def onQueryTerminated(self, event):
        logger.info(f"{event.id} got terminated!")

# Add listener
listener = ValueTrackingListener()
spark.streams.addListener(listener)

####Step 5: Run the Streaming Query.
Ensure your streaming query is set up to use the listener.

In [0]:
# COMMAND ----------
streaming_df = (spark
    .readStream
    .format("rate")
    .option("rowsPerSecond", 100)
    .load())

observed_streaming_df = streaming_df.observe(
    "metric",
    count(lit(1)).alias("cnt"),  # number of processed rows
    avg(col("value")).alias("avg_value"))  # average of row values

# COMMAND ----------
query = (observed_streaming_df
    .writeStream
    .format("console")
    .queryName("Rate query")
    .start())

# COMMAND ----------
import time
time.sleep(120)
query.stop()
spark.streams.removeListener(listener)