In [0]:
%pip install azure-eventhub
dbutils.library.restartPython()

In [0]:
%pip install --upgrade azure-eventhub
dbutils.library.restartPython()

In [0]:
# Import required Spark functions
from pyspark.sql.functions import col, cast

# --- 1. Configuration (Your provided logic) ---
eventhub_namespace = "evhns-natraining.servicebus.windows.net"
eventhub_name = "evh-natraining-biju"
keyvault_scope = "dbx-ss-kv-natraining-2"
secret_name = "evh-natraining-read-write"
shared_access_key_name = "SharedAccessKeyToSendAndListen"

secret_value = dbutils.secrets.get(scope=keyvault_scope, key=secret_name)

connection_string = (
    f"Endpoint=sb://{eventhub_namespace}/;"
    f"SharedAccessKeyName={shared_access_key_name};"
    f"SharedAccessKey={secret_value};" 
    f"EntityPath={eventhub_name}"
)

# --- 2. Event Hubs Setup (Native Connector) ---

# The native connector requires the connection string to be encrypted
eh_conf = {}
eh_conf["eventhubs.connectionString"] = connection_string

# Optional: Define where to start reading
# eh_conf["eventhubs.startingPosition"] = "@latest"

# --- 3. Read Stream ---

df_stream = (spark.readStream
  .format("eventhubs")  # Uses the native connector, not Kafka  .option("spark.jars.packages", "com.microsoft.azure:azure-eventhubs-spark_2.12:2.3.18")

  .options(**eh_conf)
  .load())

# --- 4. Transform Data ---

# In the native connector, the data payload is in the 'body' column as Binary
# We cast it to String to make it readable.
df_readable = df_stream.withColumn("body", col("body").cast("string")) \
                       .select("body", "enqueuedTime", "offset", "partition")

# --- 5. Display or Write to Sink ---

# Use display() to see the live stream in the notebook
display(df_readable)

# OR: Write to a Delta Table
# df_readable.writeStream \
#   .format("delta") \
#   .outputMode("append") \
#   .option("checkpointLocation", "/mnt/telemetry/checkpoints/evh_raw") \
#   .start("/mnt/telemetry/tables/evh_raw_data")