In [0]:
%pip install azure-eventhub

# Restart Python so the newly installed package is available in this notebook
dbutils.library.restartPython()

In [0]:
%pip install --upgrade pip

In [0]:
from pyspark.sql.functions import col

# Inputs (update as needed)
eh_namespace = "evhns-natraining.servicebus.windows.net"
eh_name = "evh-natraining-biju"
keyvault_scope = "dbx-ss-kv-natraining-2"
secret_name = "evh-natraining-read-write"  # SAS key with Send/Listen
shared_access_key_name = "RootManageSharedAccessKey"  # <-- FIXED: Use actual policy name

# Fetch SAS key from secret scope
sas_key = dbutils.secrets.get(scope=keyvault_scope, key=secret_name)

# Build connection string
connection_string = (
    f"Endpoint=sb://{eh_namespace}/;"
    f"SharedAccessKeyName={shared_access_key_name};"
    f"SharedAccessKey={sas_key};"
    f"EntityPath={eh_name}"
)

# Event Hubs config
eh_conf = {
    "eventhubs.connectionString": connection_string,
}

# Read stream
raw_df = (
    spark.readStream
         .format("eventhubs")
         .options(**eh_conf)
         .load()
)

# Body is binary; convert to string for inspection
df = raw_df.withColumn("body", col("body").cast("string"))

display(df.select("enqueuedTime", "offset", "sequenceNumber", "body"))

In [0]:
EVENTHUB_NAME = dbutils.widgets.get("eventhub_name")

EVENTHUB_CONN_STR = (
    "Endpoint=sb://evhns-natraining.servicebus.windows.net/;"
    "SharedAccessKeyName=SharedAccessKeyToSendAndListen;"
    f"SharedAccessKey={secret_value};"
    f"EntityPath={EVENTHUB_NAME}"
).strip().replace("\n", "").replace("\r", "")

encrypted_conn_str = spark._jvm.org.apache.spark.eventhubs.EventHubsUtils.encrypt(EVENTHUB_CONN_STR)

event_hubs_conf = {
    "eventhubs.connectionString": encrypted_conn_str,
    "eventhubs.consumerGroup": "$Default",
    "eventhubs.startingPosition": """{
      "offset":"-1",
      "seqNo":-1,
      "enqueuedTime":"1970-01-01T00:00:00.000Z",
      "isInclusive":false
    }"""
}

print("Event Hub read configuration ready:", EVENTHUB_NAME)


In [0]:
from pyspark.sql.functions import col, from_json, schema_of_json

# Inputs (update as needed)
eh_namespace = "evhns-natraining.servicebus.windows.net"
eh_name = "evh-natraining-biju"
keyvault_scope = "dbx-ss-kv-natraining-2"
secret_name = "evh-natraining-read-write"  # SAS key with Send/Listen
shared_access_key_name = "SharedAccessKeyToSendAndListen"

# Fetch SAS key from secret scope
sas_key = dbutils.secrets.get(scope=keyvault_scope, key=secret_name)

# Build connection string
connection_string = (
    f"Endpoint=sb://{eh_namespace}/;"
    f"SharedAccessKeyName={shared_access_key_name};"
    f"SharedAccessKey={sas_key};"
    f"EntityPath={eh_name}"
)

# Event Hubs config
eh_conf = {
    "eventhubs.connectionString": connection_string,
    # Optional consumer group
    # "eventhubs.consumerGroup": "$Default",
    # Start positions (optional)
    # "eventhubs.startingPosition": "{\"offset\":\"-1\"}",  # from beginning
    # "eventhubs.startingPosition": "{\"offset\":\"@latest\"}",  # latest
}

# Read stream
raw_df = (
    spark.readStream
         .format("eventhubs")
         .options(**eh_conf)
         .load()
)

# Body is binary; convert to string for inspection
df = raw_df.withColumn("body", col("body").cast("string"))

# If you know the JSON schema of body, you can parse it:
# sample_json = '{"foo":"bar","value":1}'
# body_schema = schema_of_json(sample_json)
# df = df.withColumn("body_parsed", from_json(col("body"), body_schema))

display(df.select("enqueuedTime", "offset", "sequenceNumber", "body"))

In [0]:
"evhns-natraining.servicebus.windows.net"https://evhns-natraining.servicebus.windows.net:443/

In [0]:
  %sh nc -vz evhns-natraining.servicebus.windows.net 9093

In [0]:
%sh nslookup evhns-natraining.servicebus.windows.net
%sh nc -vz evhns-natraining.servicebus.windows.net 9093

In [0]:
%sh
nc -vz evhns-natraining.servicebus.windows.net 9093
echo "nc exit code: $?"

In [0]:
from pyspark.sql.functions import col

eh_namespace = "evhns-natraining.servicebus.windows.net"
eh_name = "evh-natraining-biju"
keyvault_scope = "dbx-ss-kv-natraining-2"
secret_name = "evh-natraining-read-write"
shared_access_key_name = "SharedAccessKeyToSendAndListen"

secret_value = dbutils.secrets.get(scope=keyvault_scope, key=secret_name)

connection_string = (
    f"Endpoint=sb://{eh_namespace}/;"
    f"SharedAccessKeyName={shared_access_key_name};"
    f"SharedAccessKey={secret_value};"
    f"EntityPath={eh_name}"
)

KAFKA_OPTIONS = {
    "kafka.bootstrap.servers": f"{eh_namespace}:9093",
    "subscribe": eh_name,
    "kafka.sasl.mechanism": "PLAIN",
    "kafka.security.protocol": "SASL_SSL",
    "kafka.sasl.jaas.config": (
        f'kafkashaded.org.apache.kafka.common.security.plain.PlainLoginModule '
        f'required username="$ConnectionString" password="{connection_string}";'
    ),
    "kafka.request.timeout.ms": "120000",
    "kafka.session.timeout.ms": "30000",
    "failOnDataLoss": "false",
    "startingOffsets": "latest",
    "maxOffsetsPerTrigger": "10000",
}

df_stream = (
    spark.readStream
         .format("kafka")
         .options(**KAFKA_OPTIONS)
         .load()
)

display(df_stream.select(col("offset"), col("timestamp"), col("value").cast("string")))