1. Create an Event Hub instance in the previously created Azure Event Hub namespace.
2. Create a new Shared Access Policy in the Event Hub instance. Copy the connection string generated with the new policy. Note that this connection string has an "EntityPath" component , unlike the RootManageSharedAccessKey connectionstring for the Event Hub namespace.
3. To enable Azure Event hub Databricks ingestion and transformations, install the Azure Event Hubs Connector for Apache Spark from the Maven repository. For this post, I have installed the version 2.3.18 of the connector, using the following  maven coordinate: "com.microsoft.azure:azure-eventhubs-spark_2.12:2.3.18". This library is the most current package at the time of this writing.

In [2]:
# Permission is based on File or folder based ACL assignments to the Data Lake filesystem (container) . RBAC assignments to the top level Azure Data Lake resource is not required.
spark.conf.set("fs.azure.account.auth.type.adlstore.dfs.core.windows.net", "OAuth")
spark.conf.set("fs.azure.account.oauth.provider.type.adlstore.dfs.core.windows.net", "org.apache.hadoop.fs.azurebfs.oauth2.ClientCredsTokenProvider")
spark.conf.set("fs.azure.account.oauth2.client.id.adlstore.dfs.core.windows.net", dbutils.secrets.get("myscope", key="clientid"))
spark.conf.set("fs.azure.account.oauth2.client.secret.adlstore.dfs.core.windows.net", dbutils.secrets.get("myscope", key="clientsecret"))
spark.conf.set("fs.azure.account.oauth2.client.endpoint.adlstore.dfs.core.windows.net", "https://login.microsoftonline.com/{}/oauth2/token".format(dbutils.secrets.get("myscope", key="tenantid")))

Read stream from Azure Event Hub as streaming dataframe using readStream().
Set your namespace, entity, policy name, and key for Azure Event Hub in the following command.

In [4]:
connectionString = dbutils.secrets.get("myscope", key="eventhubconnstr")

In [5]:
# Initialize event hub config dictionary with connectionString
ehConf = {}
ehConf['eventhubs.connectionString'] = connectionString

In [6]:
# Add consumer group to the ehConf dictionary
ehConf['eventhubs.consumerGroup'] = "$Default"

In [7]:
# Encrypt ehConf connectionString property
ehConf['eventhubs.connectionString'] = sc._jvm.org.apache.spark.eventhubs.EventHubsUtils.encrypt(connectionString)

In [8]:
df = spark.readStream.format("eventhubs").options(**ehConf).load()

In [9]:
display(df)

body,partition,offset,sequenceNumber,enqueuedTime,publisher,partitionKey,properties,systemProperties
eyJpZCI6ICI5NzA3M2RiNy03NzBkLTQ3YmYtODg0Mi00OTg5NjgyODZkNjMiLCAidGltZXN0YW1wIjogIjIwMjEtMDUtMTUgMjA6MjA6MzAuMzUyNDk2IiwgInV2IjogMC4wMDIzNDA3MjM5Nzg2ODE0NTgsICJ0ZW1wZXJhdHU= (truncated),0,8590019992,228484,2021-05-15T20:20:30.301+0000,,,Map(),Map()
eyJpZCI6ICI2ODQxZWIzYi0wMTYyLTQwNmMtYjExNy01YjZlMTUzM2IwYTMiLCAidGltZXN0YW1wIjogIjIwMjEtMDUtMTUgMjA6MjA6MzAuMzk5NTY0IiwgInV2IjogMC4yMTMyMTQzOTAxNjMyMjE3NSwgInRlbXBlcmF0dXI= (truncated),0,8590020192,228485,2021-05-15T20:20:30.348+0000,,,Map(),Map()
eyJpZCI6ICI4ZDg3ZjU0YS0xM2UyLTRmNzEtYjc3Yy03MmIyMTJjYTY1ZGUiLCAidGltZXN0YW1wIjogIjIwMjEtMDUtMTUgMjA6MjA6MzAuNDYxNDE1IiwgInV2IjogMC4yNDM4NTM0OTM4Mzg5MDc0MiwgInRlbXBlcmF0dXI= (truncated),0,8590020392,228486,2021-05-15T20:20:30.410+0000,,,Map(),Map()
eyJpZCI6ICJlYjRhYTM5YS1mYmEwLTQzNTctYjQzNy1hZTE2YzY4NzBlNDQiLCAidGltZXN0YW1wIjogIjIwMjEtMDUtMTUgMjA6MjA6MzAuNTEzNDIyIiwgInV2IjogMC4wNTc3ODQxNjI3ODU2OTYzMTQsICJ0ZW1wZXJhdHU= (truncated),0,8590020592,228487,2021-05-15T20:20:30.458+0000,,,Map(),Map()
eyJpZCI6ICJjMzY3ODE4MS1kZGYzLTRjZTEtOGNmOC1lNTdjZjgwNTdhMjAiLCAidGltZXN0YW1wIjogIjIwMjEtMDUtMTUgMjA6MjA6MzAuNTY0NDI0IiwgInV2IjogMC42MjUzMjQ5NDMwMzUzOTA1LCAidGVtcGVyYXR1cmU= (truncated),0,8590020792,228488,2021-05-15T20:20:30.520+0000,,,Map(),Map()
eyJpZCI6ICIyNzlmZGQ3ZS05ZDMwLTQ5ZmEtYWJkOC0yOTUwNDc4YmZlOTYiLCAidGltZXN0YW1wIjogIjIwMjEtMDUtMTUgMjA6MjA6MzAuNjE0NDE4IiwgInV2IjogMC4xOTM3NDc4NjM0ODY1MzI4MywgInRlbXBlcmF0dXI= (truncated),0,8590020992,228489,2021-05-15T20:20:30.566+0000,,,Map(),Map()
eyJpZCI6ICJhYmFhMTg4NC1jMWJiLTQ3MmMtYTQwZS05NjQzZDdkNWUyOTYiLCAidGltZXN0YW1wIjogIjIwMjEtMDUtMTUgMjA6MjA6MzAuNjY2NDEzIiwgInV2IjogMC42NjMzNTAxNDA2MTQ4OTU3LCAidGVtcGVyYXR1cmU= (truncated),0,8590021192,228490,2021-05-15T20:20:30.613+0000,,,Map(),Map()
eyJpZCI6ICIyZTlmYmU4OS0wZDk5LTRmNjQtOTIwMC1lZjZlMWQwMmY4MTciLCAidGltZXN0YW1wIjogIjIwMjEtMDUtMTUgMjA6MjA6MzAuNzI2NDE2IiwgInV2IjogMC43Nzc4ODY1Njg3NjEyNDYxLCAidGVtcGVyYXR1cmU= (truncated),0,8590021392,228491,2021-05-15T20:20:30.691+0000,,,Map(),Map()
eyJpZCI6ICI1NjYyNmU1Ny04M2VmLTRlZGItODg2OS0zYTFkOWM2MjZhMzEiLCAidGltZXN0YW1wIjogIjIwMjEtMDUtMTUgMjA6MjA6MzAuNzgxNDQ1IiwgInV2IjogMC41NDQ4NzE3MTYzMzg4NTQxLCAidGVtcGVyYXR1cmU= (truncated),0,8590021592,228492,2021-05-15T20:20:30.738+0000,,,Map(),Map()
eyJpZCI6ICI4YTY0ZTRiNS02NmFlLTQ0M2ItODZkOS03YThkN2QxNTg2MzgiLCAidGltZXN0YW1wIjogIjIwMjEtMDUtMTUgMjA6MjA6MzAuODQwNzU0IiwgInV2IjogMC4yMDMwNTU3NDU2ODA3NDU5LCAidGVtcGVyYXR1cmU= (truncated),0,8590021792,228493,2021-05-15T20:20:30.801+0000,,,Map(),Map()


In [10]:
# Write streams into defined sink
from pyspark.sql.types import *
import  pyspark.sql.functions as F

events_schema = StructType([
  StructField("id", StringType(), True),
  StructField("timestamp", StringType(), True),
  StructField("uv", StringType(), True),
  StructField("temperature", StringType(), True),
  StructField("humidity", StringType(), True)])

decoded_df = df.select(F.from_json(F.col("body").cast("string"), events_schema).alias("Payload"))

In [11]:
display(decoded_df)

Payload
"List(53e4c5d2-4277-421d-a159-6c55c2482ecc, 2021-05-15 20:20:31.138680, 0.9575191454324856, 90, 86)"
"List(3e53e49e-76d6-44ee-85b5-7da7f63c99a6, 2021-05-15 20:20:31.206922, 0.0222157819551001, 81, 90)"
"List(985b5e24-b32e-4518-abf7-64ac260e240f, 2021-05-15 20:20:31.266437, 0.8666879574720238, 80, 86)"
"List(643bbaba-e0e5-437e-93ed-26b1094fc4bc, 2021-05-15 20:20:31.325437, 0.13244011891963103, 78, 93)"
"List(76af796c-7254-495c-ab5f-26ecfa21949d, 2021-05-15 20:20:31.381949, 0.6230477803161625, 100, 89)"
"List(5d6738af-8885-4491-885c-9565cf99c1c8, 2021-05-15 20:20:31.431265, 0.08203225897694477, 85, 70)"
"List(8e402781-f93f-4299-aede-e7c4e2ef4ff4, 2021-05-15 20:20:31.492009, 0.40354565251750296, 71, 72)"
"List(cc59ad45-330d-40fe-b6af-4b814141ca27, 2021-05-15 20:20:31.551005, 0.24297423198665824, 74, 78)"
"List(7aeecd1c-9a1a-427e-8b75-12e6abdf697e, 2021-05-15 20:20:31.599560, 0.6893834751658093, 100, 84)"
"List(a3c47fc6-9ead-437f-b4b8-ecd5a33ff63a, 2021-05-15 20:20:31.647554, 0.29090833418464546, 72, 84)"


In [12]:
df_events = decoded_df.select(decoded_df.Payload.id, decoded_df.Payload.timestamp, decoded_df.Payload.uv, decoded_df.Payload.temperature, decoded_df.Payload.humidity)

In [13]:
display(df_events)

Payload.id,Payload.timestamp,Payload.uv,Payload.temperature,Payload.humidity
3e286fff-90c6-413f-a7fa-1bf924f2674d,2021-05-15 20:20:31.881332,0.5545905077442329,79,77
e3c94c55-eca4-48e8-a74c-5804e7b1a958,2021-05-15 20:20:31.951849,0.5154317497076899,72,98
1c278bfc-45f5-4a69-b079-3d44ca5d3eaf,2021-05-15 20:20:31.997681,0.0957340121186405,86,79
6211a1f0-6528-4d45-8dac-18f23e53b87b,2021-05-15 20:20:32.045668,0.2766648094950011,75,87
7ee301a4-4663-4153-b4c7-7f725ad5a168,2021-05-15 20:20:32.097281,0.7862158597988386,87,93
825b8730-ab03-4b05-a577-89ff7d3af4a0,2021-05-15 20:20:32.155152,0.4644077276836937,70,100
cfd69dba-7aa7-4b49-acf2-66fc6bd8e8b3,2021-05-15 20:20:32.226657,0.7313660865905811,78,77
fb97772f-97a5-4a15-b098-9ae22c5bde0e,2021-05-15 20:20:32.300653,0.8795026324692895,73,93
5957b6d9-30ac-416b-9edd-a9efc56d8127,2021-05-15 20:20:32.375653,0.8046720174619458,93,81
37e13aa7-7e25-4272-941d-8d8d3cd4bef8,2021-05-15 20:20:32.431655,0.4865965779156929,71,95


In [14]:
df_out = df_events.writeStream\
  .format("json")\
  .outputMode("append")\
  .option("checkpointLocation", "abfss://checkpointcontainer@adlstore.dfs.core.windows.net/checkpointapievents")\
  .start("abfss://api-eventhub@adlstore.dfs.core.windows.net/writedata")

In [15]:
# read back the data for visual exploration
df_read = spark.read.format("json").load("abfss://api-eventhub@adlstore.dfs.core.windows.net/writedata")

In [16]:
display(df_read)

Payload.humidity,Payload.id,Payload.temperature,Payload.timestamp,Payload.uv
90,a90d73e1-3b50-4413-82d3-4c30661f7621,94,2021-05-07 21:51:42.768262,0.3191678807838837
75,99a54bc2-b248-4035-8617-c06ffc04dc17,89,2021-05-07 21:51:42.882639,0.6830612110579811
87,2e043a86-d4c5-4c5f-9ef4-735354b9bad8,92,2021-05-07 21:51:42.962539,0.3561484487295167
81,7ee678ec-dd09-490d-94a8-0b721b3ed393,76,2021-05-07 21:51:43.039270,0.2959618288743493
74,874540f3-d77d-4f98-85b1-208d66d5c739,99,2021-05-07 21:51:43.094064,0.89888952040047
79,e1cca749-7985-4645-b1bd-4d3b318a8ca7,87,2021-05-07 21:51:43.145359,0.3231120184524562
86,ac72099d-9902-4510-b566-273babeba7b6,74,2021-05-07 21:51:43.199247,0.953476943570104
71,760b9492-f1c9-4ed3-ae85-9dd14fed3097,99,2021-05-07 21:51:43.272267,0.7303457403554453
90,a96d928b-4fb5-4fdc-b948-f91e9b29dd35,78,2021-05-07 21:51:43.324941,0.6201649151942229
97,9aa276ed-7b8b-4693-b5b3-bb2aa5186dea,80,2021-05-07 21:51:43.394833,0.4176147453967164


In [17]:
df_read.printSchema()