# Read Data as Stream from Azure Event Hub

In [0]:
import os
import pyspark.sql.functions as F
from pyspark.sql.types import *

connection_string = os.getenv('AZURE_EH_CON_STR')
conf = {}
conf["eventhubs.connectionString"] = f'{spark._jvm.org.apache.spark.eventhubs.EventHubsUtils.encrypt(connection_string)}'

incomingStream = (
  spark
    .readStream
    .format("eventhubs")
    .options(**conf)
    .load()
)

incomingStream.writeStream.outputMode("append").format("memory").queryName("read_hub").start()

In [0]:
message_schema = StructType([
  StructField('timestamp', StringType()),
  StructField('humidity', StringType()),
  StructField('temperature', StringType()),
  StructField('device_id', StringType()),
  StructField('debice_ip', StringType()),
  StructField('device_location', StringType())  
])

messages = incomingStream \
  .withColumn("Offset", F.col("offset").cast(LongType())) \
  .withColumn("Time", F.col("enqueuedTime").cast(TimestampType())) \
  .withColumn("Timestamp", F.col("enqueuedTime").cast(LongType())) \
  .withColumn("Body", F.col("body").cast(StringType())) \
  .select("Offset", "Time", "Timestamp", "Body", F.from_json(F.col("body").cast("string"), message_schema).alias("payload"))

messages.writeStream.outputMode("append").format("delta").option("path", '/mnt/stdhamacheradl001/iot/simulator').option("checkpointLocation", '/mnt/stdhamacheradl001/iot/simulator/_checkpoint').start()

In [0]:
display(messages)

Offset,Time,Timestamp,Body,payload
13818920,2020-11-26T22:59:54.458+0000,1606431594,"[{""timestamp"": ""2020-11-26 15:59:51.905794""}, {""humidity"": ""0.42""}, {""temperature"": ""20.67""}, {""device_id"": ""e3e70682-c209-4cac-a29f-6fbed82c07cd""}, {""device_ip"": ""172.31.140.184""}, {""device_location"": ""[\""46.1351\"", \""-60.1831\"", \""Sydney\"", \""CA\"", \""America/Glace_Bay\""]""}]","List(null, null, null, null, null, null)"
13819256,2020-11-26T22:59:54.458+0000,1606431594,"[{""timestamp"": ""2020-11-26 15:59:51.906778""}, {""humidity"": ""0.61""}, {""temperature"": ""19.46""}, {""device_id"": ""7a024204-f7c1-4d87-8da5-e709d4713d60""}, {""device_ip"": ""172.22.253.123""}, {""device_location"": ""[\""49.88307\"", \""-119.48568\"", \""Kelowna\"", \""CA\"", \""America/Vancouver\""]""}]","List(null, null, null, null, null, null)"
13819592,2020-11-26T22:59:54.458+0000,1606431594,"[{""timestamp"": ""2020-11-26 15:59:51.906778""}, {""humidity"": ""0.59""}, {""temperature"": ""20.98""}, {""device_id"": ""cca5a5a1-9e4d-4e3c-9846-d424c17c6279""}, {""device_ip"": ""10.75.62.134""}, {""device_location"": ""[\""46.23899\"", \""-63.13414\"", \""Charlottetown\"", \""CA\"", \""America/Halifax\""]""}]","List(null, null, null, null, null, null)"
13819928,2020-11-26T22:59:54.458+0000,1606431594,"[{""timestamp"": ""2020-11-26 15:59:51.906778""}, {""humidity"": ""0.66""}, {""temperature"": ""25.46""}, {""device_id"": ""af19922a-d9b8-4714-a61a-441c12e0c8b2""}, {""device_ip"": ""10.181.36.35""}, {""device_location"": ""[\""49.88307\"", \""-119.48568\"", \""Kelowna\"", \""CA\"", \""America/Vancouver\""]""}]","List(null, null, null, null, null, null)"
13820264,2020-11-26T22:59:54.458+0000,1606431594,"[{""timestamp"": ""2020-11-26 15:59:51.906778""}, {""humidity"": ""0.59""}, {""temperature"": ""21.13""}, {""device_id"": ""f77383c1-3458-4748-a9bb-17bca3f2c9bf""}, {""device_ip"": ""172.24.85.243""}, {""device_location"": ""[\""53.51684\"", \""-113.3187\"", \""Sherwood Park\"", \""CA\"", \""America/Edmonton\""]""}]","List(null, null, null, null, null, null)"
13820600,2020-11-26T22:59:54.458+0000,1606431594,"[{""timestamp"": ""2020-11-26 15:59:51.906778""}, {""humidity"": ""0.60""}, {""temperature"": ""23.34""}, {""device_id"": ""17e0aa3c-0398-4ca8-aa7e-9d498c778ea6""}, {""device_ip"": ""192.168.0.149""}, {""device_location"": ""[\""50.26729\"", \""-119.27337\"", \""Vernon\"", \""CA\"", \""America/Vancouver\""]""}]","List(null, null, null, null, null, null)"
13820936,2020-11-26T22:59:54.458+0000,1606431594,"[{""timestamp"": ""2020-11-26 15:59:51.906778""}, {""humidity"": ""0.64""}, {""temperature"": ""24.91""}, {""device_id"": ""534097ca-baf3-497a-be70-f16a55485822""}, {""device_ip"": ""192.168.113.131""}, {""device_location"": ""[\""53.51684\"", \""-113.3187\"", \""Sherwood Park\"", \""CA\"", \""America/Edmonton\""]""}]","List(null, null, null, null, null, null)"
13821280,2020-11-26T22:59:54.458+0000,1606431594,"[{""timestamp"": ""2020-11-26 15:59:51.906778""}, {""humidity"": ""0.51""}, {""temperature"": ""24.65""}, {""device_id"": ""72ae2244-8b01-43c1-8d9d-2b7d247a8333""}, {""device_ip"": ""10.250.131.204""}, {""device_location"": ""[\""49.88307\"", \""-119.48568\"", \""Kelowna\"", \""CA\"", \""America/Vancouver\""]""}]","List(null, null, null, null, null, null)"
13821616,2020-11-26T22:59:54.458+0000,1606431594,"[{""timestamp"": ""2020-11-26 15:59:51.906778""}, {""humidity"": ""0.45""}, {""temperature"": ""26.24""}, {""device_id"": ""8c25166a-1ff3-4849-b4e1-357d4a84eb03""}, {""device_ip"": ""10.104.10.18""}, {""device_location"": ""[\""43.86682\"", \""-79.2663\"", \""Markham\"", \""CA\"", \""America/Toronto\""]""}]","List(null, null, null, null, null, null)"
13821944,2020-11-26T22:59:54.458+0000,1606431594,"[{""timestamp"": ""2020-11-26 15:59:51.906778""}, {""humidity"": ""0.52""}, {""temperature"": ""19.90""}, {""device_id"": ""5129fb7c-6288-41a5-8c45-782198a6416d""}, {""device_ip"": ""172.22.15.126""}, {""device_location"": ""[\""45.43341\"", \""-73.86586\"", \""Beaconsfield\"", \""CA\"", \""America/Toronto\""]""}]","List(null, null, null, null, null, null)"


In [0]:
for s in spark.streams.active:
    s.stop()