In [0]:
from pyspark.sql.functions import *
from pyspark.sql.types import *

In [0]:
%sql
create catalog if not exists demo_catalog;
create database if not exists demo_catalog.demo_database;

In [0]:
# Config
# Replace with your Event Hub namespace, name, and key
connectionString = ""
eventHubName = "event-hub"

ehConf = {
  'eventhubs.connectionString' : sc._jvm.org.apache.spark.eventhubs.EventHubsUtils.encrypt(connectionString),
  'eventhubs.eventHubName': eventHubName
}

In [0]:
# {
#     "temperature": 20,
#     "humidity": 60,
#     "windSpeed": 10,
#     "windDirection": "NW",
#     "precipitation": 0,
#     "conditions": "Partly Cloudy"
# }


In [0]:
json_schema = StructType([
    StructField("temperature", IntegerType()),
    StructField("humidity", IntegerType()),
    StructField("windSpeed", IntegerType()),
    StructField("windDirection", StringType()),
    StructField("precipitation", IntegerType()),
    StructField("conditions", StringType())
])

In [0]:
df = spark.readStream \
    .format("eventhubs") \
    .options(**ehConf) \
    .load() \

df.display()



df.writeStream\
    .option("checkpointLocation", "/mnt/checkpoint/raw")\
    .outputMode("append")\
    .format("delta")\
    .toTable("demo_catalog.demo_database.weather_raw")

In [0]:
curated_df= spark.readStream\
                .format("delta")\
                .table("demo_catalog.demo_database.weather_raw")\
                .withColumn("body", col("body").cast("string"))\
                .withColumn("body",from_json(col("body"), json_schema))\
                .select("body.temperature", "body.humidity", "body.windSpeed", "body.windDirection", "body.precipitation", "body.conditions", col("enqueuedTime").alias('timestamp'))


curated_df.display()


curated_df.writeStream\
                .option("checkpointLocation",  "/mnt/checkpoint/curated")\
                .outputMode("append")\
                .format("delta")\
                .toTable("demo_catalog.demo_database.weather_curated")