Importing the libraries.

In [0]:
from pyspark.sql.functions import *
from pyspark.sql.types import *

from pyspark.sql.window import Window
from delta.tables import DeltaTable
from pyspark.sql.types import StructType, StructField, StringType, DoubleType, LongType, TimestampType
from pyspark.dbutils import DBUtils

The code block below creates the catalog and schemas for our solution. 

The approach utilises a multi-hop data storage architecture (medallion), consisting of bronze, silver, and gold schemas within a 'streaming' catalog. 

In [0]:
# Databricks notebook source


# COMMAND ----------

catalog_name = "streaming1"
db_name = "bronze"
table_name='weather'
eventHubName1 = "streamingeventhubs"
key_vault='testScope'
connector='testsecrettyler'



dbutils.widgets.dropdown("trigger_available_now", "False", ["True", "False"])
trigger_available_now = dbutils.widgets.get("trigger_available_now") == "True"

notebook_name = DBUtils(spark).notebook.entry_point.getDbutils().notebook().getContext().notebookPath().get().split("/")[-1].split(".")[0]
checkpoint_path = f"/Volumes/{catalog_name}/{db_name}/checkpoints/{notebook_name}/"




In [0]:
try:
    spark.sql(f"create catalog {catalog_name} managed location 'abfss://streamingdata-demo@dataengineerdemoweather.dfs.core.windows.net/';")
except:
    print('check if catalog already exists')

check if catalog already exists


In [0]:

try:
    spark.sql(f"create schema if not exists {catalog_name}.{db_name} ;") 
except:
    print('check if bronze schema already exists')

try:
    spark.sql(f"CREATE VOLUME IF NOT EXISTS {catalog_name}.{db_name}.checkpoints;") 
except:
    print('check if bronze checkpoints already exists')




#### Bronze Layer

Set up Azure Event hubs connection string.

In [0]:
# Config
# Replace with your Event Hub namespace, name, and key
connectionString = dbutils.secrets.get(key_vault,connector)
eventHubName =eventHubName1


ehConf = {
  'eventhubs.connectionString' : sc._jvm.org.apache.spark.eventhubs.EventHubsUtils.encrypt(connectionString),
  'eventhubs.eventHubName': eventHubName
}

Reading and writing the stream to the bronze layer.

In [0]:
# Reading stream: Load data from Azure Event Hub into DataFrame 'df' using the previously configured settings
df = spark.readStream \
    .format("eventhubs") \
    .options(**ehConf) \
    .load() \

# Displaying stream: Show the incoming streaming data for visualization and debugging purposes
df.display()

# Writing stream: Persist the streaming data to a Delta table 'streaming.bronze.weather' in 'append' mode with checkpointing
df.writeStream\
    .option("checkpointLocation", checkpoint_path)\
    .outputMode("append")\
    .format("delta")\
    .toTable(f"{catalog_name}.{db_name}.{table_name}")

body,partition,offset,sequenceNumber,enqueuedTime,publisher,partitionKey,properties,systemProperties
eyJ0ZW1wZXJhdHVyZSI6ICIyMlx1MjU5MUMiLCAidGltZSI6ICJNb25kYXkgODo0Mlx1MjAyZnAubS4iLCAic2t5Y29uZGl0aW9uIjogIkNsZWFyIn0=,0,12884915656,73,2024-09-10T00:43:14.138Z,,,Map(),Map(x-opt-sequence-number-epoch -> -1)
eyJ0ZW1wZXJhdHVyZSI6ICIyMlx1MjU5MUMiLCAidGltZSI6ICJNb25kYXkgODo0M1x1MjAyZnAubS4iLCAic2t5Y29uZGl0aW9uIjogIkNsZWFyIn0=,0,12884915808,74,2024-09-10T00:44:23.937Z,,,Map(),Map(x-opt-sequence-number-epoch -> -1)
eyJ0ZW1wZXJhdHVyZSI6ICIyMlx1MDBiMEMiLCAidGltZSI6ICJNb25kYXkgODo0NFx1MjAyZnAubS4iLCAic2t5Y29uZGl0aW9uIjogIkNsZWFyIn0=,0,12884915960,75,2024-09-10T00:45:34.033Z,,,Map(),Map(x-opt-sequence-number-epoch -> -1)
eyJ0ZW1wZXJhdHVyZSI6ICIyMlx1MjU5MUMiLCAidGltZSI6ICJNb25kYXkgODo0NVx1MjAyZnAubS4iLCAic2t5Y29uZGl0aW9uIjogIkNsZWFyIn0=,0,12884916112,76,2024-09-10T00:46:44.175Z,,,Map(),Map(x-opt-sequence-number-epoch -> -1)
eyJ0ZW1wZXJhdHVyZSI6ICIyMlx1MjU5MUMiLCAidGltZSI6ICJNb25kYXkgODo0Nlx1MjAyZnAubS4iLCAic2t5Y29uZGl0aW9uIjogIkNsZWFyIn0=,0,12884916264,77,2024-09-10T00:47:55.287Z,,,Map(),Map(x-opt-sequence-number-epoch -> -1)
eyJ0ZW1wZXJhdHVyZSI6ICIyMlx1MjU5MUMiLCAidGltZSI6ICJNb25kYXkgODo0OFx1MjAyZnAubS4iLCAic2t5Y29uZGl0aW9uIjogIkNsZWFyIn0=,0,12884916416,78,2024-09-10T00:49:05.805Z,,,Map(),Map(x-opt-sequence-number-epoch -> -1)
eyJ0ZW1wZXJhdHVyZSI6ICIyMlx1MDBiMEMiLCAidGltZSI6ICJNb25kYXkgODo0OVx1MjAyZnAubS4iLCAic2t5Y29uZGl0aW9uIjogIkNsZWFyIn0=,0,12884916568,79,2024-09-10T00:50:15.934Z,,,Map(),Map(x-opt-sequence-number-epoch -> -1)
eyJ0ZW1wZXJhdHVyZSI6ICIyMlx1MDBiMEMiLCAidGltZSI6ICJNb25kYXkgODo1MFx1MjAyZnAubS4iLCAic2t5Y29uZGl0aW9uIjogIkNsZWFyIn0=,0,12884916720,80,2024-09-10T00:51:26.39Z,,,Map(),Map(x-opt-sequence-number-epoch -> -1)
