In [0]:
# Databricks notebook source
# MAGIC %md
# MAGIC # Bronze Layer - Streaming with Auto Loader Pattern
# MAGIC Reads events from Event Hub using Structured Streaming and splits into Orders & Products tables

# COMMAND ----------

from pyspark.sql.functions import *
from pyspark.sql.types import *

# COMMAND ----------

# MAGIC %md
# MAGIC ## Configuration

# COMMAND ----------

# Event Hub Configuration
eh_namespace = "evhns-natraining.servicebus.windows.net"
eh_name = "evh-natraining-biju"
keyvault_scope = "dbx-ss-kv-natraining-2"
secret_name = "evh-natraining-read-write"
shared_access_key_name = "SharedAccessKeyToSendAndListen"

# Unity Catalog Configuration
catalog = "na-dbxtraining"
schema_bronze = "biju_bronze"
schema_silver = "biju_silver"
schema_gold = "biju_gold"

# Table Names (with backticks for catalog)
bronze_orders_table = f"`{catalog}`.{schema_bronze}.orders"
bronze_products_table = f"`{catalog}`.{schema_bronze}.products"
silver_table = f"`{catalog}`.{schema_silver}.order_details"
gold_brand_category_table = f"`{catalog}`.{schema_gold}.sales_by_brand_category"
gold_location_table = f"`{catalog}`.{schema_gold}.location_performance"
gold_product_table = f"`{catalog}`.{schema_gold}.product_performance"
gold_customer_table = f"`{catalog}`.{schema_gold}.customer_insights"
gold_daily_summary_table = f"`{catalog}`.{schema_gold}.daily_summary"

# Checkpoint locations
checkpoint_base = f"/Volumes/na-dbxtraining/biju_raw/biju_vol/checkpoints/{catalog.replace('-', '_')}"
gold_brand_checkpoint = f"{checkpoint_base}/gold_brand_category"
gold_location_checkpoint = f"{checkpoint_base}/gold_location"
gold_product_checkpoint = f"{checkpoint_base}/gold_product"
gold_customer_checkpoint = f"{checkpoint_base}/gold_customer"
gold_daily_checkpoint = f"{checkpoint_base}/gold_daily_summary"
orders_checkpoint = f"{checkpoint_base}/bronze_orders"
products_checkpoint = f"{checkpoint_base}/bronze_products"
silver_checkpoint = f"{checkpoint_base}/silver_order_details"
print("="*70)
print("STREAMING CONFIGURATION")
print("="*70)
print(f"Event Hub: {eh_name}")
print(f"Orders Table: {bronze_orders_table}")
print(f"Products Table: {bronze_products_table}")
print(f"Checkpoint Base: {checkpoint_base}")
print("="*70)

# COMMAND ----------

# MAGIC %md
# MAGIC ## Get Secret from Key Vault

# COMMAND ----------

try:
    secret_value = dbutils.secrets.get(scope=keyvault_scope, key=secret_name)
    print("✓ Successfully retrieved secret from Key Vault")
except Exception as e:
    print(f"✗ Error retrieving secret: {str(e)}")
    raise

# COMMAND ----------

# MAGIC %md
# MAGIC ## Build Connection String and Kafka Options

# COMMAND ----------

# Connection string
connection_string = (
    f"Endpoint=sb://{eh_namespace}/;"
    f"SharedAccessKeyName={shared_access_key_name};"
    f"SharedAccessKey={secret_value}"
)


EVENTHUB_CONN_STR = (
    "Endpoint=sb://evhns-natraining.servicebus.windows.net/;"
    "SharedAccessKeyName=SharedAccessKeyToSendAndListen;"
    f"SharedAccessKey={secret_value};"
    f"EntityPath={eh_name}"
).strip().replace("\n", "").replace("\r", "")

encrypted_conn_str = spark._jvm.org.apache.spark.eventhubs.EventHubsUtils.encrypt(EVENTHUB_CONN_STR)

event_hubs_conf = {
    "eventhubs.connectionString": encrypted_conn_str,
    "eventhubs.consumerGroup": "$Default",
    "eventhubs.startingPosition": """{
      "offset":"-1",
      "seqNo":-1,
      "enqueuedTime":"1970-01-01T00:00:00.000Z",
      "isInclusive":false
    }"""
}

print("Event Hub read configuration ready:", eh_name)