In [0]:
# Databricks notebook source
# MAGIC %md
# MAGIC # Bronze Layer - Streaming with Auto Loader Pattern
# MAGIC Reads events from Event Hub using Structured Streaming and splits into Orders & Products tables

# COMMAND ----------

from pyspark.sql.functions import *
from pyspark.sql.types import *

# COMMAND ----------

# MAGIC %md
# MAGIC ## Configuration

# COMMAND ----------

# Event Hub Configuration
eh_namespace = "evhns-natraining.servicebus.windows.net"
eh_name = "evh-natraining-bijunew"
keyvault_scope = "dbx-ss-kv-natraining-2"
secret_name = "evh-natraining-read-write"
shared_access_key_name = "SharedAccessKeyToSendAndListen"

# Unity Catalog Configuration
catalog = "na-dbxtraining"
schema_bronze = "biju_bronze"
schema_silver = "biju_silver"
schema_gold = "biju_gold"

# Table Names (with backticks for catalog)
bronze_orders_table = f"`{catalog}`.{schema_bronze}.ordersnew"
bronze_products_table = f"`{catalog}`.{schema_bronze}.productsnew"
silver_table = f"`{catalog}`.{schema_silver}.order_detailsnew"
gold_brand_category_table = f"`{catalog}`.{schema_gold}.sales_by_brand_categorynew"
gold_location_table = f"`{catalog}`.{schema_gold}.location_performancenew"
gold_product_table = f"`{catalog}`.{schema_gold}.product_performancenew"
gold_customer_table = f"`{catalog}`.{schema_gold}.customer_insightsnew"
gold_daily_summary_table = f"`{catalog}`.{schema_gold}.daily_summarynew"

# Checkpoint locations
checkpoint_base = f"/tmp/checkpoints/{catalog.replace('-', '_')}"
gold_brand_checkpoint = f"{checkpoint_base}/gold_brand_category"
gold_location_checkpoint = f"{checkpoint_base}/gold_location"
gold_product_checkpoint = f"{checkpoint_base}/gold_product"
gold_customer_checkpoint = f"{checkpoint_base}/gold_customer"
gold_daily_checkpoint = f"{checkpoint_base}/gold_daily_summary"

# Checkpoint locations
checkpoint_base = f"/tmp/checkpoints/{catalog.replace('-', '_')}"
orders_checkpoint = f"{checkpoint_base}/bronze_orders"
products_checkpoint = f"{checkpoint_base}/bronze_products"
silver_checkpoint = f"{checkpoint_base}/silver_order_details"
print("="*70)
print("STREAMING CONFIGURATION")
print("="*70)
print(f"Event Hub: {eh_name}")
print(f"Orders Table: {bronze_orders_table}")
print(f"Products Table: {bronze_products_table}")
print(f"Checkpoint Base: {checkpoint_base}")
print("="*70)

# COMMAND ----------

# MAGIC %md
# MAGIC ## Get Secret from Key Vault

# COMMAND ----------

try:
    secret_value = dbutils.secrets.get(scope=keyvault_scope, key=secret_name)
    print("✓ Successfully retrieved secret from Key Vault")
except Exception as e:
    print(f"✗ Error retrieving secret: {str(e)}")
    raise

# COMMAND ----------

# MAGIC %md
# MAGIC ## Build Connection String and Kafka Options

# COMMAND ----------

# Connection string
connection_string = (
    f"Endpoint=sb://{eh_namespace}/;"
    f"SharedAccessKeyName={shared_access_key_name};"
    f"SharedAccessKey={secret_value}"
)

# Kafka options for streaming
KAFKA_OPTIONS = {
    "kafka.bootstrap.servers": f"{eh_namespace}:9093",
    "subscribe": eh_name,
    "kafka.sasl.mechanism": "PLAIN",
    "kafka.security.protocol": "SASL_SSL",
    "kafka.sasl.jaas.config": f'kafkashaded.org.apache.kafka.common.security.plain.PlainLoginModule required username="$ConnectionString" password="{connection_string}";',
    "kafka.request.timeout.ms": "60000",
    "kafka.session.timeout.ms": "30000",
    "failOnDataLoss": "false",
    "startingOffsets": "earliest",
    "maxOffsetsPerTrigger": "10000"  # Process 10k records per batch
}

print("✓ Kafka options configured for streaming")