In [0]:
# Libraries
from typing import NamedTuple
from pyspark.sql import DataFrame

In [0]:
# Variables
bronze_table = "bronze.training_io.rt_marketing"
checkpoint_path = "/Volumes/bronze/checkpoints/realtime"

In [0]:
# --- Structured Type for Kafka Credentials ---
class KafkaCredentials(NamedTuple):
    username: str
    password: str
    keystore_password: str
    private_key_password: str
    truststore_password: str
    broker: str = "detrainingbg.adastragrp.com:9092"
    topic: str = "marketing.uk.netflix"
    truststore_location: str = "abfss://source-data@sttrwesteu001.dfs.core.windows.net/cert/kafka.truststore.jks"
    keystore_location: str = "abfss://source-data@sttrwesteu001.dfs.core.windows.net/cert/kafka.keystore.jks"

# Function to retrieve secrets
def get_secrets(scope: str, username: str, password: str, key_store: str, private_key: str, trust_store: str) -> KafkaCredentials:
    """
    Retrieve Kafka credentials and SSL-related passwords from a Databricks secret scope.

    Args:
        scope (str): The name of the Databricks secret scope.
        username (str): The key for the Kafka username.
        password (str): The key for the Kafka password.
        key_store (str): The key for the SSL keystore password.
        private_key (str): The key for the SSL private key password.
        trust_store (str): The key for the SSL truststore password.

    Returns:
        KafkaCredentials: A named tuple containing all Kafka-related credentials and passwords.
    """
    return KafkaCredentials(
        username=dbutils.secrets.get(scope, username),
        password=dbutils.secrets.get(scope, password),
        keystore_password=dbutils.secrets.get(scope, key_store),
        private_key_password=dbutils.secrets.get(scope, private_key),
        truststore_password=dbutils.secrets.get(scope, trust_store)
    )

# Retrieve Kafka credentials from secret scope
def retrieve_my_kafka_credentials() -> KafkaCredentials:
    """
    Retrieve Kafka credentials and SSL passwords using hardcoded secret scope and keys.

    Returns:
        KafkaCredentials: The retrieved credentials.
    """
    return get_secrets(
        scope="dbc-training-scope",
        username="KafkaUsername",
        password="KafkaPassword",
        key_store="KafkaSSLKeystorePassword",
        private_key="KafkaSSLPrivateKeyPassword",
        trust_store="KafkaSSLTruststorePassword"
    )

# Function to read Kafka stream
def read_kafka_stream(credentials: KafkaCredentials, starting_offsets: str = "earliest") -> DataFrame:
    """
    Configure and read data from Kafka as a streaming DataFrame.

    Args:
        credentials: KafkaCredentials object with username, password, and SSL keys.

    Returns:
        DataFrame: A streaming DataFrame from Kafka.
    """
    return (
        spark.readStream
            .format("kafka")
            # Connection Settings
            .option("kafka.bootstrap.servers", credentials.broker)
            .option("subscribe", credentials.topic)
            .option("startingOffsets", starting_offsets)
            # Security Settings - Protocol and Mechanism
            .option("kafka.security.protocol", "SASL_SSL")
            .option("kafka.sasl.mechanism", "PLAIN")
            .option("kafka.ssl.endpoint.identification.algorithm", "")
            # SASL JAAS Credentials
            .option("kafka.sasl.jaas.config", f"""kafkashaded.org.apache.kafka.common.security.plain.PlainLoginModule required 
                                                    username={credentials.username} 
                                                    password={credentials.password};""")
            # SSL Settings
            .option("kafka.ssl.truststore.location", credentials.truststore_location)
            .option("kafka.ssl.truststore.password", credentials.truststore_password)
            .option("kafka.ssl.keystore.location", credentials.keystore_location)
            .option("kafka.ssl.keystore.password", credentials.keystore_password)
            .option("kafka.ssl.key.password", credentials.private_key_password)
            .load()
    )

# Function to cast Kafka stream data
def cast_kafka_stream_data(kafka_df: DataFrame) -> DataFrame:
    """
    Cast Kafka binary stream data into readable JSON strings.

    Args:
        kafka_df: The raw Kafka DataFrame.

    Returns:
        DataFrame: Parsed Kafka data with string values.
    """
    return kafka_df.selectExpr(
        "topic",
        "CAST(value AS STRING) as json_event",
        "partition",
        "offset",
        "timestamp"
    )

# Function to write the stream to a Delta table
def write_to_delta_stream(df: DataFrame, table_name: str, checkpoint_path: str) -> None:
    """
    Write the streaming DataFrame into a Delta table.

    Args:
        df: The DataFrame to write.
        table_name: Target Delta table name.
        checkpoint_path: Path for checkpointing the stream progress.
    """
    df.writeStream \
        .format("delta") \
        .option("checkpointLocation", checkpoint_path) \
        .outputMode("append") \
        .trigger(processingTime="1 minute") \
        .toTable(table_name)

In [0]:
# Main execution flow
def main():
    # Retrieve Kafka credentials
    creds = retrieve_my_kafka_credentials()
    
    # Read Kafka stream
    kafka_stream_df = read_kafka_stream(creds)
    
    # Cast Kafka stream data
    df_casted = cast_kafka_stream_data(kafka_stream_df)
    
    # Write stream data to Delta table
    write_to_delta_stream(df_casted, bronze_table, checkpoint_path)

if __name__ == "__main__":
    main()