## Bronze Test

Verifying whether `01_bronze_ingest.py` works properly

In [None]:
test_df = (
    spark.read.format("kafka")
    .option("kafka.bootstrap.servers", "boot-strap-server.kafka.svc.cluster.local:9094")
    .option("kafka.security.protocol", "SASL_SSL")
    .option("kafka.sasl.mechanism", "PLAIN")
    .option(
        "kafka.sasl.jaas.config",
        "org.apache.kafka.common.security.plain.PlainLoginModule required "
        'username="username" '
        'password="password";',
    )
    .option("subscribe", "school_climate_stream")
    .option("startingOffsets", "earliest")
    .load()
)

print("Connected to Kafka, schema:")
test_df.printSchema()

In [None]:
from rt_databricks.utils.gcs_paths import bronze_path, checkpoint_path

bronze_uri = bronze_path("school_climate_raw")
chk_uri = checkpoint_path("kafka_school_climate_bronze")

kafka_df = (
    spark.readStream.format("kafka")
    .option("kafka.bootstrap.servers", "boot-strap-server.kafka.svc.cluster.local:9094")
    .option("kafka.security.protocol", "SASL_SSL")
    .option("kafka.sasl.mechanism", "PLAIN")
    .option(
        "kafka.sasl.jaas.config",
        "org.apache.kafka.common.security.plain.PlainLoginModule required "
        'username="username" '
        'password="password";',
    )
    .option("subscribe", "school_climate_stream")
    .option("startingOffsets", "earliest")
    .load()
)

bronze_df = kafka_df.selectExpr(
    "CAST(key AS STRING) AS key",
    "CAST(value AS STRING) AS value",
    "topic",
    "partition",
    "offset",
    "timestamp",
    "timestampType",
)

query = (
    bronze_df.writeStream.format("delta")
    .option("checkpointLocation", chk_uri)
    .option("path", bronze_uri)
    .outputMode("append")
    .start()
)

print("Direct streaming Bronze started")