## 03_bronze_xml_state_pyspark
## Ingest State XML into Bronze using PySpark read/write.

In [0]:
from pyspark.sql import functions as F
import uuid

CAT = "zillow"
BRONZE = "zillow_bronze"

XML_FILE = "/Volumes/zillow/zillow_medallion/raw-converted_format/xml/State_time_series.xml"
tgt = f"{CAT}.{BRONZE}.state_ts_xml_bronze"

run_id = str(uuid.uuid4())

# COMMAND ----------
df = (spark.read.text(XML_FILE)
      .withColumnRenamed("value", "xml")
      .withColumn("load_dt", F.current_timestamp())
      .withColumn("source_file", F.lit(XML_FILE))
      .withColumn("ingest_mode", F.lit("pyspark_xml"))
      .withColumn("ingest_run_id", F.lit(run_id))
)

(df.write.format("delta").mode("append").saveAsTable(tgt))

spark.sql(f"COMMENT ON TABLE {tgt} IS 'State time series ingested as XML strings using PySpark read/write into Bronze.'")
display(spark.sql(f"SELECT COUNT(*) AS rows FROM {tgt}"))
display(spark.sql(f"SELECT * FROM {tgt} LIMIT 5"))
