## Bronze Table
The bronze table filters for cost records that were created for Databricks resources (Vendor = Databricks)

In [0]:
from pyspark.sql import functions as F

In [0]:
# Retrieve job parameters passed in from the Databricks workflow
dbutils.widgets.text("source_location_data", "")
dbutils.widgets.text("billing_period", "")
dbutils.widgets.text("target_catalog_name", "")
dbutils.widgets.text("target_schema_name", "")
dbutils.widgets.text("bronze_table_name", "")
dbutils.widgets.text("tracker_table_name", "")

source_location_data = dbutils.widgets.get("source_location_data").rstrip("/") + "/"
target_catalog_name = dbutils.widgets.get("target_catalog_name")
target_schema_name = dbutils.widgets.get("target_schema_name")
bronze_table_name = dbutils.widgets.get("bronze_table_name")
tracker_table_name = dbutils.widgets.get("tracker_table_name")
billing_period = dbutils.widgets.get("billing_period").strip()

In [0]:
#Creating 3 level namespace
target_tbl = f'{target_catalog_name}.{target_schema_name}.{bronze_table_name}'
tracker_tbl = f'{target_catalog_name}.{target_schema_name}.{tracker_table_name}'

In [0]:
#Creating bronze table
spark.sql(f"""CREATE TABLE IF NOT EXISTS {target_tbl}(
    billing_period     STRING)
    CLUSTER BY AUTO
    """)

In [0]:
try:
    # Read all Parquet files for the specific billing period
    # e.g. s3://.../data/BILLING_PERIOD=2025-09/
    src = f"{source_location_data}BILLING_PERIOD={billing_period}/"
    df = spark.read.parquet(src) \
       .withColumn("billing_period", F.lit(billing_period)) \
       .filter(F.col("resource_tags")["user_vendor"] == "Databricks")

    # Overwrite only this month's partition in the target Delta table.
    (df.write.mode("overwrite")
       .option("mergeSchema","true")
       .option("replaceWhere", f"billing_period = '{billing_period}'")
       .saveAsTable(target_tbl))

     # Update the tracker table to mark this billing period as SUCCEEDED.
    spark.sql(f"""
      UPDATE {tracker_tbl}
      SET status='BRONZE',
          last_error=NULL,
          last_update=current_timestamp()
      WHERE billing_period = '{billing_period}'
    """)
except Exception as e:
    err = str(e).replace("'", "''")[:2000]
    spark.sql(f"""
      UPDATE {tracker_tbl}
      SET status='FAILED',
          last_error='{err}',
          last_update=current_timestamp()
      WHERE billing_period = '{billing_period}'
    """)
    raise