In [0]:
# 01_bronze_ingest
# Goal:
# 1) Load raw data into a Bronze Delta table
# 2) Preserve schema as-is (no casting / no transformations)

BRONZE_TABLE = "bronze_servicenow_incidents"

df_raw = spark.table("workspace.default.servicenow_incidents_10_k")

# Ingestion metadata: timestamp and the source identifier. 
# This helps with lineage and debugging, but it doesnâ€™t affect the source data.
from pyspark.sql import functions as F
df_bronze = (
    df_raw
    .withColumn("_ingest_ts", F.current_timestamp())
    .withColumn("_source_file", F.lit("upload_table"))
)

# Writing the data as a Delta table
(df_bronze.write
 .format("delta")
 .mode("overwrite")
 .saveAsTable(BRONZE_TABLE)
)

print("Bronze created:", BRONZE_TABLE)

Bronze created: bronze_servicenow_incidents


In [0]:
# Validating the load by checking the row count and confirming that the schema matches the source. 
# This ensures the ingestion is complete and repeatable.
print("Source schema:")
df_raw.printSchema()

print("Bronze schema:")
spark.table(BRONZE_TABLE).printSchema()

Source schema:
root
 |-- number: string (nullable = true)
 |-- sys_id: string (nullable = true)
 |-- opened_at: string (nullable = true)
 |-- closed_at: string (nullable = true)
 |-- state: string (nullable = true)
 |-- priority: long (nullable = true)
 |-- impact: long (nullable = true)
 |-- urgency: long (nullable = true)
 |-- severity: string (nullable = true)
 |-- category: string (nullable = true)
 |-- subcategory: string (nullable = true)
 |-- short_description: string (nullable = true)
 |-- description: string (nullable = true)
 |-- assignment_group: string (nullable = true)
 |-- assigned_to: string (nullable = true)
 |-- assigned_to_sys_id: string (nullable = true)
 |-- caller: string (nullable = true)
 |-- caller_sys_id: string (nullable = true)
 |-- channel: string (nullable = true)
 |-- location: string (nullable = true)
 |-- cmdb_ci: string (nullable = true)
 |-- sla_breached: boolean (nullable = true)
 |-- reopen_count: long (nullable = true)
 |-- u_source: string (nullabl