In [8]:
from pyspark.sql import functions as F

CSV_TABLE  = "invoices_csv"
GOLD_TABLE = "invoices_gold"

# 1. Ensure invoices_csv exists
if not spark.catalog.tableExists(CSV_TABLE):
    raise ValueError(f"❌ Required table `{CSV_TABLE}` not found. Run the CSV notebook first.")

# 2. Load CSV
df_csv = spark.table(CSV_TABLE)

# 3. Add record_quality + ingestion_timestamp
df_gold = (
    df_csv
    .withColumn(
        "record_quality",
        F.when(df_csv.total.isNotNull(), "high")
         .when(df_csv.invoice_number.isNotNull(), "medium")
         .otherwise("low")
    )
    .withColumn("ingestion_timestamp", F.current_timestamp())
)

# 4. Drop old Gold table if exists
spark.sql(f"DROP TABLE IF EXISTS {GOLD_TABLE}")

# 5. Save as invoices_gold
(df_gold.write
      .mode("overwrite")
      .option("overwriteSchema", "true")
      .saveAsTable(GOLD_TABLE))

print(f"✅ GOLD table created from `{CSV_TABLE}` → `{GOLD_TABLE}`")


StatementMeta(, e47bd06d-0868-4b34-a140-16ef3e6a8ebf, 10, Finished, Available, Finished)

✅ GOLD table created from `invoices_csv` → `invoices_gold`


In [12]:
spark.sql("""
SELECT supplier_name, SUM(total) AS total_spend
FROM invoices_gold
GROUP BY supplier_name
ORDER BY total_spend DESC
""").show()


StatementMeta(, e47bd06d-0868-4b34-a140-16ef3e6a8ebf, 14, Finished, Available, Finished)

+--------------------+-----------+
|       supplier_name|total_spend|
+--------------------+-----------+
|      Woodgrove Bank|     4120.0|
|    Contoso Supplies|    3550.75|
|Consolidated Mess...|     3500.0|
|  Lucerne Publishing|     3330.9|
|       Fabrikam Tech|     3290.0|
|A. Datum Corporation|     2780.3|
|     Adventure Works|     2399.5|
|Wide World Importers|    2175.25|
|    Southridge Video|     1990.0|
|   Northwind Traders|    1815.45|
|       Proseware Ltd|    1475.75|
|       Trey Research|     1425.0|
|       Fourth Coffee|     1250.4|
|       Tailspin Toys|      980.0|
|Blue Yonder Airlines|      710.9|
|        Wingtip Toys|      625.6|
|    Alpine Ski House|      590.0|
|         Coho Winery|     320.15|
+--------------------+-----------+

