In [0]:
%python
# Databricks Notebook: gold_orders_summary
# Language: Python

# COMMAND ----------
# DBTITLE 1,Configuration
silver_table_name = "ordercatalog.silver_schema.silver_orders_processed"
gold_table_name = "ordercatalog.gold_schema.gold_orders_daily_summary"


# COMMAND ----------
# DBTITLE 1,Read from Silver Layer
df_silver = spark.read.table(silver_table_name)

# COMMAND ----------
# DBTITLE 1,Perform Aggregations
from pyspark.sql.functions import sum, count, avg, date_trunc, col, round # Import round function

# Aggregate by order date and status
df_gold = df_silver.groupBy(
    date_trunc("day", col("order_timestamp")).alias("order_date"),
    col("order_status")
).agg(
    count(col("order_id")).alias("total_orders"),
    round(sum(col("quantity")),2).alias("total_quantity"),
    round(sum(col("quantity") * col("price")),2).alias("total_revenue"),
    round(avg(col("price")), 2).alias("average_price_per_item")

).orderBy("order_date", "order_status")

# COMMAND ----------
# DBTITLE 1,Write to Gold Delta Table
# Gold layer tables are often overwritten daily/periodically or appended for time-series data.
# For daily summaries, overwrite is common for idempotency.
df_gold.write \
  .format("delta") \
  .mode("overwrite") \
  .option("overwriteSchema", "true").partitionBy("order_date").saveAsTable(gold_table_name)

print(f"Successfully aggregated and loaded data into gold table: .{gold_table_name}")

# COMMAND ----------
# DBTITLE 1,Verify Gold Table
# spark.sql(f"SELECT * FROM {gold_table_name} ORDER BY order_date DESC, order_status LIMIT 10").display()
# spark.sql(f"SELECT COUNT(*) FROM {gold_table_name}").display()

In [0]:
select * from ordercatalog.gold_schema.gold_orders_daily_summary