In [0]:
# 03_gold_analytics
# Goal:
# 1) Create an aggregated view:
# 2) Count tickets by status and priority.
# 3) Save as a Delta table or CSV.

from pyspark.sql import functions as F

# Read Silver
SILVER_TABLE = "silver_servicenow_incidents"
GOLD_TABLE = "gold_ticket_counts_by_state_priority"

df_silver = spark.table(SILVER_TABLE)
print("Silver rows:", df_silver.count())


Silver rows: 10000


In [0]:

# Aggregate: count tickets by state and priority
df_gold = (
    df_silver
    .groupBy("state", "priority")
    .agg(F.count("*").alias("ticket_count"))
    .orderBy(F.col("ticket_count").desc())
)

display(df_gold)


state,priority,ticket_count
In Progress,3,1591
Resolved,3,1141
In Progress,4,1067
Closed,3,897
Resolved,4,739
Closed,4,577
New,3,472
In Progress,5,364
In Progress,2,336
New,4,294


In [0]:
# Save as Gold Delta table
(
    df_gold.write
      .format("delta")
      .mode("overwrite")
      .saveAsTable(GOLD_TABLE)
)

print(f"Gold Delta table created: {GOLD_TABLE}")


Gold Delta table created: gold_ticket_counts_by_state_priority
