In [0]:
"""
Purpose: Aggregate Silver data, calculate business KPIs, and add AI-driven Inventory Risk Score.
Layer: Gold (Business Ready)
"""

from pyspark.sql import SparkSession
from pyspark.sql.functions import col, sum, avg, when

spark = SparkSession.builder.appName("GoldModeling").getOrCreate()

# Read Silver Delta table
silver_path = "/mnt/silver/supply_chain_inventory"
df_silver = spark.read.format("delta").load(silver_path)

# Aggregation: Warehouse and Category level KPIs
df_gold = (
    df_silver.groupBy("Warehouse", "Category")
    .agg(
        sum("stock_qty").alias("Total_Stock"),
        avg("reorder_level").alias("Avg_Reorder_Level"),
        avg("lead_time_days").alias("Avg_Lead_Time")
    )
)

# AI-driven feature: Inventory Risk Score
df_gold = df_gold.withColumn(
    "Inventory_Risk_Score",
    when(col("Avg_Lead_Time") > 10, "High")
    .when(col("Avg_Lead_Time").between(5, 10), "Medium")
    .otherwise("Low")
)

# Preview Gold Table
df_gold.show(10, truncate=False)

# Write to Gold Delta table
gold_path = "/mnt/gold/supply_chain_metrics"
df_gold.write.format("delta").mode("overwrite").save(gold_path)

# Register as SQL table
spark.sql("""
    CREATE TABLE IF NOT EXISTS supply_chain_db.gold_inventory_kpi
    USING DELTA
    LOCATION '{}'
""".format(gold_path))

print("Gold Layer Aggregation & AI KPIs Complete âœ…")
