In [0]:
"""
Purpose: Clean, transform, and standardize Bronze data for Silver layer.
Layer: Silver (Cleaned/Enriched)
"""

from pyspark.sql import SparkSession
from pyspark.sql.functions import col, trim, lower

spark = SparkSession.builder.appName("SilverTransformation").getOrCreate()

# Read Bronze Delta table
bronze_path = "/mnt/bronze/supply_chain_inventory"
df_bronze = spark.read.format("delta").load(bronze_path)

# Data cleaning and standardization
df_silver = (
    df_bronze.dropDuplicates()
    .na.drop(subset=["Product ID", "Warehouse"])
    .withColumn("Product Name", trim(col("Product Name")))
    .withColumn("Category", lower(trim(col("Category"))))
    .withColumnRenamed("Stock Quantity", "stock_qty")
    .withColumnRenamed("Reorder Level", "reorder_level")
    .withColumnRenamed("Lead Time (Days)", "lead_time_days")
)

# Preview cleaned data
df_silver.show(5, truncate=False)

# Write to Silver Delta table
silver_path = "/mnt/silver/supply_chain_inventory"
df_silver.write.format("delta").mode("overwrite").save(silver_path)

# Register as SQL table
spark.sql("""
    CREATE TABLE IF NOT EXISTS supply_chain_db.silver_inventory
    USING DELTA
    LOCATION '{}'
""".format(silver_path))

print("Silver Layer Transformation Complete âœ…")
