In [0]:
from pyspark.sql.functions import *
from pyspark.sql.types import *

# Step 1: Read from Bronze Table
bronze_df = spark.table("retail_demo.bronze_sales")

# Step 2: Base cleaning and deduplication
silver_df = (
    bronze_df
    .dropDuplicates(["InvoiceNo", "StockCode"])
    .dropna(subset=["InvoiceNo", "StockCode", "Description", "Quantity", "UnitPrice"])
    .filter("Quantity > 0 AND UnitPrice >= 0")
)

# Step 3: Column renaming for consistency
silver_df = silver_df.withColumnRenamed("InvoiceNo", "invoice_id") \
                     .withColumnRenamed("StockCode", "stockcode") \
                     .withColumnRenamed("Description", "description") \
                     .withColumnRenamed("InvoiceDate", "invoice_date") \
                     .withColumnRenamed("UnitPrice", "unit_price") \
                     .withColumnRenamed("CustomerID", "customer_id") \
                     .withColumnRenamed("Country", "country")

# Step 4: Derived columns (transformations)
silver_df = (
    silver_df
    .withColumn("description_clean", regexp_replace(lower(trim(col("description"))), "[^a-z0-9 ]", ""))
    .withColumn("invoice_ts", to_timestamp("invoice_date", "M/d/yyyy H:mm"))
    .withColumn("hour", hour("invoice_ts"))
    .withColumn("weekday", date_format("invoice_ts", "E"))
    .withColumn("week_number", weekofyear("invoice_ts"))
    .withColumn("is_weekend", (dayofweek("invoice_ts") >= 6).cast("int"))
    .withColumn("total_sales", round(col("Quantity") * col("unit_price"), 2))  # rounded for readability
)

# Step 5: Save to Silver Table
silver_df.write.format("delta").option("mergeSchema", "true").mode("overwrite").saveAsTable("retail_demo.silver_sales")

# Step 6: Display sample
display(spark.table("retail_demo.silver_sales"))

In [0]:
# silver_df.select(hour("invoice_ts").alias("hour")).distinct().orderBy("hour").display()

In [0]:
# silver_df.select("invoice_date", "week_number").distinct().orderBy("invoice_date").display()

In [0]:
%sql
-- SELECT * FROM retail_demo.silver_sales LIMIT 10