In [0]:
import uuid
from pyspark.sql.functions import col, unix_timestamp, current_timestamp
from pyspark.sql.types import TimestampType

# --- CRITICAL FIX: DEFINE UC_ROOT AGAIN ---
# Replace with your actual Volume details
UC_ROOT = "/Volumes/ecommerce_audit/audit_schema/audit_volume/APAF_Capstone_Project"

# --- Define ALL Unity Catalog Paths (for tables/schemas) ---
bronze_table_name = "ecommerce_audit.audit_schema.bronze_price_requests"
silver_product_table_name = "ecommerce_audit.audit_schema.product_catalog_static"
silver_enriched_table_name = "ecommerce_audit.audit_schema.silver_enriched_events"

# --- Define Checkpoint Path (Must be unique for the Silver Stream) ---
# We still need the UC_ROOT for the checkpoint folder location
dbfs_checkpoint_silver = f"{UC_ROOT}/_checkpoints/silver/{str(uuid.uuid4())}" 

print("All required paths and variables defined for Silver Layer processing.")



from pyspark.sql.types import *

# --- Define Paths (Unity Catalog Names) ---
silver_product_table_name = "ecommerce_audit.audit_schema.product_catalog_static"

# --- Create Mock Data for 20 Products (Product ID, Category, Base Cost) ---
products_data = [
    ("PROD_0001", "Electronics", 1500), ("PROD_0002", "Electronics", 80),
    ("PROD_0003", "Apparel", 25), ("PROD_0004", "Apparel", 300),
    ("PROD_0005", "HomeGoods", 120), ("PROD_0006", "HomeGoods", 45),
    ("PROD_0007", "Beauty", 65), ("PROD_0008", "Beauty", 20),
    ("PROD_0009", "Tools", 750), ("PROD_0010", "Tools", 30),
    ("PROD_0011", "Books", 15), ("PROD_0012", "Books", 50),
    ("PROD_0013", "Toys", 40), ("PROD_0014", "Toys", 10),
    ("PROD_0015", "Sports", 90), ("PROD_0016", "Sports", 180),
    ("PROD_0017", "Auto", 400), ("PROD_0018", "Auto", 200),
    ("PROD_0019", "Kitchen", 150), ("PROD_0020", "Kitchen", 250)
]

product_schema = StructType([
    StructField("product_id", StringType(), False),
    StructField("category", StringType(), True),
    StructField("base_cost", IntegerType(), True)
])

product_df = spark.createDataFrame(products_data, product_schema)

# --- Write to Unity Catalog Table (Static Silver) ---
product_df.write \
  .format("delta") \
  .mode("overwrite") \
  .saveAsTable(silver_product_table_name) 

print(f"✅ Step 5: Product Catalog created: {silver_product_table_name}")

####Step--6 Silver Stream Transformation (Stream-Static Join)

from pyspark.sql.functions import col, unix_timestamp, current_timestamp
from pyspark.sql.types import TimestampType
import uuid

# --- Paths ---
bronze_table_name = "ecommerce_audit.audit_schema.bronze_price_requests"
silver_product_table_name = "ecommerce_audit.audit_schema.product_catalog_static"
silver_enriched_table_name = "ecommerce_audit.audit_schema.silver_enriched_events"
# Checkpoint location must be a new, unique path for Silver stream
dbfs_checkpoint_silver = f"{UC_ROOT}/_checkpoints/silver/{str(uuid.uuid4())}" 

# --- 1. Define Streaming Read (from Bronze Delta) ---
bronze_stream_df = spark.readStream.table(bronze_table_name)

# --- 2. Load Static Product Catalog ---
product_df_static = spark.read.table(silver_product_table_name) 

# --- 3. Transformation and Join (PySpark) ---
from pyspark.sql.functions import col, expr

silver_df = (
    bronze_stream_df
    .withColumn(
        "event_time_ts",
        expr("try_to_timestamp(event_time, \"yyyy-MM-dd'T'HH:mm:ss.SSSSSS'Z'\")")
    )
    .join(product_df_static, ["product_id"], "inner")
    .withColumn("price_markup", col("request_price") - col("base_cost"))
    .select(
        col("request_id"),
        col("event_time_ts").alias("event_time"),
        col("user_id"),
        col("product_id"),
        col("category"),
        col("geo_cluster"),
        col("request_price"),
        col("price_markup")
    )
)

# --- 4. Write to Silver Delta Table ---
silver_query = (
    silver_df.writeStream
    .format("delta")
    .option("checkpointLocation", dbfs_checkpoint_silver)
    .trigger(once=True) # Use the successful Trigger.Once() pattern
    .queryName("Silver_Enrichment_Stream")
    .toTable(silver_enriched_table_name)
)

silver_query.awaitTermination()

print(f"✅ Step 6: Silver Stream finished. Enriched data in: {silver_enriched_table_name}")

All required paths and variables defined for Silver Layer processing.
✅ Step 5: Product Catalog created: ecommerce_audit.audit_schema.product_catalog_static
✅ Step 6: Silver Stream finished. Enriched data in: ecommerce_audit.audit_schema.silver_enriched_events
