In [0]:
from pyspark.sql import functions as F
from pyspark.sql.window import Window
BRONZE_TBL='bronze_product'
SILVER_TBL='silver_product'

#1) read bronze
b=spark.read.table(BRONZE_TBL)

#display(spark.table("bronze_product"))

#2) cleaning, transform and generate event_key
s=(
    b
    .withColumn("PRODUCT_EVENT_DATE",F.to_date(F.col("PRODUCT_EVENT_DATE")))
    .withColumn("ROW_INSERT_DATE",F.to_date(F.col("ROW_INSERT_DATE")))
    .withColumn("event_key",
                F.sha2(
                    F.concat_ws(
                        "||",
                        F.col("CLIENT_ID").cast("string"),
                        F.col("PRODUCT_CODE").cast("string"),
                        F.col("ACCOUNT_ID").cast("string"),
                        F.col("PRODUCT_EVENT_DATE").cast("string"),
                        F.col("PRODUCT_EVENT_TYPE").cast("string")
                    ),
                    256
                    )
                )
)

# if silver table does not exist, create empty table
if not spark.catalog.tableExists(SILVER_TBL):
    order_col=F.col("ROW_INSERT_DATE")
    wds=Window.partitionBy(F.col("event_key")).orderBy(order_col.desc_nulls_last())
    final_s=(s.withColumn("rn",F.row_number().over(wds))
              .filter("rn=1")
              .drop("rn")
              )
    (final_s
     .write.format("delta")
     .mode("overwrite")
     .saveAsTable(SILVER_TBL)
    )
# INCREMENTAL loading: load new data with LATEST ROW_INSERT_DATE - lookback date
max_dt=spark.sql(f"SELECT max(ROW_INSERT_DATE) as max_dt FROM {SILVER_TBL}").collect()[0]["max_dt"]
lookback_days=2
if max_dt is not None:
    s_inc=s.filter(F.col("ROW_INSERT_DATE")>F.date_sub(F.lit(max_dt),lookback_days))
else:
    s_inc=s

#indempotent based on key and insert_date
w=(
    F.row_number().over(
        __import__("pyspark.sql.window").sql.window.Window
        .partitionBy(F.col("event_key"))
        .orderBy(F.col("ROW_INSERT_DATE").desc_nulls_last())
    )
)
s_upsert=(
    s_inc
    .withColumn("rn",w)
    .filter("rn=1")
    .drop("rn")
)

# MERGE (serverless with temp view)
s_upsert.createOrReplaceTempView("product_event_upsert")

spark.sql(f"""
MERGE INTO {SILVER_TBL} t
USING product_event_upsert s
ON t.event_key=s.event_key
WHEN MATCHED AND coalesce(s.ROW_INSERT_DATE,date'1900-01-01') >coalesce(t.ROW_INSERT_DATE,date'1900-01-01') THEN UPDATE SET *
WHEN NOT MATCHED THEN INSERT *
          """)  

display(spark.table('silver_product').orderBy(F.desc("ROW_INSERT_DATE")))