In [0]:
from pyspark.sql import functions as F

raw = (
  spark.table("tp_finance.raw.sharepoint_sheet_rows")
    .where(F.col("sheet_name") == "Facility_Monthly")
)

m = F.from_json("row_json", "map<string,string>")

snap = (
  raw.select(
      "source_system","source_path","file_sha256","load_date","source_modified_ts","ingestion_run_id","load_ts",
      m.alias("m")
  )
  .select(
      F.col("m")["customer_id"].alias("customer_id"),
      F.col("m")["contract_id"].alias("contract_id"),
      F.col("m")["facility_id"].alias("facility_id"),
      F.to_date(F.col("m")["month"]).alias("month"),
      F.col("m")["currency"].alias("currency"),
      F.col("m")["opening_balance"].cast("decimal(18,2)").alias("opening_balance"),
      F.col("m")["drawn_this_month"].cast("decimal(18,2)").alias("drawn_this_month"),
      F.col("m")["repaid_this_month"].cast("decimal(18,2)").alias("repaid_this_month"),
      F.col("m")["closing_balance"].cast("decimal(18,2)").alias("closing_balance"),
      "source_system","source_path","file_sha256","load_date","source_modified_ts","ingestion_run_id","load_ts"
  )
  .withColumn(
      "record_hash",
      F.sha2(F.concat_ws("||",
        "customer_id","contract_id","facility_id",
        F.col("month").cast("string"),
        F.coalesce(F.col("closing_balance").cast("string"), F.lit("")),
        F.coalesce(F.col("file_sha256"), F.lit(""))
      ), 256)
  )
)

snap.createOrReplaceTempView("stg_snap")

spark.sql("""
MERGE INTO tp_finance.bronze.sp_facility_monthly_snapshot t
USING stg_snap s
ON t.record_hash = s.record_hash
WHEN NOT MATCHED THEN INSERT *
""")