In [0]:
from pyspark.sql import functions as F

# Read tables and alias
crm = spark.table('workspace.silver.crm_prd_info').alias('crm')
erp = spark.table('workspace.silver.erp_px_cat_g1v2').alias('erp')

# Left join on standardized product key
joined = crm.join(
    erp,
    F.col('crm.std_prd_key') == F.col('erp.std_ID'),
    how='left'
)

# Get all columns from both tables, excluding product_key from CRM
crm_cols = [c for c in crm.columns if c != 'product_key']
erp_cols = [c for c in erp.columns if c not in crm.columns]

# Add unique surrogate key
result = joined.withColumn('gold_product_sk', F.monotonically_increasing_id())
result = result.select(
    F.col('gold_product_sk'),
    *[F.col(f'crm.{c}') for c in crm_cols],
    *[F.col(f'erp.{c}') for c in erp_cols]
)

# Write to gold product dimension table
result.write.format("delta").mode("overwrite").saveAsTable("workspace.gold.dim_products")

display(result)