In [0]:
from pyspark.sql import functions as F

# Read tables and alias
crm = spark.table('workspace.silver.crm_cust_info').alias('crm')
erp = spark.table('workspace.silver.erp_cust_az12').alias('erp')
loc = spark.table('workspace.silver.erp_loc_a101').alias('loc')

# Left join CRM with ERP and Location
joined = crm.join(
    erp,
    F.col('crm.customer_id') == F.col('erp.std_CID'),
    how='left'
).join(
    loc,
    F.col('crm.customer_key') == F.col('loc.std_CID'),
    how='left'
)

# Add unique surrogate key
result = joined.withColumn(
    'gold_customer_sk', F.monotonically_increasing_id()
).select(
    F.col('gold_customer_sk'),
    F.col('crm.customer_id').alias('gold_customer_id'),
    F.col('crm.customer_key').alias('gold_customer_key'),
    F.col('crm.first_name'),
    F.col('crm.last_name'),
    F.coalesce(F.col('crm.gender'), F.col('erp.gender')).alias('gender'),
    F.col('crm.marital_status'),
    F.col('crm.created_date').alias('create_date'),
    F.col('erp.birth_date').alias('birth_date'),
    F.col('loc.country')
)

# Write to gold dimension table
result.write.format("delta").mode("overwrite").saveAsTable("workspace.gold.dim_customers")

display(result)