In [0]:
%sql
-- create a managed delta table
CREATE OR REPLACE TABLE dev_bronze.test_schma_tmp.dim_employee (
  emp_id INT,
  name STRING,
  city STRING,
  country STRING,
  contact_no INT
)

In [0]:
# Data
from pyspark.sql.types import *
from pyspark.sql.functions import *

schema = StructType([
    StructField("emp_id", IntegerType(), True),
    StructField("name", StringType(), True),
    StructField("city", StringType(), True),
    StructField("country", StringType(), True),
    StructField("contact_no", IntegerType(), True)
])

data = [(1000, "Michael", "Columbus", "USA", 68084)]

df = spark.createDataFrame(data=data, schema=schema)
display(df)

### Method 1: Using Spark SQL

In [0]:
df.createOrReplaceTempView("source_data")
spark.sql("Select * from source_data").show()

In [0]:
%sql
Merge into dev_bronze.test_schma_tmp.dim_employee as target
using source_data as source
on source.emp_id = target.emp_id
When MATCHED then UPDATE SET
target.name = source.name,
target.city = source.city,
target.country = source.country,
target.contact_no = source.contact_no
When not matched then insert * -- or insert (emp_id,name,city,country,contact_no) values(emp_id,name,city,country,contact_no) if columns mismtach between source and target

Method 2: Using Pyspark Code

In [0]:
from delta.tables import DeltaTable
target_table = DeltaTable.forName(spark,"dev_bronze.test_schma_tmp.dim_employee")
display(target_table.toDF())

In [0]:
target_table.alias("target").merge(source=df.alias("source"), condition="target.emp_id == source.emp_id") \
.whenMatchedUpdateAll() \
.whenNotMatchedInsertAll() \
.execute()

OR

In [0]:
target_table.alias("target").merge(source=df.alias("source"), condition="target.emp_id == source.emp_id") \
.whenMatchedUpdate(set={"target.name": "source.name","target.city": "source.city",
"target.country": "source.country",
"target.contact_no": "source.contact_no"}) \
.whenNotMatchedInsert(values = {"target.emp_id": "source.emp_id","target.name": "source.name","target.city": "source.city",
"target.country": "source.country",
"target.contact_no": "source.contact_no"}) \
.execute()

In [0]:
target_table.delete("True")

In [0]:
target_table.toDF().show()