In [0]:
%python
from pyspark.sql import SparkSession
from pyspark.sql.functions import rand, col

In [0]:
%python
# In PySpark
spark = SparkSession.builder.getOrCreate()

# Create a source DataFrame
sourceDF = spark.createDataFrame([(1, 10), (2, 20), (3, 30)], ["key", "value"])

# Create a target DataFrame
targetDF = spark.createDataFrame([(1, 100), (2, 200), (4, 400)], ["key", "value"])

# Write the target DataFrame to a Delta table for demonstration
targetTable = "target_table"
sourceTable = "source_table"
sourceDF.write.saveAsTable(sourceTable)
targetDF.write.saveAsTable(targetTable)

source = spark.table("source_table")
target = spark.table("target_table")


In [0]:
%sql
SELECT * FROM source_table

In [0]:
%python
# Example merge operation using df.mergeInto
mergedDF = (
    spark.table("source_table")
    .mergeInto(
        "target_table",
        col("target_table.key") == col("source_table.key")
    )
    .whenNotMatched()
    .insertAll()
    .merge()
)

In [0]:
%python
display(mergedDF)

In [0]:
CREATE TABLE source (
  key INT,
  value DOUBLE
);

CREATE TABLE target (
  key INT,
  value DOUBLE
);

INSERT INTO source (key, value) VALUES
(1, 10.0),
(2, 20.0),
(3, 30.0);

INSERT INTO target (key, value) VALUES
(1, 15.0),
(4, 40.0);

In [0]:
MERGE INTO target USING source
ON target.key = source.key
WHEN MATCHED THEN UPDATE
  SET target.value = source.value + rand()
WHEN NOT MATCHED THEN INSERT (key, value)
  VALUES (source.key, rand())

In [0]:
ALTER TABLE source ADD COLUMN new_column STRING;

UPDATE source SET new_column = 'some value';

In [0]:
MERGE INTO target USING source
  ON source.key = target.key
  WHEN MATCHED THEN UPDATE SET value = source.value, new_column = source.new_column
  WHEN NOT MATCHED THEN INSERT (key, value, new_column) VALUES (source.key, source.value, source.new_column)
  WHEN NOT MATCHED BY SOURCE THEN DELETE

In [0]:
MERGE WITH SCHEMA EVOLUTION INTO target USING source
  ON source.key = target.key
  WHEN MATCHED THEN UPDATE SET value = source.value, new_column = source.new_column
  WHEN NOT MATCHED THEN INSERT (key, value, new_column) VALUES (source.key, source.value, source.new_column)
  WHEN NOT MATCHED BY SOURCE THEN DELETE