In [0]:
from delta.tables import DeltaTable
from pyspark.sql.types import StructType, StructField, StringType, DoubleType, TimestampType
import time
import os

schema_transacoes = StructType([
    StructField("transacao_id", StringType(), True),
    StructField("timestamp", TimestampType(), True), 
    StructField("cliente_id", StringType(), True),
    StructField("valor", DoubleType(), True),
    StructField("status", StringType(), True)
])

bronze = "/Volumes/workspace/bravium/bronze/transacao/"
silver = "/Volumes/workspace/bravium/silver/transacao/"

In [0]:
df_stream = spark.readStream\
    .option("ignoreDeletes", "true")\
    .format("delta")\
    .load(bronze)

df_stream = (
    df_stream.dropDuplicates(["transacao_id", 'timestamp'])
             .withWatermark("timestamp", "10 minutes")
)

In [0]:
def upsert_to_delta(batch_df, batch_id):
    delta_table = DeltaTable.forPath(spark, silver)

    (
        delta_table.alias("t")
        .merge(
            batch_df.alias("s"),
            "t.transacao_id = s.transacao_id"
        )
        .whenMatchedUpdateAll()
        .whenNotMatchedInsertAll()
        .execute()
    )

(
    df_stream.writeStream
    .foreachBatch(upsert_to_delta)
    .option("checkpointLocation", "/Volumes/workspace/bravium/silver/checkpoints/transacao")
    .outputMode("update")
    .trigger(availableNow=True)
    .start()
    .awaitTermination()
)