In [0]:
from delta.tables import DeltaTable
from pyspark.sql.types import StructType, StructField, StringType, DoubleType, TimestampType
import time
import os

schema_cliente = StructType([
    StructField("cliente_id", StringType(), True),
    StructField("nome", StringType(), True), 
    StructField("estado", StringType(), True),
    StructField("data_criacao", TimestampType(), True),
])

bronze = "/Volumes/workspace/bravium/bronze/cliente/"
silver = "/Volumes/workspace/bravium/silver/cliente/"

In [0]:
df_stream = spark.readStream\
    .option("ignoreDeletes", "true")\
    .format("delta")\
    .load(bronze)

df_stream = (
    df_stream.dropDuplicates(["transacao_id", 'timestamp'])
             .withWatermark("timestamp", "10 minutes")
)

In [0]:
def upsert_to_delta(batch_df, batch_id):
    batch_df.createOrReplaceTempView("batch_df")
    
    spark.sql("""
        MERGE INTO silver AS t
        USING batch_df AS s
        ON t.transacao_id = s.transacao_id
        WHEN MATCHED THEN
          UPDATE SET *
        WHEN NOT MATCHED THEN
          INSERT *
    """)

(
    df_stream.writeStream
    .foreachBatch(upsert_to_delta)
    .option("checkpointLocation", "/Volumes/workspace/bravium/silver/checkpoints/cliente")
    .outputMode("update")
    .trigger(availableNow=True)
    .start()
    .awaitTermination()
)