## 0. Create the class

In [18]:
case class Data(key: String, value: String)

case class ChangeData(key: String, newValue: String, deleted: Boolean, time: Long) {
assert(newValue != null ^ deleted)
}

## 1. Add sequence of changes

In [19]:
val changeDataSource = Seq(
  ChangeData("a", "10", deleted = false, time = 0),
  ChangeData("a", null, deleted = true, time = 1),   // a was updated and then deleted
  ChangeData("b", null, deleted = true, time = 2),   // b was just deleted once
  ChangeData("c", null, deleted = true, time = 3),   // c was deleted and then updated twice
  ChangeData("c", "20", deleted = false, time = 4),
  ChangeData("c", "200", deleted = false, time = 5)
).toDF().createOrReplaceTempView("changes")

## 2. Read changes into DF

In [20]:
val changesDF = spark.sql("select * from changes")

## 3. Expression for only latest changes

In [21]:
val latestChangeForEachKey = changesDF.selectExpr("key", "struct(time, newValue, deleted) as otherCols").groupBy("key").agg(max("otherCols").as("latest")).selectExpr("key", "latest.*")
latestChangeForEachKey.show()

## 4. Temp view of latest changes

In [22]:
latestChangeForEachKey.createOrReplaceTempView("lastchanges")

## 5. Save delta table

In [23]:
latestChangeForEachKey.write.format("delta").mode("overwrite").saveAsTable("lastchanges") 