# Delta Lake Time Travel

In [1]:
import pyspark
from delta import *

conf = (
    pyspark.conf.SparkConf()
    .setAppName("MY_APP") # replace with your desired name
    .set("spark.sql.catalog.spark_catalog","org.apache.spark.sql.delta.catalog.DeltaCatalog")
    .set("spark.sql.extensions", "io.delta.sql.DeltaSparkSessionExtension")
    .set("spark.sql.shuffle.partitions", "4") # default is 200 partitions which is too many for local
    .setMaster("local[*]") # replace the * with your desired number of cores. * for use all.
)

builder = pyspark.sql.SparkSession.builder.appName("MyApp").config(conf=conf)
spark = configure_spark_with_delta_pip(builder).getOrCreate()

In [3]:
# create delta table with 3 rows
df = spark.range(0, 3)
df.repartition(1).write.format("delta").save("tmp/some_nums")

# append 3 more rows
df = spark.range(8, 11)
df.repartition(1).write.mode("append").format("delta").save("tmp/some_nums")

# overwrite new data
df = spark.createDataFrame([(55,), (66,), (77,)]).toDF("id")
df.repartition(1).write.mode("overwrite").format("delta").save("tmp/some_nums")

In [5]:
# read latest version
spark.read.format("delta").load("tmp/some_nums").show()

+---+
| id|
+---+
| 55|
| 66|
| 77|
+---+



In [6]:
# read earlier version by timestamp
spark.read.format("delta").option("timestampAsOf", "2024-06-11 14:57:41").load("tmp/some_nums").show()

+---+
| id|
+---+
|  8|
|  9|
| 10|
|  0|
|  1|
|  2|
+---+



In [7]:
# read first version
spark.read.format("delta").option("versionAsOf", "0").load("tmp/some_nums").show()

+---+
| id|
+---+
|  0|
|  1|
|  2|
+---+



In [4]:
# look at version history
from delta.tables import DeltaTable

delta_table = DeltaTable.forPath(spark, "tmp/some_nums")
delta_table.history().select("version", "timestamp", "operation").show(truncate=False)

+-------+-----------------------+---------+
|version|timestamp              |operation|
+-------+-----------------------+---------+
|2      |2024-06-11 14:57:52.721|WRITE    |
|1      |2024-06-11 14:57:40.59 |WRITE    |
|0      |2024-06-11 14:57:38.809|WRITE    |
+-------+-----------------------+---------+

