# Time Travel in Apache Iceberg


In [1]:
from pyspark.sql import SparkSession
import os

In [2]:
spark = (
    SparkSession.builder
    .appName("Time Travel in Iceberg")
    .master("spark://spark:7077") 
    .getOrCreate()
)

Let's view the snapshots, we had two batches, done at different times. So we should see two snapshots

In [3]:
spark.sql("""
  SELECT snapshot_id, committed_at, operation
  FROM ice.demo.customers.snapshots
  ORDER BY committed_at
""").show(truncate=False)

+-------------------+-----------------------+---------+
|snapshot_id        |committed_at           |operation|
+-------------------+-----------------------+---------+
|5355358217514325757|2025-10-27 16:46:30.234|append   |
|6233251210790417521|2025-10-27 16:46:37.232|append   |
+-------------------+-----------------------+---------+



Let's now view the lineage of the data

In [4]:
spark.sql("""
  SELECT made_current_at, snapshot_id, parent_id, is_current_ancestor
  FROM ice.demo.customers.history
  ORDER BY made_current_at
""").show(truncate=False)

+-----------------------+-------------------+-------------------+-------------------+
|made_current_at        |snapshot_id        |parent_id          |is_current_ancestor|
+-----------------------+-------------------+-------------------+-------------------+
|2025-10-27 16:46:30.234|5355358217514325757|NULL               |true               |
|2025-10-27 16:46:37.232|6233251210790417521|5355358217514325757|true               |
+-----------------------+-------------------+-------------------+-------------------+



Now, let's go back in time by reverting the timestamp

In [5]:
spark.conf.set("spark.sql.session.timeZone", "UTC")

# Replace the following timestamp with one after the initial timestamp
ts = "2025-10-27 16:46:30.236"

df_ts = spark.sql(f"""
  SELECT *
  FROM ice.demo.customers
  TIMESTAMP AS OF '{ts}'
""")
df_ts.show()

+---+-----------+-----------------+
| id|       name|            email|
+---+-----------+-----------------+
|  3|Carol Adams|carol@example.com|
|  1|Alice Smith|alice@example.com|
|  2|Bob Johnson|  bob@example.com|
+---+-----------+-----------------+



In [6]:
spark.sql("""SELECT snapshot_id, committed_at, operation
FROM ice.demo.customers.snapshots
ORDER BY committed_at DESC
LIMIT 5;""").show(truncate=False)

+-------------------+-----------------------+---------+
|snapshot_id        |committed_at           |operation|
+-------------------+-----------------------+---------+
|6233251210790417521|2025-10-27 16:46:37.232|append   |
|5355358217514325757|2025-10-27 16:46:30.234|append   |
+-------------------+-----------------------+---------+



In [7]:
spark.stop()