# Time Travelling

Another advantage of Iceberg's metadata structure is that it gives us Time Travel for free. Since all we're doing is storing snapshots and moving pointers, time travelling is essentially just asking to see the data at a previous pointer. 

In [85]:
import sqlalchemy as sa
import polars as pl
from pyiceberg.catalog.rest import RestCatalog
import pyarrow.csv as pc

In [86]:
engine = sa.create_engine("trino://trino:@trino:8080/lakekeeper")
catalog = RestCatalog("lakekeeper", uri="http://lakekeeper:8181/catalog", warehouse="lakehouse")
house_prices_t = catalog.load_table("house_prices.raw")

def query(sql) -> pl.DataFrame:
    with engine.connect() as conn:
        return pl.read_database(sql, conn)

In [87]:
query('SELECT * FROM house_prices."raw$history" order by made_current_at')

made_current_at,snapshot_id,parent_id,is_current_ancestor
"datetime[μs, UTC]",i64,i64,bool
2025-05-10 20:30:04.592 UTC,6975771391456708666,,True
2025-05-10 20:32:54.982 UTC,304113966180957876,6.975771391456709e+18,True
2025-05-10 20:36:40.223 UTC,2383084872890757647,3.041139661809579e+17,True
2025-05-10 20:38:16.982 UTC,2675359615872477112,2.3830848728907576e+18,True


In [89]:
query('SELECT count(transaction_id) as num_rows FROM house_prices.raw')

num_rows
i64
2387888


In [90]:
query('SELECT count(transaction_id) as num_rows from house_prices.raw for version as of 6975771391456708666')

num_rows
i64
704344


In [91]:
house_prices_t.scan(snapshot_id=6975771391456708666, selected_fields=['transaction_id']).to_arrow().num_rows



704344

In [92]:
pl.scan_iceberg(house_prices_t, snapshot_id=6975771391456708666).select(pl.count("transaction_id")).collect()

transaction_id
u32
704344


In [7]:
# query("SELECT count(transaction_id) as num_rows from house_prices.raw for timestamp as of date '2025-04-25'")

ProgrammingError: (trino.exceptions.TrinoUserError) TrinoUserError(type=USER_ERROR, name=INVALID_ARGUMENTS, message="No version history table "house_prices".raw at or before 2025-04-25T00:00:00Z", query_id=20250510_185206_00003_bq5kn)
[SQL: SELECT count(transaction_id) as num_rows from house_prices.raw for timestamp as of date '2025-04-25']
(Background on this error at: https://sqlalche.me/e/20/f405)

Remembering these snapshot ids or pinpointing the exact time we're interested in is tricky for our human brains, so Iceberg supports tagging so that we can provide human-readable references to a given snapshot.

In [93]:
house_prices_t.manage_snapshots().create_tag(6975771391456708666, "initial commit").commit()

In [94]:
house_prices_t.inspect.refs()

pyarrow.Table
name: string not null
type: dictionary<values=string, indices=int32, ordered=0> not null
snapshot_id: int64 not null
max_reference_age_in_ms: int64
min_snapshots_to_keep: int32
max_snapshot_age_in_ms: int64
----
name: [["initial commit","main"]]
type: [  -- dictionary:
["TAG","BRANCH"]  -- indices:
[0,1]]
snapshot_id: [[6975771391456708666,2675359615872477112]]
max_reference_age_in_ms: [[null,null]]
min_snapshots_to_keep: [[null,null]]
max_snapshot_age_in_ms: [[null,null]]

In [95]:
query("SELECT count(transaction_id) as num_rows from house_prices.raw for version as of 'initial commit'")

num_rows
i64
704344


In [96]:
pl.scan_iceberg(house_prices_t, snapshot_id=house_prices_t.snapshot_by_name('initial commit').snapshot_id).select(pl.count('transaction_id')).collect()

transaction_id
u32
704344


In [99]:
with engine.connect() as conn:
    conn.execute(sa.text("ALTER TABLE house_prices.raw EXECUTE rollback_to_snapshot(6975771391456708666)")).fetchone()

None


```{warning}
The current schema of the table remains unchanged even if we rollback. Current schema is set to include the `_loaded_at` column we added earlier
```

In [100]:
pl.scan_iceberg(house_prices_t).limit(10).collect()

transaction_id,price,date_of_transfer,postcode,property_type,new_property,duration,paon,saon,street,locality,town,district,county,ppd_category_type,record_status,_loaded_at
str,i32,date,str,str,str,str,str,str,str,str,str,str,str,str,str,"datetime[μs, UTC]"
"""{06C9F487-D94B-9388-E063-4804A…",330000,2023-03-20,"""CF14 7BX""","""T""","""N""","""F""","""32""","""""","""HEOL PANT Y CELYN""","""""","""CARDIFF""","""CARDIFF""","""CARDIFF""","""A""","""A""",2025-05-10 20:35:34.635633 UTC
"""{06C9F487-D94C-9388-E063-4804A…",269950,2023-07-25,"""LL28 4SH""","""D""","""N""","""F""","""7""","""""","""MARSTON DRIVE""","""RHOS ON SEA""","""COLWYN BAY""","""CONWY""","""CONWY""","""A""","""A""",2025-05-10 20:35:34.635633 UTC
"""{06C9F487-D94D-9388-E063-4804A…",280000,2023-08-10,"""LL31 9BN""","""D""","""N""","""F""","""PLAS COLWYN""","""""","""LLYS HELYG""","""DEGANWY""","""CONWY""","""CONWY""","""CONWY""","""A""","""A""",2025-05-10 20:35:34.635633 UTC
"""{06C9F487-D94E-9388-E063-4804A…",699999,2023-08-24,"""SA62 6BA""","""D""","""N""","""F""","""MIDDLE LOCHVANE""","""""","""""","""PEN Y CWM""","""HAVERFORDWEST""","""PEMBROKESHIRE""","""PEMBROKESHIRE""","""A""","""A""",2025-05-10 20:35:34.635633 UTC
"""{06C9F487-D94F-9388-E063-4804A…",160000,2023-08-21,"""SY16 1QY""","""T""","""N""","""F""","""167""","""""","""LON DOLAFON""","""""","""NEWTOWN""","""POWYS""","""POWYS""","""A""","""A""",2025-05-10 20:35:34.635633 UTC
"""{06C9F487-D950-9388-E063-4804A…",203500,2023-09-15,"""LL29 8RB""","""S""","""N""","""F""","""23""","""""","""GLYN AVENUE""","""""","""COLWYN BAY""","""CONWY""","""CONWY""","""A""","""A""",2025-05-10 20:35:34.635633 UTC
"""{06C9F487-D951-9388-E063-4804A…",60000,2023-09-15,"""SA61 2TH""","""F""","""N""","""L""","""ST. JAMES COURT""","""FLAT 7""","""CURLEW CLOSE""","""""","""HAVERFORDWEST""","""PEMBROKESHIRE""","""PEMBROKESHIRE""","""A""","""A""",2025-05-10 20:35:34.635633 UTC
"""{06C9F487-D952-9388-E063-4804A…",215000,2023-05-05,"""SA2 0DR""","""T""","""N""","""F""","""144""","""""","""RHYDDINGS TERRACE""","""BRYNMILL""","""SWANSEA""","""SWANSEA""","""SWANSEA""","""A""","""A""",2025-05-10 20:35:34.635633 UTC
"""{06C9F487-D953-9388-E063-4804A…",130000,2023-04-28,"""SA6 6TW""","""T""","""N""","""F""","""19""","""""","""CWRT LLWYNOG""","""CWMRHYDYCEIRW""","""SWANSEA""","""SWANSEA""","""SWANSEA""","""A""","""A""",2025-05-10 20:35:34.635633 UTC
"""{06C9F487-D954-9388-E063-4804A…",153000,2023-07-13,"""SA69 9NS""","""F""","""N""","""L""","""5""","""""","""MONKSTONE COURT""","""""","""SAUNDERSFOOT""","""PEMBROKESHIRE""","""PEMBROKESHIRE""","""A""","""A""",2025-05-10 20:35:34.635633 UTC


In [101]:
house_prices_t.refresh().current_snapshot().snapshot_id

6975771391456708666

In [102]:
query('SELECT * FROM house_prices."raw$history" order by made_current_at')

made_current_at,snapshot_id,parent_id,is_current_ancestor
"datetime[μs, UTC]",i64,i64,bool
2025-05-10 20:30:04.592 UTC,6975771391456708666,,True
2025-05-10 20:32:54.982 UTC,304113966180957876,6.975771391456709e+18,False
2025-05-10 20:36:40.223 UTC,2383084872890757647,3.041139661809579e+17,False
2025-05-10 20:38:16.982 UTC,2675359615872477112,2.3830848728907576e+18,False


## Cleaning up

In [103]:
with engine.connect() as conn:
    # Remove snapshots and corresponding metadata
    conn.execute(sa.text("ALTER TABLE house_prices.raw EXECUTE expire_snapshots(retention_threshold => '0d')")).fetchone()
    # Remove orphaned files not referenced by metadata
    conn.execute(sa.text("ALTER table house_prices.raw execute remove_orphan_files(retention_threshold => '0d')")).fetchone()
    # Co-locate manifests based on partitioning
    conn.execute(sa.text("ALTER TABLE house_prices.raw EXECUTE optimize_manifests")).fetchone()
    # Compact small files into larger
    conn.execute(sa.text("ALTER table house_prices.raw execute optimize")).fetchone()