https://delta-io.github.io/delta-rs/

In [1]:
import pandas as pd
from deltalake import DeltaTable, write_deltalake

In [2]:
df = pd.DataFrame({"x": [1, 2, 3]})
write_deltalake("my_table", df)

In [4]:
DeltaTable("my_table").to_pyarrow_table()

pyarrow.Table
x: int64
----
x: [[1,2,3]]

In [22]:
dt = DeltaTable("my_table")

In [5]:
df = pd.DataFrame({"x": [4, 5, 6]})
write_deltalake("my_table", df, mode="append")
DeltaTable("my_table").to_pandas()

Unnamed: 0,x
0,4
1,5
2,6
3,1
4,2
5,3


In [None]:
df = pd.DataFrame({"x": [4, 5, 6]})
write_deltalake("my_table", df, mode="overwrite")
dt.to_pandas()

Unnamed: 0,x
0,4
1,5
2,6


In [11]:
DeltaTable("my_table", version=0).to_pandas()

Unnamed: 0,x
0,1
1,2
2,3


In [10]:
DeltaTable("my_table", version=1).to_pandas()

Unnamed: 0,x
0,4
1,5
2,6
3,1
4,2
5,3


In [None]:
dt.schema()

Schema([Field(x, PrimitiveType("long"), nullable=True)])

In [None]:
dt.alter.add_constraint({"gt_0": "x > 2"})

In [17]:
import polars as pl

df = pl.from_arrow(DeltaTable("my_table").load_cdf(starting_version=1, ending_version=2).read_all())
df

x,_change_type,_commit_version,_commit_timestamp
i64,str,i64,datetime[ms]
4,"""insert""",1,2025-02-24 02:30:34.604
5,"""insert""",1,2025-02-24 02:30:34.604
6,"""insert""",1,2025-02-24 02:30:34.604
4,"""insert""",2,2025-02-24 02:30:51.406
5,"""insert""",2,2025-02-24 02:30:51.406
…,…,…,…
5,"""delete""",2,2025-02-24 02:30:51.406
6,"""delete""",2,2025-02-24 02:30:51.406
1,"""delete""",2,2025-02-24 02:30:51.406
2,"""delete""",2,2025-02-24 02:30:51.406


In [18]:
df.group_by("_commit_version").len().sort("len", descending=True)

_commit_version,len
i64,u32
2,9
1,3


In [23]:
dt.history()

[{'timestamp': 1740364406323,
  'operation': 'ADD CONSTRAINT',
  'operationParameters': {'expr': 'x > 2', 'name': 'gt_0'},
  'clientVersion': 'delta-rs.0.24.0',
  'version': 3},
 {'timestamp': 1740364251406,
  'operation': 'WRITE',
  'operationParameters': {'mode': 'Overwrite'},
  'clientVersion': 'delta-rs.0.24.0',
  'operationMetrics': {'execution_time_ms': 3,
   'num_added_files': 1,
   'num_added_rows': 3,
   'num_partitions': 0,
   'num_removed_files': 2},
  'version': 2},
 {'timestamp': 1740364234604,
  'operation': 'WRITE',
  'operationParameters': {'mode': 'Append'},
  'clientVersion': 'delta-rs.0.24.0',
  'operationMetrics': {'execution_time_ms': 12,
   'num_added_files': 1,
   'num_added_rows': 3,
   'num_partitions': 0,
   'num_removed_files': 0},
  'version': 1},
 {'timestamp': 1740363072317,
  'operation': 'WRITE',
  'operationParameters': {'mode': 'ErrorIfExists'},
  'operationMetrics': {'execution_time_ms': 165,
   'num_added_files': 1,
   'num_added_rows': 3,
   'num_pa

In [24]:
dt.vacuum(dry_run=False)

[]

In [27]:
dt.delete("x > 5")

{'num_added_files': 1,
 'num_removed_files': 1,
 'num_deleted_rows': 1,
 'num_copied_rows': 2,
 'execution_time_ms': 38,
 'scan_time_ms': 33,
 'rewrite_time_ms': 5}

In [28]:
dt.to_pandas()

Unnamed: 0,x
0,4
1,5


In [29]:
dt.optimize.z_order(["x"])

{'numFilesAdded': 1,
 'numFilesRemoved': 1,
 'filesAdded': '{"avg":520.0,"max":520,"min":520,"totalFiles":1,"totalSize":520}',
 'filesRemoved': '{"avg":508.0,"max":508,"min":508,"totalFiles":1,"totalSize":508}',
 'partitionsOptimized': 0,
 'numBatches': 1,
 'totalConsideredFiles': 1,
 'totalFilesSkipped': 0,
 'preserveInsertionOrder': True}