In [22]:
TABLE_NAME = "data/02-table"

In [25]:
!rm -rf {TABLE_NAME}

# Create Table with `polars`

## First write - values from 0 to 9

In [24]:
import polars as pl
df = pl.DataFrame(
    {
        # "x": [0, 1, 2, 3, 4, 5, 6, 7, 8, 9],
        "s": ["a", "b", "c"],
    }
)
df.write_delta(TABLE_NAME)

## Make 9 more writes: values from 10 to 99, each write 10 values 

In [4]:
# i=1: x = 10, 11, ..., 19
# i=2: x = 20, 21, ..., 29
# ...
# i=9: x = 90, 91, ..., 99
for i in range(1, 10):
    df_append = pl.DataFrame({"x": range(i * 10, i * 10 + 10)})
    df_append.write_delta(TABLE_NAME, mode="append")

In [9]:
df = pl.scan_delta(TABLE_NAME)
df.sort('x').collect()

x
i64
0
1
2
3
4
…
95
96
97
98


## Read subset of the table

In [10]:
pl.scan_delta(TABLE_NAME).filter(
    (pl.col("x") >= 50)
    & (pl.col("x") < 60)
).collect()

x
i64
50
51
52
53
54
55
56
57
58
59


# Delete Rows using `deltalake`

In [11]:
from deltalake import DeltaTable

table = DeltaTable(TABLE_NAME)
table.delete("x >= 50 AND x < 70")

{'num_added_files': 0,
 'num_removed_files': 2,
 'num_deleted_rows': 20,
 'num_copied_rows': 0,
 'execution_time_ms': 34,
 'scan_time_ms': 29,
 'rewrite_time_ms': 5}

# Time Travel

In [None]:
df = pl.scan_delta(TABLE_NAME, version=2)
df.collect()

x
i64
20
21
22
23
24
…
5
6
7
8


In [17]:
from datetime import datetime, timedelta
df = pl.scan_delta(TABLE_NAME, version=datetime.now() - timedelta(minutes=1))
df.collect()

x
i64
90
91
92
93
94
…
5
6
7
8


# Compacting

In [None]:
from deltalake import DeltaTable


table = DeltaTable(TABLE_NAME)
table.optimize.

{'numFilesAdded': 1,
 'numFilesRemoved': 8,
 'filesAdded': '{"avg":713.0,"max":713,"min":713,"totalFiles":1,"totalSize":713}',
 'filesRemoved': '{"avg":551.75,"max":552,"min":550,"totalFiles":8,"totalSize":4414}',
 'partitionsOptimized': 1,
 'numBatches': 8,
 'totalConsideredFiles': 8,
 'totalFilesSkipped': 0,
 'preserveInsertionOrder': True}

# Vacuum

In [19]:
table.vacuum()

[]