From d41ccc1e1f2161fb5a4d51a82d984bfacd285587 Mon Sep 17 00:00:00 2001 From: charlesdong1991 Date: Sat, 9 May 2026 22:32:28 +0200 Subject: [PATCH] Support FixedSizeBinary in python --- bindings/python/src/utils.rs | 1 + bindings/python/test/test_kv_table.py | 3 +++ website/docs/user-guide/python/data-types.md | 1 + 3 files changed, 5 insertions(+) diff --git a/bindings/python/src/utils.rs b/bindings/python/src/utils.rs index 5efcf5e7..e0771397 100644 --- a/bindings/python/src/utils.rs +++ b/bindings/python/src/utils.rs @@ -57,6 +57,7 @@ impl Utils { ArrowDataType::Float64 => DataTypes::double(), ArrowDataType::Utf8 | ArrowDataType::LargeUtf8 => DataTypes::string(), ArrowDataType::Binary | ArrowDataType::LargeBinary => DataTypes::bytes(), + ArrowDataType::FixedSizeBinary(n) => DataTypes::binary(*n as usize), ArrowDataType::Date32 => DataTypes::date(), ArrowDataType::Date64 => DataTypes::date(), ArrowDataType::Time32(unit) => match unit { diff --git a/bindings/python/test/test_kv_table.py b/bindings/python/test/test_kv_table.py index 36aa3e46..39407375 100644 --- a/bindings/python/test/test_kv_table.py +++ b/bindings/python/test/test_kv_table.py @@ -358,6 +358,7 @@ async def test_all_supported_datatypes(connection, admin): pa.field("col_timestamp_ntz", pa.timestamp("us")), pa.field("col_timestamp_ltz", pa.timestamp("us", tz="UTC")), pa.field("col_bytes", pa.binary()), + pa.field("col_binary", pa.binary(16)), ] ), primary_keys=["pk_int"], @@ -385,6 +386,7 @@ async def test_all_supported_datatypes(connection, admin): "col_timestamp_ntz": datetime(2026, 1, 23, 10, 13, 47, 123000), "col_timestamp_ltz": datetime(2026, 1, 23, 10, 13, 47, 123000), "col_bytes": b"binary data", + "col_binary": b"binary_data_0123", } handle = upsert_writer.upsert(row_data) @@ -411,6 +413,7 @@ async def test_all_supported_datatypes(connection, admin): 2026, 1, 23, 10, 13, 47, 123000, tzinfo=timezone.utc ) assert result["col_bytes"] == b"binary data" + assert result["col_binary"] == b"binary_data_0123" # Test with null values for all nullable columns null_row = {"pk_int": 2} diff --git a/website/docs/user-guide/python/data-types.md b/website/docs/user-guide/python/data-types.md index 99677038..8e4371e2 100644 --- a/website/docs/user-guide/python/data-types.md +++ b/website/docs/user-guide/python/data-types.md @@ -12,6 +12,7 @@ The Python client uses PyArrow types for schema definitions: | `pa.float32()` / `float64()` | Float / Double | `float` | | `pa.string()` | String | `str` | | `pa.binary()` | Bytes | `bytes` | +| `pa.binary(n)` | Binary(n) | `bytes` | | `pa.date32()` | Date | `datetime.date` | | `pa.time32("ms")` | Time | `datetime.time` | | `pa.timestamp("us")` | Timestamp (NTZ) | `datetime.datetime` |