From 0e0082ceb54456289b29168cfdc89db71b2ad12f Mon Sep 17 00:00:00 2001 From: Liang-Chi Hsieh Date: Fri, 3 Jul 2026 10:48:34 -0700 Subject: [PATCH] feat(transaction): support transform-based sort orders ReplaceSortOrderAction only supported sorting by a column's raw value: `asc`/`desc` hardcoded Transform::Identity and only accepted a column name. Java's SortOrderBuilder.asc/desc accept a Term, which can be a transform expression (bucket[N], year, truncate[W], ...). Add asc_with_transform / desc_with_transform. asc/desc are unchanged (same signature and behavior) and now delegate to the new methods with Transform::Identity, so no existing caller is affected. Transform-compatibility with the source column's type is checked at commit time via the existing SortOrder::builder().build(schema) -> check_compatibility path, matching the timing of Java's SortOrder.Builder.build() -- the spec-level type was already general (SortField.transform, check_compatibility already validate arbitrary transforms); only the transaction-layer API was narrow. Scope: this only extends the metadata-declaration API. Whether the write path sorts data to match a table's declared sort order -- for any transform, including the pre-existing identity case -- is a separate, pre-existing gap untouched here. Closes #2764 --- crates/iceberg/src/transaction/sort_order.rs | 126 +++++++++++++++++-- 1 file changed, 119 insertions(+), 7 deletions(-) diff --git a/crates/iceberg/src/transaction/sort_order.rs b/crates/iceberg/src/transaction/sort_order.rs index dfa1328c09..2ae5da38f6 100644 --- a/crates/iceberg/src/transaction/sort_order.rs +++ b/crates/iceberg/src/transaction/sort_order.rs @@ -31,6 +31,7 @@ use crate::{Error, ErrorKind, TableRequirement, TableUpdate}; #[derive(Debug, PartialEq, Eq, Clone)] struct PendingSortField { name: String, + transform: Transform, direction: SortDirection, null_order: NullOrder, } @@ -46,7 +47,7 @@ impl PendingSortField { Ok(SortField::builder() .source_id(field_id) - .transform(Transform::Identity) + .transform(self.transform) .direction(self.direction) .null_order(self.null_order) .build()) @@ -65,24 +66,58 @@ impl ReplaceSortOrderAction { } } - /// Adds a field for sorting in ascending order. + /// Adds a field for sorting in ascending order, sorting by the column's raw value + /// (an identity transform). To sort by a transform of the column instead (e.g. + /// `bucket[N]`, `year`, `truncate[W]`), use [`Self::asc_with_transform`]. pub fn asc(self, name: &str, null_order: NullOrder) -> Self { - self.add_sort_field(name, SortDirection::Ascending, null_order) + self.asc_with_transform(name, Transform::Identity, null_order) } - /// Adds a field for sorting in descending order. + /// Adds a field for sorting in descending order, sorting by the column's raw value + /// (an identity transform). To sort by a transform of the column instead (e.g. + /// `bucket[N]`, `year`, `truncate[W]`), use [`Self::desc_with_transform`]. pub fn desc(self, name: &str, null_order: NullOrder) -> Self { - self.add_sort_field(name, SortDirection::Descending, null_order) + self.desc_with_transform(name, Transform::Identity, null_order) + } + + /// Adds a field for sorting in ascending order by a transform of the column's value + /// (e.g. `Transform::Bucket(16)`, `Transform::Year`, `Transform::Truncate(4)`). + /// + /// Whether the transform is valid for the column's type is checked at commit time, + /// once the table schema is available (mirroring Java's `SortOrder.Builder.build()`). + pub fn asc_with_transform( + self, + name: &str, + transform: Transform, + null_order: NullOrder, + ) -> Self { + self.add_sort_field(name, transform, SortDirection::Ascending, null_order) + } + + /// Adds a field for sorting in descending order by a transform of the column's value + /// (e.g. `Transform::Bucket(16)`, `Transform::Year`, `Transform::Truncate(4)`). + /// + /// Whether the transform is valid for the column's type is checked at commit time, + /// once the table schema is available (mirroring Java's `SortOrder.Builder.build()`). + pub fn desc_with_transform( + self, + name: &str, + transform: Transform, + null_order: NullOrder, + ) -> Self { + self.add_sort_field(name, transform, SortDirection::Descending, null_order) } fn add_sort_field( mut self, name: &str, + transform: Transform, sort_direction: SortDirection, null_order: NullOrder, ) -> Self { self.pending_sort_fields.push(PendingSortField { name: name.to_string(), + transform, direction: sort_direction, null_order, }); @@ -135,10 +170,11 @@ impl TransactionAction for ReplaceSortOrderAction { mod tests { use as_any::Downcast; - use crate::spec::{NullOrder, SortDirection}; + use crate::ErrorKind; + use crate::spec::{NullOrder, SortDirection, Transform}; use crate::transaction::sort_order::{PendingSortField, ReplaceSortOrderAction}; use crate::transaction::tests::make_v2_table; - use crate::transaction::{ApplyTransactionAction, Transaction}; + use crate::transaction::{ApplyTransactionAction, Transaction, TransactionAction}; #[test] fn test_replace_sort_order() { @@ -159,14 +195,90 @@ mod tests { assert_eq!(replace_sort_order.pending_sort_fields, vec![ PendingSortField { name: String::from("x"), + transform: Transform::Identity, direction: SortDirection::Ascending, null_order: NullOrder::First, }, PendingSortField { name: String::from("y"), + transform: Transform::Identity, direction: SortDirection::Descending, null_order: NullOrder::Last, } ]); } + + #[test] + fn test_replace_sort_order_with_transform() { + let table = make_v2_table(); + let tx = Transaction::new(&table); + let replace_sort_order = tx.replace_sort_order(); + + let tx = replace_sort_order + .asc_with_transform("x", Transform::Bucket(16), NullOrder::First) + .desc_with_transform("y", Transform::Truncate(4), NullOrder::Last) + .apply(tx) + .unwrap(); + + let replace_sort_order = (*tx.actions[0]) + .downcast_ref::() + .unwrap(); + + assert_eq!(replace_sort_order.pending_sort_fields, vec![ + PendingSortField { + name: String::from("x"), + transform: Transform::Bucket(16), + direction: SortDirection::Ascending, + null_order: NullOrder::First, + }, + PendingSortField { + name: String::from("y"), + transform: Transform::Truncate(4), + direction: SortDirection::Descending, + null_order: NullOrder::Last, + } + ]); + } + + #[tokio::test] + async fn test_replace_sort_order_with_transform_commits() { + use std::sync::Arc; + + use crate::TableUpdate; + + let table = make_v2_table(); + let action = Arc::new(ReplaceSortOrderAction::new().asc_with_transform( + "x", + Transform::Bucket(16), + NullOrder::First, + )); + + let mut action_commit = TransactionAction::commit(action, &table).await.unwrap(); + let updates = action_commit.take_updates(); + + let sort_order = match &updates[0] { + TableUpdate::AddSortOrder { sort_order } => sort_order, + other => panic!("expected AddSortOrder, got {other:?}"), + }; + assert_eq!(sort_order.fields[0].transform, Transform::Bucket(16)); + } + + #[tokio::test] + async fn test_replace_sort_order_rejects_incompatible_transform() { + use std::sync::Arc; + + let table = make_v2_table(); + // `x` is a `long` column; `year` only accepts date/timestamp types. + let action = Arc::new(ReplaceSortOrderAction::new().asc_with_transform( + "x", + Transform::Year, + NullOrder::First, + )); + + let err = match TransactionAction::commit(action, &table).await { + Err(e) => e, + Ok(_) => panic!("year transform on a long column should be rejected"), + }; + assert_eq!(err.kind(), ErrorKind::Unexpected); + } }