Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 11 additions & 3 deletions src/iceberg/sort_field.cc
Original file line number Diff line number Diff line change
Expand Up @@ -41,10 +41,18 @@ SortDirection SortField::direction() const { return direction_; }

NullOrder SortField::null_order() const { return null_order_; }

bool SortField::Satisfies(const SortField& other) const {
if (*this == other) {
return true;
} else if (source_id_ != other.source_id() || direction_ != other.direction() ||
null_order_ != other.null_order()) {
return false;
}
return transform_->SatisfiesOrderOf(*other.transform());
}

std::string SortField::ToString() const {
return std::format(
"sort_field(source_id={}, transform={}, direction={}, null_order={})", source_id_,
*transform_, direction_, null_order_);
return std::format("{}({}) {} {}", *transform_, source_id_, direction_, null_order_);
}

bool SortField::Equals(const SortField& other) const {
Expand Down
6 changes: 6 additions & 0 deletions src/iceberg/sort_field.h
Original file line number Diff line number Diff line change
Expand Up @@ -107,9 +107,15 @@ class ICEBERG_EXPORT SortField : public util::Formattable {
/// \brief Get the null order.
NullOrder null_order() const;

/// \brief Checks whether this field's order satisfies another field's order.
bool Satisfies(const SortField& other) const;

std::string ToString() const override;

friend bool operator==(const SortField& lhs, const SortField& rhs) {
if (&lhs == &rhs) {
return true;
}
return lhs.Equals(rhs);
}

Expand Down
33 changes: 31 additions & 2 deletions src/iceberg/sort_order.cc
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
#include "iceberg/sort_order.h"

#include <format>
#include <ranges>

#include "iceberg/util/formatter.h" // IWYU pragma: keep

Expand All @@ -38,10 +39,38 @@ int32_t SortOrder::order_id() const { return order_id_; }

std::span<const SortField> SortOrder::fields() const { return fields_; }

bool SortOrder::Satisfies(const SortOrder& other) const {
// any ordering satisfies an unsorted ordering
if (other.is_unsorted()) {
return true;
}

// this ordering cannot satisfy an ordering with more sort fields
if (fields_.size() < other.fields().size()) {
return false;
}

// this ordering has either more or the same number of sort fields
for (const auto& [field, other_field] : std::views::zip(fields_, other.fields_)) {
if (!field.Satisfies(other_field)) {
return false;
}
}

return true;
}

bool SortOrder::SameOrder(const SortOrder& other) const {
return fields_ == other.fields_;
}

std::string SortOrder::ToString() const {
std::string repr = std::format("sort_order[order_id<{}>,\n", order_id_);
std::string repr = "[";
for (const auto& field : fields_) {
std::format_to(std::back_inserter(repr), " {}\n", field);
std::format_to(std::back_inserter(repr), "\n {}", field);
}
if (!fields_.empty()) {
repr.push_back('\n');
}
repr += "]";
return repr;
Expand Down
14 changes: 14 additions & 0 deletions src/iceberg/sort_order.h
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,20 @@ class ICEBERG_EXPORT SortOrder : public util::Formattable {
/// \brief Get the list of sort fields.
std::span<const SortField> fields() const;

/// \brief Returns true if the sort order is sorted
bool is_sorted() const { return !fields_.empty(); }

/// \brief Returns true if the sort order is unsorted
/// A SortOrder is unsorted if it has no sort fields.
bool is_unsorted() const { return fields_.empty(); }

/// \brief Checks whether this order satisfies another order.
bool Satisfies(const SortOrder& other) const;

/// \brief Checks whether this order is equivalent to another order while ignoring the
/// order id.
bool SameOrder(const SortOrder& other) const;

std::string ToString() const override;

friend bool operator==(const SortOrder& lhs, const SortOrder& rhs) {
Expand Down
29 changes: 21 additions & 8 deletions src/iceberg/test/sort_field_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -36,14 +36,8 @@ TEST(SortFieldTest, Basics) {
EXPECT_EQ(*transform, *field.transform());
EXPECT_EQ(SortDirection::kAscending, field.direction());
EXPECT_EQ(NullOrder::kFirst, field.null_order());
EXPECT_EQ(
"sort_field(source_id=1, transform=identity, direction=asc, "
"null_order=nulls-first)",
field.ToString());
EXPECT_EQ(
"sort_field(source_id=1, transform=identity, direction=asc, "
"null_order=nulls-first)",
std::format("{}", field));
EXPECT_EQ(field.ToString(), "identity(1) asc nulls-first");
EXPECT_EQ(std::format("{}", field), "identity(1) asc nulls-first");
}
}

Expand All @@ -67,4 +61,23 @@ TEST(SortFieldTest, Equality) {
ASSERT_NE(field1, field5);
ASSERT_NE(field5, field1);
}

TEST(SortFieldTest, Satisfies) {
const auto bucket_transform = Transform::Bucket(8);
const auto identity_transform = Transform::Identity();

SortField field1(1, bucket_transform, SortDirection::kAscending, NullOrder::kFirst);
SortField field2(1, bucket_transform, SortDirection::kAscending, NullOrder::kFirst);
SortField field3(1, identity_transform, SortDirection::kAscending, NullOrder::kFirst);
SortField field4(1, bucket_transform, SortDirection::kDescending, NullOrder::kFirst);
SortField field5(1, bucket_transform, SortDirection::kAscending, NullOrder::kLast);
SortField field6(2, bucket_transform, SortDirection::kAscending, NullOrder::kFirst);

EXPECT_TRUE(field1.Satisfies(field2)); // Same fields
EXPECT_FALSE(field1.Satisfies(field3)); // Different transform
EXPECT_FALSE(field1.Satisfies(field4)); // Different direction
EXPECT_FALSE(field1.Satisfies(field5)); // Different null order
EXPECT_FALSE(field1.Satisfies(field6)); // Different source_id
}

} // namespace iceberg
78 changes: 71 additions & 7 deletions src/iceberg/test/sort_order_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -48,13 +48,12 @@ TEST(SortOrderTest, Basics) {
ASSERT_EQ(st_field1, fields[0]);
ASSERT_EQ(st_field2, fields[1]);
auto sort_order_str =
"sort_order[order_id<100>,\n"
" sort_field(source_id=5, transform=identity, direction=asc, "
"null_order=nulls-first)\n"
" sort_field(source_id=7, transform=identity, direction=desc, "
"null_order=nulls-first)\n]";
EXPECT_EQ(sort_order_str, sort_order.ToString());
EXPECT_EQ(sort_order_str, std::format("{}", sort_order));
"[\n"
" identity(5) asc nulls-first\n"
" identity(7) desc nulls-first\n"
"]";
EXPECT_EQ(sort_order.ToString(), sort_order_str);
EXPECT_EQ(std::format("{}", sort_order), sort_order_str);
}
}

Expand Down Expand Up @@ -84,4 +83,69 @@ TEST(SortOrderTest, Equality) {
ASSERT_NE(sort_order1, sort_order5);
ASSERT_NE(sort_order5, sort_order1);
}

TEST(SortOrderTest, IsUnsorted) {
auto unsorted = SortOrder::Unsorted();
EXPECT_TRUE(unsorted->is_unsorted());
EXPECT_FALSE(unsorted->is_sorted());
}

TEST(SortOrderTest, IsSorted) {
SchemaField field1(5, "ts", iceberg::timestamp(), true);
auto identity_transform = Transform::Identity();
SortField st_field1(5, identity_transform, SortDirection::kAscending,
NullOrder::kFirst);
SortOrder sorted_order(100, {st_field1});

EXPECT_TRUE(sorted_order.is_sorted());
EXPECT_FALSE(sorted_order.is_unsorted());
}

TEST(SortOrderTest, Satisfies) {
SchemaField field1(5, "ts", iceberg::timestamp(), true);
SchemaField field2(7, "bar", iceberg::string(), true);
auto identity_transform = Transform::Identity();
auto bucket_transform = Transform::Bucket(8);

SortField st_field1(5, identity_transform, SortDirection::kAscending,
NullOrder::kFirst);
SortField st_field2(7, identity_transform, SortDirection::kDescending,
NullOrder::kFirst);
SortField st_field3(7, bucket_transform, SortDirection::kAscending, NullOrder::kFirst);

SortOrder sort_order1(100, {st_field1, st_field2});
SortOrder sort_order2(101, {st_field1});
SortOrder sort_order3(102, {st_field1, st_field3});
SortOrder sort_order4(104, {st_field2});
auto unsorted = SortOrder::Unsorted();

// Any order satisfies an unsorted order, including unsorted itself
EXPECT_TRUE(unsorted->Satisfies(*unsorted));
EXPECT_TRUE(sort_order1.Satisfies(*unsorted));
EXPECT_TRUE(sort_order2.Satisfies(*unsorted));
EXPECT_TRUE(sort_order3.Satisfies(*unsorted));

// Unsorted does not satisfy any sorted order
EXPECT_FALSE(unsorted->Satisfies(sort_order1));
EXPECT_FALSE(unsorted->Satisfies(sort_order2));
EXPECT_FALSE(unsorted->Satisfies(sort_order3));

// A sort order satisfies itself
EXPECT_TRUE(sort_order1.Satisfies(sort_order1));
EXPECT_TRUE(sort_order2.Satisfies(sort_order2));
EXPECT_TRUE(sort_order3.Satisfies(sort_order3));

// A sort order with more fields satisfy one with fewer fields
EXPECT_TRUE(sort_order1.Satisfies(sort_order2));
EXPECT_TRUE(sort_order3.Satisfies(sort_order2));

// A sort order does not satisfy one with more fields
EXPECT_FALSE(sort_order2.Satisfies(sort_order1));
EXPECT_FALSE(sort_order2.Satisfies(sort_order3));

// A sort order does not satify one with different fields
EXPECT_FALSE(sort_order4.Satisfies(sort_order2));
EXPECT_FALSE(sort_order2.Satisfies(sort_order4));
}

} // namespace iceberg
121 changes: 121 additions & 0 deletions src/iceberg/test/transform_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -720,4 +720,125 @@ INSTANTIATE_TEST_SUITE_P(
.source = Literal::Null(iceberg::string()),
.expected = Literal::Null(iceberg::string())}));

TEST(TransformPreservesOrderTest, PreservesOrder) {
struct Case {
std::string transform_str;
bool expected;
};

const std::vector<Case> cases = {
{.transform_str = "identity", .expected = true},
{.transform_str = "year", .expected = true},
{.transform_str = "month", .expected = true},
{.transform_str = "day", .expected = true},
{.transform_str = "hour", .expected = true},
{.transform_str = "void", .expected = false},
{.transform_str = "bucket[16]", .expected = false},
{.transform_str = "truncate[32]", .expected = true},
};

for (const auto& c : cases) {
auto transform = TransformFromString(c.transform_str);
ASSERT_TRUE(transform.has_value()) << "Failed to parse: " << c.transform_str;

EXPECT_EQ(transform.value()->PreservesOrder(), c.expected)
<< "Unexpected result for transform: " << c.transform_str;
}
}

TEST(TransformSatisfiesOrderOfTest, SatisfiesOrderOf) {
struct Case {
std::string transform_str;
std::string other_transform_str;
bool expected;
};

const std::vector<Case> cases = {
// Identity satisfies all order-preserving transforms
{.transform_str = "identity", .other_transform_str = "identity", .expected = true},
{.transform_str = "identity", .other_transform_str = "year", .expected = true},
{.transform_str = "identity", .other_transform_str = "month", .expected = true},
{.transform_str = "identity", .other_transform_str = "day", .expected = true},
{.transform_str = "identity", .other_transform_str = "hour", .expected = true},
{.transform_str = "identity",
.other_transform_str = "truncate[32]",
.expected = true},
{.transform_str = "identity",
.other_transform_str = "bucket[16]",
.expected = false},

// Truncate satisfies Truncate with smaller width
{.transform_str = "truncate[32]",
.other_transform_str = "truncate[16]",
.expected = true},
{.transform_str = "truncate[16]",
.other_transform_str = "truncate[16]",
.expected = true},
{.transform_str = "truncate[16]",
.other_transform_str = "truncate[32]",
.expected = false},
{.transform_str = "truncate[16]",
.other_transform_str = "bucket[32]",
.expected = false},

// Hour satisfies hour, day, month, and year
{.transform_str = "hour", .other_transform_str = "hour", .expected = true},
{.transform_str = "hour", .other_transform_str = "day", .expected = true},
{.transform_str = "hour", .other_transform_str = "month", .expected = true},
{.transform_str = "hour", .other_transform_str = "year", .expected = true},
{.transform_str = "hour", .other_transform_str = "identity", .expected = false},
{.transform_str = "hour", .other_transform_str = "bucket[16]", .expected = false},

// Day satisfies day, month, and year
{.transform_str = "day", .other_transform_str = "day", .expected = true},
{.transform_str = "day", .other_transform_str = "month", .expected = true},
{.transform_str = "day", .other_transform_str = "year", .expected = true},
{.transform_str = "day", .other_transform_str = "hour", .expected = false},
{.transform_str = "day", .other_transform_str = "identity", .expected = false},

// Month satisfies month and year
{.transform_str = "month", .other_transform_str = "month", .expected = true},
{.transform_str = "month", .other_transform_str = "year", .expected = true},
{.transform_str = "month", .other_transform_str = "day", .expected = false},
{.transform_str = "month", .other_transform_str = "hour", .expected = false},

// Year satisfies only year
{.transform_str = "year", .other_transform_str = "year", .expected = true},
{.transform_str = "year", .other_transform_str = "month", .expected = false},
{.transform_str = "year", .other_transform_str = "day", .expected = false},
{.transform_str = "year", .other_transform_str = "hour", .expected = false},

// Void satisfies no order-preserving transforms
{.transform_str = "void", .other_transform_str = "identity", .expected = false},
{.transform_str = "void", .other_transform_str = "year", .expected = false},
{.transform_str = "void", .other_transform_str = "month", .expected = false},
{.transform_str = "void", .other_transform_str = "day", .expected = false},
{.transform_str = "void", .other_transform_str = "hour", .expected = false},

// Bucket satisfies only itself
{.transform_str = "bucket[16]",
.other_transform_str = "bucket[16]",
.expected = true},
{.transform_str = "bucket[16]",
.other_transform_str = "bucket[32]",
.expected = false},
{.transform_str = "bucket[16]",
.other_transform_str = "identity",
.expected = false},
};

for (const auto& c : cases) {
auto transform = TransformFromString(c.transform_str);
auto other_transform = TransformFromString(c.other_transform_str);

ASSERT_TRUE(transform.has_value()) << "Failed to parse: " << c.transform_str;
ASSERT_TRUE(other_transform.has_value())
<< "Failed to parse: " << c.other_transform_str;

EXPECT_EQ(transform.value()->SatisfiesOrderOf(*other_transform.value()), c.expected)
<< "Unexpected result for transform: " << c.transform_str
<< " and other transform: " << c.other_transform_str;
}
}

} // namespace iceberg
Loading
Loading