diff --git a/src/iceberg/test/partition_spec_test.cc b/src/iceberg/test/partition_spec_test.cc index 538d89a08..d50b53782 100644 --- a/src/iceberg/test/partition_spec_test.cc +++ b/src/iceberg/test/partition_spec_test.cc @@ -24,7 +24,9 @@ #include #include +#include +#include "iceberg/json_internal.h" #include "iceberg/partition_field.h" #include "iceberg/schema.h" #include "iceberg/schema_field.h" @@ -106,4 +108,54 @@ TEST(PartitionSpecTest, PartitionSchemaTest) { EXPECT_EQ(pt_field2.name(), partition_schema.value()->fields()[1].name()); EXPECT_EQ(pt_field2.field_id(), partition_schema.value()->fields()[1].field_id()); } + +TEST(PartitionSpecTest, PartitionTypeTest) { + nlohmann::json json = R"( + { + "spec-id": 1, + "fields": [ { + "source-id": 4, + "field-id": 1000, + "name": "ts_day", + "transform": "day" + }, { + "source-id": 1, + "field-id": 1001, + "name": "id_bucket", + "transform": "bucket[16]" + }, { + "source-id": 2, + "field-id": 1002, + "name": "id_truncate", + "transform": "truncate[4]" + } ] + })"_json; + + SchemaField field1(1, "id", int32(), false); + SchemaField field2(2, "name", string(), false); + SchemaField field3(3, "ts", timestamp(), false); + SchemaField field4(4, "ts_day", timestamp(), false); + SchemaField field5(5, "id_bucket", int32(), false); + SchemaField field6(6, "id_truncate", int32(), false); + auto const schema = std::make_shared( + std::vector{field1, field2, field3, field4, field5, field6}, + Schema::kInitialSchemaId); + + auto parsed_spec_result = PartitionSpecFromJson(schema, json); + ASSERT_TRUE(parsed_spec_result.has_value()) << parsed_spec_result.error().message; + + auto partition_schema = parsed_spec_result.value()->PartitionType(); + + SchemaField pt_field1(1000, "ts_day", date(), true); + SchemaField pt_field2(1001, "id_bucket", int32(), true); + SchemaField pt_field3(1002, "id_truncate", string(), true); + + ASSERT_TRUE(partition_schema.has_value()); + ASSERT_EQ(3, partition_schema.value()->fields().size()); + + EXPECT_EQ(pt_field1, partition_schema.value()->fields()[0]); + EXPECT_EQ(pt_field2, partition_schema.value()->fields()[1]); + EXPECT_EQ(pt_field3, partition_schema.value()->fields()[2]); +} + } // namespace iceberg diff --git a/src/iceberg/test/transform_test.cc b/src/iceberg/test/transform_test.cc index 90f7abb5b..79d25640b 100644 --- a/src/iceberg/test/transform_test.cc +++ b/src/iceberg/test/transform_test.cc @@ -134,7 +134,7 @@ TEST(TransformResultTypeTest, PositiveCases) { .expected_result_type = iceberg::int32()}, {.str = "day", .source_type = iceberg::timestamp(), - .expected_result_type = iceberg::int32()}, + .expected_result_type = iceberg::date()}, {.str = "hour", .source_type = iceberg::timestamp(), .expected_result_type = iceberg::int32()}, diff --git a/src/iceberg/transform.h b/src/iceberg/transform.h index e5a082355..d06e31f22 100644 --- a/src/iceberg/transform.h +++ b/src/iceberg/transform.h @@ -202,6 +202,11 @@ class ICEBERG_EXPORT TransformFunction { /// \brief Get the source type of transform function const std::shared_ptr& source_type() const; /// \brief Get the result type of transform function + /// + /// Note: This method defines both the physical and display representation of the + /// partition field. The physical representation must conform to the Iceberg spec. The + /// display representation can deviate from the spec, such as by transforming the value + /// into a more human-readable format. virtual std::shared_ptr ResultType() const = 0; friend bool operator==(const TransformFunction& lhs, const TransformFunction& rhs) { diff --git a/src/iceberg/transform_function.cc b/src/iceberg/transform_function.cc index e2f5ecec9..9213d2ce3 100644 --- a/src/iceberg/transform_function.cc +++ b/src/iceberg/transform_function.cc @@ -193,7 +193,7 @@ Result DayTransform::Transform(const Literal& literal) { return TemporalUtils::ExtractDay(literal); } -std::shared_ptr DayTransform::ResultType() const { return int32(); } +std::shared_ptr DayTransform::ResultType() const { return date(); } Result> DayTransform::Make( std::shared_ptr const& source_type) { diff --git a/src/iceberg/transform_function.h b/src/iceberg/transform_function.h index fc0dd7231..33712ca60 100644 --- a/src/iceberg/transform_function.h +++ b/src/iceberg/transform_function.h @@ -141,7 +141,11 @@ class ICEBERG_EXPORT DayTransform : public TransformFunction { /// \brief Extract a date or timestamp day, as days from 1970-01-01. Result Transform(const Literal& literal) override; - /// \brief Returns INT32 as the output type. + /// \brief Return the result type of a day transform. + /// + /// Note: The physical representation conforms to the Iceberg spec as DateType is + /// internally converted to int. The DateType returned here provides a more + /// human-readable way to display the partition field. std::shared_ptr ResultType() const override; /// \brief Create a DayTransform.