diff --git a/example/demo_example.cc b/example/demo_example.cc index aa63826b0..6f2cb1380 100644 --- a/example/demo_example.cc +++ b/example/demo_example.cc @@ -19,11 +19,13 @@ #include -#include "iceberg/avro/avro_reader.h" +#include "iceberg/avro/avro_register.h" #include "iceberg/file_reader.h" +#include "iceberg/parquet/parquet_register.h" int main() { - iceberg::avro::AvroReader::Register(); + iceberg::avro::RegisterAll(); + iceberg::parquet::RegisterAll(); auto open_result = iceberg::ReaderFactoryRegistry::Open( iceberg::FileFormatType::kAvro, {.path = "non-existing-file.avro"}); if (!open_result.has_value()) { diff --git a/src/iceberg/CMakeLists.txt b/src/iceberg/CMakeLists.txt index 587b1596a..316b02cee 100644 --- a/src/iceberg/CMakeLists.txt +++ b/src/iceberg/CMakeLists.txt @@ -109,11 +109,12 @@ if(ICEBERG_BUILD_BUNDLE) avro/avro_data_util.cc avro/avro_reader.cc avro/avro_writer.cc - avro/avro_schema_util.cc avro/avro_register.cc + avro/avro_schema_util.cc avro/avro_stream_internal.cc parquet/parquet_data_util.cc parquet/parquet_reader.cc + parquet/parquet_register.cc parquet/parquet_schema_util.cc) # Libraries to link with exported libiceberg_bundle.{so,a}. diff --git a/src/iceberg/avro/avro_reader.cc b/src/iceberg/avro/avro_reader.cc index c15eb66b3..048cd4997 100644 --- a/src/iceberg/avro/avro_reader.cc +++ b/src/iceberg/avro/avro_reader.cc @@ -34,6 +34,7 @@ #include "iceberg/arrow/arrow_error_transform_internal.h" #include "iceberg/arrow/arrow_fs_file_io_internal.h" #include "iceberg/avro/avro_data_util_internal.h" +#include "iceberg/avro/avro_register.h" #include "iceberg/avro/avro_schema_util_internal.h" #include "iceberg/avro/avro_stream_internal.h" #include "iceberg/name_mapping.h" @@ -247,7 +248,7 @@ Status AvroReader::Open(const ReaderOptions& options) { Status AvroReader::Close() { return impl_->Close(); } -void AvroReader::Register() { +void RegisterReader() { static ReaderFactoryRegistry avro_reader_register( FileFormatType::kAvro, []() -> Result> { return std::make_unique(); }); diff --git a/src/iceberg/avro/avro_reader.h b/src/iceberg/avro/avro_reader.h index a5fc91489..07737bb7b 100644 --- a/src/iceberg/avro/avro_reader.h +++ b/src/iceberg/avro/avro_reader.h @@ -39,9 +39,6 @@ class ICEBERG_BUNDLE_EXPORT AvroReader : public Reader { Result Schema() final; - /// \brief Register this Avro reader implementation. - static void Register(); - private: class Impl; std::unique_ptr impl_; diff --git a/src/iceberg/avro/avro_register.cc b/src/iceberg/avro/avro_register.cc index a969948cd..07efac381 100644 --- a/src/iceberg/avro/avro_register.cc +++ b/src/iceberg/avro/avro_register.cc @@ -24,13 +24,16 @@ namespace iceberg::avro { void RegisterLogicalTypes() { - static std::once_flag flag{}; - std::call_once(flag, []() { - // Register the map logical type with the avro custom logical type registry. - // See https://github.com/apache/avro/pull/3326 for details. - ::avro::CustomLogicalTypeRegistry::instance().registerType( - "map", [](const std::string&) { return std::make_shared(); }); - }); + // Register the map logical type with the avro custom logical type registry. + // See https://github.com/apache/avro/pull/3326 for details. + ::avro::CustomLogicalTypeRegistry::instance().registerType( + "map", [](const std::string&) { return std::make_shared(); }); +} + +void RegisterAll() { + RegisterLogicalTypes(); + RegisterReader(); + RegisterWriter(); } } // namespace iceberg::avro diff --git a/src/iceberg/avro/avro_register.h b/src/iceberg/avro/avro_register.h index ce6510757..9404eb9d6 100644 --- a/src/iceberg/avro/avro_register.h +++ b/src/iceberg/avro/avro_register.h @@ -19,10 +19,23 @@ #pragma once +/// \file iceberg/avro/avro_register.h +/// \brief Provide functions to register Avro implementations. + #include "iceberg/iceberg_bundle_export.h" namespace iceberg::avro { +/// \brief Register all the logical types. ICEBERG_BUNDLE_EXPORT void RegisterLogicalTypes(); +/// \brief Register Avro reader implementation. +ICEBERG_BUNDLE_EXPORT void RegisterReader(); + +/// \brief Register Avro writer implementation. +ICEBERG_BUNDLE_EXPORT void RegisterWriter(); + +/// \brief Register all the logical types, Avro reader, and Avro writer. +ICEBERG_BUNDLE_EXPORT void RegisterAll(); + } // namespace iceberg::avro diff --git a/src/iceberg/avro/avro_schema_util.cc b/src/iceberg/avro/avro_schema_util.cc index 5a0385565..8e8e8fe3f 100644 --- a/src/iceberg/avro/avro_schema_util.cc +++ b/src/iceberg/avro/avro_schema_util.cc @@ -45,15 +45,7 @@ namespace iceberg::avro { namespace { -constexpr std::string_view kIcebergFieldNameProp = "iceberg-field-name"; -constexpr std::string_view kFieldIdProp = "field-id"; -constexpr std::string_view kKeyIdProp = "key-id"; -constexpr std::string_view kValueIdProp = "value-id"; -constexpr std::string_view kElementIdProp = "element-id"; -constexpr std::string_view kAdjustToUtcProp = "adjust-to-utc"; - ::avro::LogicalType GetMapLogicalType() { - RegisterLogicalTypes(); return ::avro::LogicalType(std::make_shared()); } diff --git a/src/iceberg/avro/avro_writer.cc b/src/iceberg/avro/avro_writer.cc index 837415e16..f7caa69f1 100644 --- a/src/iceberg/avro/avro_writer.cc +++ b/src/iceberg/avro/avro_writer.cc @@ -30,6 +30,7 @@ #include "iceberg/arrow/arrow_error_transform_internal.h" #include "iceberg/arrow/arrow_fs_file_io_internal.h" +#include "iceberg/avro/avro_register.h" #include "iceberg/avro/avro_schema_util_internal.h" #include "iceberg/avro/avro_stream_internal.h" #include "iceberg/schema.h" @@ -133,7 +134,7 @@ std::optional AvroWriter::length() { std::vector AvroWriter::split_offsets() { return {}; } -void AvroWriter::Register() { +void RegisterWriter() { static WriterFactoryRegistry avro_writer_register( FileFormatType::kAvro, []() -> Result> { return std::make_unique(); }); diff --git a/src/iceberg/avro/avro_writer.h b/src/iceberg/avro/avro_writer.h index e7fb7c30c..57499d8ed 100644 --- a/src/iceberg/avro/avro_writer.h +++ b/src/iceberg/avro/avro_writer.h @@ -43,9 +43,6 @@ class ICEBERG_BUNDLE_EXPORT AvroWriter : public Writer { std::vector split_offsets() final; - /// \brief Register this Avro writer implementation. - static void Register(); - private: class Impl; std::unique_ptr impl_; diff --git a/src/iceberg/avro/constants.h b/src/iceberg/avro/constants.h index b56cc6664..6fdfdc5ed 100644 --- a/src/iceberg/avro/constants.h +++ b/src/iceberg/avro/constants.h @@ -31,4 +31,12 @@ constexpr std::string_view kElement = "element"; constexpr std::string_view kKey = "key"; constexpr std::string_view kValue = "value"; +// Avro custom attributes constants +constexpr std::string_view kIcebergFieldNameProp = "iceberg-field-name"; +constexpr std::string_view kFieldIdProp = "field-id"; +constexpr std::string_view kKeyIdProp = "key-id"; +constexpr std::string_view kValueIdProp = "value-id"; +constexpr std::string_view kElementIdProp = "element-id"; +constexpr std::string_view kAdjustToUtcProp = "adjust-to-utc"; + } // namespace iceberg::avro diff --git a/src/iceberg/parquet/parquet_reader.cc b/src/iceberg/parquet/parquet_reader.cc index 4c216a59c..405b09f03 100644 --- a/src/iceberg/parquet/parquet_reader.cc +++ b/src/iceberg/parquet/parquet_reader.cc @@ -34,6 +34,7 @@ #include "iceberg/arrow/arrow_error_transform_internal.h" #include "iceberg/arrow/arrow_fs_file_io_internal.h" #include "iceberg/parquet/parquet_data_util_internal.h" +#include "iceberg/parquet/parquet_register.h" #include "iceberg/parquet/parquet_schema_util_internal.h" #include "iceberg/result.h" #include "iceberg/schema_internal.h" @@ -254,7 +255,7 @@ Status ParquetReader::Open(const ReaderOptions& options) { Status ParquetReader::Close() { return impl_->Close(); } -void ParquetReader::Register() { +void RegisterReader() { static ReaderFactoryRegistry parquet_reader_register( FileFormatType::kParquet, []() -> Result> { return std::make_unique(); diff --git a/src/iceberg/parquet/parquet_reader.h b/src/iceberg/parquet/parquet_reader.h index d29dacabd..23d34dfa9 100644 --- a/src/iceberg/parquet/parquet_reader.h +++ b/src/iceberg/parquet/parquet_reader.h @@ -39,8 +39,6 @@ class ICEBERG_BUNDLE_EXPORT ParquetReader : public Reader { Result Schema() final; - static void Register(); - private: class Impl; std::unique_ptr impl_; diff --git a/src/iceberg/parquet/parquet_register.cc b/src/iceberg/parquet/parquet_register.cc new file mode 100644 index 000000000..19988cd29 --- /dev/null +++ b/src/iceberg/parquet/parquet_register.cc @@ -0,0 +1,31 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#include "iceberg/parquet/parquet_register.h" + +namespace iceberg::parquet { + +void RegisterWriter() {} + +void RegisterAll() { + RegisterReader(); + RegisterWriter(); +} + +} // namespace iceberg::parquet diff --git a/src/iceberg/parquet/parquet_register.h b/src/iceberg/parquet/parquet_register.h new file mode 100644 index 000000000..1b8d2b4c5 --- /dev/null +++ b/src/iceberg/parquet/parquet_register.h @@ -0,0 +1,38 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#pragma once + +/// \file iceberg/parquet/parquet_register.h +/// \brief Provide functions to register Parquet implementations. + +#include "iceberg/iceberg_bundle_export.h" + +namespace iceberg::parquet { + +/// \brief Register Parquet reader implementation. +ICEBERG_BUNDLE_EXPORT void RegisterReader(); + +/// \brief Register Parquet writer implementation. +ICEBERG_BUNDLE_EXPORT void RegisterWriter(); + +/// \brief Register Parquet reader and writer implementations. +ICEBERG_BUNDLE_EXPORT void RegisterAll(); + +} // namespace iceberg::parquet diff --git a/test/avro_test.cc b/test/avro_test.cc index fe04f9bf6..cba29da12 100644 --- a/test/avro_test.cc +++ b/test/avro_test.cc @@ -30,7 +30,8 @@ #include #include "iceberg/arrow/arrow_fs_file_io_internal.h" -#include "iceberg/avro/avro_reader.h" +#include "iceberg/avro/avro_register.h" +#include "iceberg/file_reader.h" #include "iceberg/schema.h" #include "iceberg/type.h" #include "matchers.h" @@ -40,7 +41,7 @@ namespace iceberg::avro { class AvroReaderTest : public TempFileTestBase { protected: - static void SetUpTestSuite() { AvroReader::Register(); } + static void SetUpTestSuite() { RegisterAll(); } void SetUp() override { TempFileTestBase::SetUp(); diff --git a/test/manifest_list_reader_test.cc b/test/manifest_list_reader_test.cc index 75254801f..09984f389 100644 --- a/test/manifest_list_reader_test.cc +++ b/test/manifest_list_reader_test.cc @@ -22,7 +22,7 @@ #include #include "iceberg/arrow/arrow_fs_file_io_internal.h" -#include "iceberg/avro/avro_reader.h" +#include "iceberg/avro/avro_register.h" #include "iceberg/manifest_list.h" #include "iceberg/manifest_reader.h" #include "temp_file_test_base.h" @@ -32,7 +32,7 @@ namespace iceberg { class ManifestListReaderTestBase : public TempFileTestBase { protected: - static void SetUpTestSuite() { avro::AvroReader::Register(); } + static void SetUpTestSuite() { avro::RegisterAll(); } void SetUp() override { TempFileTestBase::SetUp(); diff --git a/test/manifest_reader_test.cc b/test/manifest_reader_test.cc index 05224dbf8..55fbdd8e5 100644 --- a/test/manifest_reader_test.cc +++ b/test/manifest_reader_test.cc @@ -25,7 +25,6 @@ #include #include "iceberg/arrow/arrow_fs_file_io_internal.h" -#include "iceberg/avro/avro_reader.h" #include "iceberg/avro/avro_register.h" #include "iceberg/manifest_entry.h" #include "iceberg/schema.h" @@ -36,14 +35,12 @@ namespace iceberg { class ManifestReaderV1Test : public TempFileTestBase { protected: - static void SetUpTestSuite() { avro::AvroReader::Register(); } + static void SetUpTestSuite() { avro::RegisterAll(); } void SetUp() override { TempFileTestBase::SetUp(); local_fs_ = std::make_shared<::arrow::fs::LocalFileSystem>(); file_io_ = std::make_shared(local_fs_); - - avro::RegisterLogicalTypes(); } std::vector PrepareV1ManifestEntries() { @@ -122,14 +119,12 @@ TEST_F(ManifestReaderV1Test, V1PartitionedBasicTest) { class ManifestReaderV2Test : public TempFileTestBase { protected: - static void SetUpTestSuite() { avro::AvroReader::Register(); } + static void SetUpTestSuite() { avro::RegisterAll(); } void SetUp() override { TempFileTestBase::SetUp(); local_fs_ = std::make_shared<::arrow::fs::LocalFileSystem>(); file_io_ = std::make_shared(local_fs_); - - avro::RegisterLogicalTypes(); } std::vector PrepareV2NonPartitionedManifestEntries() { diff --git a/test/parquet_test.cc b/test/parquet_test.cc index 7521558b2..122256bb3 100644 --- a/test/parquet_test.cc +++ b/test/parquet_test.cc @@ -29,6 +29,7 @@ #include "iceberg/arrow/arrow_fs_file_io_internal.h" #include "iceberg/parquet/parquet_reader.h" +#include "iceberg/parquet/parquet_register.h" #include "iceberg/schema.h" #include "iceberg/type.h" #include "iceberg/util/checked_cast.h" @@ -39,7 +40,7 @@ namespace iceberg::parquet { class ParquetReaderTest : public TempFileTestBase { protected: - static void SetUpTestSuite() { ParquetReader::Register(); } + static void SetUpTestSuite() { parquet::RegisterAll(); } void SetUp() override { TempFileTestBase::SetUp();