From 64aa17ad21b5ab03fc947e4359dd27d419f2d0c0 Mon Sep 17 00:00:00 2001 From: Zehua Zou <41586196+HuaHuaY@users.noreply.github.com> Date: Mon, 20 Oct 2025 13:20:25 +0800 Subject: [PATCH] refactor: package once_flag and LazyInitWithCallOnce Co-authored-by: Gang Wu --- src/iceberg/schema.cc | 65 ++++++++++++++++++++------------------- src/iceberg/schema.h | 24 ++++++--------- src/iceberg/type.cc | 61 +++++++++++++++++-------------------- src/iceberg/type.h | 28 ++++++----------- src/iceberg/util/lazy.h | 67 +++++++++++++++++++++++++++++++++++++++++ 5 files changed, 147 insertions(+), 98 deletions(-) create mode 100644 src/iceberg/util/lazy.h diff --git a/src/iceberg/schema.cc b/src/iceberg/schema.cc index 2ab2f7e70..819260c0f 100644 --- a/src/iceberg/schema.cc +++ b/src/iceberg/schema.cc @@ -90,56 +90,55 @@ bool Schema::Equals(const Schema& other) const { Result>> Schema::FindFieldByName( std::string_view name, bool case_sensitive) const { if (case_sensitive) { - ICEBERG_RETURN_UNEXPECTED( - LazyInitWithCallOnce(name_to_id_flag_, [this]() { return InitNameToIdMap(); })); - auto it = name_to_id_.find(name); - if (it == name_to_id_.end()) return std::nullopt; + ICEBERG_ASSIGN_OR_RAISE(auto name_to_id, name_to_id_.Get(*this)); + auto it = name_to_id.get().find(name); + if (it == name_to_id.get().end()) { + return std::nullopt; + }; return FindFieldById(it->second); } - ICEBERG_RETURN_UNEXPECTED(LazyInitWithCallOnce( - lowercase_name_to_id_flag_, [this]() { return InitLowerCaseNameToIdMap(); })); - auto it = lowercase_name_to_id_.find(StringUtils::ToLower(name)); - if (it == lowercase_name_to_id_.end()) return std::nullopt; + ICEBERG_ASSIGN_OR_RAISE(auto lowercase_name_to_id, lowercase_name_to_id_.Get(*this)); + auto it = lowercase_name_to_id.get().find(StringUtils::ToLower(name)); + if (it == lowercase_name_to_id.get().end()) { + return std::nullopt; + } return FindFieldById(it->second); } -Status Schema::InitIdToFieldMap() const { - if (!id_to_field_.empty()) { - return {}; - } - IdToFieldVisitor visitor(id_to_field_); - ICEBERG_RETURN_UNEXPECTED(VisitTypeInline(*this, &visitor)); - return {}; +Result>> +Schema::InitIdToFieldMap(const Schema& self) { + std::unordered_map> id_to_field; + IdToFieldVisitor visitor(id_to_field); + ICEBERG_RETURN_UNEXPECTED(VisitTypeInline(self, &visitor)); + return id_to_field; } -Status Schema::InitNameToIdMap() const { - if (!name_to_id_.empty()) { - return {}; - } - NameToIdVisitor visitor(name_to_id_, /*case_sensitive=*/true); +Result>> +Schema::InitNameToIdMap(const Schema& self) { + std::unordered_map> name_to_id; + NameToIdVisitor visitor(name_to_id, /*case_sensitive=*/true); ICEBERG_RETURN_UNEXPECTED( - VisitTypeInline(*this, &visitor, /*path=*/"", /*short_path=*/"")); + VisitTypeInline(self, &visitor, /*path=*/"", /*short_path=*/"")); visitor.Finish(); - return {}; + return name_to_id; } -Status Schema::InitLowerCaseNameToIdMap() const { - if (!lowercase_name_to_id_.empty()) { - return {}; - } - NameToIdVisitor visitor(lowercase_name_to_id_, /*case_sensitive=*/false); +Result>> +Schema::InitLowerCaseNameToIdMap(const Schema& self) { + std::unordered_map> + lowercase_name_to_id; + NameToIdVisitor visitor(lowercase_name_to_id, /*case_sensitive=*/false); ICEBERG_RETURN_UNEXPECTED( - VisitTypeInline(*this, &visitor, /*path=*/"", /*short_path=*/"")); + VisitTypeInline(self, &visitor, /*path=*/"", /*short_path=*/"")); visitor.Finish(); - return {}; + return lowercase_name_to_id; } Result>> Schema::FindFieldById( int32_t field_id) const { - ICEBERG_RETURN_UNEXPECTED( - LazyInitWithCallOnce(id_to_field_flag_, [this]() { return InitIdToFieldMap(); })); - auto it = id_to_field_.find(field_id); - if (it == id_to_field_.end()) { + ICEBERG_ASSIGN_OR_RAISE(auto id_to_field, id_to_field_.Get(*this)); + auto it = id_to_field.get().find(field_id); + if (it == id_to_field.get().end()) { return std::nullopt; } return it->second; diff --git a/src/iceberg/schema.h b/src/iceberg/schema.h index 81f9aa394..2b30a7dd9 100644 --- a/src/iceberg/schema.h +++ b/src/iceberg/schema.h @@ -24,7 +24,6 @@ /// and any utility functions. See iceberg/type.h and iceberg/field.h as well. #include -#include #include #include #include @@ -34,6 +33,7 @@ #include "iceberg/result.h" #include "iceberg/schema_field.h" #include "iceberg/type.h" +#include "iceberg/util/lazy.h" #include "iceberg/util/string_util.h" namespace iceberg { @@ -99,24 +99,20 @@ class ICEBERG_EXPORT Schema : public StructType { /// \brief Compare two schemas for equality. bool Equals(const Schema& other) const; - Status InitIdToFieldMap() const; - Status InitNameToIdMap() const; - Status InitLowerCaseNameToIdMap() const; + static Result>> + InitIdToFieldMap(const Schema&); + static Result>> + InitNameToIdMap(const Schema&); + static Result>> + InitLowerCaseNameToIdMap(const Schema&); const std::optional schema_id_; /// Mapping from field id to field. - mutable std::unordered_map> - id_to_field_; + Lazy id_to_field_; /// Mapping from field name to field id. - mutable std::unordered_map> - name_to_id_; + Lazy name_to_id_; /// Mapping from lowercased field name to field id - mutable std::unordered_map> - lowercase_name_to_id_; - - mutable std::once_flag id_to_field_flag_; - mutable std::once_flag name_to_id_flag_; - mutable std::once_flag lowercase_name_to_id_flag_; + Lazy lowercase_name_to_id_; }; } // namespace iceberg diff --git a/src/iceberg/type.cc b/src/iceberg/type.cc index ddb328585..104ddad75 100644 --- a/src/iceberg/type.cc +++ b/src/iceberg/type.cc @@ -51,10 +51,9 @@ std::string StructType::ToString() const { std::span StructType::fields() const { return fields_; } Result> StructType::GetFieldById( int32_t field_id) const { - ICEBERG_RETURN_UNEXPECTED( - LazyInitWithCallOnce(field_by_id_flag_, [this]() { return InitFieldById(); })); - auto it = field_by_id_.find(field_id); - if (it == field_by_id_.end()) return std::nullopt; + ICEBERG_ASSIGN_OR_RAISE(auto field_by_id, field_by_id_.Get(*this)); + auto it = field_by_id.get().find(field_id); + if (it == field_by_id.get().end()) return std::nullopt; return it->second; } Result> StructType::GetFieldByIndex( @@ -67,18 +66,17 @@ Result> StructType::GetFieldByInd Result> StructType::GetFieldByName( std::string_view name, bool case_sensitive) const { if (case_sensitive) { - ICEBERG_RETURN_UNEXPECTED(LazyInitWithCallOnce( - field_by_name_flag_, [this]() { return InitFieldByName(); })); - auto it = field_by_name_.find(name); - if (it != field_by_name_.end()) { + ICEBERG_ASSIGN_OR_RAISE(auto field_by_name, field_by_name_.Get(*this)); + auto it = field_by_name.get().find(name); + if (it != field_by_name.get().end()) { return it->second; } return std::nullopt; } - ICEBERG_RETURN_UNEXPECTED(LazyInitWithCallOnce( - field_by_lowercase_name_flag_, [this]() { return InitFieldByLowerCaseName(); })); - auto it = field_by_lowercase_name_.find(StringUtils::ToLower(name)); - if (it != field_by_lowercase_name_.end()) { + ICEBERG_ASSIGN_OR_RAISE(auto field_by_lowercase_name, + field_by_lowercase_name_.Get(*this)); + auto it = field_by_lowercase_name.get().find(StringUtils::ToLower(name)); + if (it != field_by_lowercase_name.get().end()) { return it->second; } return std::nullopt; @@ -90,47 +88,44 @@ bool StructType::Equals(const Type& other) const { const auto& struct_ = static_cast(other); return fields_ == struct_.fields_; } -Status StructType::InitFieldById() const { - if (!field_by_id_.empty()) { - return {}; - } - for (const auto& field : fields_) { - auto it = field_by_id_.try_emplace(field.field_id(), field); +Result> +StructType::InitFieldById(const StructType& self) { + std::unordered_map field_by_id; + for (const auto& field : self.fields_) { + auto it = field_by_id.try_emplace(field.field_id(), field); if (!it.second) { return InvalidSchema("Duplicate field id found: {} (prev name: {}, curr name: {})", field.field_id(), it.first->second.get().name(), field.name()); } } - return {}; + return field_by_id; } -Status StructType::InitFieldByName() const { - if (!field_by_name_.empty()) { - return {}; - } - for (const auto& field : fields_) { - auto it = field_by_name_.try_emplace(field.name(), field); +Result> +StructType::InitFieldByName(const StructType& self) { + std::unordered_map field_by_name; + for (const auto& field : self.fields_) { + auto it = field_by_name.try_emplace(field.name(), field); if (!it.second) { return InvalidSchema("Duplicate field name found: {} (prev id: {}, curr id: {})", it.first->first, it.first->second.get().field_id(), field.field_id()); } } - return {}; + return field_by_name; } -Status StructType::InitFieldByLowerCaseName() const { - if (!field_by_lowercase_name_.empty()) { - return {}; - } - for (const auto& field : fields_) { +Result> +StructType::InitFieldByLowerCaseName(const StructType& self) { + std::unordered_map field_by_lowercase_name; + for (const auto& field : self.fields_) { auto it = - field_by_lowercase_name_.try_emplace(StringUtils::ToLower(field.name()), field); + field_by_lowercase_name.try_emplace(StringUtils::ToLower(field.name()), field); if (!it.second) { return InvalidSchema( "Duplicate lowercase field name found: {} (prev id: {}, curr id: {})", it.first->first, it.first->second.get().field_id(), field.field_id()); } } - return {}; + return field_by_lowercase_name; } ListType::ListType(SchemaField element) : element_(std::move(element)) { diff --git a/src/iceberg/type.h b/src/iceberg/type.h index 256526834..49866c442 100644 --- a/src/iceberg/type.h +++ b/src/iceberg/type.h @@ -26,7 +26,6 @@ #include #include #include -#include #include #include #include @@ -37,16 +36,10 @@ #include "iceberg/result.h" #include "iceberg/schema_field.h" #include "iceberg/util/formattable.h" +#include "iceberg/util/lazy.h" namespace iceberg { -template -Status LazyInitWithCallOnce(std::once_flag& flag, Func&& func) { - Status status; - std::call_once(flag, [&status, &func]() { status = func(); }); - return status; -} - /// \brief Interface for a data type for a field. class ICEBERG_EXPORT Type : public iceberg::util::Formattable { public: @@ -133,18 +126,17 @@ class ICEBERG_EXPORT StructType : public NestedType { protected: bool Equals(const Type& other) const override; - Status InitFieldById() const; - Status InitFieldByName() const; - Status InitFieldByLowerCaseName() const; + static Result> InitFieldById( + const StructType&); + static Result> + InitFieldByName(const StructType&); + static Result> + InitFieldByLowerCaseName(const StructType&); std::vector fields_; - mutable std::unordered_map field_by_id_; - mutable std::unordered_map field_by_name_; - mutable std::unordered_map field_by_lowercase_name_; - - mutable std::once_flag field_by_id_flag_; - mutable std::once_flag field_by_name_flag_; - mutable std::once_flag field_by_lowercase_name_flag_; + Lazy field_by_id_; + Lazy field_by_name_; + Lazy field_by_lowercase_name_; }; /// \brief A data type representing a list of values. diff --git a/src/iceberg/util/lazy.h b/src/iceberg/util/lazy.h new file mode 100644 index 000000000..530fb6cfd --- /dev/null +++ b/src/iceberg/util/lazy.h @@ -0,0 +1,67 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#pragma once + +/// \file iceberg/util/lazy.h +/// Lazy initialization utility. + +#include +#include +#include + +#include "iceberg/result.h" +#include "iceberg/util/macros.h" + +namespace iceberg { + +template +class Lazy { + template + struct Trait; + + template + struct Trait { + using ReturnType = R::value_type; + }; + + using T = Trait::ReturnType; + + public: + template + requires std::invocable && + std::same_as, Result> + Result> Get(Args&&... args) const { + Result result; + std::call_once(flag_, [&result, this, &args...]() { + result = InitFunc(std::forward(args)...); + if (result) { + this->value_ = std::move(result.value()); + } + }); + ICEBERG_RETURN_UNEXPECTED(result); + return std::ref(value_); + } + + private: + mutable T value_; + mutable std::once_flag flag_; +}; + +}; // namespace iceberg