Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
65 changes: 32 additions & 33 deletions src/iceberg/schema.cc
Original file line number Diff line number Diff line change
Expand Up @@ -90,56 +90,55 @@ bool Schema::Equals(const Schema& other) const {
Result<std::optional<std::reference_wrapper<const SchemaField>>> Schema::FindFieldByName(
std::string_view name, bool case_sensitive) const {
if (case_sensitive) {
ICEBERG_RETURN_UNEXPECTED(
LazyInitWithCallOnce(name_to_id_flag_, [this]() { return InitNameToIdMap(); }));
auto it = name_to_id_.find(name);
if (it == name_to_id_.end()) return std::nullopt;
ICEBERG_ASSIGN_OR_RAISE(auto name_to_id, name_to_id_.Get(*this));
auto it = name_to_id.get().find(name);
if (it == name_to_id.get().end()) {
return std::nullopt;
};
return FindFieldById(it->second);
}
ICEBERG_RETURN_UNEXPECTED(LazyInitWithCallOnce(
lowercase_name_to_id_flag_, [this]() { return InitLowerCaseNameToIdMap(); }));
auto it = lowercase_name_to_id_.find(StringUtils::ToLower(name));
if (it == lowercase_name_to_id_.end()) return std::nullopt;
ICEBERG_ASSIGN_OR_RAISE(auto lowercase_name_to_id, lowercase_name_to_id_.Get(*this));
auto it = lowercase_name_to_id.get().find(StringUtils::ToLower(name));
if (it == lowercase_name_to_id.get().end()) {
return std::nullopt;
}
return FindFieldById(it->second);
}

Status Schema::InitIdToFieldMap() const {
if (!id_to_field_.empty()) {
return {};
}
IdToFieldVisitor visitor(id_to_field_);
ICEBERG_RETURN_UNEXPECTED(VisitTypeInline(*this, &visitor));
return {};
Result<std::unordered_map<int32_t, std::reference_wrapper<const SchemaField>>>
Schema::InitIdToFieldMap(const Schema& self) {
std::unordered_map<int32_t, std::reference_wrapper<const SchemaField>> id_to_field;
IdToFieldVisitor visitor(id_to_field);
ICEBERG_RETURN_UNEXPECTED(VisitTypeInline(self, &visitor));
return id_to_field;
}

Status Schema::InitNameToIdMap() const {
if (!name_to_id_.empty()) {
return {};
}
NameToIdVisitor visitor(name_to_id_, /*case_sensitive=*/true);
Result<std::unordered_map<std::string, int32_t, StringHash, std::equal_to<>>>
Schema::InitNameToIdMap(const Schema& self) {
std::unordered_map<std::string, int32_t, StringHash, std::equal_to<>> name_to_id;
NameToIdVisitor visitor(name_to_id, /*case_sensitive=*/true);
ICEBERG_RETURN_UNEXPECTED(
VisitTypeInline(*this, &visitor, /*path=*/"", /*short_path=*/""));
VisitTypeInline(self, &visitor, /*path=*/"", /*short_path=*/""));
visitor.Finish();
return {};
return name_to_id;
}

Status Schema::InitLowerCaseNameToIdMap() const {
if (!lowercase_name_to_id_.empty()) {
return {};
}
NameToIdVisitor visitor(lowercase_name_to_id_, /*case_sensitive=*/false);
Result<std::unordered_map<std::string, int32_t, StringHash, std::equal_to<>>>
Schema::InitLowerCaseNameToIdMap(const Schema& self) {
std::unordered_map<std::string, int32_t, StringHash, std::equal_to<>>
lowercase_name_to_id;
NameToIdVisitor visitor(lowercase_name_to_id, /*case_sensitive=*/false);
ICEBERG_RETURN_UNEXPECTED(
VisitTypeInline(*this, &visitor, /*path=*/"", /*short_path=*/""));
VisitTypeInline(self, &visitor, /*path=*/"", /*short_path=*/""));
visitor.Finish();
return {};
return lowercase_name_to_id;
}

Result<std::optional<std::reference_wrapper<const SchemaField>>> Schema::FindFieldById(
int32_t field_id) const {
ICEBERG_RETURN_UNEXPECTED(
LazyInitWithCallOnce(id_to_field_flag_, [this]() { return InitIdToFieldMap(); }));
auto it = id_to_field_.find(field_id);
if (it == id_to_field_.end()) {
ICEBERG_ASSIGN_OR_RAISE(auto id_to_field, id_to_field_.Get(*this));
auto it = id_to_field.get().find(field_id);
if (it == id_to_field.get().end()) {
return std::nullopt;
}
return it->second;
Expand Down
24 changes: 10 additions & 14 deletions src/iceberg/schema.h
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,6 @@
/// and any utility functions. See iceberg/type.h and iceberg/field.h as well.

#include <cstdint>
#include <mutex>
#include <optional>
#include <string>
#include <unordered_set>
Expand All @@ -34,6 +33,7 @@
#include "iceberg/result.h"
#include "iceberg/schema_field.h"
#include "iceberg/type.h"
#include "iceberg/util/lazy.h"
#include "iceberg/util/string_util.h"

namespace iceberg {
Expand Down Expand Up @@ -99,24 +99,20 @@ class ICEBERG_EXPORT Schema : public StructType {
/// \brief Compare two schemas for equality.
bool Equals(const Schema& other) const;

Status InitIdToFieldMap() const;
Status InitNameToIdMap() const;
Status InitLowerCaseNameToIdMap() const;
static Result<std::unordered_map<int32_t, std::reference_wrapper<const SchemaField>>>
InitIdToFieldMap(const Schema&);
static Result<std::unordered_map<std::string, int32_t, StringHash, std::equal_to<>>>
InitNameToIdMap(const Schema&);
static Result<std::unordered_map<std::string, int32_t, StringHash, std::equal_to<>>>
InitLowerCaseNameToIdMap(const Schema&);

const std::optional<int32_t> schema_id_;
/// Mapping from field id to field.
mutable std::unordered_map<int32_t, std::reference_wrapper<const SchemaField>>
id_to_field_;
Lazy<InitIdToFieldMap> id_to_field_;
/// Mapping from field name to field id.
mutable std::unordered_map<std::string, int32_t, StringHash, std::equal_to<>>
name_to_id_;
Lazy<InitNameToIdMap> name_to_id_;
/// Mapping from lowercased field name to field id
mutable std::unordered_map<std::string, int32_t, StringHash, std::equal_to<>>
lowercase_name_to_id_;

mutable std::once_flag id_to_field_flag_;
mutable std::once_flag name_to_id_flag_;
mutable std::once_flag lowercase_name_to_id_flag_;
Lazy<InitLowerCaseNameToIdMap> lowercase_name_to_id_;
};

} // namespace iceberg
61 changes: 28 additions & 33 deletions src/iceberg/type.cc
Original file line number Diff line number Diff line change
Expand Up @@ -51,10 +51,9 @@ std::string StructType::ToString() const {
std::span<const SchemaField> StructType::fields() const { return fields_; }
Result<std::optional<NestedType::SchemaFieldConstRef>> StructType::GetFieldById(
int32_t field_id) const {
ICEBERG_RETURN_UNEXPECTED(
LazyInitWithCallOnce(field_by_id_flag_, [this]() { return InitFieldById(); }));
auto it = field_by_id_.find(field_id);
if (it == field_by_id_.end()) return std::nullopt;
ICEBERG_ASSIGN_OR_RAISE(auto field_by_id, field_by_id_.Get(*this));
auto it = field_by_id.get().find(field_id);
if (it == field_by_id.get().end()) return std::nullopt;
return it->second;
}
Result<std::optional<NestedType::SchemaFieldConstRef>> StructType::GetFieldByIndex(
Expand All @@ -67,18 +66,17 @@ Result<std::optional<NestedType::SchemaFieldConstRef>> StructType::GetFieldByInd
Result<std::optional<NestedType::SchemaFieldConstRef>> StructType::GetFieldByName(
std::string_view name, bool case_sensitive) const {
if (case_sensitive) {
ICEBERG_RETURN_UNEXPECTED(LazyInitWithCallOnce(
field_by_name_flag_, [this]() { return InitFieldByName(); }));
auto it = field_by_name_.find(name);
if (it != field_by_name_.end()) {
ICEBERG_ASSIGN_OR_RAISE(auto field_by_name, field_by_name_.Get(*this));
auto it = field_by_name.get().find(name);
if (it != field_by_name.get().end()) {
return it->second;
}
return std::nullopt;
}
ICEBERG_RETURN_UNEXPECTED(LazyInitWithCallOnce(
field_by_lowercase_name_flag_, [this]() { return InitFieldByLowerCaseName(); }));
auto it = field_by_lowercase_name_.find(StringUtils::ToLower(name));
if (it != field_by_lowercase_name_.end()) {
ICEBERG_ASSIGN_OR_RAISE(auto field_by_lowercase_name,
field_by_lowercase_name_.Get(*this));
auto it = field_by_lowercase_name.get().find(StringUtils::ToLower(name));
if (it != field_by_lowercase_name.get().end()) {
return it->second;
}
return std::nullopt;
Expand All @@ -90,47 +88,44 @@ bool StructType::Equals(const Type& other) const {
const auto& struct_ = static_cast<const StructType&>(other);
return fields_ == struct_.fields_;
}
Status StructType::InitFieldById() const {
if (!field_by_id_.empty()) {
return {};
}
for (const auto& field : fields_) {
auto it = field_by_id_.try_emplace(field.field_id(), field);
Result<std::unordered_map<int32_t, StructType::SchemaFieldConstRef>>
StructType::InitFieldById(const StructType& self) {
std::unordered_map<int32_t, SchemaFieldConstRef> field_by_id;
for (const auto& field : self.fields_) {
auto it = field_by_id.try_emplace(field.field_id(), field);
if (!it.second) {
return InvalidSchema("Duplicate field id found: {} (prev name: {}, curr name: {})",
field.field_id(), it.first->second.get().name(), field.name());
}
}
return {};
return field_by_id;
}
Status StructType::InitFieldByName() const {
if (!field_by_name_.empty()) {
return {};
}
for (const auto& field : fields_) {
auto it = field_by_name_.try_emplace(field.name(), field);
Result<std::unordered_map<std::string_view, StructType::SchemaFieldConstRef>>
StructType::InitFieldByName(const StructType& self) {
std::unordered_map<std::string_view, StructType::SchemaFieldConstRef> field_by_name;
for (const auto& field : self.fields_) {
auto it = field_by_name.try_emplace(field.name(), field);
if (!it.second) {
return InvalidSchema("Duplicate field name found: {} (prev id: {}, curr id: {})",
it.first->first, it.first->second.get().field_id(),
field.field_id());
}
}
return {};
return field_by_name;
}
Status StructType::InitFieldByLowerCaseName() const {
if (!field_by_lowercase_name_.empty()) {
return {};
}
for (const auto& field : fields_) {
Result<std::unordered_map<std::string, StructType::SchemaFieldConstRef>>
StructType::InitFieldByLowerCaseName(const StructType& self) {
std::unordered_map<std::string, SchemaFieldConstRef> field_by_lowercase_name;
for (const auto& field : self.fields_) {
auto it =
field_by_lowercase_name_.try_emplace(StringUtils::ToLower(field.name()), field);
field_by_lowercase_name.try_emplace(StringUtils::ToLower(field.name()), field);
if (!it.second) {
return InvalidSchema(
"Duplicate lowercase field name found: {} (prev id: {}, curr id: {})",
it.first->first, it.first->second.get().field_id(), field.field_id());
}
}
return {};
return field_by_lowercase_name;
}

ListType::ListType(SchemaField element) : element_(std::move(element)) {
Expand Down
28 changes: 10 additions & 18 deletions src/iceberg/type.h
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,6 @@
#include <array>
#include <cstdint>
#include <memory>
#include <mutex>
#include <optional>
#include <span>
#include <string>
Expand All @@ -37,16 +36,10 @@
#include "iceberg/result.h"
#include "iceberg/schema_field.h"
#include "iceberg/util/formattable.h"
#include "iceberg/util/lazy.h"

namespace iceberg {

template <typename Func>
Status LazyInitWithCallOnce(std::once_flag& flag, Func&& func) {
Status status;
std::call_once(flag, [&status, &func]() { status = func(); });
return status;
}

/// \brief Interface for a data type for a field.
class ICEBERG_EXPORT Type : public iceberg::util::Formattable {
public:
Expand Down Expand Up @@ -133,18 +126,17 @@ class ICEBERG_EXPORT StructType : public NestedType {
protected:
bool Equals(const Type& other) const override;

Status InitFieldById() const;
Status InitFieldByName() const;
Status InitFieldByLowerCaseName() const;
static Result<std::unordered_map<int32_t, SchemaFieldConstRef>> InitFieldById(
const StructType&);
static Result<std::unordered_map<std::string_view, SchemaFieldConstRef>>
InitFieldByName(const StructType&);
static Result<std::unordered_map<std::string, SchemaFieldConstRef>>
InitFieldByLowerCaseName(const StructType&);

std::vector<SchemaField> fields_;
mutable std::unordered_map<int32_t, SchemaFieldConstRef> field_by_id_;
mutable std::unordered_map<std::string_view, SchemaFieldConstRef> field_by_name_;
mutable std::unordered_map<std::string, SchemaFieldConstRef> field_by_lowercase_name_;

mutable std::once_flag field_by_id_flag_;
mutable std::once_flag field_by_name_flag_;
mutable std::once_flag field_by_lowercase_name_flag_;
Lazy<InitFieldById> field_by_id_;
Lazy<InitFieldByName> field_by_name_;
Lazy<InitFieldByLowerCaseName> field_by_lowercase_name_;
};

/// \brief A data type representing a list of values.
Expand Down
67 changes: 67 additions & 0 deletions src/iceberg/util/lazy.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/

#pragma once

/// \file iceberg/util/lazy.h
/// Lazy initialization utility.

#include <concepts>
#include <functional>
#include <mutex>

#include "iceberg/result.h"
#include "iceberg/util/macros.h"

namespace iceberg {

template <auto InitFunc>
class Lazy {
template <typename R>
struct Trait;

template <typename R, typename... Args>
struct Trait<R (*)(Args...)> {
using ReturnType = R::value_type;
};

using T = Trait<decltype(InitFunc)>::ReturnType;

public:
template <typename... Args>
requires std::invocable<decltype(InitFunc), Args...> &&
std::same_as<std::invoke_result_t<decltype(InitFunc), Args...>, Result<T>>
Result<std::reference_wrapper<T>> Get(Args&&... args) const {
Result<T> result;
std::call_once(flag_, [&result, this, &args...]() {
result = InitFunc(std::forward<Args>(args)...);
if (result) {
this->value_ = std::move(result.value());
}
});
ICEBERG_RETURN_UNEXPECTED(result);
return std::ref(value_);
}

private:
mutable T value_;
mutable std::once_flag flag_;
};

}; // namespace iceberg
Loading