Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 2 additions & 7 deletions include/paimon/global_index/global_index_io_meta.h
Original file line number Diff line number Diff line change
Expand Up @@ -25,17 +25,12 @@
namespace paimon {
/// Metadata describing a single file entry in a global index.
struct PAIMON_EXPORT GlobalIndexIOMeta {
GlobalIndexIOMeta(const std::string& _file_path, int64_t _file_size, int64_t _range_end,
GlobalIndexIOMeta(const std::string& _file_path, int64_t _file_size,
const std::shared_ptr<Bytes>& _metadata)
: file_path(_file_path),
file_size(_file_size),
range_end(_range_end),
metadata(_metadata) {}
: file_path(_file_path), file_size(_file_size), metadata(_metadata) {}

std::string file_path;
int64_t file_size;
/// The inclusive range end covered by this file (i.e., the last local row id).
int64_t range_end;
/// Optional binary metadata associated with the file, such as serialized
/// secondary index structures or inline index bytes.
/// May be null if no additional metadata is available.
Expand Down
3 changes: 2 additions & 1 deletion include/paimon/global_index/global_index_writer.h
Original file line number Diff line number Diff line change
Expand Up @@ -34,9 +34,10 @@ class PAIMON_EXPORT GlobalIndexWriter {
///
/// @param arrow_array A valid C ArrowArray pointer representing a struct array.
/// Must not be nullptr, and must conform to the expected schema.
/// @param relative_row_ids local row id calculated by {@code row_id - range.from}.
/// @return `Status::OK()` on success; otherwise, an error indicating malformed
/// input, I/O failure, or unsupported type, etc.
virtual Status AddBatch(::ArrowArray* arrow_array) = 0;
virtual Status AddBatch(::ArrowArray* arrow_array, std::vector<int64_t>&& relative_row_ids) = 0;

/// Finalizes the index build process and returns metadata for persisted index.
virtual Result<std::vector<GlobalIndexIOMeta>> Finish() = 0;
Expand Down
3 changes: 3 additions & 0 deletions src/paimon/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -408,6 +408,7 @@ if(PAIMON_BUILD_TESTS)
common/file_index/bloomfilter/fast_hash_test.cpp
common/global_index/complete_index_score_batch_reader_test.cpp
common/global_index/global_index_result_test.cpp
common/global_index/global_index_utils_test.cpp
common/global_index/global_indexer_factory_test.cpp
common/global_index/bitmap_global_index_result_test.cpp
common/global_index/bitmap_scored_global_index_result_test.cpp
Expand All @@ -417,6 +418,8 @@ if(PAIMON_BUILD_TESTS)
common/global_index/btree/key_serializer_test.cpp
common/global_index/btree/btree_global_index_integration_test.cpp
common/global_index/btree/btree_compatibility_test.cpp
common/global_index/btree/btree_file_meta_selector_test.cpp
common/global_index/btree/lazy_filtered_btree_reader_test.cpp
common/global_index/rangebitmap/range_bitmap_global_index_test.cpp
common/global_index/wrap/file_index_reader_wrapper_test.cpp
common/io/byte_array_input_stream_test.cpp
Expand Down
2 changes: 2 additions & 0 deletions src/paimon/common/global_index/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,9 @@ set(PAIMON_GLOBAL_INDEX_SRC
btree/btree_global_indexer.cpp
btree/btree_global_index_reader.cpp
btree/btree_global_index_writer.cpp
btree/btree_file_meta_selector.cpp
btree/btree_index_meta.cpp
btree/lazy_filtered_btree_reader.cpp
btree/key_serializer.cpp
rangebitmap/range_bitmap_global_index.cpp
rangebitmap/range_bitmap_global_index_factory.cpp)
Expand Down
4 changes: 2 additions & 2 deletions src/paimon/common/global_index/bitmap/bitmap_global_index.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -42,9 +42,9 @@ Result<std::shared_ptr<GlobalIndexReader>> BitmapGlobalIndex::CreateReader(
PAIMON_ASSIGN_OR_RAISE(
std::shared_ptr<FileIndexReader> reader,
index_->CreateReader(arrow_schema, /*start=*/0, meta.file_size, in, pool));
auto transform = [range_end = meta.range_end](const std::shared_ptr<FileIndexResult>& result)
auto transform = [](const std::shared_ptr<FileIndexResult>& result)
-> Result<std::shared_ptr<GlobalIndexResult>> {
return FileIndexReaderWrapper::ToGlobalIndexResult(range_end, result);
return FileIndexReaderWrapper::ToGlobalIndexResult(result);
};
return std::make_shared<BitmapGlobalIndexReader>(reader, transform);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,9 @@ class BitmapGlobalIndexTest : public ::testing::Test {

ArrowArray c_array;
PAIMON_RETURN_NOT_OK_FROM_ARROW(arrow::ExportArray(*array, &c_array));
PAIMON_RETURN_NOT_OK(global_writer->AddBatch(&c_array));
std::vector<int64_t> row_ids(array->length());
std::iota(row_ids.begin(), row_ids.end(), 0);
PAIMON_RETURN_NOT_OK(global_writer->AddBatch(&c_array, std::move(row_ids)));
PAIMON_ASSIGN_OR_RAISE(auto result_metas, global_writer->Finish());
// check meta
if (result_metas.empty()) {
Expand All @@ -85,7 +87,6 @@ class BitmapGlobalIndexTest : public ::testing::Test {
auto file_name = PathUtil::GetName(result_metas[0].file_path);
EXPECT_TRUE(StringUtils::StartsWith(file_name, "bitmap-global-index-"));
EXPECT_TRUE(StringUtils::EndsWith(file_name, ".index"));
EXPECT_EQ(result_metas[0].range_end, expected_range.to);
EXPECT_FALSE(result_metas[0].metadata);
return result_metas[0];
}
Expand Down Expand Up @@ -161,19 +162,19 @@ TEST_F(BitmapGlobalIndexTest, TestStringType) {

// greater than return REMAIN file index result, will convert to all range global index
// result
CheckResult(reader->VisitGreaterThan(lit_c).value(), {0, 1, 2, 3, 4});
ASSERT_FALSE(reader->VisitGreaterThan(lit_c).value());

// test visit vector search
ASSERT_NOK_WITH_MSG(reader->VisitVectorSearch(std::make_shared<VectorSearch>(
"f0", 10, std::vector<float>({1.0f, 2.0f}), nullptr, nullptr,
std::nullopt, std::map<std::string, std::string>())),
"FileIndexReaderWrapper is not supposed to handle vector search query");
// test VisitStartsWith, VisitEndsWith, VisitContains, VisitLike, VisitFullTextSearch
CheckResult(reader->VisitStartsWith(lit_c).value(), {0, 1, 2, 3, 4});
CheckResult(reader->VisitEndsWith(lit_c).value(), {0, 1, 2, 3, 4});
CheckResult(reader->VisitContains(lit_c).value(), {0, 1, 2, 3, 4});
CheckResult(reader->VisitLike(lit_c).value(), {0, 1, 2, 3, 4});
CheckResult(reader->VisitFullTextSearch(nullptr).value(), {0, 1, 2, 3, 4});
ASSERT_FALSE(reader->VisitStartsWith(lit_c).value());
ASSERT_FALSE(reader->VisitEndsWith(lit_c).value());
ASSERT_FALSE(reader->VisitContains(lit_c).value());
ASSERT_FALSE(reader->VisitLike(lit_c).value());
ASSERT_FALSE(reader->VisitFullTextSearch(nullptr).value());
};

{
Expand Down
Loading
Loading