diff --git a/include/paimon/defs.h b/include/paimon/defs.h index c05f53d6d..82e505ecd 100644 --- a/include/paimon/defs.h +++ b/include/paimon/defs.h @@ -434,24 +434,6 @@ struct PAIMON_EXPORT Options { /// "lookup.cache-max-disk-size" - Max disk size for lookup cache, you can use this option /// to limit the use of local disks. Default value is unlimited (INT64_MAX). static const char LOOKUP_CACHE_MAX_DISK_SIZE[]; - /// "btree-index.compression" - The compression algorithm to use for BTreeIndex. - /// Default value is "none". - static const char BTREE_INDEX_COMPRESSION[]; - /// "btree-index.compression-level" - The compression level of the compression algorithm. - /// Default value is 1. - static const char BTREE_INDEX_COMPRESSION_LEVEL[]; - /// "btree-index.block-size" - The block size to use for BTreeIndex. - /// Default value is 64 KB. - static const char BTREE_INDEX_BLOCK_SIZE[]; - /// "btree-index.cache-size" - The cache size to use for BTreeIndex. - /// Default value is 128 MB. - static const char BTREE_INDEX_CACHE_SIZE[]; - /// "btree-index.high-priority-pool-ratio" - The high priority pool ratio to use for BTreeIndex. - /// Default value is 0.1. - static const char BTREE_INDEX_HIGH_PRIORITY_POOL_RATIO[]; - /// "btree-index.records-per-range" - The expected number of records per BTree Index File. - /// Default value is 1000000. - static const char BTREE_INDEX_RECORDS_PER_RANGE[]; }; static constexpr int64_t BATCH_WRITE_COMMIT_IDENTIFIER = std::numeric_limits::max(); diff --git a/include/paimon/file_index/file_index_reader.h b/include/paimon/file_index/file_index_reader.h index eb7f6c484..bbbea446c 100644 --- a/include/paimon/file_index/file_index_reader.h +++ b/include/paimon/file_index/file_index_reader.h @@ -62,12 +62,6 @@ class PAIMON_EXPORT FileIndexReader : public FunctionVisitor> VisitContains(const Literal& literal) override; Result> VisitLike(const Literal& literal) override; - - Result> VisitAnd( - const std::vector>>& children) override; - - Result> VisitOr( - const std::vector>>& children) override; }; } // namespace paimon diff --git a/include/paimon/global_index/global_index_reader.h b/include/paimon/global_index/global_index_reader.h index b0da2e725..9325735d2 100644 --- a/include/paimon/global_index/global_index_reader.h +++ b/include/paimon/global_index/global_index_reader.h @@ -48,20 +48,6 @@ class PAIMON_EXPORT GlobalIndexReader : public FunctionVisitor> VisitFullTextSearch( const std::shared_ptr& full_text_search) = 0; - /// VisitAnd performs logical AND across multiple child results. - /// Default implementation returns "not supported" error. - Result> VisitAnd( - const std::vector>>& children) override { - return Status::NotImplemented("AND operations not supported by this index type"); - } - - /// VisitOr performs logical OR across multiple child results. - /// Default implementation returns "not supported" error. - Result> VisitOr( - const std::vector>>& children) override { - return Status::NotImplemented("OR operations not supported by this index type"); - } - /// @return true if the reader is thread-safe; false otherwise. virtual bool IsThreadSafe() const = 0; diff --git a/include/paimon/predicate/function_visitor.h b/include/paimon/predicate/function_visitor.h index 80d6d7ee1..3d5954ff4 100644 --- a/include/paimon/predicate/function_visitor.h +++ b/include/paimon/predicate/function_visitor.h @@ -74,41 +74,5 @@ class PAIMON_EXPORT FunctionVisitor { /// Evaluates whether string values like the given string. virtual Result VisitLike(const Literal& literal) = 0; - - /// Evaluates the BETWEEN predicate with the given lower and upper bounds. - virtual Result VisitBetween(const Literal& from, const Literal& to) { - // Default implementation: BETWEEN is equivalent to >= AND <= - auto lower_result = VisitGreaterOrEqual(from); - if (!lower_result.ok()) { - return lower_result.status(); - } - auto upper_result = VisitLessOrEqual(to); - if (!upper_result.ok()) { - return upper_result.status(); - } - return VisitAnd({std::move(lower_result).value(), std::move(upper_result).value()}); - } - - /// Evaluates the NOT BETWEEN predicate with the given lower and upper bounds. - virtual Result VisitNotBetween(const Literal& from, const Literal& to) { - // Default implementation: NOT BETWEEN is equivalent to < OR > - auto lower_result = VisitLessThan(from); - if (!lower_result.ok()) { - return lower_result.status(); - } - auto upper_result = VisitGreaterThan(to); - if (!upper_result.ok()) { - return upper_result.status(); - } - return VisitOr({std::move(lower_result).value(), std::move(upper_result).value()}); - } - - // ----------------- Compound functions ------------------------ - - /// Evaluates the AND predicate across multiple child results. - virtual Result VisitAnd(const std::vector>& children) = 0; - - /// Evaluates the OR predicate across multiple child results. - virtual Result VisitOr(const std::vector>& children) = 0; }; } // namespace paimon diff --git a/src/paimon/CMakeLists.txt b/src/paimon/CMakeLists.txt index 9965dbbc7..4f189a618 100644 --- a/src/paimon/CMakeLists.txt +++ b/src/paimon/CMakeLists.txt @@ -413,8 +413,7 @@ if(PAIMON_BUILD_TESTS) common/global_index/bitmap/bitmap_global_index_test.cpp common/global_index/btree/btree_index_meta_test.cpp common/global_index/btree/btree_file_footer_test.cpp - common/global_index/btree/btree_global_indexer_test.cpp - common/global_index/btree/btree_global_index_writer_test.cpp + common/global_index/btree/key_serializer_test.cpp common/global_index/btree/btree_global_index_integration_test.cpp common/global_index/btree/btree_compatibility_test.cpp common/global_index/rangebitmap/range_bitmap_global_index_test.cpp diff --git a/src/paimon/common/data/binary_row.cpp b/src/paimon/common/data/binary_row.cpp index 09ee5da9d..0bba7e482 100644 --- a/src/paimon/common/data/binary_row.cpp +++ b/src/paimon/common/data/binary_row.cpp @@ -18,6 +18,7 @@ #include +#include "fmt/format.h" #include "paimon/common/data/binary_data_read_utils.h" #include "paimon/common/memory/memory_segment.h" #include "paimon/common/memory/memory_segment_utils.h" @@ -300,4 +301,8 @@ int32_t BinaryRow::HashCode() const { return MemorySegmentUtils::HashByWords({segment_}, offset_, size_in_bytes_, nullptr); } +std::string BinaryRow::ToString() const { + return fmt::format("BinaryRow@{:#x}", static_cast(HashCode())); +} + } // namespace paimon diff --git a/src/paimon/common/data/binary_row.h b/src/paimon/common/data/binary_row.h index d6f808a9c..71359d918 100644 --- a/src/paimon/common/data/binary_row.h +++ b/src/paimon/common/data/binary_row.h @@ -112,11 +112,7 @@ class BinaryRow final : public BinarySection, public InternalRow, public DataSet bool operator==(const BinaryRow& other) const; // TODO(liancheng.lsz): single column to be implemented - std::string ToString() const override { - std::stringstream ss; - ss << std::hex << static_cast(HashCode()); - return "BinaryRow@" + ss.str(); - } + std::string ToString() const override; int32_t HashCode() const override; diff --git a/src/paimon/common/defs.cpp b/src/paimon/common/defs.cpp index 0adb3dac3..19cbbdf85 100644 --- a/src/paimon/common/defs.cpp +++ b/src/paimon/common/defs.cpp @@ -123,11 +123,4 @@ const char Options::LOOKUP_CACHE_HIGH_PRIO_POOL_RATIO[] = "lookup.cache.high-pri const char Options::BUCKET_FUNCTION_TYPE[] = "bucket-function.type"; const char Options::LOOKUP_CACHE_FILE_RETENTION[] = "lookup.cache-file-retention"; const char Options::LOOKUP_CACHE_MAX_DISK_SIZE[] = "lookup.cache-max-disk-size"; -const char Options::BTREE_INDEX_COMPRESSION[] = "btree-index.compression"; -const char Options::BTREE_INDEX_COMPRESSION_LEVEL[] = "btree-index.compression-level"; -const char Options::BTREE_INDEX_BLOCK_SIZE[] = "btree-index.block-size"; -const char Options::BTREE_INDEX_CACHE_SIZE[] = "btree-index.cache-size"; -const char Options::BTREE_INDEX_HIGH_PRIORITY_POOL_RATIO[] = "btree-index.high-priority-pool-ratio"; -const char Options::BTREE_INDEX_RECORDS_PER_RANGE[] = "btree-index.records-per-range"; - } // namespace paimon diff --git a/src/paimon/common/file_index/empty/empty_file_index_reader.h b/src/paimon/common/file_index/empty/empty_file_index_reader.h index 5c07461a1..6207787a7 100644 --- a/src/paimon/common/file_index/empty/empty_file_index_reader.h +++ b/src/paimon/common/file_index/empty/empty_file_index_reader.h @@ -65,16 +65,6 @@ class EmptyFileIndexReader : public FileIndexReader { const std::vector& literals) override { return FileIndexResult::Skip(); } - - Result> VisitBetween(const Literal& from, - const Literal& to) override { - return FileIndexResult::Skip(); - } - - Result> VisitNotBetween(const Literal& from, - const Literal& to) override { - return FileIndexResult::Remain(); - } }; } // namespace paimon diff --git a/src/paimon/common/file_index/file_index_reader.cpp b/src/paimon/common/file_index/file_index_reader.cpp index fa99a479d..6f4cb38b2 100644 --- a/src/paimon/common/file_index/file_index_reader.cpp +++ b/src/paimon/common/file_index/file_index_reader.cpp @@ -98,44 +98,4 @@ Result> FileIndexReader::VisitNotIn( } return file_index_result; } - -Result> FileIndexReader::VisitAnd( - const std::vector>>& children) { - if (children.empty()) { - return Status::Invalid("VisitAnd called with no children"); - } - - // Start with the first child - PAIMON_RETURN_NOT_OK(children[0]); - auto current = children[0].value(); - - // AND with remaining children - for (size_t i = 1; i < children.size(); ++i) { - PAIMON_RETURN_NOT_OK(children[i]); - auto child = children[i].value(); - PAIMON_ASSIGN_OR_RAISE(current, current->And(child)); - } - - return current; -} - -Result> FileIndexReader::VisitOr( - const std::vector>>& children) { - if (children.empty()) { - return Status::Invalid("VisitOr called with no children"); - } - - // Start with the first child - PAIMON_RETURN_NOT_OK(children[0]); - auto current = children[0].value(); - - // OR with remaining children - for (size_t i = 1; i < children.size(); ++i) { - PAIMON_RETURN_NOT_OK(children[i]); - auto child = children[i].value(); - PAIMON_ASSIGN_OR_RAISE(current, current->Or(child)); - } - - return current; -} } // namespace paimon diff --git a/src/paimon/common/global_index/CMakeLists.txt b/src/paimon/common/global_index/CMakeLists.txt index 1778b5686..d61402fbf 100644 --- a/src/paimon/common/global_index/CMakeLists.txt +++ b/src/paimon/common/global_index/CMakeLists.txt @@ -21,6 +21,7 @@ set(PAIMON_GLOBAL_INDEX_SRC btree/btree_global_index_reader.cpp btree/btree_global_index_writer.cpp btree/btree_index_meta.cpp + btree/key_serializer.cpp rangebitmap/range_bitmap_global_index.cpp rangebitmap/range_bitmap_global_index_factory.cpp) diff --git a/src/paimon/common/global_index/btree/btree_compatibility_test.cpp b/src/paimon/common/global_index/btree/btree_compatibility_test.cpp index a3444b8c5..56a889780 100644 --- a/src/paimon/common/global_index/btree/btree_compatibility_test.cpp +++ b/src/paimon/common/global_index/btree/btree_compatibility_test.cpp @@ -13,631 +13,413 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -#include -#include - #include -#include #include #include #include #include -#include "arrow/c/helpers.h" +#include "arrow/c/bridge.h" +#include "gtest/gtest.h" #include "paimon/common/global_index/btree/btree_global_indexer.h" #include "paimon/common/global_index/btree/btree_index_meta.h" +#include "paimon/common/global_index/btree/key_serializer.h" #include "paimon/common/utils/arrow/status_utils.h" +#include "paimon/common/utils/string_utils.h" #include "paimon/fs/file_system.h" +#include "paimon/fs/file_system_factory.h" #include "paimon/global_index/io/global_index_file_reader.h" #include "paimon/memory/memory_pool.h" #include "paimon/predicate/literal.h" #include "paimon/testing/utils/testharness.h" - namespace paimon::test { +class BTreeCompatibilityTest : public ::testing::Test { + protected: + struct CsvRecord { + int64_t row_id; + std::string key; // "NULL" if is_null + bool is_null; + }; -// --------------------------------------------------------------------------- -// Test data directory (relative to project root) -// --------------------------------------------------------------------------- -static constexpr char kTestDataDir[] = "test/test_data/global_index/btree/btree_compatibility_data"; - -// --------------------------------------------------------------------------- -// CSV record parsed from the Java-generated CSV files -// --------------------------------------------------------------------------- -struct CsvRecord { - int64_t row_id; - std::string key; // "NULL" if is_null - bool is_null; -}; - -// --------------------------------------------------------------------------- -// Helper: parse a CSV file into a vector of CsvRecord -// --------------------------------------------------------------------------- -static std::vector ParseCsvFile(const std::string& csv_path) { - std::vector records; - std::ifstream ifs(csv_path); - if (!ifs.is_open()) { - return records; + void SetUp() override { + pool_ = GetDefaultPool(); + ASSERT_OK_AND_ASSIGN(fs_, FileSystemFactory::Get("local", "/", {})); + data_dir_ = GetDataDir() + "/global_index/btree/btree_compatibility_data"; } - std::string line; - // Skip header line: "row_id,key,is_null" - std::getline(ifs, line); - - while (std::getline(ifs, line)) { - if (line.empty()) continue; - std::istringstream ss(line); - std::string row_id_str, key_str, is_null_str; - std::getline(ss, row_id_str, ','); - std::getline(ss, key_str, ','); - std::getline(ss, is_null_str, ','); - - CsvRecord rec; - rec.row_id = std::stoll(row_id_str); - rec.key = key_str; - rec.is_null = (is_null_str == "true"); - records.push_back(rec); + std::string ReadFileAsString(const std::string& path) const { + EXPECT_OK_AND_ASSIGN(auto input, fs_->Open(path)); + EXPECT_OK_AND_ASSIGN(auto length, input->Length()); + std::string buffer(static_cast(length), '\0'); + EXPECT_OK_AND_ASSIGN([[maybe_unused]] auto bytes_read, + input->Read(buffer.data(), static_cast(length))); + return buffer; } - return records; -} -// --------------------------------------------------------------------------- -// Helper: read a binary file into a Bytes object -// --------------------------------------------------------------------------- -static std::shared_ptr ReadBinaryFile(const std::string& path, MemoryPool* pool) { - std::ifstream ifs(path, std::ios::binary | std::ios::ate); - if (!ifs.is_open()) return nullptr; - auto size = ifs.tellg(); - ifs.seekg(0, std::ios::beg); - auto bytes = std::make_shared(static_cast(size), pool); - ifs.read(bytes->data(), size); - return bytes; -} - -// --------------------------------------------------------------------------- -// Helper: get file size -// --------------------------------------------------------------------------- -static int64_t GetFileSize(const std::string& path) { - std::ifstream ifs(path, std::ios::binary | std::ios::ate); - if (!ifs.is_open()) return -1; - return static_cast(ifs.tellg()); -} + // Parse a CSV file into a vector of CsvRecord + std::vector ParseCsvFile(const std::string& csv_path) const { + std::vector records; + std::string content = ReadFileAsString(csv_path); + if (content.empty()) { + return records; + } -// --------------------------------------------------------------------------- -// Fake GlobalIndexFileReader that reads from local filesystem -// --------------------------------------------------------------------------- -class LocalGlobalIndexFileReader : public GlobalIndexFileReader { - public: - explicit LocalGlobalIndexFileReader(const std::shared_ptr& fs) : fs_(fs) {} + std::istringstream iss(content); + std::string line; + // Skip header line: "row_id,key,is_null" + std::getline(iss, line); - Result> GetInputStream( - const std::string& file_path) const override { - return fs_->Open(file_path); + while (std::getline(iss, line)) { + if (line.empty()) { + continue; + } + std::istringstream ss(line); + std::string row_id_str, key_str, is_null_str; + std::getline(ss, row_id_str, ','); + std::getline(ss, key_str, ','); + std::getline(ss, is_null_str, ','); + + CsvRecord rec; + rec.row_id = std::stoll(row_id_str); + rec.key = key_str; + rec.is_null = (is_null_str == "true"); + records.push_back(rec); + } + return records; } - private: - std::shared_ptr fs_; -}; - -// --------------------------------------------------------------------------- -// Collect all row IDs from a GlobalIndexResult into a set -// --------------------------------------------------------------------------- -static std::set CollectRowIds(const std::shared_ptr& result) { - std::set ids; - auto iter_result = result->CreateIterator(); - if (!iter_result.ok()) return ids; - auto iter = std::move(iter_result).value(); - while (iter->HasNext()) { - ids.insert(iter->Next()); + std::set CollectRowIds(const std::shared_ptr& result) const { + std::set ids; + EXPECT_OK_AND_ASSIGN(auto iter, result->CreateIterator()); + while (iter->HasNext()) { + ids.insert(iter->Next()); + } + return ids; } - return ids; -} -// --------------------------------------------------------------------------- -// Test fixture -// --------------------------------------------------------------------------- -class BTreeCompatibilityTest : public ::testing::Test { - protected: - void SetUp() override { - pool_ = GetDefaultPool(); - // Use UniqueTestDirectory to get a FileSystem that can read local files - test_dir_ = UniqueTestDirectory::Create("local"); - fs_ = test_dir_->GetFileSystem(); - - // Resolve the absolute path to test data - // The test data is at project_root/test/test_data/... - // We need to find the project root. Use the current working directory. - char cwd[4096]; - if (getcwd(cwd, sizeof(cwd)) != nullptr) { - project_root_ = std::string(cwd); + std::set CollectMatchingRows(const std::vector& records, + std::function predicate) const { + std::set ids; + for (const auto& rec : records) { + if (predicate(rec)) { + ids.insert(rec.row_id); + } } - data_dir_ = project_root_ + "/" + kTestDataDir; + return ids; } - // Create a BTreeGlobalIndexReader from Java-generated .bin and .bin.meta files Result> CreateReaderFromFiles( const std::string& bin_path, const std::string& meta_path, - const std::shared_ptr& arrow_type, int64_t record_count) { - // Read meta bytes - auto meta_bytes = ReadBinaryFile(meta_path, pool_.get()); - if (!meta_bytes) { - return Status::IOError("Failed to read meta file: " + meta_path); - } - - // Get file size - int64_t file_size = GetFileSize(bin_path); - if (file_size < 0) { - return Status::IOError("Failed to get file size: " + bin_path); - } + const std::shared_ptr& arrow_type, int64_t record_count) const { + auto meta_str = ReadFileAsString(meta_path); + std::shared_ptr meta_bytes = Bytes::AllocateBytes(meta_str, pool_.get()); + PAIMON_ASSIGN_OR_RAISE(auto file_status, fs_->GetFileStatus(bin_path)); + auto file_size = static_cast(file_status->GetLen()); - // Build GlobalIndexIOMeta - // range_end = record_count - 1 (inclusive) GlobalIndexIOMeta io_meta(bin_path, file_size, record_count - 1, meta_bytes); std::vector metas = {io_meta}; - // Create ArrowSchema auto schema = arrow::schema({arrow::field("testField", arrow_type)}); auto c_schema = std::make_unique(); - auto export_status = arrow::ExportSchema(*schema, c_schema.get()); - if (!export_status.ok()) { - return Status::Invalid("Failed to export ArrowSchema: " + export_status.ToString()); - } + PAIMON_RETURN_NOT_OK_FROM_ARROW(arrow::ExportSchema(*schema, c_schema.get())); - // Create reader auto file_reader = std::make_shared(fs_); std::map options; BTreeGlobalIndexer indexer(options); - - auto reader_result = indexer.CreateReader(c_schema.get(), file_reader, metas, pool_); - ArrowSchemaRelease(c_schema.get()); - return reader_result; - } - - // Helper: build expected row IDs for null rows from CSV records - std::set GetNullRowIds(const std::vector& records) { - std::set ids; - for (const auto& rec : records) { - if (rec.is_null) ids.insert(rec.row_id); - } - return ids; + return indexer.CreateReader(c_schema.get(), file_reader, metas, pool_); } - // Helper: build expected row IDs for non-null rows from CSV records - std::set GetNonNullRowIds(const std::vector& records) { - std::set ids; - for (const auto& rec : records) { - if (!rec.is_null) ids.insert(rec.row_id); - } - return ids; - } - - // Helper: build expected row IDs for rows with a specific int key - std::set GetRowIdsForIntKey(const std::vector& records, int32_t key) { - std::set ids; - std::string key_str = std::to_string(key); - for (const auto& rec : records) { - if (!rec.is_null && rec.key == key_str) ids.insert(rec.row_id); - } - return ids; - } - - // Helper: build expected row IDs for rows with a specific string key - std::set GetRowIdsForStringKey(const std::vector& records, - const std::string& key) { - std::set ids; - for (const auto& rec : records) { - if (!rec.is_null && rec.key == key) ids.insert(rec.row_id); + void RunIntQueries(const std::shared_ptr& reader, + const std::vector& records) const { + // VisitIsNull + { + ASSERT_OK_AND_ASSIGN(auto result, reader->VisitIsNull()); + auto actual_ids = CollectRowIds(result); + auto expected_ids = + CollectMatchingRows(records, [](const CsvRecord& r) { return r.is_null; }); + ASSERT_EQ(actual_ids, expected_ids); } - return ids; - } - // Helper: build expected row IDs for int keys in range [lower, upper] - std::set GetRowIdsForIntRange(const std::vector& records, int32_t lower, - int32_t upper, bool lower_inclusive, - bool upper_inclusive) { - std::set ids; - for (const auto& rec : records) { - if (rec.is_null) continue; - int32_t val = std::stoi(rec.key); - bool above_lower = lower_inclusive ? (val >= lower) : (val > lower); - bool below_upper = upper_inclusive ? (val <= upper) : (val < upper); - if (above_lower && below_upper) ids.insert(rec.row_id); + // VisitIsNotNull + { + ASSERT_OK_AND_ASSIGN(auto result, reader->VisitIsNotNull()); + auto actual_ids = CollectRowIds(result); + auto expected_ids = + CollectMatchingRows(records, [](const CsvRecord& r) { return !r.is_null; }); + ASSERT_EQ(actual_ids, expected_ids); } - return ids; - } - // Helper: build expected row IDs for string keys in range - std::set GetRowIdsForStringRange(const std::vector& records, - const std::string& lower, const std::string& upper, - bool lower_inclusive, bool upper_inclusive) { - std::set ids; + // VisitEqual for the first non-null key for (const auto& rec : records) { - if (rec.is_null) continue; - bool above_lower = lower_inclusive ? (rec.key >= lower) : (rec.key > lower); - bool below_upper = upper_inclusive ? (rec.key <= upper) : (rec.key < upper); - if (above_lower && below_upper) ids.insert(rec.row_id); + if (!rec.is_null) { + int32_t key_val = std::stoi(rec.key); + Literal literal(key_val); + ASSERT_OK_AND_ASSIGN(auto result, reader->VisitEqual(literal)); + auto actual_ids = CollectRowIds(result); + auto expected_ids = CollectMatchingRows(records, [key_val](const CsvRecord& r) { + return !r.is_null && std::stoi(r.key) == key_val; + }); + ASSERT_EQ(actual_ids, expected_ids); + break; + } } - return ids; - } - std::shared_ptr pool_; - std::unique_ptr test_dir_; - std::shared_ptr fs_; - std::string project_root_; - std::string data_dir_; -}; - -// =========================================================================== -// Test: Read int type data with various record counts -// =========================================================================== -class BTreeCompatibilityIntTest : public BTreeCompatibilityTest, - public ::testing::WithParamInterface {}; - -TEST_P(BTreeCompatibilityIntTest, ReadAndQueryIntData) { - int count = GetParam(); - std::string prefix = "btree_test_int_" + std::to_string(count); - std::string bin_path = data_dir_ + "/" + prefix + ".bin"; - std::string meta_path = bin_path + ".meta"; - std::string csv_path = data_dir_ + "/" + prefix + ".csv"; - - // Parse CSV to get expected data - auto records = ParseCsvFile(csv_path); - ASSERT_EQ(static_cast(records.size()), count) - << "CSV record count mismatch for " << prefix; - - // Create reader - auto reader_result = CreateReaderFromFiles(bin_path, meta_path, arrow::int32(), count); - ASSERT_OK(reader_result.status()) << "Failed to create reader for " << prefix; - auto reader = reader_result.value(); - - // ---- Test 1: VisitIsNull ---- - { - auto result = reader->VisitIsNull(); - ASSERT_OK(result.status()) << prefix << ": VisitIsNull failed"; - auto actual_ids = CollectRowIds(result.value()); - auto expected_ids = GetNullRowIds(records); - EXPECT_EQ(actual_ids, expected_ids) << prefix << ": VisitIsNull mismatch"; - } - - // ---- Test 2: VisitIsNotNull ---- - { - auto result = reader->VisitIsNotNull(); - ASSERT_OK(result.status()) << prefix << ": VisitIsNotNull failed"; - auto actual_ids = CollectRowIds(result.value()); - auto expected_ids = GetNonNullRowIds(records); - EXPECT_EQ(actual_ids, expected_ids) << prefix << ": VisitIsNotNull mismatch"; - } - - // ---- Test 3: VisitEqual for a known key ---- - // Find the first non-null key in the CSV - for (const auto& rec : records) { - if (!rec.is_null) { - int32_t key_val = std::stoi(rec.key); - Literal literal(key_val); - auto result = reader->VisitEqual(literal); - ASSERT_OK(result.status()) << prefix << ": VisitEqual(" << key_val << ") failed"; - auto actual_ids = CollectRowIds(result.value()); - auto expected_ids = GetRowIdsForIntKey(records, key_val); - EXPECT_EQ(actual_ids, expected_ids) - << prefix << ": VisitEqual(" << key_val << ") mismatch"; - break; + // VisitEqual for the last non-null key + for (auto it = records.rbegin(); it != records.rend(); ++it) { + if (!it->is_null) { + int32_t key_val = std::stoi(it->key); + Literal literal(key_val); + ASSERT_OK_AND_ASSIGN(auto result, reader->VisitEqual(literal)); + auto actual_ids = CollectRowIds(result); + auto expected_ids = CollectMatchingRows(records, [key_val](const CsvRecord& r) { + return !r.is_null && std::stoi(r.key) == key_val; + }); + ASSERT_EQ(actual_ids, expected_ids); + break; + } } - } - // ---- Test 4: VisitEqual for the last non-null key ---- - for (auto it = records.rbegin(); it != records.rend(); ++it) { - if (!it->is_null) { - int32_t key_val = std::stoi(it->key); - Literal literal(key_val); - auto result = reader->VisitEqual(literal); - ASSERT_OK(result.status()) << prefix << ": VisitEqual(" << key_val << ") failed"; - auto actual_ids = CollectRowIds(result.value()); - auto expected_ids = GetRowIdsForIntKey(records, key_val); - EXPECT_EQ(actual_ids, expected_ids) - << prefix << ": VisitEqual(" << key_val << ") mismatch (last key)"; - break; + // VisitEqual for a non-existent key + { + Literal literal(static_cast(-999)); + ASSERT_OK_AND_ASSIGN(auto result, reader->VisitEqual(literal)); + auto actual_ids = CollectRowIds(result); + ASSERT_TRUE(actual_ids.empty()); } - } - - // ---- Test 5: VisitEqual for a non-existent key (should return empty) ---- - { - Literal literal(static_cast(-999)); - auto result = reader->VisitEqual(literal); - ASSERT_OK(result.status()) << prefix << ": VisitEqual(-999) failed"; - auto actual_ids = CollectRowIds(result.value()); - EXPECT_TRUE(actual_ids.empty()) << prefix << ": VisitEqual(-999) should be empty"; - } - // ---- Test 6: VisitLessThan for a mid-range key ---- - { - // Find a key roughly in the middle - std::vector non_null_keys; - for (const auto& rec : records) { - if (!rec.is_null) non_null_keys.push_back(std::stoi(rec.key)); - } - if (!non_null_keys.empty()) { + int32_t mid_key = -1; + { + std::vector non_null_keys; + for (const auto& rec : records) { + if (!rec.is_null) { + non_null_keys.push_back(std::stoi(rec.key)); + } + } + ASSERT_FALSE(non_null_keys.empty()); std::sort(non_null_keys.begin(), non_null_keys.end()); - int32_t mid_key = non_null_keys[non_null_keys.size() / 2]; - Literal literal(mid_key); - auto result = reader->VisitLessThan(literal); - ASSERT_OK(result.status()) << prefix << ": VisitLessThan(" << mid_key << ") failed"; - auto actual_ids = CollectRowIds(result.value()); - auto expected_ids = - GetRowIdsForIntRange(records, non_null_keys.front(), mid_key, true, false); - EXPECT_EQ(actual_ids, expected_ids) - << prefix << ": VisitLessThan(" << mid_key << ") mismatch"; + mid_key = non_null_keys[non_null_keys.size() / 2]; } - } - - // ---- Test 7: VisitGreaterOrEqual for a mid-range key ---- - { - std::vector non_null_keys; - for (const auto& rec : records) { - if (!rec.is_null) non_null_keys.push_back(std::stoi(rec.key)); - } - if (!non_null_keys.empty()) { - std::sort(non_null_keys.begin(), non_null_keys.end()); - int32_t mid_key = non_null_keys[non_null_keys.size() / 2]; + // VisitLessThan for a mid-range key + { Literal literal(mid_key); - auto result = reader->VisitGreaterOrEqual(literal); - ASSERT_OK(result.status()) - << prefix << ": VisitGreaterOrEqual(" << mid_key << ") failed"; - auto actual_ids = CollectRowIds(result.value()); - auto expected_ids = - GetRowIdsForIntRange(records, mid_key, non_null_keys.back(), true, true); - EXPECT_EQ(actual_ids, expected_ids) - << prefix << ": VisitGreaterOrEqual(" << mid_key << ") mismatch"; + ASSERT_OK_AND_ASSIGN(auto result, reader->VisitLessThan(literal)); + auto actual_ids = CollectRowIds(result); + auto expected_ids = CollectMatchingRows(records, [mid_key](const CsvRecord& r) { + return !r.is_null && std::stoi(r.key) < mid_key; + }); + ASSERT_EQ(actual_ids, expected_ids); } - } - - // ---- Test 8: VisitBetween ---- - { - std::vector non_null_keys; - for (const auto& rec : records) { - if (!rec.is_null) non_null_keys.push_back(std::stoi(rec.key)); - } - if (non_null_keys.size() >= 4) { - std::sort(non_null_keys.begin(), non_null_keys.end()); - int32_t lower = non_null_keys[non_null_keys.size() / 4]; - int32_t upper = non_null_keys[non_null_keys.size() * 3 / 4]; - Literal lit_lower(lower); - Literal lit_upper(upper); - auto result = reader->VisitBetween(lit_lower, lit_upper); - ASSERT_OK(result.status()) - << prefix << ": VisitBetween(" << lower << ", " << upper << ") failed"; - auto actual_ids = CollectRowIds(result.value()); - auto expected_ids = GetRowIdsForIntRange(records, lower, upper, true, true); - EXPECT_EQ(actual_ids, expected_ids) - << prefix << ": VisitBetween(" << lower << ", " << upper << ") mismatch"; - } - } - // ---- Test 9: VisitIn for multiple keys ---- - { - std::set unique_keys; - for (const auto& rec : records) { - if (!rec.is_null) unique_keys.insert(std::stoi(rec.key)); - } - if (unique_keys.size() >= 3) { + // VisitGreaterOrEqual for a mid-range key + { + Literal literal(mid_key); + ASSERT_OK_AND_ASSIGN(auto result, reader->VisitGreaterOrEqual(literal)); + auto actual_ids = CollectRowIds(result); + auto expected_ids = CollectMatchingRows(records, [mid_key](const CsvRecord& r) { + return !r.is_null && std::stoi(r.key) >= mid_key; + }); + ASSERT_EQ(actual_ids, expected_ids); + } + + // VisitIn for multiple keys + { + std::set unique_keys; + for (const auto& rec : records) { + if (!rec.is_null) { + unique_keys.insert(std::stoi(rec.key)); + } + } + ASSERT_GE(unique_keys.size(), 3); auto it = unique_keys.begin(); int32_t k1 = *it++; int32_t k2 = *it++; int32_t k3 = *it++; std::vector in_literals = {Literal(k1), Literal(k2), Literal(k3)}; - auto result = reader->VisitIn(in_literals); - ASSERT_OK(result.status()) - << prefix << ": VisitIn({" << k1 << "," << k2 << "," << k3 << "}) failed"; - auto actual_ids = CollectRowIds(result.value()); - - std::set expected_ids; - for (auto id : GetRowIdsForIntKey(records, k1)) expected_ids.insert(id); - for (auto id : GetRowIdsForIntKey(records, k2)) expected_ids.insert(id); - for (auto id : GetRowIdsForIntKey(records, k3)) expected_ids.insert(id); - EXPECT_EQ(actual_ids, expected_ids) << prefix << ": VisitIn mismatch"; + ASSERT_OK_AND_ASSIGN(auto result, reader->VisitIn(in_literals)); + auto actual_ids = CollectRowIds(result); + auto expected_ids = CollectMatchingRows(records, [k1, k2, k3](const CsvRecord& r) { + if (r.is_null) { + return false; + } + int32_t v = std::stoi(r.key); + return v == k1 || v == k2 || v == k3; + }); + ASSERT_EQ(actual_ids, expected_ids); } - } - // ---- Test 10: VisitNotEqual ---- - { + // VisitNotEqual for the first non-null key for (const auto& rec : records) { if (!rec.is_null) { int32_t key_val = std::stoi(rec.key); Literal literal(key_val); - auto result = reader->VisitNotEqual(literal); - ASSERT_OK(result.status()) << prefix << ": VisitNotEqual(" << key_val << ") failed"; - auto actual_ids = CollectRowIds(result.value()); - - // Expected: all non-null rows except those with this key - std::set expected_ids; - for (const auto& r : records) { - if (!r.is_null && std::stoi(r.key) != key_val) { - expected_ids.insert(r.row_id); - } - } - EXPECT_EQ(actual_ids, expected_ids) - << prefix << ": VisitNotEqual(" << key_val << ") mismatch"; - break; // Test with just the first non-null key + ASSERT_OK_AND_ASSIGN(auto result, reader->VisitNotEqual(literal)); + auto actual_ids = CollectRowIds(result); + auto expected_ids = CollectMatchingRows(records, [key_val](const CsvRecord& r) { + return !r.is_null && std::stoi(r.key) != key_val; + }); + ASSERT_EQ(actual_ids, expected_ids); + break; } } } -} - -INSTANTIATE_TEST_SUITE_P(IntDataSizes, BTreeCompatibilityIntTest, - ::testing::Values(50, 100, 500, 1000, 5000)); - -// =========================================================================== -// Test: Read varchar type data with various record counts -// =========================================================================== -class BTreeCompatibilityVarcharTest : public BTreeCompatibilityTest, - public ::testing::WithParamInterface {}; - -TEST_P(BTreeCompatibilityVarcharTest, ReadAndQueryVarcharData) { - int count = GetParam(); - std::string prefix = "btree_test_varchar_" + std::to_string(count); - std::string bin_path = data_dir_ + "/" + prefix + ".bin"; - std::string meta_path = bin_path + ".meta"; - std::string csv_path = data_dir_ + "/" + prefix + ".csv"; - // Parse CSV - auto records = ParseCsvFile(csv_path); - ASSERT_EQ(static_cast(records.size()), count) - << "CSV record count mismatch for " << prefix; - - // Create reader (varchar -> arrow::utf8()) - auto reader_result = CreateReaderFromFiles(bin_path, meta_path, arrow::utf8(), count); - ASSERT_OK(reader_result.status()) << "Failed to create reader for " << prefix; - auto reader = reader_result.value(); - - // ---- Test 1: VisitIsNull ---- - { - auto result = reader->VisitIsNull(); - ASSERT_OK(result.status()) << prefix << ": VisitIsNull failed"; - auto actual_ids = CollectRowIds(result.value()); - auto expected_ids = GetNullRowIds(records); - EXPECT_EQ(actual_ids, expected_ids) << prefix << ": VisitIsNull mismatch"; - } - - // ---- Test 2: VisitIsNotNull ---- - { - auto result = reader->VisitIsNotNull(); - ASSERT_OK(result.status()) << prefix << ": VisitIsNotNull failed"; - auto actual_ids = CollectRowIds(result.value()); - auto expected_ids = GetNonNullRowIds(records); - EXPECT_EQ(actual_ids, expected_ids) << prefix << ": VisitIsNotNull mismatch"; - } + // Run string-type queries against a reader with CSV records as ground truth + void RunStringQueries(const std::shared_ptr& reader, + const std::vector& records) const { + // VisitIsNull + { + ASSERT_OK_AND_ASSIGN(auto result, reader->VisitIsNull()); + auto actual_ids = CollectRowIds(result); + auto expected_ids = + CollectMatchingRows(records, [](const CsvRecord& r) { return r.is_null; }); + ASSERT_EQ(actual_ids, expected_ids); + } - // ---- Test 3: VisitEqual for a known key ---- - for (const auto& rec : records) { - if (!rec.is_null) { - Literal literal(FieldType::STRING, rec.key.c_str(), - static_cast(rec.key.size())); - auto result = reader->VisitEqual(literal); - ASSERT_OK(result.status()) << prefix << ": VisitEqual(" << rec.key << ") failed"; - auto actual_ids = CollectRowIds(result.value()); - auto expected_ids = GetRowIdsForStringKey(records, rec.key); - EXPECT_EQ(actual_ids, expected_ids) - << prefix << ": VisitEqual(" << rec.key << ") mismatch"; - break; + // VisitIsNotNull + { + ASSERT_OK_AND_ASSIGN(auto result, reader->VisitIsNotNull()); + auto actual_ids = CollectRowIds(result); + auto expected_ids = + CollectMatchingRows(records, [](const CsvRecord& r) { return !r.is_null; }); + ASSERT_EQ(actual_ids, expected_ids); } - } - // ---- Test 4: VisitEqual for the last non-null key ---- - for (auto it = records.rbegin(); it != records.rend(); ++it) { - if (!it->is_null) { - Literal literal(FieldType::STRING, it->key.c_str(), - static_cast(it->key.size())); - auto result = reader->VisitEqual(literal); - ASSERT_OK(result.status()) << prefix << ": VisitEqual(" << it->key << ") failed"; - auto actual_ids = CollectRowIds(result.value()); - auto expected_ids = GetRowIdsForStringKey(records, it->key); - EXPECT_EQ(actual_ids, expected_ids) - << prefix << ": VisitEqual(" << it->key << ") mismatch (last key)"; - break; + // VisitEqual for the first non-null key + for (const auto& rec : records) { + if (!rec.is_null) { + Literal literal(FieldType::STRING, rec.key.c_str(), + static_cast(rec.key.size())); + ASSERT_OK_AND_ASSIGN(auto result, reader->VisitEqual(literal)); + auto actual_ids = CollectRowIds(result); + auto expected_ids = CollectMatchingRows( + records, [&rec](const CsvRecord& r) { return !r.is_null && r.key == rec.key; }); + ASSERT_EQ(actual_ids, expected_ids); + break; + } } - } - // ---- Test 5: VisitEqual for a non-existent key ---- - { - std::string non_existent = "zzz_non_existent_key"; - Literal literal(FieldType::STRING, non_existent.c_str(), - static_cast(non_existent.size())); - auto result = reader->VisitEqual(literal); - ASSERT_OK(result.status()) << prefix << ": VisitEqual(non_existent) failed"; - auto actual_ids = CollectRowIds(result.value()); - EXPECT_TRUE(actual_ids.empty()) << prefix << ": VisitEqual(non_existent) should be empty"; - } + // VisitEqual for the last non-null key + for (auto it = records.rbegin(); it != records.rend(); ++it) { + if (!it->is_null) { + Literal literal(FieldType::STRING, it->key.c_str(), + static_cast(it->key.size())); + ASSERT_OK_AND_ASSIGN(auto result, reader->VisitEqual(literal)); + auto actual_ids = CollectRowIds(result); + auto expected_ids = CollectMatchingRows( + records, [&it](const CsvRecord& r) { return !r.is_null && r.key == it->key; }); + ASSERT_EQ(actual_ids, expected_ids); + break; + } + } - // ---- Test 6: VisitLessThan for a mid-range key ---- - { - std::vector non_null_keys; - for (const auto& rec : records) { - if (!rec.is_null) non_null_keys.push_back(rec.key); + // VisitEqual for a non-existent key + { + std::string non_existent = "zzz_non_existent_key"; + Literal literal(FieldType::STRING, non_existent.c_str(), + static_cast(non_existent.size())); + ASSERT_OK_AND_ASSIGN(auto result, reader->VisitEqual(literal)); + auto actual_ids = CollectRowIds(result); + ASSERT_TRUE(actual_ids.empty()); } - if (!non_null_keys.empty()) { + + std::string mid_key; + { + std::vector non_null_keys; + for (const auto& rec : records) { + if (!rec.is_null) { + non_null_keys.push_back(rec.key); + } + } + ASSERT_FALSE(non_null_keys.empty()); std::sort(non_null_keys.begin(), non_null_keys.end()); - std::string mid_key = non_null_keys[non_null_keys.size() / 2]; + mid_key = non_null_keys[non_null_keys.size() / 2]; + } + // VisitLessThan for a mid-range key + { Literal literal(FieldType::STRING, mid_key.c_str(), static_cast(mid_key.size())); - auto result = reader->VisitLessThan(literal); - ASSERT_OK(result.status()) << prefix << ": VisitLessThan(" << mid_key << ") failed"; - auto actual_ids = CollectRowIds(result.value()); - auto expected_ids = - GetRowIdsForStringRange(records, non_null_keys.front(), mid_key, true, false); - EXPECT_EQ(actual_ids, expected_ids) - << prefix << ": VisitLessThan(" << mid_key << ") mismatch"; + ASSERT_OK_AND_ASSIGN(auto result, reader->VisitLessThan(literal)); + auto actual_ids = CollectRowIds(result); + auto expected_ids = CollectMatchingRows( + records, [&mid_key](const CsvRecord& r) { return !r.is_null && r.key < mid_key; }); + ASSERT_EQ(actual_ids, expected_ids); } - } - // ---- Test 7: VisitGreaterOrEqual for a mid-range key ---- - { - std::vector non_null_keys; - for (const auto& rec : records) { - if (!rec.is_null) non_null_keys.push_back(rec.key); - } - if (!non_null_keys.empty()) { - std::sort(non_null_keys.begin(), non_null_keys.end()); - std::string mid_key = non_null_keys[non_null_keys.size() / 2]; + // VisitGreaterOrEqual for a mid-range key + { Literal literal(FieldType::STRING, mid_key.c_str(), static_cast(mid_key.size())); - auto result = reader->VisitGreaterOrEqual(literal); - ASSERT_OK(result.status()) - << prefix << ": VisitGreaterOrEqual(" << mid_key << ") failed"; - auto actual_ids = CollectRowIds(result.value()); - auto expected_ids = - GetRowIdsForStringRange(records, mid_key, non_null_keys.back(), true, true); - EXPECT_EQ(actual_ids, expected_ids) - << prefix << ": VisitGreaterOrEqual(" << mid_key << ") mismatch"; + ASSERT_OK_AND_ASSIGN(auto result, reader->VisitGreaterOrEqual(literal)); + auto actual_ids = CollectRowIds(result); + auto expected_ids = CollectMatchingRows( + records, [&mid_key](const CsvRecord& r) { return !r.is_null && r.key >= mid_key; }); + ASSERT_EQ(actual_ids, expected_ids); } - } - // ---- Test 8: VisitStartsWith ---- - { - std::string prefix_str = "test_000"; - Literal literal(FieldType::STRING, prefix_str.c_str(), - static_cast(prefix_str.size())); - auto result = reader->VisitStartsWith(literal); - ASSERT_OK(result.status()) << prefix << ": VisitStartsWith(" << prefix_str << ") failed"; - auto actual_ids = CollectRowIds(result.value()); - - // Expected: all non-null rows whose key starts with "test_000" - std::set expected_ids; - for (const auto& rec : records) { - if (!rec.is_null && rec.key.substr(0, prefix_str.size()) == prefix_str) { - expected_ids.insert(rec.row_id); - } + // VisitStartsWith + { + std::string prefix_str = "test_000"; + Literal literal(FieldType::STRING, prefix_str.c_str(), + static_cast(prefix_str.size())); + ASSERT_OK_AND_ASSIGN(auto result, reader->VisitStartsWith(literal)); + auto actual_ids = CollectRowIds(result); + auto expected_ids = CollectMatchingRows(records, [&prefix_str](const CsvRecord& r) { + return !r.is_null && StringUtils::StartsWith(r.key, prefix_str); + }); + ASSERT_EQ(actual_ids, expected_ids); } - EXPECT_EQ(actual_ids, expected_ids) - << prefix << ": VisitStartsWith(" << prefix_str << ") mismatch"; } - // ---- Test 9: VisitBetween ---- - { - std::vector non_null_keys; - for (const auto& rec : records) { - if (!rec.is_null) non_null_keys.push_back(rec.key); - } - if (non_null_keys.size() >= 4) { - std::sort(non_null_keys.begin(), non_null_keys.end()); - std::string lower = non_null_keys[non_null_keys.size() / 4]; - std::string upper = non_null_keys[non_null_keys.size() * 3 / 4]; - Literal lit_lower(FieldType::STRING, lower.c_str(), static_cast(lower.size())); - Literal lit_upper(FieldType::STRING, upper.c_str(), static_cast(upper.size())); - auto result = reader->VisitBetween(lit_lower, lit_upper); - ASSERT_OK(result.status()) - << prefix << ": VisitBetween(" << lower << ", " << upper << ") failed"; - auto actual_ids = CollectRowIds(result.value()); - auto expected_ids = GetRowIdsForStringRange(records, lower, upper, true, true); - EXPECT_EQ(actual_ids, expected_ids) - << prefix << ": VisitBetween(" << lower << ", " << upper << ") mismatch"; + class LocalGlobalIndexFileReader : public GlobalIndexFileReader { + public: + explicit LocalGlobalIndexFileReader(const std::shared_ptr& fs) : fs_(fs) {} + + Result> GetInputStream( + const std::string& file_path) const override { + return fs_->Open(file_path); } + + private: + std::shared_ptr fs_; + }; + + std::shared_ptr pool_; + std::shared_ptr fs_; + std::string data_dir_; +}; + +TEST_F(BTreeCompatibilityTest, ReadAndQueryIntData) { + for (int32_t count : {50, 100, 500, 1000, 5000}) { + std::string prefix = "btree_test_int_" + std::to_string(count); + std::string bin_path = data_dir_ + "/" + prefix + ".bin"; + std::string meta_path = bin_path + ".meta"; + std::string csv_path = data_dir_ + "/" + prefix + ".csv"; + + auto records = ParseCsvFile(csv_path); + ASSERT_EQ(static_cast(records.size()), count); + + ASSERT_OK_AND_ASSIGN(auto reader, + CreateReaderFromFiles(bin_path, meta_path, arrow::int32(), count)); + RunIntQueries(reader, records); } } -INSTANTIATE_TEST_SUITE_P(VarcharDataSizes, BTreeCompatibilityVarcharTest, - ::testing::Values(50, 100, 500, 1000, 5000)); +TEST_F(BTreeCompatibilityTest, ReadAndQueryStringData) { + for (int32_t count : {50, 100, 500, 1000, 5000}) { + std::string prefix = "btree_test_string_" + std::to_string(count); + std::string bin_path = data_dir_ + "/" + prefix + ".bin"; + std::string meta_path = bin_path + ".meta"; + std::string csv_path = data_dir_ + "/" + prefix + ".csv"; + + auto records = ParseCsvFile(csv_path); + ASSERT_EQ(static_cast(records.size()), count); + + ASSERT_OK_AND_ASSIGN(auto reader, + CreateReaderFromFiles(bin_path, meta_path, arrow::utf8(), count)); + RunStringQueries(reader, records); + } +} -// =========================================================================== -// Test: Edge case - all nulls -// =========================================================================== TEST_F(BTreeCompatibilityTest, AllNulls) { std::string prefix = "btree_test_int_all_nulls"; std::string bin_path = data_dir_ + "/" + prefix + ".bin"; @@ -645,47 +427,38 @@ TEST_F(BTreeCompatibilityTest, AllNulls) { std::string csv_path = data_dir_ + "/" + prefix + ".csv"; auto records = ParseCsvFile(csv_path); - ASSERT_FALSE(records.empty()) << "Failed to parse CSV for " << prefix; - int count = static_cast(records.size()); + ASSERT_FALSE(records.empty()); + auto count = static_cast(records.size()); - auto reader_result = CreateReaderFromFiles(bin_path, meta_path, arrow::int32(), count); - ASSERT_OK(reader_result.status()) << "Failed to create reader for " << prefix; - auto reader = reader_result.value(); + ASSERT_OK_AND_ASSIGN(auto reader, + CreateReaderFromFiles(bin_path, meta_path, arrow::int32(), count)); // All rows should be null { - auto result = reader->VisitIsNull(); - ASSERT_OK(result.status()) << prefix << ": VisitIsNull failed"; - auto actual_ids = CollectRowIds(result.value()); - EXPECT_EQ(static_cast(actual_ids.size()), count) - << prefix << ": VisitIsNull should return all rows"; - // Verify each row ID - for (int i = 0; i < count; ++i) { - EXPECT_TRUE(actual_ids.count(i)) << prefix << ": Missing row_id " << i; + ASSERT_OK_AND_ASSIGN(auto result, reader->VisitIsNull()); + auto actual_ids = CollectRowIds(result); + ASSERT_EQ(static_cast(actual_ids.size()), count); + for (int32_t i = 0; i < count; ++i) { + ASSERT_TRUE(actual_ids.count(i)); } } // No rows should be non-null { - auto result = reader->VisitIsNotNull(); - ASSERT_OK(result.status()) << prefix << ": VisitIsNotNull failed"; - auto actual_ids = CollectRowIds(result.value()); - EXPECT_TRUE(actual_ids.empty()) << prefix << ": VisitIsNotNull should be empty"; + ASSERT_OK_AND_ASSIGN(auto result, reader->VisitIsNotNull()); + auto actual_ids = CollectRowIds(result); + ASSERT_TRUE(actual_ids.empty()); } // VisitEqual should return empty for any key { Literal literal(static_cast(42)); - auto result = reader->VisitEqual(literal); - ASSERT_OK(result.status()) << prefix << ": VisitEqual(42) failed"; - auto actual_ids = CollectRowIds(result.value()); - EXPECT_TRUE(actual_ids.empty()) << prefix << ": VisitEqual should be empty for all-nulls"; + ASSERT_OK_AND_ASSIGN(auto result, reader->VisitEqual(literal)); + auto actual_ids = CollectRowIds(result); + ASSERT_TRUE(actual_ids.empty()); } } -// =========================================================================== -// Test: Edge case - no nulls -// =========================================================================== TEST_F(BTreeCompatibilityTest, NoNulls) { std::string prefix = "btree_test_int_no_nulls"; std::string bin_path = data_dir_ + "/" + prefix + ".bin"; @@ -693,31 +466,27 @@ TEST_F(BTreeCompatibilityTest, NoNulls) { std::string csv_path = data_dir_ + "/" + prefix + ".csv"; auto records = ParseCsvFile(csv_path); - ASSERT_FALSE(records.empty()) << "Failed to parse CSV for " << prefix; - int count = static_cast(records.size()); + ASSERT_FALSE(records.empty()); + auto count = static_cast(records.size()); - auto reader_result = CreateReaderFromFiles(bin_path, meta_path, arrow::int32(), count); - ASSERT_OK(reader_result.status()) << "Failed to create reader for " << prefix; - auto reader = reader_result.value(); + ASSERT_OK_AND_ASSIGN(auto reader, + CreateReaderFromFiles(bin_path, meta_path, arrow::int32(), count)); // No rows should be null { - auto result = reader->VisitIsNull(); - ASSERT_OK(result.status()) << prefix << ": VisitIsNull failed"; - auto actual_ids = CollectRowIds(result.value()); - EXPECT_TRUE(actual_ids.empty()) << prefix << ": VisitIsNull should be empty"; + ASSERT_OK_AND_ASSIGN(auto result, reader->VisitIsNull()); + auto actual_ids = CollectRowIds(result); + ASSERT_TRUE(actual_ids.empty()); } // All rows should be non-null { - auto result = reader->VisitIsNotNull(); - ASSERT_OK(result.status()) << prefix << ": VisitIsNotNull failed"; - auto actual_ids = CollectRowIds(result.value()); - EXPECT_EQ(static_cast(actual_ids.size()), count) - << prefix << ": VisitIsNotNull should return all rows"; + ASSERT_OK_AND_ASSIGN(auto result, reader->VisitIsNotNull()); + auto actual_ids = CollectRowIds(result); + ASSERT_EQ(static_cast(actual_ids.size()), count); } - // VisitEqual for each unique key should return correct row IDs + // VisitEqual for each unique key { std::set tested_keys; for (const auto& rec : records) { @@ -725,47 +494,40 @@ TEST_F(BTreeCompatibilityTest, NoNulls) { tested_keys.insert(rec.key); int32_t key_val = std::stoi(rec.key); Literal literal(key_val); - auto result = reader->VisitEqual(literal); - ASSERT_OK(result.status()) << prefix << ": VisitEqual(" << key_val << ") failed"; - auto actual_ids = CollectRowIds(result.value()); - auto expected_ids = GetRowIdsForIntKey(records, key_val); - EXPECT_EQ(actual_ids, expected_ids) - << prefix << ": VisitEqual(" << key_val << ") mismatch"; + ASSERT_OK_AND_ASSIGN(auto result, reader->VisitEqual(literal)); + auto actual_ids = CollectRowIds(result); + auto expected_ids = CollectMatchingRows(records, [key_val](const CsvRecord& r) { + return !r.is_null && std::stoi(r.key) == key_val; + }); + ASSERT_EQ(actual_ids, expected_ids); } } } + int32_t max_key = 0; + for (const auto& rec : records) { + if (!rec.is_null) { + max_key = std::max(max_key, std::stoi(rec.key)); + } + } + // VisitLessOrEqual for the max key should return all rows { - int32_t max_key = 0; - for (const auto& rec : records) { - if (!rec.is_null) max_key = std::max(max_key, std::stoi(rec.key)); - } Literal literal(max_key); - auto result = reader->VisitLessOrEqual(literal); - ASSERT_OK(result.status()) << prefix << ": VisitLessOrEqual(" << max_key << ") failed"; - auto actual_ids = CollectRowIds(result.value()); - EXPECT_EQ(static_cast(actual_ids.size()), count) - << prefix << ": VisitLessOrEqual(max) should return all rows"; + ASSERT_OK_AND_ASSIGN(auto result, reader->VisitLessOrEqual(literal)); + auto actual_ids = CollectRowIds(result); + ASSERT_EQ(static_cast(actual_ids.size()), count); } // VisitGreaterThan for the max key should return empty { - int32_t max_key = 0; - for (const auto& rec : records) { - if (!rec.is_null) max_key = std::max(max_key, std::stoi(rec.key)); - } Literal literal(max_key); - auto result = reader->VisitGreaterThan(literal); - ASSERT_OK(result.status()) << prefix << ": VisitGreaterThan(" << max_key << ") failed"; - auto actual_ids = CollectRowIds(result.value()); - EXPECT_TRUE(actual_ids.empty()) << prefix << ": VisitGreaterThan(max) should be empty"; + ASSERT_OK_AND_ASSIGN(auto result, reader->VisitGreaterThan(literal)); + auto actual_ids = CollectRowIds(result); + ASSERT_TRUE(actual_ids.empty()); } } -// =========================================================================== -// Test: Edge case - duplicate keys -// =========================================================================== TEST_F(BTreeCompatibilityTest, DuplicateKeys) { std::string prefix = "btree_test_int_duplicates"; std::string bin_path = data_dir_ + "/" + prefix + ".bin"; @@ -773,33 +535,31 @@ TEST_F(BTreeCompatibilityTest, DuplicateKeys) { std::string csv_path = data_dir_ + "/" + prefix + ".csv"; auto records = ParseCsvFile(csv_path); - ASSERT_FALSE(records.empty()) << "Failed to parse CSV for " << prefix; - int count = static_cast(records.size()); + ASSERT_FALSE(records.empty()); + auto count = static_cast(records.size()); - auto reader_result = CreateReaderFromFiles(bin_path, meta_path, arrow::int32(), count); - ASSERT_OK(reader_result.status()) << "Failed to create reader for " << prefix; - auto reader = reader_result.value(); + ASSERT_OK_AND_ASSIGN(auto reader, + CreateReaderFromFiles(bin_path, meta_path, arrow::int32(), count)); - // ---- Test: VisitIsNull ---- + // VisitIsNull { - auto result = reader->VisitIsNull(); - ASSERT_OK(result.status()) << prefix << ": VisitIsNull failed"; - auto actual_ids = CollectRowIds(result.value()); - auto expected_ids = GetNullRowIds(records); - EXPECT_EQ(actual_ids, expected_ids) << prefix << ": VisitIsNull mismatch"; + ASSERT_OK_AND_ASSIGN(auto result, reader->VisitIsNull()); + auto actual_ids = CollectRowIds(result); + auto expected_ids = + CollectMatchingRows(records, [](const CsvRecord& r) { return r.is_null; }); + ASSERT_EQ(actual_ids, expected_ids); } - // ---- Test: VisitIsNotNull ---- + // VisitIsNotNull { - auto result = reader->VisitIsNotNull(); - ASSERT_OK(result.status()) << prefix << ": VisitIsNotNull failed"; - auto actual_ids = CollectRowIds(result.value()); - auto expected_ids = GetNonNullRowIds(records); - EXPECT_EQ(actual_ids, expected_ids) << prefix << ": VisitIsNotNull mismatch"; + ASSERT_OK_AND_ASSIGN(auto result, reader->VisitIsNotNull()); + auto actual_ids = CollectRowIds(result); + auto expected_ids = + CollectMatchingRows(records, [](const CsvRecord& r) { return !r.is_null; }); + ASSERT_EQ(actual_ids, expected_ids); } - // ---- Test: VisitEqual for each unique key ---- - // Duplicate keys: key = i/10, so keys are 0,0,...,0,1,1,...,1,...,9,9,...,9 + // VisitEqual for each unique key { std::set tested_keys; for (const auto& rec : records) { @@ -807,151 +567,130 @@ TEST_F(BTreeCompatibilityTest, DuplicateKeys) { tested_keys.insert(rec.key); int32_t key_val = std::stoi(rec.key); Literal literal(key_val); - auto result = reader->VisitEqual(literal); - ASSERT_OK(result.status()) << prefix << ": VisitEqual(" << key_val << ") failed"; - auto actual_ids = CollectRowIds(result.value()); - auto expected_ids = GetRowIdsForIntKey(records, key_val); - EXPECT_EQ(actual_ids, expected_ids) - << prefix << ": VisitEqual(" << key_val << ") mismatch"; + ASSERT_OK_AND_ASSIGN(auto result, reader->VisitEqual(literal)); + auto actual_ids = CollectRowIds(result); + auto expected_ids = CollectMatchingRows(records, [key_val](const CsvRecord& r) { + return !r.is_null && std::stoi(r.key) == key_val; + }); + ASSERT_EQ(actual_ids, expected_ids); } } } - // ---- Test: VisitIn for keys 0, 5, 9 ---- + // VisitIn for keys 0, 5, 9 { std::vector in_literals = {Literal(static_cast(0)), Literal(static_cast(5)), Literal(static_cast(9))}; - auto result = reader->VisitIn(in_literals); - ASSERT_OK(result.status()) << prefix << ": VisitIn({0,5,9}) failed"; - auto actual_ids = CollectRowIds(result.value()); - - std::set expected_ids; - for (auto id : GetRowIdsForIntKey(records, 0)) expected_ids.insert(id); - for (auto id : GetRowIdsForIntKey(records, 5)) expected_ids.insert(id); - for (auto id : GetRowIdsForIntKey(records, 9)) expected_ids.insert(id); - EXPECT_EQ(actual_ids, expected_ids) << prefix << ": VisitIn({0,5,9}) mismatch"; - } - - // ---- Test: VisitBetween for keys [2, 7] ---- - { - Literal lit_lower(static_cast(2)); - Literal lit_upper(static_cast(7)); - auto result = reader->VisitBetween(lit_lower, lit_upper); - ASSERT_OK(result.status()) << prefix << ": VisitBetween(2, 7) failed"; - auto actual_ids = CollectRowIds(result.value()); - auto expected_ids = GetRowIdsForIntRange(records, 2, 7, true, true); - EXPECT_EQ(actual_ids, expected_ids) << prefix << ": VisitBetween(2, 7) mismatch"; - } - - // ---- Test: VisitNotBetween for keys [2, 7] ---- - { - Literal lit_lower(static_cast(2)); - Literal lit_upper(static_cast(7)); - auto result = reader->VisitNotBetween(lit_lower, lit_upper); - ASSERT_OK(result.status()) << prefix << ": VisitNotBetween(2, 7) failed"; - auto actual_ids = CollectRowIds(result.value()); - - // Expected: non-null rows with key < 2 or key > 7 - std::set expected_ids; - for (const auto& rec : records) { - if (!rec.is_null) { - int32_t val = std::stoi(rec.key); - if (val < 2 || val > 7) expected_ids.insert(rec.row_id); + ASSERT_OK_AND_ASSIGN(auto result, reader->VisitIn(in_literals)); + auto actual_ids = CollectRowIds(result); + auto expected_ids = CollectMatchingRows(records, [](const CsvRecord& r) { + if (r.is_null) { + return false; } - } - EXPECT_EQ(actual_ids, expected_ids) << prefix << ": VisitNotBetween(2, 7) mismatch"; + int32_t v = std::stoi(r.key); + return v == 0 || v == 5 || v == 9; + }); + ASSERT_EQ(actual_ids, expected_ids); } } -// =========================================================================== -// Test: BTreeIndexMeta deserialization from Java-generated meta files -// =========================================================================== TEST_F(BTreeCompatibilityTest, MetaDeserialization) { // Test int_50 meta { std::string meta_path = data_dir_ + "/btree_test_int_50.bin.meta"; - auto meta_bytes = ReadBinaryFile(meta_path, pool_.get()); - ASSERT_NE(meta_bytes, nullptr) << "Failed to read meta file"; + auto meta_str = ReadFileAsString(meta_path); + std::shared_ptr meta_bytes = Bytes::AllocateBytes(meta_str, pool_.get()); auto meta = BTreeIndexMeta::Deserialize(meta_bytes, pool_.get()); - ASSERT_NE(meta, nullptr) << "Failed to deserialize meta"; - - // The int_50 data has nulls (row_id 1, 18, 48, 49) - EXPECT_TRUE(meta->HasNulls()) << "int_50 should have nulls"; - - // First key should be "3" (first non-null key in sorted order) - ASSERT_NE(meta->FirstKey(), nullptr) << "int_50 first key should not be null"; - std::string first_key(meta->FirstKey()->data(), meta->FirstKey()->size()); - // Java writes int keys as string representation - // The first non-null key in sorted order is 3 - EXPECT_FALSE(first_key.empty()) << "int_50 first key should not be empty"; - - // Last key should be "143" (last non-null key in sorted order) - ASSERT_NE(meta->LastKey(), nullptr) << "int_50 last key should not be null"; - std::string last_key(meta->LastKey()->data(), meta->LastKey()->size()); - EXPECT_FALSE(last_key.empty()) << "int_50 last key should not be empty"; + ASSERT_TRUE(meta); + + ASSERT_TRUE(meta->HasNulls()); + ASSERT_FALSE(meta->OnlyNulls()); + + ASSERT_TRUE(meta->FirstKey()); + ASSERT_OK_AND_ASSIGN(auto min_key, + KeySerializer::DeserializeKey(MemorySlice::Wrap(meta->FirstKey()), + arrow::int32(), pool_.get())); + ASSERT_EQ(min_key, Literal(3)); + + ASSERT_TRUE(meta->LastKey()); + ASSERT_OK_AND_ASSIGN(auto max_key, + KeySerializer::DeserializeKey(MemorySlice::Wrap(meta->LastKey()), + arrow::int32(), pool_.get())); + ASSERT_EQ(max_key, Literal(143)); } // Test all_nulls meta { std::string meta_path = data_dir_ + "/btree_test_int_all_nulls.bin.meta"; - auto meta_bytes = ReadBinaryFile(meta_path, pool_.get()); - ASSERT_NE(meta_bytes, nullptr) << "Failed to read all_nulls meta file"; + auto meta_str = ReadFileAsString(meta_path); + std::shared_ptr meta_bytes = Bytes::AllocateBytes(meta_str, pool_.get()); auto meta = BTreeIndexMeta::Deserialize(meta_bytes, pool_.get()); - ASSERT_NE(meta, nullptr) << "Failed to deserialize all_nulls meta"; + ASSERT_TRUE(meta); - EXPECT_TRUE(meta->HasNulls()) << "all_nulls should have nulls"; - // For all-nulls data, first_key and last_key should be null - EXPECT_TRUE(meta->OnlyNulls()) << "all_nulls should report OnlyNulls()"; + ASSERT_TRUE(meta->HasNulls()); + ASSERT_TRUE(meta->OnlyNulls()); + ASSERT_FALSE(meta->FirstKey()); + ASSERT_FALSE(meta->LastKey()); } // Test no_nulls meta { std::string meta_path = data_dir_ + "/btree_test_int_no_nulls.bin.meta"; - auto meta_bytes = ReadBinaryFile(meta_path, pool_.get()); - ASSERT_NE(meta_bytes, nullptr) << "Failed to read no_nulls meta file"; + auto meta_str = ReadFileAsString(meta_path); + std::shared_ptr meta_bytes = Bytes::AllocateBytes(meta_str, pool_.get()); auto meta = BTreeIndexMeta::Deserialize(meta_bytes, pool_.get()); - ASSERT_NE(meta, nullptr) << "Failed to deserialize no_nulls meta"; + ASSERT_TRUE(meta); + + ASSERT_TRUE(meta->FirstKey()); + ASSERT_OK_AND_ASSIGN(auto min_key, + KeySerializer::DeserializeKey(MemorySlice::Wrap(meta->FirstKey()), + arrow::int32(), pool_.get())); + ASSERT_EQ(min_key, Literal(4)); - EXPECT_FALSE(meta->HasNulls()) << "no_nulls should not have nulls"; - EXPECT_FALSE(meta->OnlyNulls()) << "no_nulls should not report OnlyNulls()"; - ASSERT_NE(meta->FirstKey(), nullptr) << "no_nulls first key should not be null"; - ASSERT_NE(meta->LastKey(), nullptr) << "no_nulls last key should not be null"; + ASSERT_TRUE(meta->LastKey()); + ASSERT_OK_AND_ASSIGN(auto max_key, + KeySerializer::DeserializeKey(MemorySlice::Wrap(meta->LastKey()), + arrow::int32(), pool_.get())); + ASSERT_EQ(max_key, Literal(158)); } - // Test varchar_50 meta + // Test string_50 meta { - std::string meta_path = data_dir_ + "/btree_test_varchar_50.bin.meta"; - auto meta_bytes = ReadBinaryFile(meta_path, pool_.get()); - ASSERT_NE(meta_bytes, nullptr) << "Failed to read varchar_50 meta file"; + std::string meta_path = data_dir_ + "/btree_test_string_50.bin.meta"; + auto meta_str = ReadFileAsString(meta_path); + std::shared_ptr meta_bytes = Bytes::AllocateBytes(meta_str, pool_.get()); auto meta = BTreeIndexMeta::Deserialize(meta_bytes, pool_.get()); - ASSERT_NE(meta, nullptr) << "Failed to deserialize varchar_50 meta"; + ASSERT_TRUE(meta); + + ASSERT_TRUE(meta->HasNulls()); + ASSERT_FALSE(meta->OnlyNulls()); - // varchar_50 has 1 null (row_id=24 based on 5% null ratio with seed 42) - ASSERT_NE(meta->FirstKey(), nullptr) << "varchar_50 first key should not be null"; - ASSERT_NE(meta->LastKey(), nullptr) << "varchar_50 last key should not be null"; + ASSERT_TRUE(meta->FirstKey()); + ASSERT_OK_AND_ASSIGN(auto min_key, + KeySerializer::DeserializeKey(MemorySlice::Wrap(meta->FirstKey()), + arrow::utf8(), pool_.get())); + std::string min_key_str = "test_00000"; + ASSERT_EQ(min_key, Literal(FieldType::STRING, min_key_str.data(), min_key_str.size())); - std::string first_key(meta->FirstKey()->data(), meta->FirstKey()->size()); - std::string last_key(meta->LastKey()->data(), meta->LastKey()->size()); - // Keys are "test_00000" to "test_00049" (excluding nulls) - EXPECT_EQ(first_key, "test_00000") << "varchar_50 first key mismatch"; - EXPECT_EQ(last_key, "test_00049") << "varchar_50 last key mismatch"; + ASSERT_TRUE(meta->LastKey()); + ASSERT_OK_AND_ASSIGN(auto max_key, + KeySerializer::DeserializeKey(MemorySlice::Wrap(meta->LastKey()), + arrow::utf8(), pool_.get())); + std::string max_key_str = "test_00049"; + ASSERT_EQ(max_key, Literal(FieldType::STRING, max_key_str.data(), max_key_str.size())); } } -// =========================================================================== -// Test: Verify total row count consistency -// =========================================================================== TEST_F(BTreeCompatibilityTest, RowCountConsistency) { - // For each test data set, verify that null_count + non_null_count == total_count std::vector>> test_cases = { {"btree_test_int_50", arrow::int32()}, {"btree_test_int_100", arrow::int32()}, - {"btree_test_int_500", arrow::int32()}, {"btree_test_varchar_50", arrow::utf8()}, - {"btree_test_varchar_100", arrow::utf8()}, {"btree_test_int_all_nulls", arrow::int32()}, + {"btree_test_int_500", arrow::int32()}, {"btree_test_string_50", arrow::utf8()}, + {"btree_test_string_100", arrow::utf8()}, {"btree_test_int_all_nulls", arrow::int32()}, {"btree_test_int_no_nulls", arrow::int32()}, {"btree_test_int_duplicates", arrow::int32()}, }; @@ -961,30 +700,25 @@ TEST_F(BTreeCompatibilityTest, RowCountConsistency) { std::string csv_path = data_dir_ + "/" + prefix + ".csv"; auto records = ParseCsvFile(csv_path); - ASSERT_FALSE(records.empty()) << "Failed to parse CSV for " << prefix; - int count = static_cast(records.size()); + ASSERT_FALSE(records.empty()); + auto count = static_cast(records.size()); - auto reader_result = CreateReaderFromFiles(bin_path, meta_path, arrow_type, count); - ASSERT_OK(reader_result.status()) << "Failed to create reader for " << prefix; - auto reader = reader_result.value(); + ASSERT_OK_AND_ASSIGN(auto reader, + CreateReaderFromFiles(bin_path, meta_path, arrow_type, count)); - auto null_result = reader->VisitIsNull(); - ASSERT_OK(null_result.status()) << prefix << ": VisitIsNull failed"; - auto null_ids = CollectRowIds(null_result.value()); + ASSERT_OK_AND_ASSIGN(auto null_result, reader->VisitIsNull()); + auto null_ids = CollectRowIds(null_result); - auto non_null_result = reader->VisitIsNotNull(); - ASSERT_OK(non_null_result.status()) << prefix << ": VisitIsNotNull failed"; - auto non_null_ids = CollectRowIds(non_null_result.value()); + ASSERT_OK_AND_ASSIGN(auto non_null_result, reader->VisitIsNotNull()); + auto non_null_ids = CollectRowIds(non_null_result); // Null and non-null should be disjoint for (auto id : null_ids) { - EXPECT_EQ(non_null_ids.count(id), 0u) - << prefix << ": row_id " << id << " is in both null and non-null sets"; + ASSERT_EQ(non_null_ids.count(id), 0u); } // Total should equal record count - EXPECT_EQ(static_cast(null_ids.size() + non_null_ids.size()), count) - << prefix << ": null_count + non_null_count != total_count"; + ASSERT_EQ(static_cast(null_ids.size() + non_null_ids.size()), count); } } diff --git a/src/paimon/common/global_index/btree/btree_defs.h b/src/paimon/common/global_index/btree/btree_defs.h new file mode 100644 index 000000000..2a880d516 --- /dev/null +++ b/src/paimon/common/global_index/btree/btree_defs.h @@ -0,0 +1,50 @@ +/* + * Copyright 2026-present Alibaba Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include +#include + +namespace paimon { +struct BtreeDefs { + BtreeDefs() = delete; + ~BtreeDefs() = delete; + static inline const char kIdentifier[] = "btree"; + /// "btree-index.compression" - The compression algorithm to use for BTreeIndex. + /// Default value is "none". + static inline const char kBtreeIndexCompression[] = "btree-index.compression"; + /// "btree-index.compression-level" - The compression level of the compression algorithm. + /// Default value is 1. + static inline const char kBtreeIndexCompressionLevel[] = "btree-index.compression-level"; + /// "btree-index.block-size" - The block size to use for BTreeIndex. + /// Default value is 64 KB. + static inline const char kBtreeIndexBlockSize[] = "btree-index.block-size"; + /// "btree-index.cache-size" - The cache size to use for BTreeIndex. + /// Default value is 128 MB. + static inline const char kBtreeIndexCacheSize[] = "btree-index.cache-size"; + /// "btree-index.high-priority-pool-ratio" - The high priority pool ratio to use for BTreeIndex. + /// Default value is 0.1. + static inline const char kBtreeIndexHighPriorityPoolRatio[] = + "btree-index.high-priority-pool-ratio"; + + static inline const char kDefaultBtreeIndexBlockSize[] = "64KB"; + static inline const char kDefaultBtreeIndexCompression[] = "none"; + static inline const char kDefaultBtreeIndexCacheSize[] = "128MB"; + static inline const int32_t kDefaultBtreeIndexCompressionLevel = 1; + static inline const double kDefaultBtreeIndexHighPriorityPoolRatio = 0.1; +}; +} // namespace paimon diff --git a/src/paimon/common/global_index/btree/btree_file_footer.cpp b/src/paimon/common/global_index/btree/btree_file_footer.cpp index fd8ca485a..7463975fa 100644 --- a/src/paimon/common/global_index/btree/btree_file_footer.cpp +++ b/src/paimon/common/global_index/btree/btree_file_footer.cpp @@ -22,7 +22,7 @@ namespace paimon { Result> BTreeFileFooter::Read(MemorySliceInput* input) { // read version and verify magic number - PAIMON_RETURN_NOT_OK(input->SetPosition(ENCODED_LENGTH - 8)); + PAIMON_RETURN_NOT_OK(input->SetPosition(kEncodingLength - 8)); int32_t version = input->ReadInt(); int32_t magic_number = input->ReadInt(); @@ -38,22 +38,22 @@ Result> BTreeFileFooter::Read(MemorySliceInput* auto offset = input->ReadLong(); auto size = input->ReadInt(); auto expected_entries = input->ReadLong(); - std::shared_ptr bloom_filter_handle = - std::make_shared(offset, size, expected_entries); + std::optional bloom_filter_handle = + BloomFilterHandle(offset, size, expected_entries); if (bloom_filter_handle->Offset() == 0 && bloom_filter_handle->Size() == 0 && bloom_filter_handle->ExpectedEntries() == 0) { - bloom_filter_handle = nullptr; + bloom_filter_handle = std::nullopt; } offset = input->ReadLong(); size = input->ReadInt(); - std::shared_ptr index_block_handle = std::make_shared(offset, size); + BlockHandle index_block_handle(offset, size); offset = input->ReadLong(); size = input->ReadInt(); - std::shared_ptr null_bitmap_handle = std::make_shared(offset, size); + std::optional null_bitmap_handle = BlockHandle(offset, size); if (null_bitmap_handle->Offset() == 0 && null_bitmap_handle->Size() == 0) { - null_bitmap_handle = nullptr; + null_bitmap_handle = std::nullopt; } return std::make_shared(version, bloom_filter_handle, index_block_handle, @@ -62,42 +62,44 @@ Result> BTreeFileFooter::Read(MemorySliceInput* MemorySlice BTreeFileFooter::Write(const std::shared_ptr& footer, MemoryPool* pool) { - MemorySliceOutput output(ENCODED_LENGTH, pool); - return BTreeFileFooter::Write(footer, output); + MemorySliceOutput output(kEncodingLength, pool); + return BTreeFileFooter::Write(footer, &output); } MemorySlice BTreeFileFooter::Write(const std::shared_ptr& footer, - MemorySliceOutput& output) { - // write bloom filter and index handles - auto bloom_filter_handle = footer->GetBloomFilterHandle(); - if (!bloom_filter_handle) { - output.WriteValue(static_cast(0)); - output.WriteValue(static_cast(0)); - output.WriteValue(static_cast(0)); + MemorySliceOutput* output) { + // write bloom filter handle + const auto& bloom_filter_handle = footer->GetBloomFilterHandle(); + if (!bloom_filter_handle.has_value()) { + output->WriteValue(static_cast(0)); + output->WriteValue(static_cast(0)); + output->WriteValue(static_cast(0)); } else { - output.WriteValue(bloom_filter_handle->Offset()); - output.WriteValue(bloom_filter_handle->Size()); - output.WriteValue(bloom_filter_handle->ExpectedEntries()); + output->WriteValue(bloom_filter_handle->Offset()); + output->WriteValue(bloom_filter_handle->Size()); + output->WriteValue(bloom_filter_handle->ExpectedEntries()); } - auto index_block_handle = footer->GetIndexBlockHandle(); - output.WriteValue(index_block_handle->Offset()); - output.WriteValue(index_block_handle->Size()); + // write index block handle + const auto& index_block_handle = footer->GetIndexBlockHandle(); + output->WriteValue(index_block_handle.Offset()); + output->WriteValue(index_block_handle.Size()); - auto null_bitmap_handle = footer->GetNullBitmapHandle(); - if (!null_bitmap_handle) { - output.WriteValue(static_cast(0)); - output.WriteValue(static_cast(0)); + // write null bitmap handle + const auto& null_bitmap_handle = footer->GetNullBitmapHandle(); + if (!null_bitmap_handle.has_value()) { + output->WriteValue(static_cast(0)); + output->WriteValue(static_cast(0)); } else { - output.WriteValue(null_bitmap_handle->Offset()); - output.WriteValue(null_bitmap_handle->Size()); + output->WriteValue(null_bitmap_handle->Offset()); + output->WriteValue(null_bitmap_handle->Size()); } // write version and magic number - output.WriteValue(footer->GetVersion()); - output.WriteValue(kMagicNumber); + output->WriteValue(footer->GetVersion()); + output->WriteValue(kMagicNumber); - return output.ToSlice(); + return output->ToSlice(); } } // namespace paimon diff --git a/src/paimon/common/global_index/btree/btree_file_footer.h b/src/paimon/common/global_index/btree/btree_file_footer.h index c963521d2..76273fd22 100644 --- a/src/paimon/common/global_index/btree/btree_file_footer.h +++ b/src/paimon/common/global_index/btree/btree_file_footer.h @@ -16,7 +16,7 @@ #pragma once -#include +#include #include "paimon/common/memory/memory_slice_input.h" #include "paimon/common/memory/memory_slice_output.h" @@ -30,20 +30,18 @@ class BTreeFileFooter { static Result> Read(MemorySliceInput* input); static MemorySlice Write(const std::shared_ptr& footer, MemoryPool* pool); static MemorySlice Write(const std::shared_ptr& footer, - MemorySliceOutput& output); + MemorySliceOutput* output); public: - BTreeFileFooter(const std::shared_ptr& bloom_filter_handle, - const std::shared_ptr& index_block_handle, - const std::shared_ptr& null_bitmap_handle) - : version_(CURRENT_VERSION), - bloom_filter_handle_(bloom_filter_handle), - index_block_handle_(index_block_handle), - null_bitmap_handle_(null_bitmap_handle) {} + BTreeFileFooter(const std::optional& bloom_filter_handle, + const BlockHandle& index_block_handle, + const std::optional& null_bitmap_handle) + : BTreeFileFooter(kCurrentVersion, bloom_filter_handle, index_block_handle, + null_bitmap_handle) {} - BTreeFileFooter(int32_t version, const std::shared_ptr& bloom_filter_handle, - const std::shared_ptr& index_block_handle, - const std::shared_ptr& null_bitmap_handle) + BTreeFileFooter(int32_t version, const std::optional& bloom_filter_handle, + const BlockHandle& index_block_handle, + const std::optional& null_bitmap_handle) : version_(version), bloom_filter_handle_(bloom_filter_handle), index_block_handle_(index_block_handle), @@ -53,28 +51,28 @@ class BTreeFileFooter { return version_; } - std::shared_ptr GetBloomFilterHandle() const { + const std::optional& GetBloomFilterHandle() const { return bloom_filter_handle_; } - std::shared_ptr GetIndexBlockHandle() const { + const BlockHandle& GetIndexBlockHandle() const { return index_block_handle_; } - std::shared_ptr GetNullBitmapHandle() const { + const std::optional& GetNullBitmapHandle() const { return null_bitmap_handle_; } public: static constexpr int32_t kMagicNumber = 0x50425449; - static constexpr int32_t CURRENT_VERSION = 1; - static constexpr int32_t ENCODED_LENGTH = 52; + static constexpr int32_t kCurrentVersion = 1; + static constexpr int32_t kEncodingLength = 52; private: int32_t version_; - std::shared_ptr bloom_filter_handle_; - std::shared_ptr index_block_handle_; - std::shared_ptr null_bitmap_handle_; + std::optional bloom_filter_handle_; + BlockHandle index_block_handle_; + std::optional null_bitmap_handle_; }; } // namespace paimon diff --git a/src/paimon/common/global_index/btree/btree_file_footer_test.cpp b/src/paimon/common/global_index/btree/btree_file_footer_test.cpp index 91172253d..04f39bc89 100644 --- a/src/paimon/common/global_index/btree/btree_file_footer_test.cpp +++ b/src/paimon/common/global_index/btree/btree_file_footer_test.cpp @@ -35,112 +35,108 @@ class BTreeFileFooterTest : public ::testing::Test { }; TEST_F(BTreeFileFooterTest, ReadWriteRoundTrip) { - auto bloom_filter_handle = std::make_shared(100, 50, 1000); - auto index_block_handle = std::make_shared(200, 80); - auto null_bitmap_handle = std::make_shared(300, 40); + BloomFilterHandle bloom_filter_handle(100, 50, 1000); + BlockHandle index_block_handle(200, 80); + BlockHandle null_bitmap_handle(300, 40); auto footer = std::make_shared(bloom_filter_handle, index_block_handle, null_bitmap_handle); auto serialized = BTreeFileFooter::Write(footer, pool_.get()); - EXPECT_EQ(serialized.Length(), BTreeFileFooter::ENCODED_LENGTH); + ASSERT_EQ(serialized.Length(), BTreeFileFooter::kEncodingLength); auto input = serialized.ToInput(); ASSERT_OK_AND_ASSIGN(auto deserialized_footer, BTreeFileFooter::Read(&input)); - auto bf_handle = deserialized_footer->GetBloomFilterHandle(); - ASSERT_NE(bf_handle, nullptr); - EXPECT_EQ(bf_handle->Offset(), 100); - EXPECT_EQ(bf_handle->Size(), 50); - EXPECT_EQ(bf_handle->ExpectedEntries(), 1000); - - auto ib_handle = deserialized_footer->GetIndexBlockHandle(); - ASSERT_NE(ib_handle, nullptr); - EXPECT_EQ(ib_handle->Offset(), 200); - EXPECT_EQ(ib_handle->Size(), 80); - - auto nb_handle = deserialized_footer->GetNullBitmapHandle(); - ASSERT_NE(nb_handle, nullptr); - EXPECT_EQ(nb_handle->Offset(), 300); - EXPECT_EQ(nb_handle->Size(), 40); + const auto& bf_handle = deserialized_footer->GetBloomFilterHandle(); + ASSERT_TRUE(bf_handle.has_value()); + ASSERT_EQ(bf_handle->Offset(), 100); + ASSERT_EQ(bf_handle->Size(), 50); + ASSERT_EQ(bf_handle->ExpectedEntries(), 1000); + + const auto& ib_handle = deserialized_footer->GetIndexBlockHandle(); + ASSERT_EQ(ib_handle.Offset(), 200); + ASSERT_EQ(ib_handle.Size(), 80); + + const auto& nb_handle = deserialized_footer->GetNullBitmapHandle(); + ASSERT_TRUE(nb_handle.has_value()); + ASSERT_EQ(nb_handle->Offset(), 300); + ASSERT_EQ(nb_handle->Size(), 40); } TEST_F(BTreeFileFooterTest, ReadWriteWithNullBloomFilter) { - auto index_block_handle = std::make_shared(200, 80); - auto null_bitmap_handle = std::make_shared(300, 40); + BlockHandle index_block_handle(200, 80); + BlockHandle null_bitmap_handle(300, 40); auto footer = - std::make_shared(nullptr, index_block_handle, null_bitmap_handle); + std::make_shared(std::nullopt, index_block_handle, null_bitmap_handle); auto serialized = BTreeFileFooter::Write(footer, pool_.get()); - EXPECT_EQ(serialized.Length(), BTreeFileFooter::ENCODED_LENGTH); + ASSERT_EQ(serialized.Length(), BTreeFileFooter::kEncodingLength); auto input = serialized.ToInput(); ASSERT_OK_AND_ASSIGN(auto deserialized_footer, BTreeFileFooter::Read(&input)); - EXPECT_EQ(deserialized_footer->GetBloomFilterHandle(), nullptr); + ASSERT_FALSE(deserialized_footer->GetBloomFilterHandle().has_value()); - auto ib_handle = deserialized_footer->GetIndexBlockHandle(); - ASSERT_NE(ib_handle, nullptr); - EXPECT_EQ(ib_handle->Offset(), 200); - EXPECT_EQ(ib_handle->Size(), 80); + const auto& ib_handle = deserialized_footer->GetIndexBlockHandle(); + ASSERT_EQ(ib_handle.Offset(), 200); + ASSERT_EQ(ib_handle.Size(), 80); - auto nb_handle = deserialized_footer->GetNullBitmapHandle(); - ASSERT_NE(nb_handle, nullptr); - EXPECT_EQ(nb_handle->Offset(), 300); - EXPECT_EQ(nb_handle->Size(), 40); + const auto& nb_handle = deserialized_footer->GetNullBitmapHandle(); + ASSERT_TRUE(nb_handle.has_value()); + ASSERT_EQ(nb_handle->Offset(), 300); + ASSERT_EQ(nb_handle->Size(), 40); } TEST_F(BTreeFileFooterTest, ReadWriteWithNullNullBitmap) { - auto bloom_filter_handle = std::make_shared(100, 50, 1000); - auto index_block_handle = std::make_shared(200, 80); + BloomFilterHandle bloom_filter_handle(100, 50, 1000); + BlockHandle index_block_handle(200, 80); auto footer = - std::make_shared(bloom_filter_handle, index_block_handle, nullptr); + std::make_shared(bloom_filter_handle, index_block_handle, std::nullopt); auto serialized = BTreeFileFooter::Write(footer, pool_.get()); - EXPECT_EQ(serialized.Length(), BTreeFileFooter::ENCODED_LENGTH); + ASSERT_EQ(serialized.Length(), BTreeFileFooter::kEncodingLength); auto input = serialized.ToInput(); ASSERT_OK_AND_ASSIGN(auto deserialized_footer, BTreeFileFooter::Read(&input)); - auto bf_handle = deserialized_footer->GetBloomFilterHandle(); - ASSERT_NE(bf_handle, nullptr); - EXPECT_EQ(bf_handle->Offset(), 100); - EXPECT_EQ(bf_handle->Size(), 50); - EXPECT_EQ(bf_handle->ExpectedEntries(), 1000); + const auto& bf_handle = deserialized_footer->GetBloomFilterHandle(); + ASSERT_TRUE(bf_handle.has_value()); + ASSERT_EQ(bf_handle->Offset(), 100); + ASSERT_EQ(bf_handle->Size(), 50); + ASSERT_EQ(bf_handle->ExpectedEntries(), 1000); - auto ib_handle = deserialized_footer->GetIndexBlockHandle(); - ASSERT_NE(ib_handle, nullptr); - EXPECT_EQ(ib_handle->Offset(), 200); - EXPECT_EQ(ib_handle->Size(), 80); + const auto& ib_handle = deserialized_footer->GetIndexBlockHandle(); + ASSERT_EQ(ib_handle.Offset(), 200); + ASSERT_EQ(ib_handle.Size(), 80); - EXPECT_EQ(deserialized_footer->GetNullBitmapHandle(), nullptr); + ASSERT_FALSE(deserialized_footer->GetNullBitmapHandle().has_value()); } TEST_F(BTreeFileFooterTest, ReadWriteWithAllNullHandles) { - auto index_block_handle = std::make_shared(200, 80); + BlockHandle index_block_handle(200, 80); - auto footer = std::make_shared(nullptr, index_block_handle, nullptr); + auto footer = std::make_shared(std::nullopt, index_block_handle, std::nullopt); auto serialized = BTreeFileFooter::Write(footer, pool_.get()); - EXPECT_EQ(serialized.Length(), BTreeFileFooter::ENCODED_LENGTH); + ASSERT_EQ(serialized.Length(), BTreeFileFooter::kEncodingLength); auto input = serialized.ToInput(); ASSERT_OK_AND_ASSIGN(auto deserialized_footer, BTreeFileFooter::Read(&input)); - EXPECT_EQ(deserialized_footer->GetBloomFilterHandle(), nullptr); + ASSERT_FALSE(deserialized_footer->GetBloomFilterHandle().has_value()); - auto ib_handle = deserialized_footer->GetIndexBlockHandle(); - ASSERT_NE(ib_handle, nullptr); - EXPECT_EQ(ib_handle->Offset(), 200); - EXPECT_EQ(ib_handle->Size(), 80); + const auto& ib_handle = deserialized_footer->GetIndexBlockHandle(); + ASSERT_EQ(ib_handle.Offset(), 200); + ASSERT_EQ(ib_handle.Size(), 80); - EXPECT_EQ(deserialized_footer->GetNullBitmapHandle(), nullptr); + ASSERT_FALSE(deserialized_footer->GetNullBitmapHandle().has_value()); } TEST_F(BTreeFileFooterTest, InvalidMagicNumber) { - MemorySliceOutput output(BTreeFileFooter::ENCODED_LENGTH, pool_.get()); + MemorySliceOutput output(BTreeFileFooter::kEncodingLength, pool_.get()); output.WriteValue(static_cast(0)); output.WriteValue(static_cast(0)); @@ -159,7 +155,7 @@ TEST_F(BTreeFileFooterTest, InvalidMagicNumber) { auto input = serialized.ToInput(); auto deserialized = BTreeFileFooter::Read(&input); - ASSERT_NOK_WITH_MSG(deserialized, "not a btree index file"); + ASSERT_NOK_WITH_MSG(deserialized, "File is not a btree index file (expected magic number"); } } // namespace paimon::test diff --git a/src/paimon/common/global_index/btree/btree_global_index_integration_test.cpp b/src/paimon/common/global_index/btree/btree_global_index_integration_test.cpp index 7d8b77dce..15f7bf5bd 100644 --- a/src/paimon/common/global_index/btree/btree_global_index_integration_test.cpp +++ b/src/paimon/common/global_index/btree/btree_global_index_integration_test.cpp @@ -13,22 +13,24 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - #include "arrow/c/bridge.h" -#include "arrow/c/helpers.h" #include "arrow/ipc/json_simple.h" #include "gtest/gtest.h" #include "paimon/common/compression/block_compression_factory.h" +#include "paimon/common/factories/io_hook.h" #include "paimon/common/global_index/btree/btree_global_index_writer.h" #include "paimon/common/global_index/btree/btree_global_indexer.h" -#include "paimon/common/utils/arrow/status_utils.h" +#include "paimon/common/utils/scope_guard.h" +#include "paimon/data/decimal.h" +#include "paimon/data/timestamp.h" #include "paimon/fs/file_system.h" +#include "paimon/global_index/bitmap_global_index_result.h" #include "paimon/global_index/io/global_index_file_reader.h" #include "paimon/global_index/io/global_index_file_writer.h" #include "paimon/memory/memory_pool.h" #include "paimon/predicate/literal.h" +#include "paimon/testing/utils/io_exception_helper.h" #include "paimon/testing/utils/testharness.h" - namespace paimon::test { class FakeGlobalIndexFileWriter : public GlobalIndexFileWriter { @@ -75,7 +77,8 @@ class FakeGlobalIndexFileReader : public GlobalIndexFileReader { std::string base_path_; }; -class BTreeGlobalIndexIntegrationTest : public ::testing::Test { +class BTreeGlobalIndexIntegrationTest : public ::testing::Test, + public ::testing::WithParamInterface { protected: void SetUp() override { pool_ = GetDefaultPool(); @@ -88,27 +91,23 @@ class BTreeGlobalIndexIntegrationTest : public ::testing::Test { void TearDown() override {} // Helper to create ArrowSchema from arrow type - std::unique_ptr CreateArrowSchema(const std::shared_ptr& type, - const std::string& field_name) { - auto schema = arrow::schema({arrow::field(field_name, type)}); + std::unique_ptr CreateArrowSchema( + const std::shared_ptr& field) const { + auto schema = arrow::schema({field}); auto c_schema = std::make_unique(); EXPECT_TRUE(arrow::ExportSchema(*schema, c_schema.get()).ok()); return c_schema; } - // Helper to check if a row ID is in the result - bool ContainsRowId(const std::shared_ptr& result, int64_t row_id) { - auto iterator_result = result->CreateIterator(); - if (!iterator_result.ok()) { - return false; - } - auto iterator = std::move(iterator_result).value(); - while (iterator->HasNext()) { - if (iterator->Next() == row_id) { - return true; - } - } - return false; + void CheckResult(const std::shared_ptr& result, + const std::vector& expected) const { + auto typed_result = std::dynamic_pointer_cast(result); + ASSERT_TRUE(typed_result); + ASSERT_OK_AND_ASSIGN(const RoaringBitmap64* bitmap, typed_result->GetBitmap()); + ASSERT_TRUE(bitmap); + ASSERT_EQ(*bitmap, RoaringBitmap64::From(expected)) + << "result=" << bitmap->ToString() + << ", expected=" << RoaringBitmap64::From(expected).ToString(); } std::shared_ptr pool_; @@ -118,317 +117,1747 @@ class BTreeGlobalIndexIntegrationTest : public ::testing::Test { std::string base_path_; }; -TEST_F(BTreeGlobalIndexIntegrationTest, WriteAndReadIntData) { - // Create file writer +TEST_P(BTreeGlobalIndexIntegrationTest, WriteAndReadIntData) { auto file_writer = std::make_shared(fs_, base_path_); + auto field = arrow::field("int_field", arrow::int32()); + auto c_schema = CreateArrowSchema(field); - // Create ArrowSchema - auto c_schema = CreateArrowSchema(arrow::int32(), "int_field"); - - // Create the BTree global index writer + std::map options = {{BtreeDefs::kBtreeIndexBlockSize, "4096"}, + {BtreeDefs::kBtreeIndexCompression, GetParam()}}; + auto indexer = std::make_shared(options); ASSERT_OK_AND_ASSIGN(auto writer, - BTreeGlobalIndexWriter::Create("int_field", c_schema.get(), file_writer, - compression_factory_, pool_, 4096)); - - // Create an Arrow array with int values - // Row IDs: 0->1, 1->2, 2->3, 3->2, 4->1, 5->4, 6->5, 7->5, 8->5 - auto array = - arrow::ipc::internal::json::ArrayFromJSON(arrow::int32(), "[1, 2, 3, 2, 1, 4, 5, 5, 5]") - .ValueOrDie(); + indexer->CreateWriter("int_field", c_schema.get(), file_writer, pool_)); + auto btree_writer = std::dynamic_pointer_cast(writer); + ASSERT_TRUE(btree_writer); + // Data layout (row_id -> value): + // 0->1, 1->1, 2->null, 3->2, 4->2, 5->null, 6->3, 7->4, 8->5, 9->5, 10->5, 11->null + auto array = arrow::ipc::internal::json::ArrayFromJSON(arrow::struct_({field}), R"([ + [1], + [1], + [null], + [2], + [2], + [null], + [3], + [4], + [5], + [5], + [5], + [null] + ])") + .ValueOrDie(); ArrowArray c_array; ASSERT_TRUE(arrow::ExportArray(*array, &c_array).ok()); + std::vector row_ids(array->length()); + std::iota(row_ids.begin(), row_ids.end(), 0); + ASSERT_OK(btree_writer->AddBatch(&c_array, row_ids)); + ASSERT_OK_AND_ASSIGN(auto metas, writer->Finish()); + ASSERT_EQ(metas.size(), 1); + + // Now read back + auto file_reader = std::make_shared(fs_, base_path_); + c_schema = CreateArrowSchema(field); + ASSERT_OK_AND_ASSIGN(auto reader, + indexer->CreateReader(c_schema.get(), file_reader, metas, pool_)); + + // All non-null row ids: {0,1,3,4,6,7,8,9,10} + // Null row ids: {2,5,11} + + // --- VisitIsNull --- + { + ASSERT_OK_AND_ASSIGN(auto result, reader->VisitIsNull()); + CheckResult(result, {2, 5, 11}); + } + + // --- VisitIsNotNull --- + { + ASSERT_OK_AND_ASSIGN(auto result, reader->VisitIsNotNull()); + CheckResult(result, {0, 1, 3, 4, 6, 7, 8, 9, 10}); + } + + // --- VisitEqual --- + { + // Equal to 1 -> rows 0,1 + Literal literal_1(1); + ASSERT_OK_AND_ASSIGN(auto result, reader->VisitEqual(literal_1)); + CheckResult(result, {0, 1}); + + // Equal to 3 -> row 6 + Literal literal_3(3); + ASSERT_OK_AND_ASSIGN(result, reader->VisitEqual(literal_3)); + CheckResult(result, {6}); + + // Equal to 5 -> rows 8,9,10 + Literal literal_5(5); + ASSERT_OK_AND_ASSIGN(result, reader->VisitEqual(literal_5)); + CheckResult(result, {8, 9, 10}); + + // Equal to 99 (not present) -> empty + Literal literal_99(99); + ASSERT_OK_AND_ASSIGN(result, reader->VisitEqual(literal_99)); + CheckResult(result, {}); + } + + // --- VisitNotEqual --- + { + // NotEqual to 3 -> all non-null except row 6 + Literal literal_3(3); + ASSERT_OK_AND_ASSIGN(auto result, reader->VisitNotEqual(literal_3)); + CheckResult(result, {0, 1, 3, 4, 7, 8, 9, 10}); + } + + // --- VisitLessThan --- + { + // LessThan 3 -> values 1,2 -> rows 0,1,3,4 + Literal literal_3(3); + ASSERT_OK_AND_ASSIGN(auto result, reader->VisitLessThan(literal_3)); + CheckResult(result, {0, 1, 3, 4}); + + // LessThan 1 -> empty (no value < 1) + Literal literal_1(1); + ASSERT_OK_AND_ASSIGN(result, reader->VisitLessThan(literal_1)); + CheckResult(result, {}); + } + + // --- VisitLessOrEqual --- + { + // LessOrEqual 3 -> values 1,2,3 -> rows 0,1,3,4,6 + Literal literal_3(3); + ASSERT_OK_AND_ASSIGN(auto result, reader->VisitLessOrEqual(literal_3)); + CheckResult(result, {0, 1, 3, 4, 6}); + } + + // --- VisitGreaterThan --- + { + // GreaterThan 3 -> values 4,5 -> rows 7,8,9,10 + Literal literal_3(3); + ASSERT_OK_AND_ASSIGN(auto result, reader->VisitGreaterThan(literal_3)); + CheckResult(result, {7, 8, 9, 10}); + + // GreaterThan 5 -> empty (no value > 5) + Literal literal_5(5); + ASSERT_OK_AND_ASSIGN(result, reader->VisitGreaterThan(literal_5)); + CheckResult(result, {}); + } + + // --- VisitGreaterOrEqual --- + { + // GreaterOrEqual 3 -> values 3,4,5 -> rows 6,7,8,9,10 + Literal literal_3(3); + ASSERT_OK_AND_ASSIGN(auto result, reader->VisitGreaterOrEqual(literal_3)); + CheckResult(result, {6, 7, 8, 9, 10}); + } + + // --- VisitIn --- + { + // In {1, 4} -> rows 0,1,7 + std::vector in_literals = {Literal(1), Literal(4)}; + ASSERT_OK_AND_ASSIGN(auto result, reader->VisitIn(in_literals)); + CheckResult(result, {0, 1, 7}); + + // In {99} (not present) -> empty + std::vector in_missing = {Literal(99)}; + ASSERT_OK_AND_ASSIGN(result, reader->VisitIn(in_missing)); + CheckResult(result, {}); + } + + // --- VisitNotIn --- + { + // NotIn {1, 5} -> all non-null except rows 0,1,8,9,10 -> rows 3,4,6,7 + std::vector not_in_literals = {Literal(1), Literal(5)}; + ASSERT_OK_AND_ASSIGN(auto result, reader->VisitNotIn(not_in_literals)); + CheckResult(result, {3, 4, 6, 7}); + } +} + +TEST_P(BTreeGlobalIndexIntegrationTest, WriteAndReadStringData) { + auto file_writer = std::make_shared(fs_, base_path_); + auto field = arrow::field("str_field", arrow::utf8()); + auto c_schema = CreateArrowSchema(field); - // Add batch - ASSERT_OK(writer->AddBatch(&c_array)); + std::map options = {{BtreeDefs::kBtreeIndexBlockSize, "128"}, + {BtreeDefs::kBtreeIndexCompression, GetParam()}}; + auto indexer = std::make_shared(options); + ASSERT_OK_AND_ASSIGN(auto writer, + indexer->CreateWriter("str_field", c_schema.get(), file_writer, pool_)); + auto btree_writer = std::dynamic_pointer_cast(writer); + ASSERT_TRUE(btree_writer); + + // Data layout (row_id -> value): + // 0->"apple", 1->"apricot", 2->null, 3->"banana", 4->"blueberry", + // 5->null, 6->"cherry", 7->"cherry", 8->"date" + auto array = arrow::ipc::internal::json::ArrayFromJSON(arrow::struct_({field}), R"([ + ["apple"], + ["apricot"], + [null], + ["banana"], + ["blueberry"], + [null], + ["cherry"], + ["cherry"], + ["date"] + ])") + .ValueOrDie(); - // Finish writing - auto result = writer->Finish(); - ASSERT_OK(result.status()); - auto metas = result.value(); + ArrowArray c_array; + ASSERT_TRUE(arrow::ExportArray(*array, &c_array).ok()); + std::vector row_ids(array->length()); + std::iota(row_ids.begin(), row_ids.end(), 0); + ASSERT_OK(btree_writer->AddBatch(&c_array, row_ids)); + ASSERT_OK_AND_ASSIGN(auto metas, writer->Finish()); ASSERT_EQ(metas.size(), 1); - // Release ArrowArray - ArrowArrayRelease(&c_array); + auto file_reader = std::make_shared(fs_, base_path_); + c_schema = CreateArrowSchema(field); + ASSERT_OK_AND_ASSIGN(auto reader, + indexer->CreateReader(c_schema.get(), file_reader, metas, pool_)); + + // Non-null rows: {0,1,3,4,6,7,8}, Null rows: {2,5} + + // --- VisitIsNull / VisitIsNotNull --- + { + ASSERT_OK_AND_ASSIGN(auto result, reader->VisitIsNull()); + CheckResult(result, {2, 5}); + } + { + ASSERT_OK_AND_ASSIGN(auto result, reader->VisitIsNotNull()); + CheckResult(result, {0, 1, 3, 4, 6, 7, 8}); + } + + // --- VisitEqual --- + { + Literal lit_cherry(FieldType::STRING, "cherry", 6); + ASSERT_OK_AND_ASSIGN(auto result, reader->VisitEqual(lit_cherry)); + CheckResult(result, {6, 7}); + + Literal lit_missing(FieldType::STRING, "fig", 3); + ASSERT_OK_AND_ASSIGN(result, reader->VisitEqual(lit_missing)); + CheckResult(result, {}); + } + + // --- VisitNotEqual --- + { + Literal lit_banana(FieldType::STRING, "banana", 6); + ASSERT_OK_AND_ASSIGN(auto result, reader->VisitNotEqual(lit_banana)); + CheckResult(result, {0, 1, 4, 6, 7, 8}); + } + + // --- VisitLessThan --- + { + // LessThan "cherry" -> "apple","apricot","banana","blueberry" -> rows 0,1,3,4 + Literal lit_cherry(FieldType::STRING, "cherry", 6); + ASSERT_OK_AND_ASSIGN(auto result, reader->VisitLessThan(lit_cherry)); + CheckResult(result, {0, 1, 3, 4}); + } + + // --- VisitLessOrEqual --- + { + Literal lit_banana(FieldType::STRING, "banana", 6); + ASSERT_OK_AND_ASSIGN(auto result, reader->VisitLessOrEqual(lit_banana)); + CheckResult(result, {0, 1, 3}); + } + + // --- VisitGreaterThan --- + { + Literal lit_cherry(FieldType::STRING, "cherry", 6); + ASSERT_OK_AND_ASSIGN(auto result, reader->VisitGreaterThan(lit_cherry)); + CheckResult(result, {8}); + } + + // --- VisitGreaterOrEqual --- + { + Literal lit_cherry(FieldType::STRING, "cherry", 6); + ASSERT_OK_AND_ASSIGN(auto result, reader->VisitGreaterOrEqual(lit_cherry)); + CheckResult(result, {6, 7, 8}); + } + + // --- VisitIn --- + { + std::vector in_literals = {Literal(FieldType::STRING, "apple", 5), + Literal(FieldType::STRING, "date", 4)}; + ASSERT_OK_AND_ASSIGN(auto result, reader->VisitIn(in_literals)); + CheckResult(result, {0, 8}); + } + + // --- VisitNotIn --- + { + std::vector not_in_literals = {Literal(FieldType::STRING, "apple", 5), + Literal(FieldType::STRING, "cherry", 6)}; + ASSERT_OK_AND_ASSIGN(auto result, reader->VisitNotIn(not_in_literals)); + CheckResult(result, {1, 3, 4, 8}); + } + + // --- VisitStartsWith --- + { + // StartsWith "ap" -> "apple","apricot" -> rows 0,1 + Literal lit_ap(FieldType::STRING, "ap", 2); + ASSERT_OK_AND_ASSIGN(auto result, reader->VisitStartsWith(lit_ap)); + CheckResult(result, {0, 1}); + + // StartsWith "bl" -> "blueberry" -> row 4 + Literal lit_bl(FieldType::STRING, "bl", 2); + ASSERT_OK_AND_ASSIGN(result, reader->VisitStartsWith(lit_bl)); + CheckResult(result, {4}); + + // StartsWith "z" -> no match + Literal lit_z(FieldType::STRING, "z", 1); + ASSERT_OK_AND_ASSIGN(result, reader->VisitStartsWith(lit_z)); + CheckResult(result, {}); + } + + // --- VisitEndsWith (falls back to AllNonNullRows) --- + { + Literal lit_ry(FieldType::STRING, "ry", 2); + ASSERT_OK_AND_ASSIGN(auto result, reader->VisitEndsWith(lit_ry)); + CheckResult(result, {0, 1, 3, 4, 6, 7, 8}); + } + + // --- VisitContains (falls back to AllNonNullRows) --- + { + Literal lit_an(FieldType::STRING, "an", 2); + ASSERT_OK_AND_ASSIGN(auto result, reader->VisitContains(lit_an)); + CheckResult(result, {0, 1, 3, 4, 6, 7, 8}); + } + + // --- VisitLike --- + { + // "ap%" is a prefix pattern -> delegates to VisitStartsWith("ap") -> rows 0,1 + Literal lit_ap_pct(FieldType::STRING, "ap%", 3); + ASSERT_OK_AND_ASSIGN(auto result, reader->VisitLike(lit_ap_pct)); + CheckResult(result, {0, 1}); + + // "%erry" is not a prefix pattern -> falls back to AllNonNullRows + Literal lit_suffix(FieldType::STRING, "%erry", 5); + ASSERT_OK_AND_ASSIGN(result, reader->VisitLike(lit_suffix)); + CheckResult(result, {0, 1, 3, 4, 6, 7, 8}); + + // "b_nana" contains '_' before '%' -> falls back to AllNonNullRows + Literal lit_underscore(FieldType::STRING, "b_nana", 6); + ASSERT_OK_AND_ASSIGN(result, reader->VisitLike(lit_underscore)); + CheckResult(result, {0, 1, 3, 4, 6, 7, 8}); + } +} + +TEST_P(BTreeGlobalIndexIntegrationTest, WriteAndReadBigIntData) { + auto file_writer = std::make_shared(fs_, base_path_); + auto field = arrow::field("bigint_field", arrow::int64()); + auto c_schema = CreateArrowSchema(field); + + std::map options = {{BtreeDefs::kBtreeIndexBlockSize, "128"}, + {BtreeDefs::kBtreeIndexCompression, GetParam()}}; + auto indexer = std::make_shared(options); + ASSERT_OK_AND_ASSIGN(auto writer, + indexer->CreateWriter("bigint_field", c_schema.get(), file_writer, pool_)); + auto btree_writer = std::dynamic_pointer_cast(writer); + ASSERT_TRUE(btree_writer); + + // Data layout (row_id -> value): + // 0->100, 1->null, 2->200, 3->200, 4->300, 5->null, 6->400 + auto array = arrow::ipc::internal::json::ArrayFromJSON(arrow::struct_({field}), R"([ + [100], + [null], + [200], + [200], + [300], + [null], + [400] + ])") + .ValueOrDie(); + + ArrowArray c_array; + ASSERT_TRUE(arrow::ExportArray(*array, &c_array).ok()); + std::vector row_ids(array->length()); + std::iota(row_ids.begin(), row_ids.end(), 0); + ASSERT_OK(btree_writer->AddBatch(&c_array, row_ids)); + ASSERT_OK_AND_ASSIGN(auto metas, writer->Finish()); + ASSERT_EQ(metas.size(), 1); - // Now read back auto file_reader = std::make_shared(fs_, base_path_); - std::map options; - BTreeGlobalIndexer indexer(options); - - // Create a new ArrowSchema for reading (the original was consumed by the writer) - auto c_schema_read = CreateArrowSchema(arrow::int32(), "int_field"); - - // Create reader - auto reader_result = indexer.CreateReader(c_schema_read.get(), file_reader, metas, pool_); - ASSERT_OK(reader_result.status()); - auto reader = reader_result.value(); - - // Test VisitEqual for value 1 (should return row IDs 0 and 4) - Literal literal_1(static_cast(1)); - auto equal_result = reader->VisitEqual(literal_1); - ASSERT_OK(equal_result.status()); - EXPECT_TRUE(ContainsRowId(equal_result.value(), 0)); - EXPECT_TRUE(ContainsRowId(equal_result.value(), 4)); - EXPECT_FALSE(ContainsRowId(equal_result.value(), 1)); - - // Test VisitEqual for value 5 (should return row IDs 6, 7, 8) - Literal literal_5(static_cast(5)); - auto equal_result_5 = reader->VisitEqual(literal_5); - ASSERT_OK(equal_result_5.status()); - EXPECT_TRUE(ContainsRowId(equal_result_5.value(), 6)); - EXPECT_TRUE(ContainsRowId(equal_result_5.value(), 7)); - EXPECT_TRUE(ContainsRowId(equal_result_5.value(), 8)); - - // Release ArrowSchema - ArrowSchemaRelease(c_schema.get()); - ArrowSchemaRelease(c_schema_read.get()); + c_schema = CreateArrowSchema(field); + ASSERT_OK_AND_ASSIGN(auto reader, + indexer->CreateReader(c_schema.get(), file_reader, metas, pool_)); + + // Non-null rows: {0,2,3,4,6}, Null rows: {1,5} + + { + ASSERT_OK_AND_ASSIGN(auto result, reader->VisitIsNull()); + CheckResult(result, {1, 5}); + } + { + ASSERT_OK_AND_ASSIGN(auto result, reader->VisitIsNotNull()); + CheckResult(result, {0, 2, 3, 4, 6}); + } + { + Literal lit_200(static_cast(200)); + ASSERT_OK_AND_ASSIGN(auto result, reader->VisitEqual(lit_200)); + CheckResult(result, {2, 3}); + } + { + Literal lit_200(static_cast(200)); + ASSERT_OK_AND_ASSIGN(auto result, reader->VisitNotEqual(lit_200)); + CheckResult(result, {0, 4, 6}); + } + { + Literal lit_300(static_cast(300)); + ASSERT_OK_AND_ASSIGN(auto result, reader->VisitLessThan(lit_300)); + CheckResult(result, {0, 2, 3}); + } + { + Literal lit_300(static_cast(300)); + ASSERT_OK_AND_ASSIGN(auto result, reader->VisitLessOrEqual(lit_300)); + CheckResult(result, {0, 2, 3, 4}); + } + { + Literal lit_200(static_cast(200)); + ASSERT_OK_AND_ASSIGN(auto result, reader->VisitGreaterThan(lit_200)); + CheckResult(result, {4, 6}); + } + { + Literal lit_200(static_cast(200)); + ASSERT_OK_AND_ASSIGN(auto result, reader->VisitGreaterOrEqual(lit_200)); + CheckResult(result, {2, 3, 4, 6}); + } + { + std::vector in_literals = {Literal(static_cast(100)), + Literal(static_cast(400))}; + ASSERT_OK_AND_ASSIGN(auto result, reader->VisitIn(in_literals)); + CheckResult(result, {0, 6}); + } + { + std::vector not_in_literals = {Literal(static_cast(200))}; + ASSERT_OK_AND_ASSIGN(auto result, reader->VisitNotIn(not_in_literals)); + CheckResult(result, {0, 4, 6}); + } } -TEST_F(BTreeGlobalIndexIntegrationTest, WriteAndReadStringData) { - // Create file writer +TEST_P(BTreeGlobalIndexIntegrationTest, WriteAndReadFloatData) { auto file_writer = std::make_shared(fs_, base_path_); + auto field = arrow::field("float_field", arrow::float32()); + auto c_schema = CreateArrowSchema(field); - // Create ArrowSchema - auto c_schema = CreateArrowSchema(arrow::utf8(), "string_field"); + std::map options = {{BtreeDefs::kBtreeIndexBlockSize, "128"}, + {BtreeDefs::kBtreeIndexCompression, GetParam()}}; + auto indexer = std::make_shared(options); + ASSERT_OK_AND_ASSIGN(auto writer, + indexer->CreateWriter("float_field", c_schema.get(), file_writer, pool_)); + auto btree_writer = std::dynamic_pointer_cast(writer); + ASSERT_TRUE(btree_writer); + + // Data layout (row_id -> value): + // 0->1.0, 1->null, 2->2.5, 3->2.5, 4->3.0, 5->null, 6->4.5 + auto array = arrow::ipc::internal::json::ArrayFromJSON(arrow::struct_({field}), R"([ + [1.0], + [null], + [2.5], + [2.5], + [3.0], + [null], + [4.5] + ])") + .ValueOrDie(); - // Create the BTree global index writer + ArrowArray c_array; + ASSERT_TRUE(arrow::ExportArray(*array, &c_array).ok()); + std::vector row_ids(array->length()); + std::iota(row_ids.begin(), row_ids.end(), 0); + ASSERT_OK(btree_writer->AddBatch(&c_array, row_ids)); + ASSERT_OK_AND_ASSIGN(auto metas, writer->Finish()); + ASSERT_EQ(metas.size(), 1); + + auto file_reader = std::make_shared(fs_, base_path_); + c_schema = CreateArrowSchema(field); + ASSERT_OK_AND_ASSIGN(auto reader, + indexer->CreateReader(c_schema.get(), file_reader, metas, pool_)); + + // Non-null rows: {0,2,3,4,6}, Null rows: {1,5} + + { + ASSERT_OK_AND_ASSIGN(auto result, reader->VisitIsNull()); + CheckResult(result, {1, 5}); + } + { + ASSERT_OK_AND_ASSIGN(auto result, reader->VisitIsNotNull()); + CheckResult(result, {0, 2, 3, 4, 6}); + } + { + Literal lit_2_5(2.5f); + ASSERT_OK_AND_ASSIGN(auto result, reader->VisitEqual(lit_2_5)); + CheckResult(result, {2, 3}); + } + { + Literal lit_2_5(2.5f); + ASSERT_OK_AND_ASSIGN(auto result, reader->VisitNotEqual(lit_2_5)); + CheckResult(result, {0, 4, 6}); + } + { + Literal lit_3_0(3.0f); + ASSERT_OK_AND_ASSIGN(auto result, reader->VisitLessThan(lit_3_0)); + CheckResult(result, {0, 2, 3}); + } + { + Literal lit_3_0(3.0f); + ASSERT_OK_AND_ASSIGN(auto result, reader->VisitLessOrEqual(lit_3_0)); + CheckResult(result, {0, 2, 3, 4}); + } + { + Literal lit_2_5(2.5f); + ASSERT_OK_AND_ASSIGN(auto result, reader->VisitGreaterThan(lit_2_5)); + CheckResult(result, {4, 6}); + } + { + Literal lit_2_5(2.5f); + ASSERT_OK_AND_ASSIGN(auto result, reader->VisitGreaterOrEqual(lit_2_5)); + CheckResult(result, {2, 3, 4, 6}); + } + { + std::vector in_literals = {Literal(1.0f), Literal(4.5f)}; + ASSERT_OK_AND_ASSIGN(auto result, reader->VisitIn(in_literals)); + CheckResult(result, {0, 6}); + } + { + std::vector not_in_literals = {Literal(2.5f)}; + ASSERT_OK_AND_ASSIGN(auto result, reader->VisitNotIn(not_in_literals)); + CheckResult(result, {0, 4, 6}); + } +} + +TEST_P(BTreeGlobalIndexIntegrationTest, WriteAndReadDoubleData) { + auto file_writer = std::make_shared(fs_, base_path_); + auto field = arrow::field("double_field", arrow::float64()); + auto c_schema = CreateArrowSchema(field); + + std::map options = {{BtreeDefs::kBtreeIndexBlockSize, "128"}, + {BtreeDefs::kBtreeIndexCompression, GetParam()}}; + auto indexer = std::make_shared(options); ASSERT_OK_AND_ASSIGN(auto writer, - BTreeGlobalIndexWriter::Create("string_field", c_schema.get(), file_writer, - compression_factory_, pool_, 4096)); + indexer->CreateWriter("double_field", c_schema.get(), file_writer, pool_)); + auto btree_writer = std::dynamic_pointer_cast(writer); + ASSERT_TRUE(btree_writer); + + // Data layout (row_id -> value): + // 0->1.1, 1->null, 2->2.2, 3->2.2, 4->3.3, 5->null, 6->4.4 + auto array = arrow::ipc::internal::json::ArrayFromJSON(arrow::struct_({field}), R"([ + [1.1], + [null], + [2.2], + [2.2], + [3.3], + [null], + [4.4] + ])") + .ValueOrDie(); + + ArrowArray c_array; + ASSERT_TRUE(arrow::ExportArray(*array, &c_array).ok()); + std::vector row_ids(array->length()); + std::iota(row_ids.begin(), row_ids.end(), 0); + ASSERT_OK(btree_writer->AddBatch(&c_array, row_ids)); + ASSERT_OK_AND_ASSIGN(auto metas, writer->Finish()); + ASSERT_EQ(metas.size(), 1); + + auto file_reader = std::make_shared(fs_, base_path_); + c_schema = CreateArrowSchema(field); + ASSERT_OK_AND_ASSIGN(auto reader, + indexer->CreateReader(c_schema.get(), file_reader, metas, pool_)); + + // Non-null rows: {0,2,3,4,6}, Null rows: {1,5} + + { + ASSERT_OK_AND_ASSIGN(auto result, reader->VisitIsNull()); + CheckResult(result, {1, 5}); + } + { + ASSERT_OK_AND_ASSIGN(auto result, reader->VisitIsNotNull()); + CheckResult(result, {0, 2, 3, 4, 6}); + } + { + Literal lit_2_2(2.2); + ASSERT_OK_AND_ASSIGN(auto result, reader->VisitEqual(lit_2_2)); + CheckResult(result, {2, 3}); + } + { + Literal lit_2_2(2.2); + ASSERT_OK_AND_ASSIGN(auto result, reader->VisitNotEqual(lit_2_2)); + CheckResult(result, {0, 4, 6}); + } + { + Literal lit_3_3(3.3); + ASSERT_OK_AND_ASSIGN(auto result, reader->VisitLessThan(lit_3_3)); + CheckResult(result, {0, 2, 3}); + } + { + Literal lit_3_3(3.3); + ASSERT_OK_AND_ASSIGN(auto result, reader->VisitLessOrEqual(lit_3_3)); + CheckResult(result, {0, 2, 3, 4}); + } + { + Literal lit_2_2(2.2); + ASSERT_OK_AND_ASSIGN(auto result, reader->VisitGreaterThan(lit_2_2)); + CheckResult(result, {4, 6}); + } + { + Literal lit_2_2(2.2); + ASSERT_OK_AND_ASSIGN(auto result, reader->VisitGreaterOrEqual(lit_2_2)); + CheckResult(result, {2, 3, 4, 6}); + } + { + std::vector in_literals = {Literal(1.1), Literal(4.4)}; + ASSERT_OK_AND_ASSIGN(auto result, reader->VisitIn(in_literals)); + CheckResult(result, {0, 6}); + } + { + std::vector not_in_literals = {Literal(2.2)}; + ASSERT_OK_AND_ASSIGN(auto result, reader->VisitNotIn(not_in_literals)); + CheckResult(result, {0, 4, 6}); + } +} + +TEST_P(BTreeGlobalIndexIntegrationTest, WriteAndReadAllNonNull) { + auto file_writer = std::make_shared(fs_, base_path_); + auto field = arrow::field("int_field", arrow::int32()); + auto c_schema = CreateArrowSchema(field); - // Create an Arrow array with string values - auto array = arrow::ipc::internal::json::ArrayFromJSON( - arrow::utf8(), R"(["apple", "banana", "cherry", "apple", "banana"])") + std::map options = {{BtreeDefs::kBtreeIndexBlockSize, "128"}, + {BtreeDefs::kBtreeIndexCompression, GetParam()}}; + auto indexer = std::make_shared(options); + ASSERT_OK_AND_ASSIGN(auto writer, + indexer->CreateWriter("int_field", c_schema.get(), file_writer, pool_)); + auto btree_writer = std::dynamic_pointer_cast(writer); + ASSERT_TRUE(btree_writer); + + // All values are non-null + // Data layout (row_id -> value): 0->10, 1->20, 2->20, 3->30, 4->40, 5->50 + auto array = arrow::ipc::internal::json::ArrayFromJSON(arrow::struct_({field}), R"([ + [10], + [20], + [20], + [30], + [40], + [50] + ])") .ValueOrDie(); ArrowArray c_array; ASSERT_TRUE(arrow::ExportArray(*array, &c_array).ok()); + std::vector row_ids(array->length()); + std::iota(row_ids.begin(), row_ids.end(), 0); + ASSERT_OK(btree_writer->AddBatch(&c_array, row_ids)); + ASSERT_OK_AND_ASSIGN(auto metas, writer->Finish()); + ASSERT_EQ(metas.size(), 1); + + auto file_reader = std::make_shared(fs_, base_path_); + c_schema = CreateArrowSchema(field); + ASSERT_OK_AND_ASSIGN(auto reader, + indexer->CreateReader(c_schema.get(), file_reader, metas, pool_)); + + // All rows: {0,1,2,3,4,5}, No null rows - // Add batch - ASSERT_OK(writer->AddBatch(&c_array)); + // --- VisitIsNull -> empty --- + { + ASSERT_OK_AND_ASSIGN(auto result, reader->VisitIsNull()); + CheckResult(result, {}); + } - // Finish writing - auto result = writer->Finish(); - ASSERT_OK(result.status()); - auto metas = result.value(); + // --- VisitIsNotNull -> all rows --- + { + ASSERT_OK_AND_ASSIGN(auto result, reader->VisitIsNotNull()); + CheckResult(result, {0, 1, 2, 3, 4, 5}); + } + + // --- VisitEqual --- + { + Literal lit_20(20); + ASSERT_OK_AND_ASSIGN(auto result, reader->VisitEqual(lit_20)); + CheckResult(result, {1, 2}); + } + + // --- VisitNotEqual --- + { + Literal lit_20(20); + ASSERT_OK_AND_ASSIGN(auto result, reader->VisitNotEqual(lit_20)); + CheckResult(result, {0, 3, 4, 5}); + } + + // --- VisitLessThan --- + { + Literal lit_30(30); + ASSERT_OK_AND_ASSIGN(auto result, reader->VisitLessThan(lit_30)); + CheckResult(result, {0, 1, 2}); + } + + // --- VisitLessOrEqual --- + { + Literal lit_30(30); + ASSERT_OK_AND_ASSIGN(auto result, reader->VisitLessOrEqual(lit_30)); + CheckResult(result, {0, 1, 2, 3}); + } + + // --- VisitGreaterThan --- + { + Literal lit_30(30); + ASSERT_OK_AND_ASSIGN(auto result, reader->VisitGreaterThan(lit_30)); + CheckResult(result, {4, 5}); + } + + // --- VisitGreaterOrEqual --- + { + Literal lit_30(30); + ASSERT_OK_AND_ASSIGN(auto result, reader->VisitGreaterOrEqual(lit_30)); + CheckResult(result, {3, 4, 5}); + } + + // --- VisitIn --- + { + std::vector in_literals = {Literal(10), Literal(50)}; + ASSERT_OK_AND_ASSIGN(auto result, reader->VisitIn(in_literals)); + CheckResult(result, {0, 5}); + } + + // --- VisitNotIn --- + { + std::vector not_in_literals = {Literal(10), Literal(20)}; + ASSERT_OK_AND_ASSIGN(auto result, reader->VisitNotIn(not_in_literals)); + CheckResult(result, {3, 4, 5}); + } +} + +TEST_P(BTreeGlobalIndexIntegrationTest, WriteAndReadBoolData) { + auto file_writer = std::make_shared(fs_, base_path_); + auto field = arrow::field("bool_field", arrow::boolean()); + auto c_schema = CreateArrowSchema(field); + + std::map options = {{BtreeDefs::kBtreeIndexBlockSize, "128"}, + {BtreeDefs::kBtreeIndexCompression, GetParam()}}; + auto indexer = std::make_shared(options); + ASSERT_OK_AND_ASSIGN(auto writer, + indexer->CreateWriter("bool_field", c_schema.get(), file_writer, pool_)); + auto btree_writer = std::dynamic_pointer_cast(writer); + ASSERT_TRUE(btree_writer); + + // Data layout (row_id -> value), sorted by key (false < true): + // 0->false, 1->false, 2->null, 3->false, 4->true, 5->null, 6->true, 7->true + auto array = arrow::ipc::internal::json::ArrayFromJSON(arrow::struct_({field}), R"([ + [false], + [false], + [null], + [false], + [true], + [null], + [true], + [true] + ])") + .ValueOrDie(); + + ArrowArray c_array; + ASSERT_TRUE(arrow::ExportArray(*array, &c_array).ok()); + std::vector row_ids(array->length()); + std::iota(row_ids.begin(), row_ids.end(), 0); + ASSERT_OK(btree_writer->AddBatch(&c_array, row_ids)); + ASSERT_OK_AND_ASSIGN(auto metas, writer->Finish()); ASSERT_EQ(metas.size(), 1); - // Release ArrowArray - ArrowArrayRelease(&c_array); + auto file_reader = std::make_shared(fs_, base_path_); + c_schema = CreateArrowSchema(field); + ASSERT_OK_AND_ASSIGN(auto reader, + indexer->CreateReader(c_schema.get(), file_reader, metas, pool_)); + + // Non-null rows: {0,1,3,4,6,7}, Null rows: {2,5} + + { + ASSERT_OK_AND_ASSIGN(auto result, reader->VisitIsNull()); + CheckResult(result, {2, 5}); + } + { + ASSERT_OK_AND_ASSIGN(auto result, reader->VisitIsNotNull()); + CheckResult(result, {0, 1, 3, 4, 6, 7}); + } + { + Literal lit_true(true); + ASSERT_OK_AND_ASSIGN(auto result, reader->VisitEqual(lit_true)); + CheckResult(result, {4, 6, 7}); + } + { + Literal lit_false(false); + ASSERT_OK_AND_ASSIGN(auto result, reader->VisitEqual(lit_false)); + CheckResult(result, {0, 1, 3}); + } + { + Literal lit_true(true); + ASSERT_OK_AND_ASSIGN(auto result, reader->VisitNotEqual(lit_true)); + CheckResult(result, {0, 1, 3}); + } + { + std::vector in_literals = {Literal(true)}; + ASSERT_OK_AND_ASSIGN(auto result, reader->VisitIn(in_literals)); + CheckResult(result, {4, 6, 7}); + } + { + std::vector not_in_literals = {Literal(true)}; + ASSERT_OK_AND_ASSIGN(auto result, reader->VisitNotIn(not_in_literals)); + CheckResult(result, {0, 1, 3}); + } +} + +TEST_P(BTreeGlobalIndexIntegrationTest, WriteAndReadTinyIntData) { + auto file_writer = std::make_shared(fs_, base_path_); + auto field = arrow::field("tinyint_field", arrow::int8()); + auto c_schema = CreateArrowSchema(field); + + std::map options = {{BtreeDefs::kBtreeIndexBlockSize, "128"}, + {BtreeDefs::kBtreeIndexCompression, GetParam()}}; + auto indexer = std::make_shared(options); + ASSERT_OK_AND_ASSIGN( + auto writer, indexer->CreateWriter("tinyint_field", c_schema.get(), file_writer, pool_)); + auto btree_writer = std::dynamic_pointer_cast(writer); + ASSERT_TRUE(btree_writer); + + // Data layout (row_id -> value): + // 0->-10, 1->null, 2->0, 3->10, 4->10, 5->null, 6->20 + auto array = arrow::ipc::internal::json::ArrayFromJSON(arrow::struct_({field}), R"([ + [-10], + [null], + [0], + [10], + [10], + [null], + [20] + ])") + .ValueOrDie(); + + ArrowArray c_array; + ASSERT_TRUE(arrow::ExportArray(*array, &c_array).ok()); + std::vector row_ids(array->length()); + std::iota(row_ids.begin(), row_ids.end(), 0); + ASSERT_OK(btree_writer->AddBatch(&c_array, row_ids)); + ASSERT_OK_AND_ASSIGN(auto metas, writer->Finish()); + ASSERT_EQ(metas.size(), 1); - // Now read back auto file_reader = std::make_shared(fs_, base_path_); - std::map options; - BTreeGlobalIndexer indexer(options); - - // Create a new ArrowSchema for reading (the original was consumed by the writer) - auto c_schema_read = CreateArrowSchema(arrow::utf8(), "string_field"); - - // Create reader - auto reader_result = indexer.CreateReader(c_schema_read.get(), file_reader, metas, pool_); - ASSERT_OK(reader_result.status()); - auto reader = reader_result.value(); - - // Test VisitEqual for "apple" (should return row IDs 0 and 3) - Literal literal_apple(FieldType::STRING, "apple", 5); - auto equal_result = reader->VisitEqual(literal_apple); - ASSERT_OK(equal_result.status()); - EXPECT_TRUE(ContainsRowId(equal_result.value(), 0)); - EXPECT_TRUE(ContainsRowId(equal_result.value(), 3)); - - // Release ArrowSchema - ArrowSchemaRelease(c_schema.get()); - ArrowSchemaRelease(c_schema_read.get()); + c_schema = CreateArrowSchema(field); + ASSERT_OK_AND_ASSIGN(auto reader, + indexer->CreateReader(c_schema.get(), file_reader, metas, pool_)); + + // Non-null rows: {0,2,3,4,6}, Null rows: {1,5} + + { + ASSERT_OK_AND_ASSIGN(auto result, reader->VisitIsNull()); + CheckResult(result, {1, 5}); + } + { + ASSERT_OK_AND_ASSIGN(auto result, reader->VisitIsNotNull()); + CheckResult(result, {0, 2, 3, 4, 6}); + } + { + Literal lit_10(static_cast(10)); + ASSERT_OK_AND_ASSIGN(auto result, reader->VisitEqual(lit_10)); + CheckResult(result, {3, 4}); + } + { + Literal lit_10(static_cast(10)); + ASSERT_OK_AND_ASSIGN(auto result, reader->VisitNotEqual(lit_10)); + CheckResult(result, {0, 2, 6}); + } + { + Literal lit_10(static_cast(10)); + ASSERT_OK_AND_ASSIGN(auto result, reader->VisitLessThan(lit_10)); + CheckResult(result, {0, 2}); + } + { + Literal lit_10(static_cast(10)); + ASSERT_OK_AND_ASSIGN(auto result, reader->VisitGreaterOrEqual(lit_10)); + CheckResult(result, {3, 4, 6}); + } + { + std::vector in_literals = {Literal(static_cast(-10)), + Literal(static_cast(20))}; + ASSERT_OK_AND_ASSIGN(auto result, reader->VisitIn(in_literals)); + CheckResult(result, {0, 6}); + } + { + std::vector not_in_literals = {Literal(static_cast(10))}; + ASSERT_OK_AND_ASSIGN(auto result, reader->VisitNotIn(not_in_literals)); + CheckResult(result, {0, 2, 6}); + } } -TEST_F(BTreeGlobalIndexIntegrationTest, WriteAndReadWithNulls) { - // Create file writer +TEST_P(BTreeGlobalIndexIntegrationTest, WriteAndReadSmallIntData) { auto file_writer = std::make_shared(fs_, base_path_); + auto field = arrow::field("smallint_field", arrow::int16()); + auto c_schema = CreateArrowSchema(field); + + std::map options = {{BtreeDefs::kBtreeIndexBlockSize, "128"}, + {BtreeDefs::kBtreeIndexCompression, GetParam()}}; + auto indexer = std::make_shared(options); + ASSERT_OK_AND_ASSIGN( + auto writer, indexer->CreateWriter("smallint_field", c_schema.get(), file_writer, pool_)); + auto btree_writer = std::dynamic_pointer_cast(writer); + ASSERT_TRUE(btree_writer); + + // Data layout (row_id -> value): + // 0->-100, 1->null, 2->0, 3->100, 4->100, 5->null, 6->200 + auto array = arrow::ipc::internal::json::ArrayFromJSON(arrow::struct_({field}), R"([ + [-100], + [null], + [0], + [100], + [100], + [null], + [200] + ])") + .ValueOrDie(); + + ArrowArray c_array; + ASSERT_TRUE(arrow::ExportArray(*array, &c_array).ok()); + std::vector row_ids(array->length()); + std::iota(row_ids.begin(), row_ids.end(), 0); + ASSERT_OK(btree_writer->AddBatch(&c_array, row_ids)); + ASSERT_OK_AND_ASSIGN(auto metas, writer->Finish()); + ASSERT_EQ(metas.size(), 1); - // Create ArrowSchema - auto c_schema = CreateArrowSchema(arrow::int32(), "int_field"); + auto file_reader = std::make_shared(fs_, base_path_); + c_schema = CreateArrowSchema(field); + ASSERT_OK_AND_ASSIGN(auto reader, + indexer->CreateReader(c_schema.get(), file_reader, metas, pool_)); - // Create the BTree global index writer - ASSERT_OK_AND_ASSIGN(auto writer, - BTreeGlobalIndexWriter::Create("int_field", c_schema.get(), file_writer, - compression_factory_, pool_, 4096)); + // Non-null rows: {0,2,3,4,6}, Null rows: {1,5} + + { + ASSERT_OK_AND_ASSIGN(auto result, reader->VisitIsNull()); + CheckResult(result, {1, 5}); + } + { + ASSERT_OK_AND_ASSIGN(auto result, reader->VisitIsNotNull()); + CheckResult(result, {0, 2, 3, 4, 6}); + } + { + Literal lit_100(static_cast(100)); + ASSERT_OK_AND_ASSIGN(auto result, reader->VisitEqual(lit_100)); + CheckResult(result, {3, 4}); + } + { + Literal lit_100(static_cast(100)); + ASSERT_OK_AND_ASSIGN(auto result, reader->VisitNotEqual(lit_100)); + CheckResult(result, {0, 2, 6}); + } + { + Literal lit_100(static_cast(100)); + ASSERT_OK_AND_ASSIGN(auto result, reader->VisitLessThan(lit_100)); + CheckResult(result, {0, 2}); + } + { + Literal lit_100(static_cast(100)); + ASSERT_OK_AND_ASSIGN(auto result, reader->VisitGreaterOrEqual(lit_100)); + CheckResult(result, {3, 4, 6}); + } + { + std::vector in_literals = {Literal(static_cast(-100)), + Literal(static_cast(200))}; + ASSERT_OK_AND_ASSIGN(auto result, reader->VisitIn(in_literals)); + CheckResult(result, {0, 6}); + } + { + std::vector not_in_literals = {Literal(static_cast(100))}; + ASSERT_OK_AND_ASSIGN(auto result, reader->VisitNotIn(not_in_literals)); + CheckResult(result, {0, 2, 6}); + } +} + +TEST_P(BTreeGlobalIndexIntegrationTest, WriteAndReadTimestampCompactData) { + // Compact timestamp: precision <= 3 (millisecond) + auto file_writer = std::make_shared(fs_, base_path_); + auto field = arrow::field("ts_field", arrow::timestamp(arrow::TimeUnit::MILLI)); + auto c_schema = CreateArrowSchema(field); - // Create an Arrow array with null values - // Row IDs: 0->1, 1->null, 2->3, 3->null, 4->5 - auto array = arrow::ipc::internal::json::ArrayFromJSON(arrow::int32(), "[1, null, 3, null, 5]") + std::map options = {{BtreeDefs::kBtreeIndexBlockSize, "128"}, + {BtreeDefs::kBtreeIndexCompression, GetParam()}}; + auto indexer = std::make_shared(options); + ASSERT_OK_AND_ASSIGN(auto writer, + indexer->CreateWriter("ts_field", c_schema.get(), file_writer, pool_)); + auto btree_writer = std::dynamic_pointer_cast(writer); + ASSERT_TRUE(btree_writer); + + // Data layout (row_id -> value in millis): + // 0->1000, 1->null, 2->2000, 3->2000, 4->3000, 5->null, 6->4000 + auto array = arrow::ipc::internal::json::ArrayFromJSON(arrow::struct_({field}), R"([ + [1000], + [null], + [2000], + [2000], + [3000], + [null], + [4000] + ])") .ValueOrDie(); ArrowArray c_array; ASSERT_TRUE(arrow::ExportArray(*array, &c_array).ok()); + std::vector row_ids(array->length()); + std::iota(row_ids.begin(), row_ids.end(), 0); + ASSERT_OK(btree_writer->AddBatch(&c_array, row_ids)); + ASSERT_OK_AND_ASSIGN(auto metas, writer->Finish()); + ASSERT_EQ(metas.size(), 1); + + auto file_reader = std::make_shared(fs_, base_path_); + c_schema = CreateArrowSchema(field); + ASSERT_OK_AND_ASSIGN(auto reader, + indexer->CreateReader(c_schema.get(), file_reader, metas, pool_)); + + // Non-null rows: {0,2,3,4,6}, Null rows: {1,5} + + { + ASSERT_OK_AND_ASSIGN(auto result, reader->VisitIsNull()); + CheckResult(result, {1, 5}); + } + { + ASSERT_OK_AND_ASSIGN(auto result, reader->VisitIsNotNull()); + CheckResult(result, {0, 2, 3, 4, 6}); + } + { + Literal lit_2000(Timestamp::FromEpochMillis(2000)); + ASSERT_OK_AND_ASSIGN(auto result, reader->VisitEqual(lit_2000)); + CheckResult(result, {2, 3}); + } + { + Literal lit_2000(Timestamp::FromEpochMillis(2000)); + ASSERT_OK_AND_ASSIGN(auto result, reader->VisitNotEqual(lit_2000)); + CheckResult(result, {0, 4, 6}); + } + { + Literal lit_3000(Timestamp::FromEpochMillis(3000)); + ASSERT_OK_AND_ASSIGN(auto result, reader->VisitLessThan(lit_3000)); + CheckResult(result, {0, 2, 3}); + } + { + Literal lit_2000(Timestamp::FromEpochMillis(2000)); + ASSERT_OK_AND_ASSIGN(auto result, reader->VisitGreaterOrEqual(lit_2000)); + CheckResult(result, {2, 3, 4, 6}); + } + { + std::vector in_literals = {Literal(Timestamp::FromEpochMillis(1000)), + Literal(Timestamp::FromEpochMillis(4000))}; + ASSERT_OK_AND_ASSIGN(auto result, reader->VisitIn(in_literals)); + CheckResult(result, {0, 6}); + } + { + std::vector not_in_literals = {Literal(Timestamp::FromEpochMillis(2000))}; + ASSERT_OK_AND_ASSIGN(auto result, reader->VisitNotIn(not_in_literals)); + CheckResult(result, {0, 4, 6}); + } +} - // Add batch - ASSERT_OK(writer->AddBatch(&c_array)); +TEST_P(BTreeGlobalIndexIntegrationTest, WriteAndReadTimestampNonCompactData) { + // Non-compact timestamp: precision > 3 (microsecond, precision=6) + auto file_writer = std::make_shared(fs_, base_path_); + auto field = arrow::field("ts_field", arrow::timestamp(arrow::TimeUnit::MICRO)); + auto c_schema = CreateArrowSchema(field); + + std::map options = {{BtreeDefs::kBtreeIndexBlockSize, "128"}, + {BtreeDefs::kBtreeIndexCompression, GetParam()}}; + auto indexer = std::make_shared(options); + ASSERT_OK_AND_ASSIGN(auto writer, + indexer->CreateWriter("ts_field", c_schema.get(), file_writer, pool_)); + auto btree_writer = std::dynamic_pointer_cast(writer); + ASSERT_TRUE(btree_writer); + + // Data layout (row_id -> value in micros): + // 0->1000000 (1s), 1->null, 2->2000123 (2s+123us), 3->2000123, 4->3000456, 5->null, + // 6->4000789 + auto array = arrow::ipc::internal::json::ArrayFromJSON(arrow::struct_({field}), R"([ + [1000000], + [null], + [2000123], + [2000123], + [3000456], + [null], + [4000789] + ])") + .ValueOrDie(); - // Finish writing - auto result = writer->Finish(); - ASSERT_OK(result.status()); - auto metas = result.value(); + ArrowArray c_array; + ASSERT_TRUE(arrow::ExportArray(*array, &c_array).ok()); + std::vector row_ids(array->length()); + std::iota(row_ids.begin(), row_ids.end(), 0); + ASSERT_OK(btree_writer->AddBatch(&c_array, row_ids)); + ASSERT_OK_AND_ASSIGN(auto metas, writer->Finish()); ASSERT_EQ(metas.size(), 1); - // Release ArrowArray - ArrowArrayRelease(&c_array); + auto file_reader = std::make_shared(fs_, base_path_); + c_schema = CreateArrowSchema(field); + ASSERT_OK_AND_ASSIGN(auto reader, + indexer->CreateReader(c_schema.get(), file_reader, metas, pool_)); + + // Non-null rows: {0,2,3,4,6}, Null rows: {1,5} + // micros: 1000000 -> millis=1000, nanos_of_millis=0 + // 2000123 -> millis=2000, nanos_of_millis=123000 + // 3000456 -> millis=3000, nanos_of_millis=456000 + // 4000789 -> millis=4000, nanos_of_millis=789000 + + { + ASSERT_OK_AND_ASSIGN(auto result, reader->VisitIsNull()); + CheckResult(result, {1, 5}); + } + { + ASSERT_OK_AND_ASSIGN(auto result, reader->VisitIsNotNull()); + CheckResult(result, {0, 2, 3, 4, 6}); + } + { + Literal lit_ts(Timestamp(2000, 123000)); + ASSERT_OK_AND_ASSIGN(auto result, reader->VisitEqual(lit_ts)); + CheckResult(result, {2, 3}); + } + { + Literal lit_ts(Timestamp(2000, 123000)); + ASSERT_OK_AND_ASSIGN(auto result, reader->VisitNotEqual(lit_ts)); + CheckResult(result, {0, 4, 6}); + } + { + Literal lit_ts(Timestamp(3000, 456000)); + ASSERT_OK_AND_ASSIGN(auto result, reader->VisitLessThan(lit_ts)); + CheckResult(result, {0, 2, 3}); + } + { + Literal lit_ts(Timestamp(2000, 123000)); + ASSERT_OK_AND_ASSIGN(auto result, reader->VisitGreaterOrEqual(lit_ts)); + CheckResult(result, {2, 3, 4, 6}); + } + { + std::vector in_literals = {Literal(Timestamp(1000, 0)), + Literal(Timestamp(4000, 789000))}; + ASSERT_OK_AND_ASSIGN(auto result, reader->VisitIn(in_literals)); + CheckResult(result, {0, 6}); + } + { + std::vector not_in_literals = {Literal(Timestamp(2000, 123000))}; + ASSERT_OK_AND_ASSIGN(auto result, reader->VisitNotIn(not_in_literals)); + CheckResult(result, {0, 4, 6}); + } +} + +TEST_P(BTreeGlobalIndexIntegrationTest, WriteAndReadDecimalCompactData) { + // Compact decimal: precision <= 18 + auto file_writer = std::make_shared(fs_, base_path_); + auto field = arrow::field("decimal_field", arrow::decimal128(10, 2)); + auto c_schema = CreateArrowSchema(field); + + std::map options = {{BtreeDefs::kBtreeIndexBlockSize, "128"}, + {BtreeDefs::kBtreeIndexCompression, GetParam()}}; + auto indexer = std::make_shared(options); + ASSERT_OK_AND_ASSIGN( + auto writer, indexer->CreateWriter("decimal_field", c_schema.get(), file_writer, pool_)); + auto btree_writer = std::dynamic_pointer_cast(writer); + ASSERT_TRUE(btree_writer); + + // Data layout (row_id -> value, stored as unscaled int with scale=2): + // 0->1.00, 1->null, 2->2.50, 3->2.50, 4->3.00, + // 5->null, 6->4.50 + auto array = arrow::ipc::internal::json::ArrayFromJSON(arrow::struct_({field}), R"([ + ["1.00"], + [null], + ["2.50"], + ["2.50"], + ["3.00"], + [null], + ["4.50"] + ])") + .ValueOrDie(); + + ArrowArray c_array; + ASSERT_TRUE(arrow::ExportArray(*array, &c_array).ok()); + std::vector row_ids(array->length()); + std::iota(row_ids.begin(), row_ids.end(), 0); + ASSERT_OK(btree_writer->AddBatch(&c_array, row_ids)); + ASSERT_OK_AND_ASSIGN(auto metas, writer->Finish()); + ASSERT_EQ(metas.size(), 1); - // Now read back auto file_reader = std::make_shared(fs_, base_path_); - std::map options; - BTreeGlobalIndexer indexer(options); - - // Create a new ArrowSchema for reading (the original was consumed by the writer) - auto c_schema_read = CreateArrowSchema(arrow::int32(), "int_field"); - - // Create reader - auto reader_result = indexer.CreateReader(c_schema_read.get(), file_reader, metas, pool_); - ASSERT_OK(reader_result.status()); - auto reader = reader_result.value(); - - // Test VisitIsNull (should return row IDs 1 and 3) - auto is_null_result = reader->VisitIsNull(); - ASSERT_OK(is_null_result.status()); - EXPECT_TRUE(ContainsRowId(is_null_result.value(), 1)); - EXPECT_TRUE(ContainsRowId(is_null_result.value(), 3)); - EXPECT_FALSE(ContainsRowId(is_null_result.value(), 0)); - - // Test VisitIsNotNull (should return row IDs 0, 2, 4) - auto is_not_null_result = reader->VisitIsNotNull(); - ASSERT_OK(is_not_null_result.status()); - EXPECT_TRUE(ContainsRowId(is_not_null_result.value(), 0)); - EXPECT_TRUE(ContainsRowId(is_not_null_result.value(), 2)); - EXPECT_TRUE(ContainsRowId(is_not_null_result.value(), 4)); - EXPECT_FALSE(ContainsRowId(is_not_null_result.value(), 1)); - - // Release ArrowSchema - ArrowSchemaRelease(c_schema.get()); - ArrowSchemaRelease(c_schema_read.get()); + c_schema = CreateArrowSchema(field); + ASSERT_OK_AND_ASSIGN(auto reader, + indexer->CreateReader(c_schema.get(), file_reader, metas, pool_)); + + // Non-null rows: {0,2,3,4,6}, Null rows: {1,5} + + { + ASSERT_OK_AND_ASSIGN(auto result, reader->VisitIsNull()); + CheckResult(result, {1, 5}); + } + { + ASSERT_OK_AND_ASSIGN(auto result, reader->VisitIsNotNull()); + CheckResult(result, {0, 2, 3, 4, 6}); + } + { + Literal lit_250(Decimal::FromUnscaledLong(250, 10, 2)); + ASSERT_OK_AND_ASSIGN(auto result, reader->VisitEqual(lit_250)); + CheckResult(result, {2, 3}); + } + { + Literal lit_250(Decimal::FromUnscaledLong(250, 10, 2)); + ASSERT_OK_AND_ASSIGN(auto result, reader->VisitNotEqual(lit_250)); + CheckResult(result, {0, 4, 6}); + } + { + Literal lit_300(Decimal::FromUnscaledLong(300, 10, 2)); + ASSERT_OK_AND_ASSIGN(auto result, reader->VisitLessThan(lit_300)); + CheckResult(result, {0, 2, 3}); + } + { + Literal lit_250(Decimal::FromUnscaledLong(250, 10, 2)); + ASSERT_OK_AND_ASSIGN(auto result, reader->VisitGreaterOrEqual(lit_250)); + CheckResult(result, {2, 3, 4, 6}); + } + { + std::vector in_literals = {Literal(Decimal::FromUnscaledLong(100, 10, 2)), + Literal(Decimal::FromUnscaledLong(450, 10, 2))}; + ASSERT_OK_AND_ASSIGN(auto result, reader->VisitIn(in_literals)); + CheckResult(result, {0, 6}); + } + { + std::vector not_in_literals = {Literal(Decimal::FromUnscaledLong(250, 10, 2))}; + ASSERT_OK_AND_ASSIGN(auto result, reader->VisitNotIn(not_in_literals)); + CheckResult(result, {0, 4, 6}); + } } -TEST_F(BTreeGlobalIndexIntegrationTest, WriteAndReadRangeQuery) { - // Create file writer +TEST_P(BTreeGlobalIndexIntegrationTest, WriteAndReadDecimalNonCompactData) { + // Non-compact decimal: precision > 18 auto file_writer = std::make_shared(fs_, base_path_); + auto field = arrow::field("decimal_field", arrow::decimal128(25, 3)); + auto c_schema = CreateArrowSchema(field); + + std::map options = {{BtreeDefs::kBtreeIndexBlockSize, "128"}, + {BtreeDefs::kBtreeIndexCompression, GetParam()}}; + auto indexer = std::make_shared(options); + ASSERT_OK_AND_ASSIGN( + auto writer, indexer->CreateWriter("decimal_field", c_schema.get(), file_writer, pool_)); + auto btree_writer = std::dynamic_pointer_cast(writer); + ASSERT_TRUE(btree_writer); + + // Data layout (row_id -> unscaled value with scale=3): + // 0->1.000, 1->null, 2->2.500, 3->2.500, + // 4->3.000, 5->null, 6->4.500 + // For non-compact decimal (precision=25), Arrow JSON uses string representation of unscaled + auto array = arrow::ipc::internal::json::ArrayFromJSON(arrow::struct_({field}), R"([ + ["1.000"], + [null], + ["2.500"], + ["2.500"], + ["3.000"], + [null], + ["4.500"] + ])") + .ValueOrDie(); + + ArrowArray c_array; + ASSERT_TRUE(arrow::ExportArray(*array, &c_array).ok()); + std::vector row_ids(array->length()); + std::iota(row_ids.begin(), row_ids.end(), 0); + ASSERT_OK(btree_writer->AddBatch(&c_array, row_ids)); + ASSERT_OK_AND_ASSIGN(auto metas, writer->Finish()); + ASSERT_EQ(metas.size(), 1); - // Create ArrowSchema - auto c_schema = CreateArrowSchema(arrow::int32(), "int_field"); + auto file_reader = std::make_shared(fs_, base_path_); + c_schema = CreateArrowSchema(field); + ASSERT_OK_AND_ASSIGN(auto reader, + indexer->CreateReader(c_schema.get(), file_reader, metas, pool_)); - // Create the BTree global index writer - ASSERT_OK_AND_ASSIGN(auto writer, - BTreeGlobalIndexWriter::Create("int_field", c_schema.get(), file_writer, - compression_factory_, pool_, 4096)); + // Non-null rows: {0,2,3,4,6}, Null rows: {1,5} + + { + ASSERT_OK_AND_ASSIGN(auto result, reader->VisitIsNull()); + CheckResult(result, {1, 5}); + } + { + ASSERT_OK_AND_ASSIGN(auto result, reader->VisitIsNotNull()); + CheckResult(result, {0, 2, 3, 4, 6}); + } + { + Literal lit_2500(Decimal(25, 3, 2500)); + ASSERT_OK_AND_ASSIGN(auto result, reader->VisitEqual(lit_2500)); + CheckResult(result, {2, 3}); + } + { + Literal lit_2500(Decimal(25, 3, 2500)); + ASSERT_OK_AND_ASSIGN(auto result, reader->VisitNotEqual(lit_2500)); + CheckResult(result, {0, 4, 6}); + } + { + Literal lit_3000(Decimal(25, 3, 3000)); + ASSERT_OK_AND_ASSIGN(auto result, reader->VisitLessThan(lit_3000)); + CheckResult(result, {0, 2, 3}); + } + { + Literal lit_2500(Decimal(25, 3, 2500)); + ASSERT_OK_AND_ASSIGN(auto result, reader->VisitGreaterOrEqual(lit_2500)); + CheckResult(result, {2, 3, 4, 6}); + } + { + std::vector in_literals = {Literal(Decimal(25, 3, 1000)), + Literal(Decimal(25, 3, 4500))}; + ASSERT_OK_AND_ASSIGN(auto result, reader->VisitIn(in_literals)); + CheckResult(result, {0, 6}); + } + { + std::vector not_in_literals = {Literal(Decimal(25, 3, 2500))}; + ASSERT_OK_AND_ASSIGN(auto result, reader->VisitNotIn(not_in_literals)); + CheckResult(result, {0, 4, 6}); + } +} + +TEST_P(BTreeGlobalIndexIntegrationTest, WriteAndReadAllNull) { + auto file_writer = std::make_shared(fs_, base_path_); + auto field = arrow::field("int_field", arrow::int32()); + auto c_schema = CreateArrowSchema(field); - // Create an Arrow array with int values - auto array = - arrow::ipc::internal::json::ArrayFromJSON(arrow::int32(), "[1, 2, 3, 4, 5]").ValueOrDie(); + std::map options = {{BtreeDefs::kBtreeIndexBlockSize, "128"}, + {BtreeDefs::kBtreeIndexCompression, GetParam()}}; + auto indexer = std::make_shared(options); + ASSERT_OK_AND_ASSIGN(auto writer, + indexer->CreateWriter("int_field", c_schema.get(), file_writer, pool_)); + auto btree_writer = std::dynamic_pointer_cast(writer); + ASSERT_TRUE(btree_writer); + + // All values are null + auto array = arrow::ipc::internal::json::ArrayFromJSON(arrow::struct_({field}), R"([ + [null], + [null], + [null], + [null] + ])") + .ValueOrDie(); ArrowArray c_array; ASSERT_TRUE(arrow::ExportArray(*array, &c_array).ok()); + std::vector row_ids(array->length()); + std::iota(row_ids.begin(), row_ids.end(), 0); + ASSERT_OK(btree_writer->AddBatch(&c_array, row_ids)); + ASSERT_OK_AND_ASSIGN(auto metas, writer->Finish()); + ASSERT_EQ(metas.size(), 1); - // Add batch - ASSERT_OK(writer->AddBatch(&c_array)); + auto file_reader = std::make_shared(fs_, base_path_); + c_schema = CreateArrowSchema(field); + ASSERT_OK_AND_ASSIGN(auto reader, + indexer->CreateReader(c_schema.get(), file_reader, metas, pool_)); - // Finish writing - auto result = writer->Finish(); - ASSERT_OK(result.status()); - auto metas = result.value(); + // All rows are null: {0,1,2,3}, No non-null rows - // Release ArrowArray - ArrowArrayRelease(&c_array); + // --- VisitIsNull -> all rows --- + { + ASSERT_OK_AND_ASSIGN(auto result, reader->VisitIsNull()); + CheckResult(result, {0, 1, 2, 3}); + } + + // --- VisitIsNotNull -> empty --- + { + ASSERT_OK_AND_ASSIGN(auto result, reader->VisitIsNotNull()); + CheckResult(result, {}); + } + + // --- VisitEqual -> empty --- + { + Literal lit_1(1); + ASSERT_OK_AND_ASSIGN(auto result, reader->VisitEqual(lit_1)); + CheckResult(result, {}); + } + + // --- VisitNotEqual -> empty (no non-null rows) --- + { + Literal lit_1(1); + ASSERT_OK_AND_ASSIGN(auto result, reader->VisitNotEqual(lit_1)); + CheckResult(result, {}); + } + + // --- VisitLessThan -> empty --- + { + Literal lit_1(1); + ASSERT_OK_AND_ASSIGN(auto result, reader->VisitLessThan(lit_1)); + CheckResult(result, {}); + } + + // --- VisitIn -> empty --- + { + std::vector in_literals = {Literal(1), Literal(2)}; + ASSERT_OK_AND_ASSIGN(auto result, reader->VisitIn(in_literals)); + CheckResult(result, {}); + } + + // --- VisitNotIn -> empty --- + { + std::vector not_in_literals = {Literal(1)}; + ASSERT_OK_AND_ASSIGN(auto result, reader->VisitNotIn(not_in_literals)); + CheckResult(result, {}); + } +} + +TEST_P(BTreeGlobalIndexIntegrationTest, WriteAndReadLargeDataWithSmallBlocks) { + // Use very small block size and cache size to force multiple block evictions + auto file_writer = std::make_shared(fs_, base_path_); + auto field = arrow::field("int_field", arrow::int32()); + auto c_schema = CreateArrowSchema(field); + + std::map options = { + {BtreeDefs::kBtreeIndexBlockSize, "256"}, + {BtreeDefs::kBtreeIndexCacheSize, "1024"}, + }; + auto indexer = std::make_shared(options); + ASSERT_OK_AND_ASSIGN(auto writer, + indexer->CreateWriter("int_field", c_schema.get(), file_writer, pool_)); + auto btree_writer = std::dynamic_pointer_cast(writer); + ASSERT_TRUE(btree_writer); + + // Generate 50000 sorted int values with some nulls and duplicates. + // Pattern: every 100th row is null, values increase by 1 every 3 rows (duplicates). + // Data is written in multiple batches of 1000 rows each. + constexpr int32_t total_rows = 50000; + constexpr int32_t batch_size = 1000; + + std::vector null_row_ids; + std::vector non_null_row_ids; + int32_t current_value = 0; + + for (int32_t batch_start = 0; batch_start < total_rows; batch_start += batch_size) { + int32_t batch_end = std::min(batch_start + batch_size, total_rows); + int32_t batch_len = batch_end - batch_start; + + arrow::Int32Builder value_builder; + ASSERT_TRUE(value_builder.Reserve(batch_len).ok()); + + std::vector batch_row_ids; + batch_row_ids.reserve(batch_len); + + for (int32_t i = batch_start; i < batch_end; ++i) { + batch_row_ids.push_back(i); + if (i % 100 == 99) { + ASSERT_TRUE(value_builder.AppendNull().ok()); + null_row_ids.push_back(i); + } else { + ASSERT_TRUE(value_builder.Append(current_value).ok()); + non_null_row_ids.push_back(i); + if (i % 3 == 2) { + ++current_value; + } + } + } + + std::shared_ptr value_array; + ASSERT_TRUE(value_builder.Finish(&value_array).ok()); + auto struct_array = arrow::StructArray::Make({value_array}, {field}).ValueOrDie(); + + ArrowArray c_array; + ASSERT_TRUE(arrow::ExportArray(*struct_array, &c_array).ok()); + ASSERT_OK(btree_writer->AddBatch(&c_array, batch_row_ids)); + } + + ASSERT_OK_AND_ASSIGN(auto metas, writer->Finish()); + ASSERT_EQ(metas.size(), 1); - // Now read back auto file_reader = std::make_shared(fs_, base_path_); - std::map options; - BTreeGlobalIndexer indexer(options); - - // Create a new ArrowSchema for reading (the original was consumed by the writer) - auto c_schema_read = CreateArrowSchema(arrow::int32(), "int_field"); - - // Create reader - auto reader_result = indexer.CreateReader(c_schema_read.get(), file_reader, metas, pool_); - ASSERT_OK(reader_result.status()); - auto reader = reader_result.value(); - - // Test VisitLessThan for value 3 (should return row IDs 0, 1) - Literal literal_3(static_cast(3)); - auto lt_result = reader->VisitLessThan(literal_3); - ASSERT_OK(lt_result.status()); - EXPECT_TRUE(ContainsRowId(lt_result.value(), 0)); - EXPECT_TRUE(ContainsRowId(lt_result.value(), 1)); - EXPECT_FALSE(ContainsRowId(lt_result.value(), 2)); - - // Test VisitGreaterOrEqual for value 3 (should return row IDs 2, 3, 4) - auto gte_result = reader->VisitGreaterOrEqual(literal_3); - ASSERT_OK(gte_result.status()); - EXPECT_TRUE(ContainsRowId(gte_result.value(), 2)); - EXPECT_TRUE(ContainsRowId(gte_result.value(), 3)); - EXPECT_TRUE(ContainsRowId(gte_result.value(), 4)); - EXPECT_FALSE(ContainsRowId(gte_result.value(), 1)); - - // Release ArrowSchema - ArrowSchemaRelease(c_schema.get()); - ArrowSchemaRelease(c_schema_read.get()); + c_schema = CreateArrowSchema(field); + ASSERT_OK_AND_ASSIGN(auto reader, + indexer->CreateReader(c_schema.get(), file_reader, metas, pool_)); + + // Helper lambda: given a predicate on value, collect matching row ids + auto collect_rows = [total_rows = total_rows](std::function predicate) { + std::vector result; + int32_t val = 0; + for (int32_t i = 0; i < total_rows; ++i) { + if (i % 100 == 99) { + continue; + } + if (predicate(val)) { + result.push_back(i); + } + if (i % 3 == 2) { + ++val; + } + } + return result; + }; + + // --- VisitIsNull --- + { + ASSERT_OK_AND_ASSIGN(auto result, reader->VisitIsNull()); + CheckResult(result, null_row_ids); + } + + // --- VisitIsNotNull --- + { + ASSERT_OK_AND_ASSIGN(auto result, reader->VisitIsNotNull()); + CheckResult(result, non_null_row_ids); + } + + // --- VisitEqual for value 0 --- + { + Literal lit_0(0); + ASSERT_OK_AND_ASSIGN(auto result, reader->VisitEqual(lit_0)); + CheckResult(result, collect_rows([](int32_t v) { return v == 0; })); + } + + // --- VisitEqual for a value in the middle --- + { + Literal lit_100(100); + ASSERT_OK_AND_ASSIGN(auto result, reader->VisitEqual(lit_100)); + CheckResult(result, collect_rows([](int32_t v) { return v == 100; })); + } + + // --- VisitEqual for non-existent value --- + { + Literal lit_neg(static_cast(-1)); + ASSERT_OK_AND_ASSIGN(auto result, reader->VisitEqual(lit_neg)); + CheckResult(result, {}); + } + + // --- VisitLessThan for value 5 --- + { + Literal lit_5(5); + ASSERT_OK_AND_ASSIGN(auto result, reader->VisitLessThan(lit_5)); + CheckResult(result, collect_rows([](int32_t v) { return v < 5; })); + } + + // --- VisitGreaterOrEqual for a high value near max --- + { + int32_t max_val = + static_cast(collect_rows([](int32_t) { return true; }).size()) / 3; + int32_t threshold = max_val - 2; + Literal lit_threshold(threshold); + ASSERT_OK_AND_ASSIGN(auto result, reader->VisitGreaterOrEqual(lit_threshold)); + CheckResult(result, collect_rows([threshold](int32_t v) { return v >= threshold; })); + } + + // --- VisitIn for scattered values --- + { + std::vector in_literals = {Literal(0), Literal(500), Literal(10000)}; + ASSERT_OK_AND_ASSIGN(auto result, reader->VisitIn(in_literals)); + CheckResult(result, + collect_rows([](int32_t v) { return v == 0 || v == 500 || v == 10000; })); + } + + // --- VisitNotEqual for value 0 --- + { + Literal lit_0(0); + ASSERT_OK_AND_ASSIGN(auto result, reader->VisitNotEqual(lit_0)); + CheckResult(result, collect_rows([](int32_t v) { return v != 0; })); + } +} + +TEST_P(BTreeGlobalIndexIntegrationTest, CreateWriterWithNonStructSchema) { + auto file_writer = std::make_shared(fs_, base_path_); + std::map options = {{BtreeDefs::kBtreeIndexBlockSize, "128"}, + {BtreeDefs::kBtreeIndexCompression, GetParam()}}; + auto indexer = std::make_shared(options); + + // Export a plain int32 type (not struct) as ArrowSchema + auto plain_type = arrow::int32(); + ArrowSchema c_schema; + ASSERT_TRUE(arrow::ExportType(*plain_type, &c_schema).ok()); + + ASSERT_NOK_WITH_MSG(indexer->CreateWriter("int_field", &c_schema, file_writer, pool_), + "arrow schema must be struct type"); +} + +TEST_P(BTreeGlobalIndexIntegrationTest, CreateReaderWithMultipleMetas) { + auto file_reader = std::make_shared(fs_, base_path_); + auto field = arrow::field("int_field", arrow::int32()); + auto c_schema = CreateArrowSchema(field); + + std::map options = {{BtreeDefs::kBtreeIndexBlockSize, "128"}, + {BtreeDefs::kBtreeIndexCompression, GetParam()}}; + auto indexer = std::make_shared(options); + + // Provide two fake metas + GlobalIndexIOMeta meta1("fake_path_1", 100, 10, nullptr); + GlobalIndexIOMeta meta2("fake_path_2", 200, 20, nullptr); + std::vector metas = {meta1, meta2}; + + ASSERT_NOK_WITH_MSG(indexer->CreateReader(c_schema.get(), file_reader, metas, pool_), + "exist multiple metas"); +} + +TEST_P(BTreeGlobalIndexIntegrationTest, CreateReaderWithMultiFieldSchema) { + auto file_reader = std::make_shared(fs_, base_path_); + + // Create a schema with two fields + auto schema = arrow::schema( + {arrow::field("field1", arrow::int32()), arrow::field("field2", arrow::int64())}); + auto c_schema = std::make_unique(); + ASSERT_TRUE(arrow::ExportSchema(*schema, c_schema.get()).ok()); + + std::map options = {{BtreeDefs::kBtreeIndexBlockSize, "128"}, + {BtreeDefs::kBtreeIndexCompression, GetParam()}}; + auto indexer = std::make_shared(options); + + GlobalIndexIOMeta meta("fake_path", 100, 10, nullptr); + std::vector metas = {meta}; + + ASSERT_NOK_WITH_MSG(indexer->CreateReader(c_schema.get(), file_reader, metas, pool_), + "supposed to have single field"); } -TEST_F(BTreeGlobalIndexIntegrationTest, WriteAndReadInQuery) { - // Create file writer +TEST_P(BTreeGlobalIndexIntegrationTest, CreateWriterWithMissingField) { auto file_writer = std::make_shared(fs_, base_path_); + auto type = arrow::struct_({arrow::field("existing_field", arrow::int32())}); + auto struct_type = std::dynamic_pointer_cast(type); + ASSERT_TRUE(struct_type); + ASSERT_NOK_WITH_MSG( + BTreeGlobalIndexWriter::Create("nonexistent_field", struct_type, file_writer, 4096, + compression_factory_, pool_), + "not in arrow_array when Create BTreeGlobalIndexWriter"); +} - // Create ArrowSchema - auto c_schema = CreateArrowSchema(arrow::int32(), "int_field"); +TEST_P(BTreeGlobalIndexIntegrationTest, AddBatchWithNullArray) { + auto file_writer = std::make_shared(fs_, base_path_); + auto field = arrow::field("int_field", arrow::int32()); + auto c_schema = CreateArrowSchema(field); - // Create the BTree global index writer + std::map options = {{BtreeDefs::kBtreeIndexBlockSize, "128"}, + {BtreeDefs::kBtreeIndexCompression, GetParam()}}; + auto indexer = std::make_shared(options); ASSERT_OK_AND_ASSIGN(auto writer, - BTreeGlobalIndexWriter::Create("int_field", c_schema.get(), file_writer, - compression_factory_, pool_, 4096)); + indexer->CreateWriter("int_field", c_schema.get(), file_writer, pool_)); + auto btree_writer = std::dynamic_pointer_cast(writer); + ASSERT_TRUE(btree_writer); - // Create an Arrow array with int values - auto array = - arrow::ipc::internal::json::ArrayFromJSON(arrow::int32(), "[1, 2, 3, 4, 5]").ValueOrDie(); + std::vector row_ids = {0, 1, 2}; + ASSERT_NOK_WITH_MSG(btree_writer->AddBatch(nullptr, row_ids), "ArrowArray is null"); +} + +TEST_P(BTreeGlobalIndexIntegrationTest, AddBatchWithMismatchedRowIds) { + auto file_writer = std::make_shared(fs_, base_path_); + auto field = arrow::field("int_field", arrow::int32()); + auto c_schema = CreateArrowSchema(field); + + std::map options = {{BtreeDefs::kBtreeIndexBlockSize, "128"}, + {BtreeDefs::kBtreeIndexCompression, GetParam()}}; + auto indexer = std::make_shared(options); + ASSERT_OK_AND_ASSIGN(auto writer, + indexer->CreateWriter("int_field", c_schema.get(), file_writer, pool_)); + auto btree_writer = std::dynamic_pointer_cast(writer); + ASSERT_TRUE(btree_writer); + + auto array = arrow::ipc::internal::json::ArrayFromJSON(arrow::struct_({field}), R"([ + [1], + [2], + [3] + ])") + .ValueOrDie(); ArrowArray c_array; ASSERT_TRUE(arrow::ExportArray(*array, &c_array).ok()); - // Add batch - ASSERT_OK(writer->AddBatch(&c_array)); + // Provide wrong number of row_ids (2 instead of 3) + std::vector row_ids = {0, 1}; + ASSERT_NOK_WITH_MSG(btree_writer->AddBatch(&c_array, row_ids), + "row_ids length 2 mismatch arrow_array length 3 when AddBatch"); +} - // Finish writing - auto result = writer->Finish(); - ASSERT_OK(result.status()); - auto metas = result.value(); +TEST_P(BTreeGlobalIndexIntegrationTest, AddBatchWithNonMonotonicKeys) { + auto file_writer = std::make_shared(fs_, base_path_); + auto field = arrow::field("int_field", arrow::int32()); + auto c_schema = CreateArrowSchema(field); - // Release ArrowArray - ArrowArrayRelease(&c_array); + std::map options = {{BtreeDefs::kBtreeIndexBlockSize, "128"}, + {BtreeDefs::kBtreeIndexCompression, GetParam()}}; + auto indexer = std::make_shared(options); + ASSERT_OK_AND_ASSIGN(auto writer, + indexer->CreateWriter("int_field", c_schema.get(), file_writer, pool_)); + auto btree_writer = std::dynamic_pointer_cast(writer); + ASSERT_TRUE(btree_writer); + + // Write decreasing keys: 3, 2, 1 + auto array = arrow::ipc::internal::json::ArrayFromJSON(arrow::struct_({field}), R"([ + [3], + [2], + [1] + ])") + .ValueOrDie(); - // Now read back - auto file_reader = std::make_shared(fs_, base_path_); - std::map options; - BTreeGlobalIndexer indexer(options); - - // Create a new ArrowSchema for reading (the original was consumed by the writer) - auto c_schema_read = CreateArrowSchema(arrow::int32(), "int_field"); - - // Create reader - auto reader_result = indexer.CreateReader(c_schema_read.get(), file_reader, metas, pool_); - ASSERT_OK(reader_result.status()); - auto reader = reader_result.value(); - - // Test VisitIn for values 1, 3, 5 (should return row IDs 0, 2, 4) - std::vector in_literals = {Literal(static_cast(1)), - Literal(static_cast(3)), - Literal(static_cast(5))}; - auto in_result = reader->VisitIn(in_literals); - ASSERT_OK(in_result.status()); - EXPECT_TRUE(ContainsRowId(in_result.value(), 0)); - EXPECT_TRUE(ContainsRowId(in_result.value(), 2)); - EXPECT_TRUE(ContainsRowId(in_result.value(), 4)); - EXPECT_FALSE(ContainsRowId(in_result.value(), 1)); - EXPECT_FALSE(ContainsRowId(in_result.value(), 3)); - - // Release ArrowSchema - ArrowSchemaRelease(c_schema.get()); - ArrowSchemaRelease(c_schema_read.get()); + ArrowArray c_array; + ASSERT_TRUE(arrow::ExportArray(*array, &c_array).ok()); + std::vector row_ids = {0, 1, 2}; + ASSERT_NOK_WITH_MSG(btree_writer->AddBatch(&c_array, row_ids), + "Users must keep written keys monotonically incremental"); } +TEST_P(BTreeGlobalIndexIntegrationTest, FinishWithEmptyData) { + auto file_writer = std::make_shared(fs_, base_path_); + auto field = arrow::field("int_field", arrow::int32()); + auto c_schema = CreateArrowSchema(field); + + std::map options = {{BtreeDefs::kBtreeIndexBlockSize, "128"}, + {BtreeDefs::kBtreeIndexCompression, GetParam()}}; + auto indexer = std::make_shared(options); + ASSERT_OK_AND_ASSIGN(auto writer, + indexer->CreateWriter("int_field", c_schema.get(), file_writer, pool_)); + auto btree_writer = std::dynamic_pointer_cast(writer); + ASSERT_TRUE(btree_writer); + + // Finish without adding any data + ASSERT_NOK_WITH_MSG(writer->Finish(), "Should never write an empty btree index file"); +} + +TEST_P(BTreeGlobalIndexIntegrationTest, TestIOException) { + bool run_complete = false; + auto io_hook = paimon::IOHook::GetInstance(); + for (size_t i = 0; i < 200; i++) { + auto test_dir = UniqueTestDirectory::Create("local"); + ASSERT_TRUE(test_dir); + auto local_fs = test_dir->GetFileSystem(); + auto local_base = test_dir->Str(); + paimon::ScopeGuard guard([&io_hook]() { io_hook->Clear(); }); + io_hook->Reset(i, paimon::IOHook::Mode::RETURN_ERROR); + + auto file_writer = std::make_shared(local_fs, local_base); + auto field = arrow::field("int_field", arrow::int32()); + auto c_schema = CreateArrowSchema(field); + + std::map options = { + {BtreeDefs::kBtreeIndexBlockSize, "128"}, + {BtreeDefs::kBtreeIndexCompression, GetParam()}}; + auto indexer = std::make_shared(options); + + // write + auto writer_result = indexer->CreateWriter("int_field", c_schema.get(), file_writer, pool_); + CHECK_HOOK_STATUS(writer_result.status(), i); + auto writer = std::move(writer_result).value(); + auto btree_writer = std::dynamic_pointer_cast(writer); + + auto array = arrow::ipc::internal::json::ArrayFromJSON(arrow::struct_({field}), R"([ + [1], [2], [null], [3], [4], [5] + ])") + .ValueOrDie(); + ArrowArray c_array; + ASSERT_TRUE(arrow::ExportArray(*array, &c_array).ok()); + std::vector row_ids = {0, 1, 2, 3, 4, 5}; + + CHECK_HOOK_STATUS(btree_writer->AddBatch(&c_array, row_ids), i); + auto finish_result = writer->Finish(); + CHECK_HOOK_STATUS(finish_result.status(), i); + auto metas = std::move(finish_result).value(); + + // read + auto file_reader = std::make_shared(local_fs, local_base); + c_schema = CreateArrowSchema(field); + auto reader_result = indexer->CreateReader(c_schema.get(), file_reader, metas, pool_); + CHECK_HOOK_STATUS(reader_result.status(), i); + auto reader = std::move(reader_result).value(); + + auto equal_result = reader->VisitEqual(Literal(3)); + CHECK_HOOK_STATUS(equal_result.status(), i); + + auto typed_result = + std::dynamic_pointer_cast(equal_result.value()); + ASSERT_TRUE(typed_result); + auto bitmap_result = typed_result->GetBitmap(); + CHECK_HOOK_STATUS(bitmap_result.status(), i); + ASSERT_TRUE(bitmap_result.value()); + ASSERT_EQ(*bitmap_result.value(), RoaringBitmap64::From({3})); + + run_complete = true; + break; + } + ASSERT_TRUE(run_complete); +} + +INSTANTIATE_TEST_SUITE_P(Compression, BTreeGlobalIndexIntegrationTest, + ::testing::ValuesIn(std::vector({"none", "zstd", "lz4"}))); + } // namespace paimon::test diff --git a/src/paimon/common/global_index/btree/btree_global_index_reader.cpp b/src/paimon/common/global_index/btree/btree_global_index_reader.cpp index 21603656b..d4a589a68 100644 --- a/src/paimon/common/global_index/btree/btree_global_index_reader.cpp +++ b/src/paimon/common/global_index/btree/btree_global_index_reader.cpp @@ -16,216 +16,106 @@ #include "paimon/common/global_index/btree/btree_global_index_reader.h" -#include - +#include "fmt/format.h" +#include "paimon/common/global_index/btree/key_serializer.h" #include "paimon/common/memory/memory_slice.h" #include "paimon/common/memory/memory_slice_input.h" -#include "paimon/common/memory/memory_slice_output.h" -#include "paimon/common/utils/date_time_utils.h" -#include "paimon/common/utils/field_type_utils.h" -#include "paimon/data/decimal.h" -#include "paimon/data/timestamp.h" #include "paimon/global_index/bitmap_global_index_result.h" #include "paimon/memory/bytes.h" #include "paimon/predicate/literal.h" namespace paimon { - -// Helper function to convert Literal to MemorySlice -static Result LiteralToMemorySlice(const Literal& literal, MemoryPool* pool, - int32_t ts_precision) { - if (literal.IsNull()) { - return Status::Invalid("Cannot convert null literal to MemorySlice for btree index query"); - } - - auto type = literal.GetType(); - - // Handle string/binary types - if (type == FieldType::STRING || type == FieldType::BINARY) { - auto str_value = literal.GetValue(); - auto bytes = std::make_shared(str_value, pool); - return MemorySlice::Wrap(bytes); - } - - // Handle integer types - if (type == FieldType::BIGINT) { - auto value = literal.GetValue(); - auto bytes = std::make_shared(8, pool); - memcpy(bytes->data(), &value, sizeof(int64_t)); - return MemorySlice::Wrap(bytes); - } - - if (type == FieldType::INT) { - auto value = literal.GetValue(); - auto bytes = std::make_shared(4, pool); - memcpy(bytes->data(), &value, sizeof(int32_t)); - return MemorySlice::Wrap(bytes); - } - - if (type == FieldType::TINYINT) { - auto value = literal.GetValue(); - auto bytes = std::make_shared(1, pool); - bytes->data()[0] = static_cast(value); - return MemorySlice::Wrap(bytes); - } - - if (type == FieldType::SMALLINT) { - auto value = literal.GetValue(); - auto bytes = std::make_shared(2, pool); - memcpy(bytes->data(), &value, sizeof(int16_t)); - return MemorySlice::Wrap(bytes); - } - - if (type == FieldType::BOOLEAN) { - bool value = literal.GetValue(); - auto bytes = std::make_shared(1, pool); - bytes->data()[0] = value ? 1 : 0; - return MemorySlice::Wrap(bytes); - } - - if (type == FieldType::FLOAT) { - auto value = literal.GetValue(); - auto bytes = std::make_shared(sizeof(float), pool); - memcpy(bytes->data(), &value, sizeof(float)); - return MemorySlice::Wrap(bytes); - } - - if (type == FieldType::DOUBLE) { - auto value = literal.GetValue(); - auto bytes = std::make_shared(sizeof(double), pool); - memcpy(bytes->data(), &value, sizeof(double)); - return MemorySlice::Wrap(bytes); - } - - if (type == FieldType::DATE) { - // DATE is stored as int32_t to match Java's writeInt - auto value = literal.GetValue(); - auto bytes = std::make_shared(sizeof(int32_t), pool); - memcpy(bytes->data(), &value, sizeof(int32_t)); - return MemorySlice::Wrap(bytes); - } - - if (type == FieldType::TIMESTAMP) { - auto ts = literal.GetValue(); - if (Timestamp::IsCompact(ts_precision)) { - // compact: writeLong(millisecond) - int64_t value = ts.GetMillisecond(); - auto bytes = std::make_shared(sizeof(int64_t), pool); - memcpy(bytes->data(), &value, sizeof(int64_t)); - return MemorySlice::Wrap(bytes); - } else { - // non-compact: writeLong(millisecond) + writeVarLenInt(nanoOfMillisecond) - MemorySliceOutput ts_out(13, pool); - ts_out.WriteValue(ts.GetMillisecond()); - PAIMON_RETURN_NOT_OK(ts_out.WriteVarLenInt(ts.GetNanoOfMillisecond())); - return ts_out.ToSlice(); - } - } - - if (type == FieldType::DECIMAL) { - auto decimal_value = literal.GetValue(); - auto bytes = std::make_shared(16, pool); - uint64_t high_bits = decimal_value.HighBits(); - uint64_t low_bits = decimal_value.LowBits(); - for (int i = 0; i < 8; ++i) { - bytes->data()[i] = static_cast((high_bits >> (56 - i * 8)) & 0xFF); - } - for (int i = 0; i < 8; ++i) { - bytes->data()[8 + i] = static_cast((low_bits >> (56 - i * 8)) & 0xFF); - } - return MemorySlice::Wrap(bytes); - } - - return Status::NotImplemented("Literal type " + FieldTypeUtils::FieldTypeToString(type) + - " not yet supported in btree index"); -} - BTreeGlobalIndexReader::BTreeGlobalIndexReader( - const std::shared_ptr& sst_file_reader, - const std::shared_ptr& null_bitmap, const MemorySlice& min_key, - const MemorySlice& max_key, bool has_min_key, const std::vector& files, - const std::shared_ptr& pool, - std::function comparator, int32_t ts_precision) - : sst_file_reader_(sst_file_reader), - null_bitmap_(null_bitmap), + const std::shared_ptr& sst_file_reader, RoaringBitmap64&& null_bitmap, + const std::optional& min_key, const std::optional& max_key, + const std::shared_ptr& key_type, const std::shared_ptr& pool) + : pool_(pool), + sst_file_reader_(sst_file_reader), + null_bitmap_(std::move(null_bitmap)), min_key_(min_key), max_key_(max_key), - has_min_key_(has_min_key), - files_(files), - pool_(pool), - comparator_(std::move(comparator)), - ts_precision_(ts_precision) {} + key_type_(key_type) {} Result> BTreeGlobalIndexReader::VisitIsNotNull() { - return std::make_shared([this]() -> Result { - PAIMON_ASSIGN_OR_RAISE(RoaringBitmap64 result, AllNonNullRows()); - return result; - }); + return std::make_shared( + [reader = shared_from_this()]() -> Result { + return reader->AllNonNullRows(); + }); } Result> BTreeGlobalIndexReader::VisitIsNull() { return std::make_shared( - [this]() -> Result { return *null_bitmap_; }); + [reader = shared_from_this()]() -> Result { + return reader->null_bitmap_; + }); } Result> BTreeGlobalIndexReader::VisitStartsWith( const Literal& prefix) { - return std::make_shared([this, &prefix]() -> Result { - PAIMON_ASSIGN_OR_RAISE(auto prefix_slice, - LiteralToMemorySlice(prefix, pool_.get(), ts_precision_)); - - auto prefix_type = prefix.GetType(); - - if (prefix_type == FieldType::STRING || prefix_type == FieldType::BINARY) { - auto prefix_bytes = prefix_slice.GetHeapMemory(); - if (!prefix_bytes || prefix_bytes->size() == 0) { - PAIMON_ASSIGN_OR_RAISE(RoaringBitmap64 result, AllNonNullRows()); - return result; - } + if (prefix.IsNull()) { + return Status::Invalid("StartsWith pattern cannot be null"); + } - std::string upper_bound_str(prefix_bytes->data(), prefix_bytes->size()); - bool overflow = true; - for (int i = static_cast(upper_bound_str.size()) - 1; i >= 0 && overflow; --i) { - auto c = static_cast(upper_bound_str[i]); - if (c < 0xFF) { - upper_bound_str[i] = c + 1; - overflow = false; - } else { - upper_bound_str[i] = 0x00; - } - } + if (prefix.GetType() == FieldType::STRING) { + auto prefix_str = prefix.GetValue(); + if (prefix_str.empty()) { + return std::make_shared( + [reader = shared_from_this()]() -> Result { + return reader->AllNonNullRows(); + }); + } - if (!overflow) { - auto upper_bytes = std::make_shared(upper_bound_str, pool_.get()); - auto upper_bound_slice = MemorySlice::Wrap(upper_bytes); - PAIMON_ASSIGN_OR_RAISE(RoaringBitmap64 result, - RangeQuery(prefix_slice, upper_bound_slice, true, false)); - return result; + // Compute the exclusive upper bound for the prefix range. + // Increment the last byte; carry over if it overflows 0xFF. + std::string upper_str = prefix_str; + bool overflow = true; + for (int32_t i = static_cast(upper_str.size()) - 1; i >= 0 && overflow; --i) { + auto c = static_cast(upper_str[i]); + if (c < 0xFF) { + upper_str[i] = static_cast(c + 1); + overflow = false; } else { - // If overflow (all bytes were 0xFF), use max_key_ as upper bound - PAIMON_ASSIGN_OR_RAISE(RoaringBitmap64 result, - RangeQuery(prefix_slice, max_key_, true, false)); - return result; + upper_str[i] = 0x00; } } - return RoaringBitmap64(); - }); + if (!overflow) { + Literal upper_bound(FieldType::STRING, upper_str.data(), upper_str.size()); + return std::make_shared( + [reader = shared_from_this(), prefix = prefix, + upper_bound = std::move(upper_bound)]() -> Result { + return reader->RangeQuery(prefix, upper_bound, /*from_inclusive=*/true, + /*to_inclusive=*/false); + }); + } + + // All bytes were 0xFF, use max_key_ as upper bound + return std::make_shared( + [reader = shared_from_this(), prefix = prefix]() -> Result { + return reader->RangeQuery(prefix, reader->max_key_, /*from_inclusive=*/true, + /*to_inclusive=*/true); + }); + } + + return std::make_shared( + [reader = shared_from_this()]() -> Result { + return reader->AllNonNullRows(); + }); } Result> BTreeGlobalIndexReader::VisitEndsWith( const Literal& suffix) { - return std::make_shared([this]() -> Result { - PAIMON_ASSIGN_OR_RAISE(RoaringBitmap64 result, AllNonNullRows()); - return result; - }); + return std::make_shared( + [reader = shared_from_this()]() -> Result { + return reader->AllNonNullRows(); + }); } Result> BTreeGlobalIndexReader::VisitContains( const Literal& literal) { - return std::make_shared([this]() -> Result { - PAIMON_ASSIGN_OR_RAISE(RoaringBitmap64 result, AllNonNullRows()); - return result; - }); + return std::make_shared( + [reader = shared_from_this()]() -> Result { + return reader->AllNonNullRows(); + }); } Result> BTreeGlobalIndexReader::VisitLike( @@ -233,119 +123,98 @@ Result> BTreeGlobalIndexReader::VisitLike( if (literal.IsNull()) { return Status::Invalid("LIKE pattern cannot be null"); } + if (literal.GetType() == FieldType::STRING) { + auto pattern = literal.GetValue(); - auto pattern = literal.GetValue(); + bool is_prefix_pattern = false; + std::string prefix; - bool is_prefix_pattern = false; - std::string prefix; + size_t first_wildcard = pattern.find_first_of("_%"); - size_t first_wildcard = pattern.find_first_of("_%"); - - if (first_wildcard != std::string::npos) { - if (pattern[first_wildcard] == '%' && first_wildcard == pattern.length() - 1) { - bool has_wildcard_in_prefix = false; - for (size_t i = 0; i < first_wildcard; ++i) { - if (pattern[i] == '_' || pattern[i] == '%') { - has_wildcard_in_prefix = true; - break; - } - } - if (!has_wildcard_in_prefix) { - is_prefix_pattern = true; - prefix = pattern.substr(0, first_wildcard); - } + if (first_wildcard != std::string::npos && pattern[first_wildcard] == '%' && + first_wildcard == pattern.length() - 1) { + is_prefix_pattern = true; + prefix = pattern.substr(0, first_wildcard); } - } - if (is_prefix_pattern) { - Literal prefix_literal(FieldType::STRING, prefix.c_str(), prefix.length()); - return VisitStartsWith(prefix_literal); + if (is_prefix_pattern) { + Literal prefix_literal(FieldType::STRING, prefix.data(), prefix.length()); + return VisitStartsWith(prefix_literal); + } } - - return std::make_shared([this]() -> Result { - PAIMON_ASSIGN_OR_RAISE(RoaringBitmap64 result, AllNonNullRows()); - return result; - }); + return std::make_shared( + [reader = shared_from_this()]() -> Result { + return reader->AllNonNullRows(); + }); } Result> BTreeGlobalIndexReader::VisitLessThan( const Literal& literal) { - return std::make_shared([this, &literal]() -> Result { - PAIMON_ASSIGN_OR_RAISE(auto literal_slice, - LiteralToMemorySlice(literal, pool_.get(), ts_precision_)); - PAIMON_ASSIGN_OR_RAISE(RoaringBitmap64 result, - RangeQuery(min_key_, literal_slice, true, false)); - return result; - }); + return std::make_shared( + [reader = shared_from_this(), literal = literal]() -> Result { + return reader->RangeQuery(reader->min_key_, literal, /*from_inclusive=*/true, + /*to_inclusive=*/false); + }); } Result> BTreeGlobalIndexReader::VisitGreaterOrEqual( const Literal& literal) { - return std::make_shared([this, &literal]() -> Result { - PAIMON_ASSIGN_OR_RAISE(auto literal_slice, - LiteralToMemorySlice(literal, pool_.get(), ts_precision_)); - PAIMON_ASSIGN_OR_RAISE(RoaringBitmap64 result, - RangeQuery(literal_slice, max_key_, true, true)); - return result; - }); + return std::make_shared( + [reader = shared_from_this(), literal = literal]() -> Result { + return reader->RangeQuery(literal, reader->max_key_, /*from_inclusive=*/true, + /*to_inclusive=*/true); + }); } Result> BTreeGlobalIndexReader::VisitNotEqual( const Literal& literal) { - return std::make_shared([this, &literal]() -> Result { - PAIMON_ASSIGN_OR_RAISE(RoaringBitmap64 result, AllNonNullRows()); - PAIMON_ASSIGN_OR_RAISE(auto literal_slice, - LiteralToMemorySlice(literal, pool_.get(), ts_precision_)); - PAIMON_ASSIGN_OR_RAISE(RoaringBitmap64 equal_result, - RangeQuery(literal_slice, literal_slice, true, true)); - result -= equal_result; - return result; - }); + return std::make_shared( + [reader = shared_from_this(), literal = literal]() -> Result { + PAIMON_ASSIGN_OR_RAISE(RoaringBitmap64 result, reader->AllNonNullRows()); + PAIMON_ASSIGN_OR_RAISE(RoaringBitmap64 equal_result, + reader->RangeQuery(literal, literal, /*from_inclusive=*/true, + /*to_inclusive=*/true)); + result -= equal_result; + return result; + }); } Result> BTreeGlobalIndexReader::VisitLessOrEqual( const Literal& literal) { - return std::make_shared([this, &literal]() -> Result { - PAIMON_ASSIGN_OR_RAISE(auto literal_slice, - LiteralToMemorySlice(literal, pool_.get(), ts_precision_)); - PAIMON_ASSIGN_OR_RAISE(RoaringBitmap64 result, - RangeQuery(min_key_, literal_slice, true, true)); - return result; - }); + return std::make_shared( + [reader = shared_from_this(), literal = literal]() -> Result { + return reader->RangeQuery(reader->min_key_, literal, /*from_inclusive=*/true, + /*to_inclusive=*/true); + }); } Result> BTreeGlobalIndexReader::VisitEqual( const Literal& literal) { - return std::make_shared([this, &literal]() -> Result { - PAIMON_ASSIGN_OR_RAISE(auto literal_slice, - LiteralToMemorySlice(literal, pool_.get(), ts_precision_)); - PAIMON_ASSIGN_OR_RAISE(RoaringBitmap64 result, - RangeQuery(literal_slice, literal_slice, true, true)); - return result; - }); + return std::make_shared( + [reader = shared_from_this(), literal = literal]() -> Result { + return reader->RangeQuery(literal, literal, /*from_inclusive=*/true, + /*to_inclusive=*/true); + }); } Result> BTreeGlobalIndexReader::VisitGreaterThan( const Literal& literal) { - return std::make_shared([this, &literal]() -> Result { - PAIMON_ASSIGN_OR_RAISE(auto literal_slice, - LiteralToMemorySlice(literal, pool_.get(), ts_precision_)); - PAIMON_ASSIGN_OR_RAISE(RoaringBitmap64 result, - RangeQuery(literal_slice, max_key_, false, true)); - return result; - }); + return std::make_shared( + [reader = shared_from_this(), literal = literal]() -> Result { + return reader->RangeQuery(literal, reader->max_key_, /*from_inclusive=*/false, + /*to_inclusive=*/true); + }); } Result> BTreeGlobalIndexReader::VisitIn( const std::vector& literals) { return std::make_shared( - [this, &literals]() -> Result { + [reader = shared_from_this(), literals = literals]() -> Result { RoaringBitmap64 result; for (const auto& literal : literals) { - PAIMON_ASSIGN_OR_RAISE(auto literal_slice, - LiteralToMemorySlice(literal, pool_.get(), ts_precision_)); PAIMON_ASSIGN_OR_RAISE(RoaringBitmap64 literal_result, - RangeQuery(literal_slice, literal_slice, true, true)); + reader->RangeQuery(literal, literal, /*from_inclusive=*/true, + /*to_inclusive=*/true)); result |= literal_result; } return result; @@ -355,150 +224,47 @@ Result> BTreeGlobalIndexReader::VisitIn( Result> BTreeGlobalIndexReader::VisitNotIn( const std::vector& literals) { return std::make_shared( - [this, &literals]() -> Result { - PAIMON_ASSIGN_OR_RAISE(RoaringBitmap64 result, AllNonNullRows()); - - PAIMON_ASSIGN_OR_RAISE(auto in_result_ptr, VisitIn(literals)); - PAIMON_ASSIGN_OR_RAISE(auto in_iterator, in_result_ptr->CreateIterator()); - - RoaringBitmap64 in_bitmap; - while (in_iterator->HasNext()) { - in_bitmap.Add(in_iterator->Next()); + [reader = shared_from_this(), literals = literals]() -> Result { + PAIMON_ASSIGN_OR_RAISE(RoaringBitmap64 result, reader->AllNonNullRows()); + PAIMON_ASSIGN_OR_RAISE(std::shared_ptr in_result, + reader->VisitIn(literals)); + auto* typed_in_result = dynamic_cast(in_result.get()); + if (!typed_in_result) { + return Status::Invalid( + "VisitIn should return BitmapGlobalIndexResult in BTreeGlobalIndexReader"); } - - result -= in_bitmap; + PAIMON_ASSIGN_OR_RAISE(const RoaringBitmap64* in_bitmap, typed_in_result->GetBitmap()); + result -= (*in_bitmap); return result; }); } -Result> BTreeGlobalIndexReader::VisitBetween(const Literal& from, - const Literal& to) { - return std::make_shared([this, &from, - &to]() -> Result { - PAIMON_ASSIGN_OR_RAISE(auto from_slice, - LiteralToMemorySlice(from, pool_.get(), ts_precision_)); - PAIMON_ASSIGN_OR_RAISE(auto to_slice, LiteralToMemorySlice(to, pool_.get(), ts_precision_)); - PAIMON_ASSIGN_OR_RAISE(RoaringBitmap64 result, - RangeQuery(from_slice, to_slice, true, true)); - return result; - }); -} - -Result> BTreeGlobalIndexReader::VisitNotBetween( - const Literal& from, const Literal& to) { - return std::make_shared([this, &from, - &to]() -> Result { - PAIMON_ASSIGN_OR_RAISE(auto from_slice, - LiteralToMemorySlice(from, pool_.get(), ts_precision_)); - PAIMON_ASSIGN_OR_RAISE(auto to_slice, LiteralToMemorySlice(to, pool_.get(), ts_precision_)); - PAIMON_ASSIGN_OR_RAISE(RoaringBitmap64 lower_result, - RangeQuery(min_key_, from_slice, true, false)); - PAIMON_ASSIGN_OR_RAISE(RoaringBitmap64 upper_result, - RangeQuery(to_slice, max_key_, false, true)); - lower_result |= upper_result; - return lower_result; - }); -} - -Result> BTreeGlobalIndexReader::VisitAnd( - const std::vector>>& children) { - return std::make_shared([&children]() -> Result { - if (children.empty()) { - return Status::Invalid("VisitAnd called with no children"); - } - - auto first_result_status = children[0]; - if (!first_result_status.ok()) { - return first_result_status.status(); - } - auto first_result = std::move(first_result_status).value(); - PAIMON_ASSIGN_OR_RAISE(auto first_iterator, first_result->CreateIterator()); - - RoaringBitmap64 result_bitmap; - while (first_iterator->HasNext()) { - result_bitmap.Add(first_iterator->Next()); - } - - for (size_t i = 1; i < children.size(); ++i) { - auto child_status = children[i]; - if (!child_status.ok()) { - return child_status.status(); - } - auto child = std::move(child_status).value(); - PAIMON_ASSIGN_OR_RAISE(auto child_iterator, child->CreateIterator()); - - RoaringBitmap64 child_bitmap; - while (child_iterator->HasNext()) { - child_bitmap.Add(child_iterator->Next()); - } - - result_bitmap &= child_bitmap; - } - - return result_bitmap; - }); -} - -Result> BTreeGlobalIndexReader::VisitOr( - const std::vector>>& children) { - return std::make_shared([&children]() -> Result { - RoaringBitmap64 result_bitmap; - - for (const auto& child_status : children) { - if (!child_status.ok()) { - return child_status.status(); - } - auto child = std::move(child_status).value(); - PAIMON_ASSIGN_OR_RAISE(auto child_iterator, child->CreateIterator()); - - while (child_iterator->HasNext()) { - result_bitmap.Add(child_iterator->Next()); - } - } - - return result_bitmap; - }); -} - Result> BTreeGlobalIndexReader::VisitVectorSearch( const std::shared_ptr& vector_search) { - return Status::NotImplemented("Vector search not supported in BTree index"); + return Status::Invalid("Vector search not supported in BTree index"); } Result> BTreeGlobalIndexReader::VisitFullTextSearch( const std::shared_ptr& full_text_search) { - return Status::NotImplemented("Full text search not supported in BTree index"); + return Status::Invalid("Full text search not supported in BTree index"); } -Result BTreeGlobalIndexReader::RangeQuery(const MemorySlice& lower_bound, - const MemorySlice& upper_bound, - bool lower_inclusive, - bool upper_inclusive) { +Result BTreeGlobalIndexReader::RangeQuery(const std::optional& from, + const std::optional& to, + bool from_inclusive, bool to_inclusive) { RoaringBitmap64 result; + if (!from || !to) { + return result; + } // Create an index block iterator to iterate through data blocks + PAIMON_ASSIGN_OR_RAISE(std::shared_ptr from_bytes, + KeySerializer::SerializeKey(from.value(), key_type_, pool_.get())); auto index_iterator = sst_file_reader_->CreateIndexIterator(); - - // Seek iterator to the lower bound - auto lower_bytes = lower_bound.GetHeapMemory(); - - if (lower_bytes) { - PAIMON_ASSIGN_OR_RAISE([[maybe_unused]] bool seek_result, - index_iterator->SeekTo(lower_bound)); - } - - // Check if there are any blocks to read - if (!index_iterator->HasNext()) { - return result; - } + PAIMON_ASSIGN_OR_RAISE([[maybe_unused]] bool seek_result, + index_iterator->SeekTo(MemorySlice::Wrap(from_bytes))); bool first_block = true; - - // Compare key with bounds using the comparator - if (!comparator_) { - return Status::Invalid("Comparator is not set for BTreeGlobalIndexReader"); - } - while (index_iterator->HasNext()) { // Get the next data block PAIMON_ASSIGN_OR_RAISE(std::unique_ptr data_iterator, @@ -509,76 +275,61 @@ Result BTreeGlobalIndexReader::RangeQuery(const MemorySlice& lo } // For the first block, we need to seek within the block to the exact position - if (first_block && lower_bytes) { - PAIMON_ASSIGN_OR_RAISE([[maybe_unused]] bool found, data_iterator->SeekTo(lower_bound)); + if (first_block) { + PAIMON_ASSIGN_OR_RAISE([[maybe_unused]] bool found, + data_iterator->SeekTo(MemorySlice::Wrap(from_bytes))); first_block = false; - - if (!data_iterator->HasNext()) { - continue; - } } // Iterate through entries in the data block while (data_iterator->HasNext()) { PAIMON_ASSIGN_OR_RAISE(std::unique_ptr entry, data_iterator->Next()); - int cmp_lower = comparator_(entry->key, lower_bound); + PAIMON_ASSIGN_OR_RAISE( + Literal key, KeySerializer::DeserializeKey(entry->key, key_type_, pool_.get())); + PAIMON_ASSIGN_OR_RAISE(int32_t cmp_from, key.CompareTo(from.value())); // Check lower bound - if (!lower_inclusive && cmp_lower == 0) { + if (!from_inclusive && cmp_from == 0) { continue; } // Check upper bound - int cmp_upper = comparator_(entry->key, upper_bound); + PAIMON_ASSIGN_OR_RAISE(int32_t cmp_to, key.CompareTo(to.value())); - if (cmp_upper > 0 || (!upper_inclusive && cmp_upper == 0)) { + if (cmp_to > 0 || (!to_inclusive && cmp_to == 0)) { return result; } - // Deserialize row IDs from the value - auto value_bytes = entry->value.CopyBytes(pool_.get()); - auto value_slice = MemorySlice::Wrap(value_bytes); - auto value_input = value_slice.ToInput(); - - // Read row IDs. The format is: [num_row_ids (VarLenLong)][row_id1 (VarLenLong)]... - // Use VarLenLong to match Java's DataOutputStream.writeVarLong format - PAIMON_ASSIGN_OR_RAISE(int64_t num_row_ids, value_input.ReadVarLenLong()); - - for (int64_t i = 0; i < num_row_ids; i++) { - PAIMON_ASSIGN_OR_RAISE(int64_t row_id, value_input.ReadVarLenLong()); - result.Add(row_id); - } + PAIMON_RETURN_NOT_OK(DeserializeRowIds(entry->value, &result)); } } - return result; } -Result BTreeGlobalIndexReader::AllNonNullRows() { - if (files_.empty()) { - return RoaringBitmap64(); +Status BTreeGlobalIndexReader::DeserializeRowIds(const MemorySlice& slice, + RoaringBitmap64* result) const { + auto input = slice.ToInput(); + PAIMON_ASSIGN_OR_RAISE(int32_t num_row_ids, input.ReadVarLenInt()); + if (num_row_ids <= 0) { + return Status::Invalid(fmt::format( + "Invalid row id length {} in DeserializeRowIds for BTreeGlobalIndexReader, must > 0", + num_row_ids)); } - - int64_t total_rows = files_[0].range_end + 1; - uint64_t null_count = null_bitmap_->Cardinality(); - - const double NULL_RATIO_THRESHOLD = 0.1; - const int64_t MAX_ROWS_FOR_SUBTRACTION = 10000000; - - bool use_subtraction = (total_rows <= MAX_ROWS_FOR_SUBTRACTION) && - (null_count < static_cast(total_rows * NULL_RATIO_THRESHOLD)); - - if (use_subtraction) { - RoaringBitmap64 result; - result.AddRange(0, total_rows); - result -= *null_bitmap_; - return result; + for (int32_t i = 0; i < num_row_ids; i++) { + PAIMON_ASSIGN_OR_RAISE(int64_t row_id, input.ReadVarLenLong()); + result->Add(row_id); } + return Status::OK(); +} - if (!has_min_key_) { +Result BTreeGlobalIndexReader::AllNonNullRows() { + // Traverse all data to avoid returning null values, which is very advantageous in + // situations where there are many null values + // TODO(xinyu.lxy) do not traverse all data if less null values + if (!min_key_) { return RoaringBitmap64(); } - return RangeQuery(min_key_, max_key_, true, true); + return RangeQuery(min_key_, max_key_, /*from_inclusive=*/true, /*to_inclusive=*/true); } } // namespace paimon diff --git a/src/paimon/common/global_index/btree/btree_global_index_reader.h b/src/paimon/common/global_index/btree/btree_global_index_reader.h index 1a62ac731..e994c986a 100644 --- a/src/paimon/common/global_index/btree/btree_global_index_reader.h +++ b/src/paimon/common/global_index/btree/btree_global_index_reader.h @@ -18,28 +18,29 @@ #include #include +#include #include #include +#include "arrow/api.h" +#include "paimon/common/global_index/btree/btree_defs.h" #include "paimon/common/sst/sst_file_reader.h" #include "paimon/global_index/global_index_io_meta.h" #include "paimon/global_index/global_index_reader.h" #include "paimon/utils/roaring_bitmap64.h" - namespace paimon { /// Reader for BTree Global Index files. /// This reader evaluates filter predicates against a BTree-based SST file /// where each key maps to a list of row IDs. -class BTreeGlobalIndexReader : public GlobalIndexReader { +class BTreeGlobalIndexReader : public GlobalIndexReader, + public std::enable_shared_from_this { public: - BTreeGlobalIndexReader( - const std::shared_ptr& sst_file_reader, - const std::shared_ptr& null_bitmap, const MemorySlice& min_key, - const MemorySlice& max_key, bool has_min_key, const std::vector& files, - const std::shared_ptr& pool, - std::function comparator, - int32_t ts_precision); + BTreeGlobalIndexReader(const std::shared_ptr& sst_file_reader, + RoaringBitmap64&& null_bitmap, const std::optional& min_key, + const std::optional& max_key, + const std::shared_ptr& key_type, + const std::shared_ptr& pool); Result> VisitIsNotNull() override; @@ -63,12 +64,6 @@ class BTreeGlobalIndexReader : public GlobalIndexReader { Result> VisitNotIn( const std::vector& literals) override; - Result> VisitBetween(const Literal& from, - const Literal& to) override; - - Result> VisitNotBetween(const Literal& from, - const Literal& to) override; - Result> VisitStartsWith(const Literal& prefix) override; Result> VisitEndsWith(const Literal& suffix) override; @@ -77,12 +72,6 @@ class BTreeGlobalIndexReader : public GlobalIndexReader { Result> VisitLike(const Literal& literal) override; - Result> VisitAnd( - const std::vector>>& children) override; - - Result> VisitOr( - const std::vector>>& children) override; - Result> VisitVectorSearch( const std::shared_ptr& vector_search) override; @@ -94,25 +83,24 @@ class BTreeGlobalIndexReader : public GlobalIndexReader { } std::string GetIndexType() const override { - return "btree"; + return BtreeDefs::kIdentifier; } private: - Result RangeQuery(const MemorySlice& lower_bound, - const MemorySlice& upper_bound, bool lower_inclusive, - bool upper_inclusive); + Result RangeQuery(const std::optional& from, + const std::optional& to, bool from_inclusive, + bool to_inclusive); + + Status DeserializeRowIds(const MemorySlice& slice, RoaringBitmap64* result) const; Result AllNonNullRows(); - std::shared_ptr sst_file_reader_; - std::shared_ptr null_bitmap_; - MemorySlice min_key_; - MemorySlice max_key_; - bool has_min_key_; - std::vector files_; std::shared_ptr pool_; - std::function comparator_; - int32_t ts_precision_; + std::shared_ptr sst_file_reader_; + RoaringBitmap64 null_bitmap_; + std::optional min_key_; + std::optional max_key_; + std::shared_ptr key_type_; }; } // namespace paimon diff --git a/src/paimon/common/global_index/btree/btree_global_index_writer.cpp b/src/paimon/common/global_index/btree/btree_global_index_writer.cpp index 1e1310a6a..ea7b5baf4 100644 --- a/src/paimon/common/global_index/btree/btree_global_index_writer.cpp +++ b/src/paimon/common/global_index/btree/btree_global_index_writer.cpp @@ -16,307 +16,193 @@ #include "paimon/common/global_index/btree/btree_global_index_writer.h" -#include - #include +#include "arrow/c/bridge.h" +#include "fmt/format.h" #include "paimon/common/compression/block_compression_factory.h" +#include "paimon/common/global_index/btree/btree_defs.h" +#include "paimon/common/global_index/btree/key_serializer.h" #include "paimon/common/memory/memory_slice_output.h" +#include "paimon/common/predicate/literal_converter.h" #include "paimon/common/utils/arrow/status_utils.h" #include "paimon/common/utils/crc32c.h" -#include "paimon/common/utils/date_time_utils.h" -#include "paimon/common/utils/field_type_utils.h" +#include "paimon/common/utils/preconditions.h" #include "paimon/memory/bytes.h" - namespace paimon { - Result> BTreeGlobalIndexWriter::Create( - const std::string& field_name, ::ArrowSchema* arrow_schema, - const std::shared_ptr& file_writer, + const std::string& field_name, const std::shared_ptr& arrow_type, + const std::shared_ptr& file_writer, int32_t block_size, const std::shared_ptr& compression_factory, - const std::shared_ptr& pool, int32_t block_size) { - // Import schema to get the field type - std::shared_ptr arrow_type; - if (arrow_schema) { - PAIMON_ASSIGN_OR_RAISE_FROM_ARROW(std::shared_ptr schema, - arrow::ImportSchema(arrow_schema)); - if (schema->num_fields() > 0) { - arrow_type = schema->field(0)->type(); - } - } - - auto writer = std::shared_ptr(new BTreeGlobalIndexWriter( - field_name, std::move(arrow_type), file_writer, compression_factory, pool, block_size)); - - // Initialize SST writer - if (!file_writer) { - return Status::Invalid("file_writer is null"); - } - PAIMON_ASSIGN_OR_RAISE(writer->file_name_, file_writer->NewFileName("btree")); - PAIMON_ASSIGN_OR_RAISE(writer->output_stream_, - file_writer->NewOutputStream(writer->file_name_)); - writer->sst_writer_ = std::make_unique(writer->output_stream_, nullptr, - block_size, compression_factory, pool); - - return writer; + const std::shared_ptr& pool) { + auto key_field = arrow_type->GetFieldByName(field_name); + PAIMON_RETURN_NOT_OK(Preconditions::CheckNotNull( + key_field, + fmt::format("field {} not in arrow_array when Create BTreeGlobalIndexWriter", field_name))); + PAIMON_ASSIGN_OR_RAISE(std::string index_file_name, + file_writer->NewFileName(BtreeDefs::kIdentifier)); + PAIMON_ASSIGN_OR_RAISE(std::shared_ptr output_stream, + file_writer->NewOutputStream(index_file_name)); + auto sst_file_writer = std::make_unique(output_stream, /*bloom_filter=*/nullptr, + block_size, compression_factory, pool); + return std::shared_ptr(new BTreeGlobalIndexWriter( + field_name, arrow_type, key_field->type(), file_writer, index_file_name, output_stream, + std::move(sst_file_writer), pool)); } BTreeGlobalIndexWriter::BTreeGlobalIndexWriter( - const std::string& field_name, std::shared_ptr arrow_type, - const std::shared_ptr& file_writer, - const std::shared_ptr& compression_factory, - const std::shared_ptr& pool, int32_t block_size) + const std::string& field_name, const std::shared_ptr& arrow_type, + const std::shared_ptr& key_type, + const std::shared_ptr& file_writer, const std::string& index_file_name, + const std::shared_ptr& output_stream, std::unique_ptr&& sst_writer, + const std::shared_ptr& pool) : field_name_(field_name), - arrow_type_(std::move(arrow_type)), + arrow_type_(arrow_type), + key_type_(key_type), pool_(pool), file_writer_(file_writer), - null_bitmap_(std::make_shared()), - has_nulls_(false), - current_row_id_(0) {} + index_file_name_(index_file_name), + output_stream_(output_stream), + sst_writer_(std::move(sst_writer)) {} -Status BTreeGlobalIndexWriter::AddBatch(::ArrowArray* arrow_array) { +Status BTreeGlobalIndexWriter::AddBatch(::ArrowArray* arrow_array, + const std::vector& row_ids) { if (!arrow_array) { return Status::Invalid("ArrowArray is null"); } - - if (!arrow_type_) { - return Status::Invalid( - "Arrow type is not set. Please provide a valid ArrowSchema in constructor."); - } - - // Import Arrow array with the correct type PAIMON_ASSIGN_OR_RAISE_FROM_ARROW(std::shared_ptr array, arrow::ImportArray(arrow_array, arrow_type_)); - - // Group row IDs by key value - // Use std::map with custom comparator for binary keys - // Keys are stored in binary format to match Java's serialization - std::map, std::vector, - std::function&, const std::shared_ptr&)>> - key_to_row_ids([this](const std::shared_ptr& a, const std::shared_ptr& b) { - return CompareBinaryKeys(a, b) < 0; - }); + if (static_cast(row_ids.size()) != array->length()) { + return Status::Invalid( + fmt::format("row_ids length {} mismatch arrow_array length {} when AddBatch to " + "BTreeGlobalIndexWriter", + row_ids.size(), array->length())); + } + auto struct_array = std::dynamic_pointer_cast(array); + PAIMON_RETURN_NOT_OK(Preconditions::CheckNotNull( + struct_array, "arrow array must be struct array when AddBatch to BTreeGlobalIndexWriter")); + auto value_array = struct_array->GetFieldByName(field_name_); + PAIMON_RETURN_NOT_OK(Preconditions::CheckNotNull( + value_array, + fmt::format("field {} not in arrow_array when AddBatch to BTreeGlobalIndexWriter", + field_name_))); // Process each element in the array - for (int64_t i = 0; i < array->length(); ++i) { - int64_t row_id = current_row_id_ + i; - - if (array->IsNull(i)) { + PAIMON_ASSIGN_OR_RAISE(std::vector literals, + LiteralConverter::ConvertLiteralsFromArray(*value_array, + /*own_data=*/true)); + for (size_t i = 0; i < literals.size(); ++i) { + int64_t row_id = row_ids[i]; + const auto& literal = literals[i]; + max_row_id_ = std::max(max_row_id_, row_id); + if (literal.IsNull()) { // Track null values - null_bitmap_->Add(row_id); - has_nulls_ = true; + null_bitmap_.Add(row_id); continue; } - - // Convert array element to binary key - // Use type-specific binary serialization to match Java format - std::shared_ptr key_bytes; - - // Get the value as binary based on array type - auto type_id = array->type_id(); - - switch (type_id) { - case arrow::Type::STRING: - case arrow::Type::BINARY: { - auto str_array = std::static_pointer_cast(array); - auto view = str_array->GetView(i); - key_bytes = std::make_shared(view.size(), pool_.get()); - memcpy(key_bytes->data(), view.data(), view.size()); - break; - } - case arrow::Type::INT32: { - auto int_array = std::static_pointer_cast(array); - int32_t value = int_array->Value(i); - // Store as 4-byte little-endian to match Java's DataOutputStream.writeInt - key_bytes = std::make_shared(sizeof(int32_t), pool_.get()); - memcpy(key_bytes->data(), &value, sizeof(int32_t)); - break; - } - case arrow::Type::INT64: { - auto int_array = std::static_pointer_cast(array); - int64_t value = int_array->Value(i); - // Store as 8-byte little-endian to match Java's DataOutputStream.writeLong - key_bytes = std::make_shared(sizeof(int64_t), pool_.get()); - memcpy(key_bytes->data(), &value, sizeof(int64_t)); - break; - } - case arrow::Type::FLOAT: { - auto float_array = std::static_pointer_cast(array); - float value = float_array->Value(i); - // Store as 4-byte IEEE 754 to match Java's DataOutputStream.writeFloat - key_bytes = std::make_shared(sizeof(float), pool_.get()); - memcpy(key_bytes->data(), &value, sizeof(float)); - break; - } - case arrow::Type::DOUBLE: { - auto double_array = std::static_pointer_cast(array); - double value = double_array->Value(i); - // Store as 8-byte IEEE 754 to match Java's DataOutputStream.writeDouble - key_bytes = std::make_shared(sizeof(double), pool_.get()); - memcpy(key_bytes->data(), &value, sizeof(double)); - break; - } - case arrow::Type::BOOL: { - auto bool_array = std::static_pointer_cast(array); - bool value = bool_array->Value(i); - // Store as single byte (0 or 1) - key_bytes = std::make_shared(1, pool_.get()); - key_bytes->data()[0] = value ? 1 : 0; - break; - } - case arrow::Type::DATE32: { - auto date_array = std::static_pointer_cast(array); - int32_t value = date_array->Value(i); - // Store as 4-byte int32 to match Java's writeInt for DATE type - key_bytes = std::make_shared(sizeof(int32_t), pool_.get()); - memcpy(key_bytes->data(), &value, sizeof(int32_t)); - break; - } - case arrow::Type::TIMESTAMP: { - auto ts_array = std::static_pointer_cast(array); - auto ts_type = std::static_pointer_cast(array->type()); - int32_t precision = DateTimeUtils::GetPrecisionFromType(ts_type); - auto time_type = DateTimeUtils::GetTimeTypeFromArrowType(ts_type); - int64_t raw_value = ts_array->Value(i); - auto [milli, nano] = DateTimeUtils::TimestampConverter( - raw_value, time_type, DateTimeUtils::TimeType::MILLISECOND, - DateTimeUtils::TimeType::NANOSECOND); - if (Timestamp::IsCompact(precision)) { - // compact: writeLong(millisecond) — 8 bytes - key_bytes = std::make_shared(sizeof(int64_t), pool_.get()); - memcpy(key_bytes->data(), &milli, sizeof(int64_t)); - } else { - // non-compact: writeLong(millisecond) + writeVarLenInt(nanoOfMillisecond) - MemorySliceOutput ts_out(13, pool_.get()); - ts_out.WriteValue(milli); - PAIMON_RETURN_NOT_OK(ts_out.WriteVarLenInt(static_cast(nano))); - auto slice = ts_out.ToSlice(); - key_bytes = slice.GetHeapMemory(); - } - break; + if (last_key_) { + PAIMON_ASSIGN_OR_RAISE(int32_t cmp, literal.CompareTo(last_key_.value())); + if (cmp > 0) { + PAIMON_RETURN_NOT_OK(Flush()); + } else if (cmp < 0) { + return Status::Invalid( + fmt::format("Users must keep written keys monotonically incremental in " + "BTreeGlobalIndexWriter, current literal {}, last_key {}", + literal.ToString(), last_key_.value().ToString())); } - default: - return Status::NotImplemented("Unsupported arrow type for BTree index: " + - array->type()->ToString()); } - - key_to_row_ids[key_bytes].push_back(row_id); - } - - // Write each key and its row IDs to the SST file - for (const auto& [key_bytes, row_ids] : key_to_row_ids) { - // Track first and last keys + last_key_ = literal; + current_row_ids_.push_back(row_id); if (!first_key_) { - first_key_ = key_bytes; + first_key_ = literal; } - last_key_ = key_bytes; - - // Write key-value pair - PAIMON_RETURN_NOT_OK(WriteKeyValue(key_bytes, row_ids)); } - - current_row_id_ += array->length(); return Status::OK(); } -Status BTreeGlobalIndexWriter::WriteKeyValue(std::shared_ptr key, - const std::vector& row_ids) { - PAIMON_ASSIGN_OR_RAISE(std::shared_ptr value, SerializeRowIds(row_ids)); - - return sst_writer_->Write(std::move(key), std::move(value)); -} - -Result> BTreeGlobalIndexWriter::SerializeRowIds( - const std::vector& row_ids) { - // Format: [num_row_ids (VarLenLong)][row_id1 (VarLenLong)][row_id2]... - // Use VarLenLong for row IDs to match Java's MemorySliceOutput.writeVarLenLong - int32_t estimated_size = 10 + row_ids.size() * 10; // Conservative estimate - auto output = std::make_shared(estimated_size, pool_.get()); - - PAIMON_RETURN_NOT_OK(output->WriteVarLenLong(static_cast(row_ids.size()))); - for (int64_t row_id : row_ids) { - PAIMON_RETURN_NOT_OK(output->WriteVarLenLong(row_id)); +Status BTreeGlobalIndexWriter::Flush() { + if (current_row_ids_.empty()) { + return Status::OK(); } - - auto slice = output->ToSlice(); - return slice.CopyBytes(pool_.get()); -} - -int32_t BTreeGlobalIndexWriter::CompareBinaryKeys(const std::shared_ptr& a, - const std::shared_ptr& b) const { - if (!a || !b) return 0; - size_t min_len = std::min(a->size(), b->size()); - int cmp = memcmp(a->data(), b->data(), min_len); - if (cmp != 0) return cmp < 0 ? -1 : 1; - if (a->size() < b->size()) return -1; - if (a->size() > b->size()) return 1; - return 0; + MemorySliceOutput output(current_row_ids_.size() * 9 + 5, pool_.get()); + PAIMON_RETURN_NOT_OK(output.WriteVarLenInt(current_row_ids_.size())); + for (int64_t row_id : current_row_ids_) { + PAIMON_RETURN_NOT_OK(output.WriteVarLenLong(row_id)); + } + current_row_ids_.clear(); + assert(last_key_); + PAIMON_ASSIGN_OR_RAISE(std::shared_ptr key_bytes, + KeySerializer::SerializeKey(last_key_.value(), key_type_, pool_.get())); + return sst_writer_->Write(std::move(key_bytes), output.ToSlice().CopyBytes(pool_.get())); } -Result> BTreeGlobalIndexWriter::WriteNullBitmap( +Result> BTreeGlobalIndexWriter::WriteNullBitmap( const std::shared_ptr& out) { - if (!has_nulls_ || null_bitmap_->IsEmpty()) { - return std::shared_ptr(nullptr); + if (null_bitmap_.IsEmpty()) { + return std::optional(); } + std::shared_ptr bitmap_bytes = null_bitmap_.Serialize(pool_.get()); + uint32_t crc = CRC32C::calculate(bitmap_bytes->data(), bitmap_bytes->size()); - // Serialize null bitmap - auto bitmap_bytes = null_bitmap_->Serialize(pool_.get()); - if (!bitmap_bytes || bitmap_bytes->size() == 0) { - return std::shared_ptr(nullptr); - } + MemorySliceOutput slice_out(bitmap_bytes->size() + 4, pool_.get()); + slice_out.WriteBytes(bitmap_bytes); + slice_out.WriteValue(static_cast(crc)); // Get current position for the block handle PAIMON_ASSIGN_OR_RAISE(int64_t offset, out->GetPos()); - - // Write bitmap data - PAIMON_RETURN_NOT_OK(out->Write(bitmap_bytes->data(), bitmap_bytes->size())); - - // Calculate and write CRC32C - uint32_t crc = CRC32C::calculate(bitmap_bytes->data(), bitmap_bytes->size()); - PAIMON_RETURN_NOT_OK(out->Write(reinterpret_cast(&crc), sizeof(crc))); - - return std::make_shared(offset, bitmap_bytes->size()); + PAIMON_RETURN_NOT_OK(sst_writer_->WriteSlice(slice_out.ToSlice())); + return std::optional(BlockHandle(offset, bitmap_bytes->size())); } Result> BTreeGlobalIndexWriter::Finish() { - if (current_row_id_ == 0) { - // No data was written, return empty metadata - return std::vector(); - } + // write remaining row ids + PAIMON_RETURN_NOT_OK(Flush()); // Flush any remaining data in the data block writer PAIMON_RETURN_NOT_OK(sst_writer_->Flush()); - // Write null bitmap first (matches Java write order: null bitmap → bloom filter → index block) - PAIMON_ASSIGN_OR_RAISE(std::shared_ptr null_bitmap_handle, + // Write null bitmap first + PAIMON_ASSIGN_OR_RAISE(std::optional null_bitmap_handle, WriteNullBitmap(output_stream_)); - + // write bloom filter (currently is always null, but we could add it for equal + // and in condition.) + PAIMON_ASSIGN_OR_RAISE(std::optional bloom_filter_handle, + sst_writer_->WriteBloomFilter()); // Write index block PAIMON_ASSIGN_OR_RAISE(BlockHandle index_block_handle, sst_writer_->WriteIndexBlock()); - // Write BTree file footer (no bloom filter) - auto index_block_handle_ptr = - std::make_shared(index_block_handle.Offset(), index_block_handle.Size()); - auto footer = - std::make_shared(nullptr, index_block_handle_ptr, null_bitmap_handle); + // Write BTree file footer + auto footer = std::make_shared(bloom_filter_handle, index_block_handle, + null_bitmap_handle); auto footer_slice = BTreeFileFooter::Write(footer, pool_.get()); - auto footer_bytes = footer_slice.CopyBytes(pool_.get()); - PAIMON_RETURN_NOT_OK(output_stream_->Write(footer_bytes->data(), footer_bytes->size())); + PAIMON_RETURN_NOT_OK(sst_writer_->WriteSlice(footer_slice)); - // Close the output stream PAIMON_RETURN_NOT_OK(output_stream_->Close()); - // Get file size - PAIMON_ASSIGN_OR_RAISE(int64_t file_size, file_writer_->GetFileSize(file_name_)); + if (!first_key_ && null_bitmap_.IsEmpty()) { + return Status::Invalid("Should never write an empty btree index file."); + } + // Get file size + std::shared_ptr first_key_bytes; + std::shared_ptr last_key_bytes; + if (first_key_) { + PAIMON_ASSIGN_OR_RAISE(first_key_bytes, KeySerializer::SerializeKey( + first_key_.value(), key_type_, pool_.get())); + } + if (last_key_) { + PAIMON_ASSIGN_OR_RAISE( + last_key_bytes, KeySerializer::SerializeKey(last_key_.value(), key_type_, pool_.get())); + } // Create index meta - auto index_meta = std::make_shared(first_key_, last_key_, has_nulls_); + auto index_meta = + std::make_shared(first_key_bytes, last_key_bytes, !null_bitmap_.IsEmpty()); auto meta_bytes = index_meta->Serialize(pool_.get()); // Create GlobalIndexIOMeta - std::string file_path = file_writer_->ToPath(file_name_); - GlobalIndexIOMeta io_meta(file_path, file_size, current_row_id_ - 1, meta_bytes); - + std::string file_path = file_writer_->ToPath(index_file_name_); + PAIMON_ASSIGN_OR_RAISE(int64_t file_size, file_writer_->GetFileSize(index_file_name_)); + GlobalIndexIOMeta io_meta(file_path, file_size, max_row_id_, meta_bytes); return std::vector{io_meta}; } diff --git a/src/paimon/common/global_index/btree/btree_global_index_writer.h b/src/paimon/common/global_index/btree/btree_global_index_writer.h index 96035ad37..de72773d7 100644 --- a/src/paimon/common/global_index/btree/btree_global_index_writer.h +++ b/src/paimon/common/global_index/btree/btree_global_index_writer.h @@ -25,72 +25,89 @@ #include "paimon/common/sst/sst_file_writer.h" #include "paimon/global_index/global_index_writer.h" #include "paimon/global_index/io/global_index_file_writer.h" +#include "paimon/predicate/literal.h" #include "paimon/utils/roaring_bitmap64.h" - namespace paimon { /// Writer for BTree Global Index files. /// This writer builds an SST file where each key maps to a list of row IDs. +/// Note that users must keep written keys monotonically incremental. All null keys are stored in a +/// separate bitmap, which will be serialized and appended to the file end on close. The layout is +/// as below: +/// +/// +-----------------------------------+------+ +/// | Footer | | +/// +-----------------------------------+ | +/// | Index Block | +--> Loaded on open +/// +-----------------------------------+ | +/// | Bloom Filter Block | | +/// +-----------------------------------+------+ +/// | Null Bitmap Block | | +/// +-----------------------------------+ | +/// | Data Block | | +/// +-----------------------------------+ +--> Loaded on requested +/// | ...... | | +/// +-----------------------------------+ | +/// | Data Block | | +/// +-----------------------------------+------+ +/// +/// For efficiency, we combine entries with the same keys and store a compact list of row ids for +/// each key. class BTreeGlobalIndexWriter : public GlobalIndexWriter { public: /// Factory method that may fail during initialization (e.g., /// Arrow schema import). Use this instead of the constructor. static Result> Create( - const std::string& field_name, ::ArrowSchema* arrow_schema, - const std::shared_ptr& file_writer, + const std::string& field_name, const std::shared_ptr& arrow_type, + const std::shared_ptr& file_writer, int32_t block_size, const std::shared_ptr& compression_factory, - const std::shared_ptr& pool, int32_t block_size); + const std::shared_ptr& pool); ~BTreeGlobalIndexWriter() override = default; - /// Add a batch of data from an Arrow array. - /// The Arrow array should contain a single column of the indexed field. - Status AddBatch(::ArrowArray* arrow_array) override; + Status AddBatch(::ArrowArray* arrow_array) override { + // TODO(xinyu.lxy): refactor AddBatch with relative row ids + return Status::Invalid("BTreeGlobalIndexWriter not support AddBatch without row_ids"); + } + + Status AddBatch(::ArrowArray* arrow_array, const std::vector& row_ids); /// Finish writing and return the index metadata. Result> Finish() override; private: - BTreeGlobalIndexWriter( - const std::string& field_name, std::shared_ptr arrow_type, - const std::shared_ptr& file_writer, - const std::shared_ptr& compression_factory, - const std::shared_ptr& pool, int32_t block_size); - - // Helper method to write a key-value pair to the SST file - Status WriteKeyValue(std::shared_ptr key, const std::vector& row_ids); - - // Helper method to serialize row IDs into a Bytes object - Result> SerializeRowIds(const std::vector& row_ids); + BTreeGlobalIndexWriter(const std::string& field_name, + const std::shared_ptr& arrow_type, + const std::shared_ptr& key_type, + const std::shared_ptr& file_writer, + const std::string& index_file_name, + const std::shared_ptr& output_stream, + std::unique_ptr&& sst_writer, + const std::shared_ptr& pool); - // Helper method to write null bitmap to the output stream - Result> WriteNullBitmap(const std::shared_ptr& out); + Status Flush(); - // Helper method to compare binary keys for std::map ordering - int32_t CompareBinaryKeys(const std::shared_ptr& a, - const std::shared_ptr& b) const; + Result> WriteNullBitmap(const std::shared_ptr& out); private: std::string field_name_; std::shared_ptr arrow_type_; + std::shared_ptr key_type_; std::shared_ptr pool_; - std::shared_ptr file_writer_; - // SST file writer (declared after pool_ to ensure correct destruction order) - std::unique_ptr sst_writer_; + std::shared_ptr file_writer_; + std::string index_file_name_; std::shared_ptr output_stream_; - std::string file_name_; + std::unique_ptr sst_writer_; - // Track first and last keys for index meta - std::shared_ptr first_key_; - std::shared_ptr last_key_; + // TODO(xinyu.lxy): remove it when GlobalIndexIOMeta is updated + int64_t max_row_id_ = -1; + std::optional first_key_; + std::optional last_key_; // Null bitmap tracking - std::shared_ptr null_bitmap_; - bool has_nulls_; - - // Current row ID counter - int64_t current_row_id_; + RoaringBitmap64 null_bitmap_; + std::vector current_row_ids_; }; } // namespace paimon diff --git a/src/paimon/common/global_index/btree/btree_global_index_writer_test.cpp b/src/paimon/common/global_index/btree/btree_global_index_writer_test.cpp deleted file mode 100644 index bc4289563..000000000 --- a/src/paimon/common/global_index/btree/btree_global_index_writer_test.cpp +++ /dev/null @@ -1,354 +0,0 @@ -/* - * Copyright 2026-present Alibaba Inc. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "paimon/common/global_index/btree/btree_global_index_writer.h" - -#include "arrow/c/bridge.h" -#include "arrow/c/helpers.h" -#include "arrow/ipc/json_simple.h" -#include "gtest/gtest.h" -#include "paimon/common/compression/block_compression_factory.h" -#include "paimon/fs/file_system.h" -#include "paimon/global_index/io/global_index_file_writer.h" -#include "paimon/memory/memory_pool.h" -#include "paimon/testing/utils/testharness.h" - -namespace paimon::test { - -class FakeGlobalIndexFileWriter : public GlobalIndexFileWriter { - public: - FakeGlobalIndexFileWriter(const std::shared_ptr& fs, const std::string& base_path) - : fs_(fs), base_path_(base_path) {} - - Result NewFileName(const std::string& prefix) const override { - return prefix + "_" + std::to_string(file_counter_++); - } - - Result> NewOutputStream( - const std::string& file_name) const override { - return fs_->Create(base_path_ + "/" + file_name, true); - } - - Result GetFileSize(const std::string& file_name) const override { - PAIMON_ASSIGN_OR_RAISE(auto file_status, fs_->GetFileStatus(base_path_ + "/" + file_name)); - return static_cast(file_status->GetLen()); - } - - std::string ToPath(const std::string& file_name) const override { - return base_path_ + "/" + file_name; - } - - private: - std::shared_ptr fs_; - std::string base_path_; - mutable int64_t file_counter_{0}; -}; - -class BTreeGlobalIndexWriterTest : public ::testing::Test { - protected: - void SetUp() override { - pool_ = GetDefaultPool(); - test_dir_ = UniqueTestDirectory::Create("local"); - fs_ = test_dir_->GetFileSystem(); - base_path_ = test_dir_->Str(); - compression_factory_ = BlockCompressionFactory::Create(BlockCompressionType::NONE).value(); - } - - void TearDown() override {} - - // Helper to create ArrowSchema from arrow type - std::unique_ptr CreateArrowSchema(const std::shared_ptr& type, - const std::string& field_name) { - auto schema = arrow::schema({arrow::field(field_name, type)}); - auto c_schema = std::make_unique(); - EXPECT_TRUE(arrow::ExportSchema(*schema, c_schema.get()).ok()); - return c_schema; - } - - std::shared_ptr pool_; - std::shared_ptr compression_factory_; - std::unique_ptr test_dir_; - std::shared_ptr fs_; - std::string base_path_; -}; - -TEST_F(BTreeGlobalIndexWriterTest, WriteIntData) { - // Create a fake file writer - auto file_writer = std::make_shared(fs_, base_path_); - - // Create ArrowSchema - auto c_schema = CreateArrowSchema(arrow::int32(), "int_field"); - - // Create the BTree global index writer - ASSERT_OK_AND_ASSIGN(auto writer, - BTreeGlobalIndexWriter::Create("int_field", c_schema.get(), file_writer, - compression_factory_, pool_, 4096)); - - // Create an Arrow array with int values - auto array = - arrow::ipc::internal::json::ArrayFromJSON(arrow::int32(), "[1, 2, 3, 2, 1, 4, 5, 5, 5]") - .ValueOrDie(); - - // Export to ArrowArray - ArrowArray c_array; - ASSERT_TRUE(arrow::ExportArray(*array, &c_array).ok()); - - // Add batch - auto status = writer->AddBatch(&c_array); - ASSERT_OK(status); - - // Finish writing - ASSERT_OK_AND_ASSIGN(auto metas, writer->Finish()); - ASSERT_EQ(metas.size(), 1); - - // Verify metadata - const auto& meta = metas[0]; - EXPECT_FALSE(meta.file_path.empty()); - EXPECT_GT(meta.file_size, 0); - EXPECT_EQ(meta.range_end, 8); // 9 elements, 0-indexed - - // Release the ArrowArray - ArrowArrayRelease(&c_array); - - // Release the ArrowSchema - ArrowSchemaRelease(c_schema.get()); -} - -TEST_F(BTreeGlobalIndexWriterTest, WriteStringData) { - // Create a fake file writer - auto file_writer = std::make_shared(fs_, base_path_); - - // Create ArrowSchema - auto c_schema = CreateArrowSchema(arrow::utf8(), "string_field"); - - // Create the BTree global index writer - ASSERT_OK_AND_ASSIGN(auto writer, - BTreeGlobalIndexWriter::Create("string_field", c_schema.get(), file_writer, - compression_factory_, pool_, 4096)); - - // Create an Arrow array with string values - auto array = arrow::ipc::internal::json::ArrayFromJSON( - arrow::utf8(), R"(["apple", "banana", "cherry", "apple", "banana"])") - .ValueOrDie(); - - // Export to ArrowArray - ArrowArray c_array; - ASSERT_TRUE(arrow::ExportArray(*array, &c_array).ok()); - - // Add batch - auto status = writer->AddBatch(&c_array); - ASSERT_OK(status); - - // Finish writing - ASSERT_OK_AND_ASSIGN(auto metas, writer->Finish()); - ASSERT_EQ(metas.size(), 1); - - // Verify metadata - const auto& meta = metas[0]; - EXPECT_FALSE(meta.file_path.empty()); - EXPECT_GT(meta.file_size, 0); - - // Release the ArrowArray - ArrowArrayRelease(&c_array); - - // Release the ArrowSchema - ArrowSchemaRelease(c_schema.get()); -} - -TEST_F(BTreeGlobalIndexWriterTest, WriteWithNulls) { - // Create a fake file writer - auto file_writer = std::make_shared(fs_, base_path_); - - // Create ArrowSchema - auto c_schema = CreateArrowSchema(arrow::int32(), "int_field"); - - // Create the BTree global index writer - ASSERT_OK_AND_ASSIGN(auto writer, - BTreeGlobalIndexWriter::Create("int_field", c_schema.get(), file_writer, - compression_factory_, pool_, 4096)); - - // Create an Arrow array with null values - auto array = arrow::ipc::internal::json::ArrayFromJSON(arrow::int32(), "[1, null, 3, null, 5]") - .ValueOrDie(); - - // Export to ArrowArray - ArrowArray c_array; - ASSERT_TRUE(arrow::ExportArray(*array, &c_array).ok()); - - // Add batch - auto status = writer->AddBatch(&c_array); - ASSERT_OK(status); - - // Finish writing - ASSERT_OK_AND_ASSIGN(auto metas, writer->Finish()); - ASSERT_EQ(metas.size(), 1); - - // Verify metadata - const auto& meta = metas[0]; - EXPECT_FALSE(meta.file_path.empty()); - EXPECT_GT(meta.file_size, 0); - EXPECT_NE(meta.metadata, nullptr); - - // Release the ArrowArray - ArrowArrayRelease(&c_array); - - // Release the ArrowSchema - ArrowSchemaRelease(c_schema.get()); -} - -TEST_F(BTreeGlobalIndexWriterTest, WriteMultipleBatches) { - // Create a fake file writer - auto file_writer = std::make_shared(fs_, base_path_); - - // Create ArrowSchema - auto c_schema = CreateArrowSchema(arrow::int32(), "int_field"); - - // Create the BTree global index writer - ASSERT_OK_AND_ASSIGN(auto writer, - BTreeGlobalIndexWriter::Create("int_field", c_schema.get(), file_writer, - compression_factory_, pool_, 4096)); - - // Create first batch - auto array1 = - arrow::ipc::internal::json::ArrayFromJSON(arrow::int32(), "[1, 2, 3]").ValueOrDie(); - - ArrowArray c_array1; - ASSERT_TRUE(arrow::ExportArray(*array1, &c_array1).ok()); - - // Add first batch - auto status1 = writer->AddBatch(&c_array1); - ASSERT_OK(status1); - ArrowArrayRelease(&c_array1); - - // Create second batch - auto array2 = - arrow::ipc::internal::json::ArrayFromJSON(arrow::int32(), "[4, 5, 6]").ValueOrDie(); - - ArrowArray c_array2; - ASSERT_TRUE(arrow::ExportArray(*array2, &c_array2).ok()); - - // Add second batch - auto status2 = writer->AddBatch(&c_array2); - ASSERT_OK(status2); - ArrowArrayRelease(&c_array2); - - // Finish writing - ASSERT_OK_AND_ASSIGN(auto metas, writer->Finish()); - ASSERT_EQ(metas.size(), 1); - - // Verify metadata - const auto& meta = metas[0]; - EXPECT_EQ(meta.range_end, 5); // 6 elements, 0-indexed - - // Release the ArrowSchema - ArrowSchemaRelease(c_schema.get()); -} - -TEST_F(BTreeGlobalIndexWriterTest, WriteEmptyData) { - // Create a fake file writer - auto file_writer = std::make_shared(fs_, base_path_); - - // Create ArrowSchema - auto c_schema = CreateArrowSchema(arrow::int32(), "int_field"); - - // Create the BTree global index writer - ASSERT_OK_AND_ASSIGN(auto writer, - BTreeGlobalIndexWriter::Create("int_field", c_schema.get(), file_writer, - compression_factory_, pool_, 4096)); - - // Finish without adding any data - ASSERT_OK_AND_ASSIGN(auto metas, writer->Finish()); - ASSERT_EQ(metas.size(), 0); // No data, no metadata - - // Release the ArrowSchema - ArrowSchemaRelease(c_schema.get()); -} - -TEST_F(BTreeGlobalIndexWriterTest, WriteAllNulls) { - // Create a fake file writer - auto file_writer = std::make_shared(fs_, base_path_); - - // Create ArrowSchema - auto c_schema = CreateArrowSchema(arrow::int32(), "int_field"); - - // Create the BTree global index writer - ASSERT_OK_AND_ASSIGN(auto writer, - BTreeGlobalIndexWriter::Create("int_field", c_schema.get(), file_writer, - compression_factory_, pool_, 4096)); - - // Create an Arrow array with all null values - auto array = arrow::ipc::internal::json::ArrayFromJSON(arrow::int32(), "[null, null, null]") - .ValueOrDie(); - - // Export to ArrowArray - ArrowArray c_array; - ASSERT_TRUE(arrow::ExportArray(*array, &c_array).ok()); - - // Add batch - auto status = writer->AddBatch(&c_array); - ASSERT_OK(status); - - // Finish writing - ASSERT_OK_AND_ASSIGN(auto metas, writer->Finish()); - ASSERT_EQ(metas.size(), 1); - - // Verify metadata - should have null bitmap but no keys - const auto& meta = metas[0]; - EXPECT_NE(meta.metadata, nullptr); - - // Release the ArrowArray - ArrowArrayRelease(&c_array); - - // Release the ArrowSchema - ArrowSchemaRelease(c_schema.get()); -} - -TEST_F(BTreeGlobalIndexWriterTest, WriteDoubleData) { - // Create a fake file writer - auto file_writer = std::make_shared(fs_, base_path_); - - // Create ArrowSchema - auto c_schema = CreateArrowSchema(arrow::float64(), "double_field"); - - // Create the BTree global index writer - ASSERT_OK_AND_ASSIGN(auto writer, - BTreeGlobalIndexWriter::Create("double_field", c_schema.get(), file_writer, - compression_factory_, pool_, 4096)); - - // Create an Arrow array with double values - auto array = arrow::ipc::internal::json::ArrayFromJSON(arrow::float64(), "[1.5, 2.5, 3.5, 1.5]") - .ValueOrDie(); - - // Export to ArrowArray - ArrowArray c_array; - ASSERT_TRUE(arrow::ExportArray(*array, &c_array).ok()); - - // Add batch - auto status = writer->AddBatch(&c_array); - ASSERT_OK(status); - - // Finish writing - ASSERT_OK_AND_ASSIGN(auto metas, writer->Finish()); - ASSERT_EQ(metas.size(), 1); - - // Release the ArrowArray - ArrowArrayRelease(&c_array); - - // Release the ArrowSchema - ArrowSchemaRelease(c_schema.get()); -} - -} // namespace paimon::test diff --git a/src/paimon/common/global_index/btree/btree_global_indexer.cpp b/src/paimon/common/global_index/btree/btree_global_indexer.cpp index e72f4392a..9e96acc23 100644 --- a/src/paimon/common/global_index/btree/btree_global_indexer.cpp +++ b/src/paimon/common/global_index/btree/btree_global_indexer.cpp @@ -15,7 +15,6 @@ */ #include "paimon/common/global_index/btree/btree_global_indexer.h" -#include #include #include @@ -25,329 +24,163 @@ #include "paimon/common/global_index/btree/btree_global_index_reader.h" #include "paimon/common/global_index/btree/btree_global_index_writer.h" #include "paimon/common/global_index/btree/btree_index_meta.h" +#include "paimon/common/global_index/btree/key_serializer.h" #include "paimon/common/memory/memory_slice.h" #include "paimon/common/memory/memory_slice_input.h" #include "paimon/common/options/memory_size.h" #include "paimon/common/utils/arrow/status_utils.h" #include "paimon/common/utils/crc32c.h" -#include "paimon/common/utils/date_time_utils.h" -#include "paimon/common/utils/field_type_utils.h" #include "paimon/common/utils/options_utils.h" +#include "paimon/common/utils/preconditions.h" #include "paimon/core/options/compress_options.h" -#include "paimon/data/timestamp.h" -#include "paimon/defs.h" -#include "paimon/file_index/bitmap_index_result.h" #include "paimon/global_index/bitmap_global_index_result.h" #include "paimon/memory/bytes.h" #include "paimon/utils/roaring_bitmap64.h" - namespace paimon { - -// Helper function to get cache size from options with default value -static Result GetBTreeIndexCacheSize(const std::map& options) { - auto str_result = - OptionsUtils::GetValueFromMap(options, Options::BTREE_INDEX_CACHE_SIZE); - if (!str_result.ok()) { - return 128 * 1024 * 1024; - } - return MemorySize::ParseBytes(str_result.value()); -} - -// Helper function to get high priority pool ratio from options with default value -static Result GetBTreeIndexHighPriorityPoolRatio( - const std::map& options) { - return OptionsUtils::GetValueFromMap( - options, Options::BTREE_INDEX_HIGH_PRIORITY_POOL_RATIO, 0.1); -} - Result> BTreeGlobalIndexer::CreateWriter( const std::string& field_name, ::ArrowSchema* arrow_schema, const std::shared_ptr& file_writer, const std::shared_ptr& pool) const { - // Read block size from options (default: 64 KB) - auto block_size_str_result = - OptionsUtils::GetValueFromMap(options_, Options::BTREE_INDEX_BLOCK_SIZE); - int32_t block_size = 64 * 1024; // default 64 KB - if (block_size_str_result.ok()) { - PAIMON_ASSIGN_OR_RAISE(int64_t parsed_size, - MemorySize::ParseBytes(block_size_str_result.value())); - block_size = static_cast(parsed_size); - } - // Read compression options - auto compress_str = OptionsUtils::GetValueFromMap( - options_, Options::BTREE_INDEX_COMPRESSION, "none"); - auto compress_level = - OptionsUtils::GetValueFromMap(options_, Options::BTREE_INDEX_COMPRESSION_LEVEL, 1); - CompressOptions compress_options{compress_str.value(), compress_level.value()}; + PAIMON_ASSIGN_OR_RAISE_FROM_ARROW(std::shared_ptr arrow_type, + arrow::ImportType(arrow_schema)); + // check data type + auto struct_type = std::dynamic_pointer_cast(arrow_type); + PAIMON_RETURN_NOT_OK(Preconditions::CheckNotNull( + struct_type, "arrow schema must be struct type when create BTreeGlobalIndexWriter")); + + // parse options + PAIMON_ASSIGN_OR_RAISE( + std::string block_size_str, + OptionsUtils::GetValueFromMap(options_, BtreeDefs::kBtreeIndexBlockSize, + BtreeDefs::kDefaultBtreeIndexBlockSize)); + PAIMON_ASSIGN_OR_RAISE(int32_t block_size, MemorySize::ParseBytes(block_size_str)); + PAIMON_ASSIGN_OR_RAISE( + std::string compress_str, + OptionsUtils::GetValueFromMap(options_, BtreeDefs::kBtreeIndexCompression, + BtreeDefs::kDefaultBtreeIndexCompression)); + PAIMON_ASSIGN_OR_RAISE( + int32_t compress_level, + OptionsUtils::GetValueFromMap(options_, BtreeDefs::kBtreeIndexCompressionLevel, + BtreeDefs::kDefaultBtreeIndexCompressionLevel)); + CompressOptions compress_options{compress_str, compress_level}; PAIMON_ASSIGN_OR_RAISE(std::shared_ptr compression_factory, BlockCompressionFactory::Create(compress_options)); - - PAIMON_ASSIGN_OR_RAISE(auto writer, - BTreeGlobalIndexWriter::Create(field_name, arrow_schema, file_writer, - compression_factory, pool, block_size)); - return writer; + return BTreeGlobalIndexWriter::Create(field_name, struct_type, file_writer, block_size, + compression_factory, pool); } -// Create a comparator function based on field type -// Keys are stored in binary format to match Java's DataOutputStream format -static std::function CreateComparator( - FieldType field_type, const std::shared_ptr& arrow_type) { - // For numeric types, compare as binary values in little-endian format - // to match Java's DataOutputStream.writeInt/writeLong format - switch (field_type) { - case FieldType::INT: - case FieldType::DATE: - return [](const MemorySlice& a, const MemorySlice& b) -> int32_t { - int32_t a_val = a.ReadInt(0); - int32_t b_val = b.ReadInt(0); - if (a_val < b_val) return -1; - if (a_val > b_val) return 1; - return 0; - }; - case FieldType::BIGINT: - return [](const MemorySlice& a, const MemorySlice& b) -> int32_t { - int64_t a_val = a.ReadLong(0); - int64_t b_val = b.ReadLong(0); - if (a_val < b_val) return -1; - if (a_val > b_val) return 1; - return 0; - }; - case FieldType::TIMESTAMP: { - int32_t precision = Timestamp::MILLIS_PRECISION; - if (arrow_type->id() == arrow::Type::TIMESTAMP) { - auto ts_type = std::static_pointer_cast(arrow_type); - precision = DateTimeUtils::GetPrecisionFromType(ts_type); - } - if (Timestamp::IsCompact(precision)) { - // compact: compare as int64 (millisecond only) - return [](const MemorySlice& a, const MemorySlice& b) -> int32_t { - int64_t a_val = a.ReadLong(0); - int64_t b_val = b.ReadLong(0); - if (a_val < b_val) return -1; - if (a_val > b_val) return 1; - return 0; - }; - } else { - // non-compact: compare millisecond first, then nanoOfMillisecond - return [](const MemorySlice& a, const MemorySlice& b) -> int32_t { - auto a_input = a.ToInput(); - auto b_input = b.ToInput(); - int64_t a_milli = a_input.ReadLong(); - int64_t b_milli = b_input.ReadLong(); - if (a_milli < b_milli) return -1; - if (a_milli > b_milli) return 1; - auto a_nano = a_input.ReadVarLenInt(); - auto b_nano = b_input.ReadVarLenInt(); - if (a_nano.ok() && b_nano.ok()) { - if (a_nano.value() < b_nano.value()) return -1; - if (a_nano.value() > b_nano.value()) return 1; - } - return 0; - }; - } - } - case FieldType::SMALLINT: - return [](const MemorySlice& a, const MemorySlice& b) -> int32_t { - int16_t a_val = a.ReadShort(0); - int16_t b_val = b.ReadShort(0); - if (a_val < b_val) return -1; - if (a_val > b_val) return 1; - return 0; - }; - case FieldType::TINYINT: - return [](const MemorySlice& a, const MemorySlice& b) -> int32_t { - int8_t a_val = a.ReadByte(0); - int8_t b_val = b.ReadByte(0); - if (a_val < b_val) return -1; - if (a_val > b_val) return 1; - return 0; - }; - case FieldType::FLOAT: - return [](const MemorySlice& a, const MemorySlice& b) -> int32_t { - // Read float from bytes (little-endian) - float a_val, b_val; - std::memcpy(&a_val, a.ReadStringView().data(), sizeof(float)); - std::memcpy(&b_val, b.ReadStringView().data(), sizeof(float)); - if (a_val < b_val) return -1; - if (a_val > b_val) return 1; - return 0; - }; - case FieldType::DOUBLE: - return [](const MemorySlice& a, const MemorySlice& b) -> int32_t { - // Read double from bytes (little-endian) - double a_val, b_val; - std::memcpy(&a_val, a.ReadStringView().data(), sizeof(double)); - std::memcpy(&b_val, b.ReadStringView().data(), sizeof(double)); - if (a_val < b_val) return -1; - if (a_val > b_val) return 1; - return 0; - }; - case FieldType::BOOLEAN: - return [](const MemorySlice& a, const MemorySlice& b) -> int32_t { - if (a.Length() == 0 || b.Length() == 0) return 0; - int8_t a_val = a.ReadByte(0); - int8_t b_val = b.ReadByte(0); - if (a_val < b_val) return -1; - if (a_val > b_val) return 1; - return 0; - }; - case FieldType::STRING: - case FieldType::BINARY: - default: - // For string/binary types, use lexicographic comparison - return [](const MemorySlice& a, const MemorySlice& b) -> int32_t { - size_t min_len = - std::min(static_cast(a.Length()), static_cast(b.Length())); - int cmp = memcmp(a.ReadStringView().data(), b.ReadStringView().data(), min_len); - if (cmp != 0) return cmp < 0 ? -1 : 1; - if (a.Length() < b.Length()) return -1; - if (a.Length() > b.Length()) return 1; - return 0; - }; - } -} Result> BTreeGlobalIndexer::CreateReader( ::ArrowSchema* arrow_schema, const std::shared_ptr& file_reader, const std::vector& files, const std::shared_ptr& pool) const { + // Get field type from arrow schema + PAIMON_ASSIGN_OR_RAISE_FROM_ARROW(std::shared_ptr schema, + arrow::ImportSchema(arrow_schema)); if (files.size() != 1) { return Status::Invalid( "invalid GlobalIndexIOMeta for BTreeGlobalIndex, exist multiple metas"); } const auto& meta = files[0]; - PAIMON_ASSIGN_OR_RAISE(std::shared_ptr in, - file_reader->GetInputStream(meta.file_path)); - - // Get field type from arrow schema - PAIMON_ASSIGN_OR_RAISE_FROM_ARROW(std::shared_ptr schema, - arrow::ImportSchema(arrow_schema)); if (schema->num_fields() != 1) { return Status::Invalid( "invalid schema for BTreeGlobalIndexReader, supposed to have single field."); } - auto arrow_type = schema->field(0)->type(); - PAIMON_ASSIGN_OR_RAISE(FieldType field_type, - FieldTypeUtils::ConvertToFieldType(arrow_type->id())); - + auto key_type = schema->field(0)->type(); // Create comparator based on field type - auto comparator = CreateComparator(field_type, arrow_type); + auto comparator = KeySerializer::CreateComparator(key_type, pool); + // get min, max key from meta data + auto index_meta = BTreeIndexMeta::Deserialize(meta.metadata, pool.get()); + std::optional min_key; + std::optional max_key; + if (index_meta->FirstKey()) { + PAIMON_ASSIGN_OR_RAISE( + min_key, KeySerializer::DeserializeKey(MemorySlice::Wrap(index_meta->FirstKey()), + key_type, pool.get())); + } + if (index_meta->LastKey()) { + PAIMON_ASSIGN_OR_RAISE( + max_key, KeySerializer::DeserializeKey(MemorySlice::Wrap(index_meta->LastKey()), + key_type, pool.get())); + } - // Wrap the comparator to return Result - MemorySlice::SliceComparator result_comparator = - [comparator](const MemorySlice& a, const MemorySlice& b) -> Result { - return comparator(a, b); - }; + // parse read options + PAIMON_ASSIGN_OR_RAISE( + std::string cache_size_str, + OptionsUtils::GetValueFromMap(options_, BtreeDefs::kBtreeIndexCacheSize, + BtreeDefs::kDefaultBtreeIndexCacheSize)); + PAIMON_ASSIGN_OR_RAISE(int64_t cache_size, MemorySize::ParseBytes(cache_size_str)); - // Read BTree file footer first - PAIMON_ASSIGN_OR_RAISE(int64_t cache_size, GetBTreeIndexCacheSize(options_)); - PAIMON_ASSIGN_OR_RAISE(double high_priority_pool_ratio, - GetBTreeIndexHighPriorityPoolRatio(options_)); + PAIMON_ASSIGN_OR_RAISE( + double high_priority_pool_ratio, + OptionsUtils::GetValueFromMap(options_, BtreeDefs::kBtreeIndexHighPriorityPoolRatio, + BtreeDefs::kDefaultBtreeIndexHighPriorityPoolRatio)); + // TODO(xinyu.lxy): pass cache_manager from param. auto cache_manager = std::make_shared(cache_size, high_priority_pool_ratio); + PAIMON_ASSIGN_OR_RAISE(std::shared_ptr in, + file_reader->GetInputStream(meta.file_path)); auto block_cache = std::make_shared(meta.file_path, in, cache_manager, pool); + // read footer PAIMON_ASSIGN_OR_RAISE(MemorySegment segment, - block_cache->GetBlock(meta.file_size - BTreeFileFooter::ENCODED_LENGTH, - BTreeFileFooter::ENCODED_LENGTH, true, + block_cache->GetBlock(meta.file_size - BTreeFileFooter::kEncodingLength, + BTreeFileFooter::kEncodingLength, true, /*decompress_func=*/nullptr)); auto footer_slice = MemorySlice::Wrap(segment); auto footer_input = footer_slice.ToInput(); PAIMON_ASSIGN_OR_RAISE(std::shared_ptr footer, BTreeFileFooter::Read(&footer_input)); - - // Create SST file reader with footer information - PAIMON_ASSIGN_OR_RAISE( - std::shared_ptr sst_file_reader, - SstFileReader::Create(in, *footer->GetIndexBlockHandle(), footer->GetBloomFilterHandle(), - result_comparator, cache_manager, pool)); - // prepare null_bitmap - PAIMON_ASSIGN_OR_RAISE(std::shared_ptr null_bitmap, - ReadNullBitmap(block_cache, footer->GetNullBitmapHandle())); - - auto index_meta = BTreeIndexMeta::Deserialize(meta.metadata, pool.get()); - - // Convert Bytes to MemorySlice for keys - MemorySlice min_key_slice(MemorySegment(), 0, 0); - MemorySlice max_key_slice(MemorySegment(), 0, 0); - bool has_min_key = false; - if (index_meta->FirstKey()) { - min_key_slice = MemorySlice::Wrap(index_meta->FirstKey()); - has_min_key = true; - } - if (index_meta->LastKey()) { - max_key_slice = MemorySlice::Wrap(index_meta->LastKey()); - } + PAIMON_ASSIGN_OR_RAISE(RoaringBitmap64 null_bitmap, + ReadNullBitmap(block_cache, footer->GetNullBitmapHandle(), pool.get())); - // Get timestamp precision if applicable - int32_t ts_precision = Timestamp::MILLIS_PRECISION; - if (arrow_type->id() == arrow::Type::TIMESTAMP) { - auto ts_type = std::static_pointer_cast(arrow_type); - ts_precision = DateTimeUtils::GetPrecisionFromType(ts_type); - } + // Close the temporary block_cache to remove its entries from the shared LRU cache. + // This prevents use-after-free: the eviction callback captures `this` (the BlockCache), + // and if the BlockCache is destroyed without closing, a later eviction would invoke + // the callback on a dangling pointer. + block_cache->Close(); - return std::make_shared(sst_file_reader, null_bitmap, min_key_slice, - max_key_slice, has_min_key, files, pool, - comparator, ts_precision); -} + // create SST file reader with footer information + // TODO(xinyu.lxy): pass block cache to SstFileReader rather than cache_manager + PAIMON_ASSIGN_OR_RAISE( + std::shared_ptr sst_file_reader, + SstFileReader::Create(in, footer->GetIndexBlockHandle(), footer->GetBloomFilterHandle(), + comparator, cache_manager, pool)); -Result> BTreeGlobalIndexer::ToGlobalIndexResult( - int64_t range_end, const std::shared_ptr& result) { - if (auto remain = std::dynamic_pointer_cast(result)) { - return std::make_shared([range_end]() -> Result { - RoaringBitmap64 bitmap; - bitmap.AddRange(0, range_end + 1); - return bitmap; - }); - } else if (auto skip = std::dynamic_pointer_cast(result)) { - return std::make_shared( - []() -> Result { return RoaringBitmap64(); }); - } else if (auto bitmap_result = std::dynamic_pointer_cast(result)) { - return std::make_shared( - [bitmap_result]() -> Result { - PAIMON_ASSIGN_OR_RAISE(const RoaringBitmap32* bitmap, bitmap_result->GetBitmap()); - return RoaringBitmap64(*bitmap); - }); - } - return Status::Invalid( - "invalid FileIndexResult, supposed to be Remain or Skip or BitmapIndexResult"); + return std::make_shared(sst_file_reader, std::move(null_bitmap), + min_key, max_key, key_type, pool); } -Result> BTreeGlobalIndexer::ReadNullBitmap( - const std::shared_ptr& cache, const std::shared_ptr& block_handle) { - auto null_bitmap = std::make_shared(); - if (block_handle == nullptr) { +Result BTreeGlobalIndexer::ReadNullBitmap( + const std::shared_ptr& cache, const std::optional& block_handle, + MemoryPool* pool) { + RoaringBitmap64 null_bitmap; + if (!block_handle.has_value()) { return null_bitmap; } - // Read bytes and crc value - PAIMON_ASSIGN_OR_RAISE(auto segment, - cache->GetBlock(block_handle->Offset(), block_handle->Size() + 4, false, - /*decompress_func=*/nullptr)); + // read bytes and crc value + PAIMON_ASSIGN_OR_RAISE( + MemorySegment segment, + cache->GetBlock(block_handle->Offset(), block_handle->Size() + 4, /*is_index=*/false, + /*decompress_func=*/nullptr)); auto slice = MemorySlice::Wrap(segment); auto slice_input = slice.ToInput(); - // Read null bitmap data - auto null_bitmap_slice = slice_input.ReadSlice(block_handle->Size()); - auto null_bitmap_view = null_bitmap_slice.ReadStringView(); - + // read null bitmap data + auto null_bitmap_bytes = slice_input.ReadSlice(block_handle->Size()).CopyBytes(pool); // Calculate CRC32C checksum - uint32_t crc_value = CRC32C::calculate(null_bitmap_view.data(), null_bitmap_view.size()); - - // Read expected CRC value (stored as native uint32_t) - auto crc_slice = slice_input.ReadSlice(sizeof(uint32_t)); - uint32_t expected_crc_value; - std::memcpy(&expected_crc_value, crc_slice.ReadStringView().data(), sizeof(expected_crc_value)); + uint32_t crc_value = CRC32C::calculate(null_bitmap_bytes->data(), null_bitmap_bytes->size()); + int32_t expected_crc_value = slice_input.ReadInt(); // Verify CRC checksum - if (crc_value != expected_crc_value) { - return Status::Invalid("CRC check failure during decoding null bitmap. Expected: " + - std::to_string(expected_crc_value) + - ", Calculated: " + std::to_string(crc_value)); + if (crc_value != static_cast(expected_crc_value)) { + return Status::Invalid(fmt::format( + "CRC check failure during decoding null bitmap. Expected: {}, Calculated: {}", + expected_crc_value, crc_value)); } - // Deserialize null bitmap + // deserialize null bitmap PAIMON_RETURN_NOT_OK( - null_bitmap->Deserialize(null_bitmap_view.data(), null_bitmap_view.size())); - + null_bitmap.Deserialize(null_bitmap_bytes->data(), null_bitmap_bytes->size())); return null_bitmap; } diff --git a/src/paimon/common/global_index/btree/btree_global_indexer.h b/src/paimon/common/global_index/btree/btree_global_indexer.h index 835ae9613..e1eda2478 100644 --- a/src/paimon/common/global_index/btree/btree_global_indexer.h +++ b/src/paimon/common/global_index/btree/btree_global_indexer.h @@ -17,18 +17,37 @@ #pragma once #include +#include #include #include +#include "paimon/common/global_index/btree/btree_defs.h" #include "paimon/common/global_index/btree/btree_global_index_reader.h" #include "paimon/common/sst/block_cache.h" #include "paimon/common/sst/block_handle.h" -#include "paimon/file_index/file_index_result.h" #include "paimon/global_index/global_indexer.h" #include "paimon/global_index/io/global_index_file_reader.h" #include "paimon/utils/roaring_bitmap64.h" - namespace paimon { +/// The indexer for btree index. We do not build a B-tree directly in memory, instead, we form a +/// logical B-tree via multi-level metadata over SST files that store the actual data, as below: +/// +/// BTree-Index +/// / | +/// / ... | +/// / | +/// +--------------------------------------+ +------------+ +/// | SST File | | | +/// +--------------------------------------+ | | +/// | Root Index | | | +/// | / ... | | ... | SST File | +/// | Leaf Index ... Leaf Index | | | +/// | / ... | / ... | | | | +/// | DataBlock ... DataBlock | | | +/// +--------------------------------------+ +------------+ +/// +/// This approach significantly reduces memory pressure during index reads. + class BTreeGlobalIndexer : public GlobalIndexer { public: explicit BTreeGlobalIndexer(const std::map& options) @@ -45,11 +64,9 @@ class BTreeGlobalIndexer : public GlobalIndexer { const std::shared_ptr& pool) const override; private: - static Result> ToGlobalIndexResult( - int64_t range_end, const std::shared_ptr& result); - - static Result> ReadNullBitmap( - const std::shared_ptr& cache, const std::shared_ptr& block_handle); + static Result ReadNullBitmap(const std::shared_ptr& cache, + const std::optional& block_handle, + MemoryPool* pool); private: std::map options_; diff --git a/src/paimon/common/global_index/btree/btree_global_indexer_test.cpp b/src/paimon/common/global_index/btree/btree_global_indexer_test.cpp deleted file mode 100644 index f01ab798c..000000000 --- a/src/paimon/common/global_index/btree/btree_global_indexer_test.cpp +++ /dev/null @@ -1,238 +0,0 @@ -/* - * Copyright 2026-present Alibaba Inc. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "paimon/common/global_index/btree/btree_global_indexer.h" - -#include - -#include "paimon/common/memory/memory_slice.h" -#include "paimon/common/utils/field_type_utils.h" -#include "paimon/memory/memory_pool.h" -#include "paimon/predicate/literal.h" - -namespace paimon::test { - -class BTreeGlobalIndexerTest : public ::testing::Test { - protected: - void SetUp() override { - pool_ = GetDefaultPool(); - } - - std::shared_ptr pool_; -}; - -// Test CreateComparator for STRING type -TEST_F(BTreeGlobalIndexerTest, CreateComparatorString) { - // Create two MemorySlices for comparison - auto slice_a = MemorySlice::Wrap(std::make_shared("apple", pool_.get())); - auto slice_b = MemorySlice::Wrap(std::make_shared("banana", pool_.get())); - auto slice_same = MemorySlice::Wrap(std::make_shared("apple", pool_.get())); - - // Lexicographic comparison: "apple" < "banana" - auto bytes_a = slice_a.GetHeapMemory(); - auto bytes_b = slice_b.GetHeapMemory(); - ASSERT_NE(bytes_a, nullptr); - ASSERT_NE(bytes_b, nullptr); - - size_t min_len = std::min(bytes_a->size(), bytes_b->size()); - int cmp = memcmp(bytes_a->data(), bytes_b->data(), min_len); - EXPECT_LT(cmp, 0); // "apple" < "banana" - - // Same strings should be equal - auto bytes_same = slice_same.GetHeapMemory(); - EXPECT_EQ(bytes_a->size(), bytes_same->size()); - EXPECT_EQ(memcmp(bytes_a->data(), bytes_same->data(), bytes_a->size()), 0); -} - -// Test CreateComparator for INT type -TEST_F(BTreeGlobalIndexerTest, CreateComparatorInt) { - int32_t val1 = 100; - int32_t val2 = 200; - int32_t val3 = 100; - - auto bytes1 = std::make_shared(sizeof(int32_t), pool_.get()); - memcpy(bytes1->data(), &val1, sizeof(int32_t)); - auto slice1 = MemorySlice::Wrap(bytes1); - - auto bytes2 = std::make_shared(sizeof(int32_t), pool_.get()); - memcpy(bytes2->data(), &val2, sizeof(int32_t)); - auto slice2 = MemorySlice::Wrap(bytes2); - - auto bytes3 = std::make_shared(sizeof(int32_t), pool_.get()); - memcpy(bytes3->data(), &val3, sizeof(int32_t)); - auto slice3 = MemorySlice::Wrap(bytes3); - - // Compare values - EXPECT_LT(val1, val2); - EXPECT_EQ(val1, val3); -} - -// Test CreateComparator for BIGINT type -TEST_F(BTreeGlobalIndexerTest, CreateComparatorBigInt) { - int64_t val1 = 10000000000LL; - int64_t val2 = 20000000000LL; - - EXPECT_LT(val1, val2); -} - -// Test CreateComparator for FLOAT type -TEST_F(BTreeGlobalIndexerTest, CreateComparatorFloat) { - float val1 = 1.5f; - float val2 = 2.5f; - - EXPECT_LT(val1, val2); -} - -// Test CreateComparator for DOUBLE type -TEST_F(BTreeGlobalIndexerTest, CreateComparatorDouble) { - double val1 = 1.5; - double val2 = 2.5; - - EXPECT_LT(val1, val2); -} - -// Test LiteralToMemorySlice for STRING type -TEST_F(BTreeGlobalIndexerTest, LiteralToMemorySliceString) { - Literal literal(FieldType::STRING, "test_value", 10); - EXPECT_FALSE(literal.IsNull()); - EXPECT_EQ(literal.GetType(), FieldType::STRING); - - auto value = literal.GetValue(); - EXPECT_EQ(value, "test_value"); -} - -// Test LiteralToMemorySlice for INT type -TEST_F(BTreeGlobalIndexerTest, LiteralToMemorySliceInt) { - Literal literal(static_cast(42)); - EXPECT_FALSE(literal.IsNull()); - EXPECT_EQ(literal.GetType(), FieldType::INT); - - auto value = literal.GetValue(); - EXPECT_EQ(value, 42); -} - -// Test LiteralToMemorySlice for BIGINT type -TEST_F(BTreeGlobalIndexerTest, LiteralToMemorySliceBigInt) { - Literal literal(static_cast(12345678901234LL)); - EXPECT_FALSE(literal.IsNull()); - EXPECT_EQ(literal.GetType(), FieldType::BIGINT); - - auto value = literal.GetValue(); - EXPECT_EQ(value, 12345678901234LL); -} - -// Test LiteralToMemorySlice for FLOAT type -TEST_F(BTreeGlobalIndexerTest, LiteralToMemorySliceFloat) { - Literal literal(3.14f); - EXPECT_FALSE(literal.IsNull()); - EXPECT_EQ(literal.GetType(), FieldType::FLOAT); - - auto value = literal.GetValue(); - EXPECT_FLOAT_EQ(value, 3.14f); -} - -// Test LiteralToMemorySlice for DOUBLE type -TEST_F(BTreeGlobalIndexerTest, LiteralToMemorySliceDouble) { - Literal literal(3.14159265358979); - EXPECT_FALSE(literal.IsNull()); - EXPECT_EQ(literal.GetType(), FieldType::DOUBLE); - - auto value = literal.GetValue(); - EXPECT_DOUBLE_EQ(value, 3.14159265358979); -} - -// Test LiteralToMemorySlice for BOOLEAN type -TEST_F(BTreeGlobalIndexerTest, LiteralToMemorySliceBoolean) { - Literal literal_true(true); - Literal literal_false(false); - - EXPECT_FALSE(literal_true.IsNull()); - EXPECT_EQ(literal_true.GetType(), FieldType::BOOLEAN); - EXPECT_TRUE(literal_true.GetValue()); - EXPECT_FALSE(literal_false.GetValue()); -} - -// Test LiteralToMemorySlice for null literal -TEST_F(BTreeGlobalIndexerTest, LiteralNull) { - Literal literal(FieldType::STRING); - EXPECT_TRUE(literal.IsNull()); - EXPECT_EQ(literal.GetType(), FieldType::STRING); -} - -// Test BTreeGlobalIndexer creation -TEST_F(BTreeGlobalIndexerTest, CreateIndexer) { - std::map options; - BTreeGlobalIndexer indexer(options); - - // CreateWriter with nullptr file_writer should fail - auto writer_result = indexer.CreateWriter("test_field", nullptr, nullptr, pool_); - EXPECT_FALSE(writer_result.ok()); -} - -// Test RangeQuery boundary conditions conceptually -TEST_F(BTreeGlobalIndexerTest, RangeQueryBoundaries) { - // This test verifies the boundary condition logic conceptually - // Inclusive lower bound: key >= lower_bound - // Exclusive lower bound: key > lower_bound - // Inclusive upper bound: key <= upper_bound - // Exclusive upper bound: key < upper_bound - - // For a range query [lower, upper] (both inclusive): - // - We include keys where key >= lower AND key <= upper - - // For a range query (lower, upper) (both exclusive): - // - We include keys where key > lower AND key < upper - - // The actual range query is tested in integration tests - SUCCEED(); -} - -// Test ToGlobalIndexResult with different result types -TEST_F(BTreeGlobalIndexerTest, ToGlobalIndexResultConcept) { - // This test verifies the concept of converting FileIndexResult to GlobalIndexResult - // - Remain: all rows match -> full bitmap - // - Skip: no rows match -> empty bitmap - // - BitmapIndexResult: specific rows match -> bitmap from result - - // The actual conversion is tested in integration tests - SUCCEED(); -} - -// Test Visit methods conceptually -TEST_F(BTreeGlobalIndexerTest, VisitMethodsConcept) { - // This test verifies the concept of various visit methods: - // - VisitEqual: exact match - // - VisitNotEqual: all rows except exact match - // - VisitLessThan: keys < literal - // - VisitLessOrEqual: keys <= literal - // - VisitGreaterThan: keys > literal - // - VisitGreaterOrEqual: keys >= literal - // - VisitIn: keys in set of literals - // - VisitNotIn: keys not in set of literals - // - VisitBetween: keys in [from, to] - // - VisitNotBetween: keys not in [from, to] - // - VisitIsNull: null rows from null_bitmap - // - VisitIsNotNull: non-null rows - // - VisitStartsWith: keys starting with prefix - // - VisitEndsWith: all non-null rows (fallback) - // - VisitContains: all non-null rows (fallback) - // - VisitLike: all non-null rows (fallback, TODO: optimize for prefix%) - - // The actual visit methods are tested in integration tests - SUCCEED(); -} - -} // namespace paimon::test diff --git a/src/paimon/common/global_index/btree/btree_index_meta.cpp b/src/paimon/common/global_index/btree/btree_index_meta.cpp index 78c6897de..2b2bf8f9f 100644 --- a/src/paimon/common/global_index/btree/btree_index_meta.cpp +++ b/src/paimon/common/global_index/btree/btree_index_meta.cpp @@ -34,7 +34,7 @@ std::shared_ptr BTreeIndexMeta::Deserialize(const std::shared_pt if (last_key_len) { last_key = input.ReadSlice(last_key_len).CopyBytes(pool); } - auto has_nulls = static_cast(input.ReadByte()) == 1; + auto has_nulls = input.ReadByte() == static_cast(1); return std::make_shared(first_key, last_key, has_nulls); } diff --git a/src/paimon/common/global_index/btree/btree_index_meta.h b/src/paimon/common/global_index/btree/btree_index_meta.h index f6507ad2c..d8794e8fd 100644 --- a/src/paimon/common/global_index/btree/btree_index_meta.h +++ b/src/paimon/common/global_index/btree/btree_index_meta.h @@ -23,8 +23,7 @@ namespace paimon { /// Index Meta of each BTree index file. The first key and last key of this meta could be null if -/// the -/// entire btree index file only contains nulls. +/// the entire btree index file only contains nulls. class BTreeIndexMeta { public: static std::shared_ptr Deserialize(const std::shared_ptr& meta, diff --git a/src/paimon/common/global_index/btree/btree_index_meta_test.cpp b/src/paimon/common/global_index/btree/btree_index_meta_test.cpp index d4ffde8f5..7229c8b19 100644 --- a/src/paimon/common/global_index/btree/btree_index_meta_test.cpp +++ b/src/paimon/common/global_index/btree/btree_index_meta_test.cpp @@ -16,12 +16,10 @@ #include "paimon/common/global_index/btree/btree_index_meta.h" -#include - +#include "gtest/gtest.h" #include "paimon/memory/memory_pool.h" namespace paimon::test { - class BTreeIndexMetaTest : public ::testing::Test { protected: void SetUp() override { @@ -32,32 +30,29 @@ class BTreeIndexMetaTest : public ::testing::Test { }; TEST_F(BTreeIndexMetaTest, SerializeDeserializeNormalKeys) { - // Create a BTreeIndexMeta with normal keys - // Use std::make_shared to create shared_ptr with proper memory management - // Bytes constructor uses pool->Malloc() for internal data, and destructor uses pool->Free() auto first_key = std::make_shared("first_key_data", pool_.get()); auto last_key = std::make_shared("last_key_data", pool_.get()); auto meta = std::make_shared(first_key, last_key, true); // Serialize auto serialized = meta->Serialize(pool_.get()); - ASSERT_NE(serialized, nullptr); + ASSERT_TRUE(serialized); ASSERT_GT(serialized->size(), 0u); // Deserialize auto deserialized = BTreeIndexMeta::Deserialize(serialized, pool_.get()); - ASSERT_NE(deserialized, nullptr); + ASSERT_TRUE(deserialized); // Verify first_key auto deserialized_first = deserialized->FirstKey(); - ASSERT_NE(deserialized_first, nullptr); - EXPECT_EQ(std::string(deserialized_first->data(), deserialized_first->size()), + ASSERT_TRUE(deserialized_first); + ASSERT_EQ(std::string(deserialized_first->data(), deserialized_first->size()), "first_key_data"); // Verify last_key auto deserialized_last = deserialized->LastKey(); - ASSERT_NE(deserialized_last, nullptr); - EXPECT_EQ(std::string(deserialized_last->data(), deserialized_last->size()), "last_key_data"); + ASSERT_TRUE(deserialized_last); + ASSERT_EQ(std::string(deserialized_last->data(), deserialized_last->size()), "last_key_data"); // Verify has_nulls ASSERT_TRUE(deserialized->HasNulls()); @@ -69,15 +64,15 @@ TEST_F(BTreeIndexMetaTest, SerializeDeserializeEmptyKeys) { // Serialize auto serialized = meta->Serialize(pool_.get()); - ASSERT_NE(serialized, nullptr); + ASSERT_TRUE(serialized); // Deserialize auto deserialized = BTreeIndexMeta::Deserialize(serialized, pool_.get()); - ASSERT_NE(deserialized, nullptr); + ASSERT_TRUE(deserialized); // Verify keys are null - ASSERT_EQ(deserialized->FirstKey(), nullptr); - ASSERT_EQ(deserialized->LastKey(), nullptr); + ASSERT_FALSE(deserialized->FirstKey()); + ASSERT_FALSE(deserialized->LastKey()); // Verify has_nulls ASSERT_TRUE(deserialized->HasNulls()); @@ -91,25 +86,25 @@ TEST_F(BTreeIndexMetaTest, HasNullsAndOnlyNulls) { auto meta1 = std::make_shared(std::make_shared("key", pool_.get()), std::make_shared("key", pool_.get()), true); - EXPECT_TRUE(meta1->HasNulls()); - EXPECT_FALSE(meta1->OnlyNulls()); + ASSERT_TRUE(meta1->HasNulls()); + ASSERT_FALSE(meta1->OnlyNulls()); // Case 2: No nulls with keys auto meta2 = std::make_shared(std::make_shared("key", pool_.get()), std::make_shared("key", pool_.get()), false); - EXPECT_FALSE(meta2->HasNulls()); - EXPECT_FALSE(meta2->OnlyNulls()); + ASSERT_FALSE(meta2->HasNulls()); + ASSERT_FALSE(meta2->OnlyNulls()); // Case 3: Only nulls (no keys) auto meta3 = std::make_shared(nullptr, nullptr, true); - EXPECT_TRUE(meta3->HasNulls()); - EXPECT_TRUE(meta3->OnlyNulls()); + ASSERT_TRUE(meta3->HasNulls()); + ASSERT_TRUE(meta3->OnlyNulls()); // Case 4: No nulls and no keys (edge case) auto meta4 = std::make_shared(nullptr, nullptr, false); - EXPECT_FALSE(meta4->HasNulls()); - EXPECT_TRUE(meta4->OnlyNulls()); + ASSERT_FALSE(meta4->HasNulls()); + ASSERT_TRUE(meta4->OnlyNulls()); } TEST_F(BTreeIndexMetaTest, SerializeDeserializeNoNulls) { @@ -120,14 +115,14 @@ TEST_F(BTreeIndexMetaTest, SerializeDeserializeNoNulls) { // Serialize auto serialized = meta->Serialize(pool_.get()); - ASSERT_NE(serialized, nullptr); + ASSERT_TRUE(serialized); // Deserialize auto deserialized = BTreeIndexMeta::Deserialize(serialized, pool_.get()); - ASSERT_NE(deserialized, nullptr); + ASSERT_TRUE(deserialized); // Verify has_nulls is false - EXPECT_FALSE(deserialized->HasNulls()); + ASSERT_FALSE(deserialized->HasNulls()); } TEST_F(BTreeIndexMetaTest, SerializeDeserializeWithOnlyFirstKey) { @@ -137,19 +132,19 @@ TEST_F(BTreeIndexMetaTest, SerializeDeserializeWithOnlyFirstKey) { // Serialize auto serialized = meta->Serialize(pool_.get()); - ASSERT_NE(serialized, nullptr); + ASSERT_TRUE(serialized); // Deserialize auto deserialized = BTreeIndexMeta::Deserialize(serialized, pool_.get()); - ASSERT_NE(deserialized, nullptr); + ASSERT_TRUE(deserialized); // Verify first_key auto deserialized_first = deserialized->FirstKey(); - ASSERT_NE(deserialized_first, nullptr); - EXPECT_EQ(std::string(deserialized_first->data(), deserialized_first->size()), "first"); + ASSERT_TRUE(deserialized_first); + ASSERT_EQ(std::string(deserialized_first->data(), deserialized_first->size()), "first"); // Verify last_key is null - EXPECT_EQ(deserialized->LastKey(), nullptr); + ASSERT_FALSE(deserialized->LastKey()); } TEST_F(BTreeIndexMetaTest, SerializeDeserializeWithOnlyLastKey) { @@ -159,19 +154,19 @@ TEST_F(BTreeIndexMetaTest, SerializeDeserializeWithOnlyLastKey) { // Serialize auto serialized = meta->Serialize(pool_.get()); - ASSERT_NE(serialized, nullptr); + ASSERT_TRUE(serialized); // Deserialize auto deserialized = BTreeIndexMeta::Deserialize(serialized, pool_.get()); - ASSERT_NE(deserialized, nullptr); + ASSERT_TRUE(deserialized); // Verify first_key is null - EXPECT_EQ(deserialized->FirstKey(), nullptr); + ASSERT_FALSE(deserialized->FirstKey()); // Verify last_key auto deserialized_last = deserialized->LastKey(); - ASSERT_NE(deserialized_last, nullptr); - EXPECT_EQ(std::string(deserialized_last->data(), deserialized_last->size()), "last"); + ASSERT_TRUE(deserialized_last); + ASSERT_EQ(std::string(deserialized_last->data(), deserialized_last->size()), "last"); } TEST_F(BTreeIndexMetaTest, SerializeDeserializeBinaryKeys) { @@ -184,21 +179,21 @@ TEST_F(BTreeIndexMetaTest, SerializeDeserializeBinaryKeys) { // Serialize auto serialized = meta->Serialize(pool_.get()); - ASSERT_NE(serialized, nullptr); + ASSERT_TRUE(serialized); // Deserialize auto deserialized = BTreeIndexMeta::Deserialize(serialized, pool_.get()); - ASSERT_NE(deserialized, nullptr); + ASSERT_TRUE(deserialized); // Verify first_key auto deserialized_first = deserialized->FirstKey(); - ASSERT_NE(deserialized_first, nullptr); - EXPECT_EQ(std::string(deserialized_first->data(), deserialized_first->size()), binary_first); + ASSERT_TRUE(deserialized_first); + ASSERT_EQ(std::string(deserialized_first->data(), deserialized_first->size()), binary_first); // Verify last_key auto deserialized_last = deserialized->LastKey(); - ASSERT_NE(deserialized_last, nullptr); - EXPECT_EQ(std::string(deserialized_last->data(), deserialized_last->size()), binary_last); + ASSERT_TRUE(deserialized_last); + ASSERT_EQ(std::string(deserialized_last->data(), deserialized_last->size()), binary_last); } } // namespace paimon::test diff --git a/src/paimon/common/global_index/btree/key_serializer.cpp b/src/paimon/common/global_index/btree/key_serializer.cpp new file mode 100644 index 000000000..1aab2951a --- /dev/null +++ b/src/paimon/common/global_index/btree/key_serializer.cpp @@ -0,0 +1,207 @@ +/* + * Copyright 2026-present Alibaba Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "paimon/common/global_index/btree/key_serializer.h" + +#include "fmt/format.h" +#include "paimon/common/memory/memory_slice_input.h" +#include "paimon/common/memory/memory_slice_output.h" +#include "paimon/common/utils/date_time_utils.h" +#include "paimon/common/utils/field_type_utils.h" +#include "paimon/common/utils/preconditions.h" +#include "paimon/data/decimal.h" +#include "paimon/data/timestamp.h" +#include "paimon/status.h" +namespace paimon { +Result> KeySerializer::SerializeKey( + const Literal& literal, const std::shared_ptr& type, MemoryPool* pool) { + if (literal.IsNull()) { + return Status::Invalid("cannot serialize null in KeySerializer"); + } + switch (literal.GetType()) { + case FieldType::BOOLEAN: { + MemorySliceOutput output(1, pool); + output.Reset(); + output.WriteValue(literal.GetValue() ? static_cast(1) + : static_cast(0)); + return output.ToSlice().CopyBytes(pool); + } + case FieldType::TINYINT: { + MemorySliceOutput output(1, pool); + output.Reset(); + output.WriteValue(literal.GetValue()); + return output.ToSlice().CopyBytes(pool); + } + case FieldType::SMALLINT: { + MemorySliceOutput output(2, pool); + output.Reset(); + output.WriteValue(literal.GetValue()); + return output.ToSlice().CopyBytes(pool); + } + case FieldType::INT: + case FieldType::DATE: { + MemorySliceOutput output(4, pool); + output.Reset(); + output.WriteValue(literal.GetValue()); + return output.ToSlice().CopyBytes(pool); + } + case FieldType::BIGINT: { + MemorySliceOutput output(8, pool); + output.Reset(); + output.WriteValue(literal.GetValue()); + return output.ToSlice().CopyBytes(pool); + } + case FieldType::FLOAT: { + MemorySliceOutput output(4, pool); + output.Reset(); + // TODO(xinyu): check java floatToIntBits + auto fvalue = literal.GetValue(); + int32_t ivalue; + memcpy(&ivalue, &fvalue, sizeof(float)); + output.WriteValue(ivalue); + return output.ToSlice().CopyBytes(pool); + } + case FieldType::DOUBLE: { + MemorySliceOutput output(8, pool); + output.Reset(); + // TODO(xinyu): check java doubleToLongBits + auto dvalue = literal.GetValue(); + int64_t ivalue; + memcpy(&ivalue, &dvalue, sizeof(double)); + output.WriteValue(ivalue); + return output.ToSlice().CopyBytes(pool); + } + case FieldType::STRING: { + auto svalue = literal.GetValue(); + std::shared_ptr bytes = Bytes::AllocateBytes(svalue, pool); + return bytes; + } + case FieldType::TIMESTAMP: { + auto ts_type = std::dynamic_pointer_cast(type); + PAIMON_RETURN_NOT_OK(Preconditions::CheckNotNull( + ts_type, "ts type cannot cast to arrow::TimestampType in BTreeGlobalIndex")); + MemorySliceOutput output(8, pool); + output.Reset(); + auto ts = literal.GetValue(); + if (Timestamp::IsCompact(DateTimeUtils::GetPrecisionFromType(ts_type))) { + output.WriteValue(ts.GetMillisecond()); + } else { + output.WriteValue(ts.GetMillisecond()); + PAIMON_RETURN_NOT_OK(output.WriteVarLenInt(ts.GetNanoOfMillisecond())); + } + return output.ToSlice().CopyBytes(pool); + } + case FieldType::DECIMAL: { + auto decimal_type = std::dynamic_pointer_cast(type); + PAIMON_RETURN_NOT_OK(Preconditions::CheckNotNull( + decimal_type, + "decimal type cannot cast to arrow::Decimal128Type in BTreeGlobalIndex")); + + auto decimal = literal.GetValue(); + if (Decimal::IsCompact(decimal_type->precision())) { + MemorySliceOutput output(8, pool); + output.Reset(); + output.WriteValue(decimal.ToUnscaledLong()); + return output.ToSlice().CopyBytes(pool); + } else { + std::vector decimal_bytes = decimal.ToUnscaledBytes(); + std::shared_ptr bytes = Bytes::AllocateBytes(decimal_bytes.size(), pool); + memcpy(bytes->data(), decimal_bytes.data(), decimal_bytes.size()); + return bytes; + } + } + default: + return Status::Invalid( + fmt::format("Not support serialize {} type in BTreeGlobalIndex", + FieldTypeUtils::FieldTypeToString(literal.GetType()))); + } +} + +Result KeySerializer::DeserializeKey(const MemorySlice& slice, + const std::shared_ptr& type, + MemoryPool* pool) { + switch (type->id()) { + case arrow::Type::type::BOOL: + return Literal(slice.ReadByte(0) == 1 ? true : false); + case arrow::Type::type::INT8: + return Literal(slice.ReadByte(0)); + case arrow::Type::type::INT16: + return Literal(slice.ReadShort(0)); + case arrow::Type::type::INT32: + return Literal(slice.ReadInt(0)); + case arrow::Type::type::DATE32: + return Literal(FieldType::DATE, slice.ReadInt(0)); + case arrow::Type::type::INT64: + return Literal(slice.ReadLong(0)); + case arrow::Type::type::FLOAT: { + int32_t ivalue = slice.ReadInt(0); + float fvalue; + memcpy(&fvalue, &ivalue, sizeof(fvalue)); + return Literal(fvalue); + } + case arrow::Type::type::DOUBLE: { + int64_t ivalue = slice.ReadLong(0); + double dvalue; + memcpy(&dvalue, &ivalue, sizeof(dvalue)); + return Literal(dvalue); + } + case arrow::Type::type::STRING: { + auto bytes = slice.CopyBytes(pool); + return Literal(FieldType::STRING, bytes->data(), bytes->size()); + } + case arrow::Type::type::TIMESTAMP: { + auto ts_type = std::dynamic_pointer_cast(type); + PAIMON_RETURN_NOT_OK(Preconditions::CheckNotNull( + ts_type, "ts type cannot cast to arrow::TimestampType in BTreeGlobalIndex")); + if (Timestamp::IsCompact(DateTimeUtils::GetPrecisionFromType(ts_type))) { + return Literal(Timestamp::FromEpochMillis(slice.ReadLong(0))); + } else { + auto input = slice.ToInput(); + int64_t millis = input.ReadLong(); + PAIMON_ASSIGN_OR_RAISE(int32_t nanos, input.ReadVarLenInt()); + return Literal(Timestamp(millis, nanos)); + } + } + case arrow::Type::type::DECIMAL128: { + auto decimal_type = std::dynamic_pointer_cast(type); + PAIMON_RETURN_NOT_OK(Preconditions::CheckNotNull( + decimal_type, + "decimal type cannot cast to arrow::Decimal128Type in BTreeGlobalIndex")); + if (Decimal::IsCompact(decimal_type->precision())) { + return Literal(Decimal::FromUnscaledLong( + slice.ReadLong(0), decimal_type->precision(), decimal_type->scale())); + } else { + auto bytes = slice.CopyBytes(pool); + return Literal(Decimal::FromUnscaledBytes(decimal_type->precision(), + decimal_type->scale(), bytes.get())); + } + } + default: + return Status::Invalid(fmt::format( + "Not support deserialize {} type in BTreeGlobalIndex", type->ToString())); + } +} + +MemorySlice::SliceComparator KeySerializer::CreateComparator( + const std::shared_ptr& type, const std::shared_ptr& pool) { + return + [pool = pool, type = type](const MemorySlice& a, const MemorySlice& b) -> Result { + PAIMON_ASSIGN_OR_RAISE(Literal la, DeserializeKey(a, type, pool.get())); + PAIMON_ASSIGN_OR_RAISE(Literal lb, DeserializeKey(b, type, pool.get())); + return la.CompareTo(lb); + }; +} +} // namespace paimon diff --git a/src/paimon/common/global_index/btree/key_serializer.h b/src/paimon/common/global_index/btree/key_serializer.h new file mode 100644 index 000000000..ade1a92a6 --- /dev/null +++ b/src/paimon/common/global_index/btree/key_serializer.h @@ -0,0 +1,41 @@ +/* + * Copyright 2026-present Alibaba Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include "arrow/api.h" +#include "paimon/common/memory/memory_slice.h" +#include "paimon/memory/bytes.h" +#include "paimon/memory/memory_pool.h" +#include "paimon/predicate/literal.h" +namespace paimon { +class KeySerializer { + public: + KeySerializer() = delete; + ~KeySerializer() = delete; + + static Result> SerializeKey(const Literal& literal, + const std::shared_ptr& type, + MemoryPool* pool); + + static Result DeserializeKey(const MemorySlice& slice, + const std::shared_ptr& type, + MemoryPool* pool); + + static MemorySlice::SliceComparator CreateComparator( + const std::shared_ptr& type, const std::shared_ptr& pool); +}; +} // namespace paimon diff --git a/src/paimon/common/global_index/btree/key_serializer_test.cpp b/src/paimon/common/global_index/btree/key_serializer_test.cpp new file mode 100644 index 000000000..b3c38fee9 --- /dev/null +++ b/src/paimon/common/global_index/btree/key_serializer_test.cpp @@ -0,0 +1,258 @@ +/* + * Copyright 2026-present Alibaba Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "paimon/common/global_index/btree/key_serializer.h" + +#include "gtest/gtest.h" +#include "paimon/data/decimal.h" +#include "paimon/data/timestamp.h" +#include "paimon/testing/utils/testharness.h" + +namespace paimon::test { + +class KeySerializerTest : public ::testing::Test { + protected: + void SetUp() override { + pool_ = GetDefaultPool(); + } + + std::shared_ptr pool_; +}; + +TEST_F(KeySerializerTest, SerializeAndDeserializeAllTypes) { + // BOOLEAN + { + Literal literal(true); + ASSERT_OK_AND_ASSIGN(auto bytes, + KeySerializer::SerializeKey(literal, arrow::boolean(), pool_.get())); + auto slice = MemorySlice::Wrap(MemorySegment::Wrap(bytes)); + ASSERT_OK_AND_ASSIGN(auto result, + KeySerializer::DeserializeKey(slice, arrow::boolean(), pool_.get())); + ASSERT_EQ(result.GetValue(), true); + + Literal literal_false(false); + ASSERT_OK_AND_ASSIGN( + bytes, KeySerializer::SerializeKey(literal_false, arrow::boolean(), pool_.get())); + slice = MemorySlice::Wrap(MemorySegment::Wrap(bytes)); + ASSERT_OK_AND_ASSIGN(result, + KeySerializer::DeserializeKey(slice, arrow::boolean(), pool_.get())); + ASSERT_EQ(result.GetValue(), false); + } + + // TINYINT (int8) + { + Literal literal(static_cast(-42)); + ASSERT_OK_AND_ASSIGN(auto bytes, + KeySerializer::SerializeKey(literal, arrow::int8(), pool_.get())); + auto slice = MemorySlice::Wrap(MemorySegment::Wrap(bytes)); + ASSERT_OK_AND_ASSIGN(auto result, + KeySerializer::DeserializeKey(slice, arrow::int8(), pool_.get())); + ASSERT_EQ(result.GetValue(), -42); + } + + // SMALLINT (int16) + { + Literal literal(static_cast(12345)); + ASSERT_OK_AND_ASSIGN(auto bytes, + KeySerializer::SerializeKey(literal, arrow::int16(), pool_.get())); + auto slice = MemorySlice::Wrap(MemorySegment::Wrap(bytes)); + ASSERT_OK_AND_ASSIGN(auto result, + KeySerializer::DeserializeKey(slice, arrow::int16(), pool_.get())); + ASSERT_EQ(result.GetValue(), 12345); + } + + // INT (int32) + { + Literal literal(42); + ASSERT_OK_AND_ASSIGN(auto bytes, + KeySerializer::SerializeKey(literal, arrow::int32(), pool_.get())); + auto slice = MemorySlice::Wrap(MemorySegment::Wrap(bytes)); + ASSERT_OK_AND_ASSIGN(auto result, + KeySerializer::DeserializeKey(slice, arrow::int32(), pool_.get())); + ASSERT_EQ(result.GetValue(), 42); + } + + // DATE (stored as int32) + { + Literal literal(FieldType::DATE, 18000); + ASSERT_OK_AND_ASSIGN(auto bytes, + KeySerializer::SerializeKey(literal, arrow::date32(), pool_.get())); + auto slice = MemorySlice::Wrap(MemorySegment::Wrap(bytes)); + ASSERT_OK_AND_ASSIGN(auto result, + KeySerializer::DeserializeKey(slice, arrow::date32(), pool_.get())); + ASSERT_EQ(result.GetType(), FieldType::DATE); + ASSERT_EQ(result.GetValue(), 18000); + } + + // BIGINT (int64) + { + Literal literal(static_cast(123456789012345LL)); + ASSERT_OK_AND_ASSIGN(auto bytes, + KeySerializer::SerializeKey(literal, arrow::int64(), pool_.get())); + auto slice = MemorySlice::Wrap(MemorySegment::Wrap(bytes)); + ASSERT_OK_AND_ASSIGN(auto result, + KeySerializer::DeserializeKey(slice, arrow::int64(), pool_.get())); + ASSERT_EQ(result.GetValue(), 123456789012345LL); + } + + // FLOAT + { + Literal literal(3.14f); + ASSERT_OK_AND_ASSIGN(auto bytes, + KeySerializer::SerializeKey(literal, arrow::float32(), pool_.get())); + auto slice = MemorySlice::Wrap(MemorySegment::Wrap(bytes)); + ASSERT_OK_AND_ASSIGN(auto result, + KeySerializer::DeserializeKey(slice, arrow::float32(), pool_.get())); + ASSERT_FLOAT_EQ(result.GetValue(), 3.14f); + } + + // DOUBLE + { + Literal literal(2.718281828); + ASSERT_OK_AND_ASSIGN(auto bytes, + KeySerializer::SerializeKey(literal, arrow::float64(), pool_.get())); + auto slice = MemorySlice::Wrap(MemorySegment::Wrap(bytes)); + ASSERT_OK_AND_ASSIGN(auto result, + KeySerializer::DeserializeKey(slice, arrow::float64(), pool_.get())); + ASSERT_DOUBLE_EQ(result.GetValue(), 2.718281828); + } + + // STRING + { + Literal literal(FieldType::STRING, "hello world", 11); + ASSERT_OK_AND_ASSIGN(auto bytes, + KeySerializer::SerializeKey(literal, arrow::utf8(), pool_.get())); + auto slice = MemorySlice::Wrap(MemorySegment::Wrap(bytes)); + ASSERT_OK_AND_ASSIGN(auto result, + KeySerializer::DeserializeKey(slice, arrow::utf8(), pool_.get())); + ASSERT_EQ(result.GetValue(), "hello world"); + } + + // TIMESTAMP (compact, millis precision) + { + auto ts_type = arrow::timestamp(arrow::TimeUnit::MILLI); + Literal literal(Timestamp::FromEpochMillis(1234567890)); + ASSERT_OK_AND_ASSIGN(auto bytes, + KeySerializer::SerializeKey(literal, ts_type, pool_.get())); + auto slice = MemorySlice::Wrap(MemorySegment::Wrap(bytes)); + ASSERT_OK_AND_ASSIGN(auto result, + KeySerializer::DeserializeKey(slice, ts_type, pool_.get())); + ASSERT_EQ(result.GetValue().GetMillisecond(), 1234567890); + } + + // TIMESTAMP (non-compact, nano precision) + { + auto ts_type = arrow::timestamp(arrow::TimeUnit::NANO); + Literal literal(Timestamp(5000, 123456)); + ASSERT_OK_AND_ASSIGN(auto bytes, + KeySerializer::SerializeKey(literal, ts_type, pool_.get())); + auto slice = MemorySlice::Wrap(MemorySegment::Wrap(bytes)); + ASSERT_OK_AND_ASSIGN(auto result, + KeySerializer::DeserializeKey(slice, ts_type, pool_.get())); + ASSERT_EQ(result.GetValue().GetMillisecond(), 5000); + ASSERT_EQ(result.GetValue().GetNanoOfMillisecond(), 123456); + } + + // DECIMAL (compact, precision <= 18) + { + auto decimal_type = arrow::decimal128(10, 2); + Literal literal(Decimal::FromUnscaledLong(12345, 10, 2)); + ASSERT_OK_AND_ASSIGN(auto bytes, + KeySerializer::SerializeKey(literal, decimal_type, pool_.get())); + auto slice = MemorySlice::Wrap(MemorySegment::Wrap(bytes)); + ASSERT_OK_AND_ASSIGN(auto result, + KeySerializer::DeserializeKey(slice, decimal_type, pool_.get())); + ASSERT_EQ(result.GetValue().ToUnscaledLong(), 12345); + } + + // DECIMAL (non-compact, precision > 18) + { + auto decimal_type = arrow::decimal128(25, 3); + Literal literal(Decimal(25, 3, 9999999)); + ASSERT_OK_AND_ASSIGN(auto bytes, + KeySerializer::SerializeKey(literal, decimal_type, pool_.get())); + auto slice = MemorySlice::Wrap(MemorySegment::Wrap(bytes)); + ASSERT_OK_AND_ASSIGN(auto result, + KeySerializer::DeserializeKey(slice, decimal_type, pool_.get())); + ASSERT_EQ(result.GetValue().ToString(), literal.GetValue().ToString()); + } + + // NULL should fail + { + Literal null_literal(FieldType::INT); + ASSERT_NOK_WITH_MSG(KeySerializer::SerializeKey(null_literal, arrow::int32(), pool_.get()), + "cannot serialize null in KeySerializer"); + } + + // unsupported type + { + Literal literal(FieldType::BINARY, "ab", 2); + ASSERT_NOK_WITH_MSG(KeySerializer::SerializeKey(literal, arrow::binary(), pool_.get()), + "Not support serialize BINARY type in BTreeGlobalIndex"); + } +} + +TEST_F(KeySerializerTest, CreateComparator) { + // INT comparator + { + auto comparator = KeySerializer::CreateComparator(arrow::int32(), pool_); + Literal literal_1(1); + Literal literal_2(2); + Literal literal_3(1); + ASSERT_OK_AND_ASSIGN(auto bytes_1, + KeySerializer::SerializeKey(literal_1, arrow::int32(), pool_.get())); + ASSERT_OK_AND_ASSIGN(auto bytes_2, + KeySerializer::SerializeKey(literal_2, arrow::int32(), pool_.get())); + ASSERT_OK_AND_ASSIGN(auto bytes_3, + KeySerializer::SerializeKey(literal_3, arrow::int32(), pool_.get())); + auto slice_1 = MemorySlice::Wrap(MemorySegment::Wrap(bytes_1)); + auto slice_2 = MemorySlice::Wrap(MemorySegment::Wrap(bytes_2)); + auto slice_3 = MemorySlice::Wrap(MemorySegment::Wrap(bytes_3)); + + ASSERT_OK_AND_ASSIGN(auto cmp_result, comparator(slice_1, slice_2)); + ASSERT_LT(cmp_result, 0); + ASSERT_OK_AND_ASSIGN(cmp_result, comparator(slice_2, slice_1)); + ASSERT_GT(cmp_result, 0); + ASSERT_OK_AND_ASSIGN(cmp_result, comparator(slice_1, slice_3)); + ASSERT_EQ(cmp_result, 0); + } + + // STRING comparator + { + auto comparator = KeySerializer::CreateComparator(arrow::utf8(), pool_); + Literal literal_a(FieldType::STRING, "apple", 5); + Literal literal_b(FieldType::STRING, "banana", 6); + Literal literal_c(FieldType::STRING, "apple", 5); + ASSERT_OK_AND_ASSIGN(auto bytes_a, + KeySerializer::SerializeKey(literal_a, arrow::utf8(), pool_.get())); + ASSERT_OK_AND_ASSIGN(auto bytes_b, + KeySerializer::SerializeKey(literal_b, arrow::utf8(), pool_.get())); + ASSERT_OK_AND_ASSIGN(auto bytes_c, + KeySerializer::SerializeKey(literal_c, arrow::utf8(), pool_.get())); + auto slice_a = MemorySlice::Wrap(MemorySegment::Wrap(bytes_a)); + auto slice_b = MemorySlice::Wrap(MemorySegment::Wrap(bytes_b)); + auto slice_c = MemorySlice::Wrap(MemorySegment::Wrap(bytes_c)); + + ASSERT_OK_AND_ASSIGN(auto cmp_result, comparator(slice_a, slice_b)); + ASSERT_LT(cmp_result, 0); + ASSERT_OK_AND_ASSIGN(cmp_result, comparator(slice_b, slice_a)); + ASSERT_GT(cmp_result, 0); + ASSERT_OK_AND_ASSIGN(cmp_result, comparator(slice_a, slice_c)); + ASSERT_EQ(cmp_result, 0); + } +} + +} // namespace paimon::test diff --git a/src/paimon/common/global_index/wrap/file_index_reader_wrapper.h b/src/paimon/common/global_index/wrap/file_index_reader_wrapper.h index e217d61f7..91427172c 100644 --- a/src/paimon/common/global_index/wrap/file_index_reader_wrapper.h +++ b/src/paimon/common/global_index/wrap/file_index_reader_wrapper.h @@ -123,30 +123,6 @@ class FileIndexReaderWrapper : public GlobalIndexReader { return transform_(file_index_result); } - Result> VisitBetween(const Literal& from, - const Literal& to) override { - PAIMON_ASSIGN_OR_RAISE(std::shared_ptr file_index_result, - reader_->VisitBetween(from, to)); - return transform_(file_index_result); - } - - Result> VisitNotBetween(const Literal& from, - const Literal& to) override { - PAIMON_ASSIGN_OR_RAISE(std::shared_ptr file_index_result, - reader_->VisitNotBetween(from, to)); - return transform_(file_index_result); - } - - Result> VisitAnd( - const std::vector>>& children) override { - return Status::Invalid("FileIndexReaderWrapper is not supposed to handle AND operations"); - } - - Result> VisitOr( - const std::vector>>& children) override { - return Status::Invalid("FileIndexReaderWrapper is not supposed to handle OR operations"); - } - Result> VisitVectorSearch( const std::shared_ptr& vector_search) override { return Status::Invalid( diff --git a/src/paimon/common/io/cache/cache.h b/src/paimon/common/io/cache/cache.h index 6bcf22184..76aaae405 100644 --- a/src/paimon/common/io/cache/cache.h +++ b/src/paimon/common/io/cache/cache.h @@ -51,7 +51,7 @@ class PAIMON_EXPORT Cache { class CacheValue { public: - explicit CacheValue(const MemorySegment& segment, CacheCallback callback) + CacheValue(const MemorySegment& segment, CacheCallback callback) : segment_(segment), callback_(std::move(callback)) {} const MemorySegment& GetSegment() const { diff --git a/src/paimon/common/lookup/sort/sort_lookup_store_factory.cpp b/src/paimon/common/lookup/sort/sort_lookup_store_factory.cpp index 2d6bcf918..e81acdf1d 100644 --- a/src/paimon/common/lookup/sort/sort_lookup_store_factory.cpp +++ b/src/paimon/common/lookup/sort/sort_lookup_store_factory.cpp @@ -36,8 +36,9 @@ Result> SortLookupStoreFactory::CreateReader( const std::shared_ptr& fs, const std::string& file_path, const std::shared_ptr& pool) const { PAIMON_ASSIGN_OR_RAISE(std::shared_ptr in, fs->Open(file_path)); - PAIMON_ASSIGN_OR_RAISE(std::shared_ptr reader, - SstFileReader::CreateFromStream(in, comparator_, cache_manager_, pool)); + PAIMON_ASSIGN_OR_RAISE( + std::shared_ptr reader, + SstFileReader::CreateForSortLookupStore(in, comparator_, cache_manager_, pool)); return std::make_unique(in, reader); } @@ -48,7 +49,7 @@ Status SortLookupStoreReader::Close() { Status SortLookupStoreWriter::Close() { PAIMON_RETURN_NOT_OK(writer_->Flush()); - PAIMON_ASSIGN_OR_RAISE(std::shared_ptr bloom_filter_handle, + PAIMON_ASSIGN_OR_RAISE(std::optional bloom_filter_handle, writer_->WriteBloomFilter()); PAIMON_ASSIGN_OR_RAISE(BlockHandle index_block_handle, writer_->WriteIndexBlock()); SortLookupStoreFooter footer(index_block_handle, bloom_filter_handle); diff --git a/src/paimon/common/lookup/sort/sort_lookup_store_footer.cpp b/src/paimon/common/lookup/sort/sort_lookup_store_footer.cpp index 98b49844e..57c6212a5 100644 --- a/src/paimon/common/lookup/sort/sort_lookup_store_footer.cpp +++ b/src/paimon/common/lookup/sort/sort_lookup_store_footer.cpp @@ -26,9 +26,9 @@ Result> SortLookupStoreFooter::ReadSortLo auto offset = input->ReadLong(); auto size = input->ReadInt(); auto expected_entries = input->ReadLong(); - std::shared_ptr bloom_filter_handle = nullptr; + std::optional bloom_filter_handle; if (offset || size || expected_entries) { - bloom_filter_handle = std::make_shared(offset, size, expected_entries); + bloom_filter_handle.emplace(offset, size, expected_entries); } auto index_offset = input->ReadLong(); auto index_size = input->ReadInt(); @@ -48,7 +48,7 @@ Result> SortLookupStoreFooter::ReadSortLo MemorySlice SortLookupStoreFooter::WriteSortLookupStoreFooter(MemoryPool* pool) { MemorySliceOutput output(ENCODED_LENGTH, pool); // 20 bytes - if (!bloom_filter_handle_.get()) { + if (!bloom_filter_handle_) { output.WriteValue(static_cast(0)); output.WriteValue(static_cast(0)); output.WriteValue(static_cast(0)); diff --git a/src/paimon/common/lookup/sort/sort_lookup_store_footer.h b/src/paimon/common/lookup/sort/sort_lookup_store_footer.h index 433c2e0b7..25cc011fc 100644 --- a/src/paimon/common/lookup/sort/sort_lookup_store_footer.h +++ b/src/paimon/common/lookup/sort/sort_lookup_store_footer.h @@ -16,7 +16,7 @@ #pragma once -#include +#include #include "paimon/common/memory/memory_slice_input.h" #include "paimon/common/sst/block_handle.h" @@ -34,7 +34,7 @@ class PAIMON_EXPORT SortLookupStoreFooter { public: SortLookupStoreFooter(const BlockHandle& index_block_handle, - const std::shared_ptr& bloom_filter_handle) + const std::optional& bloom_filter_handle) : index_block_handle_(index_block_handle), bloom_filter_handle_(bloom_filter_handle) {} ~SortLookupStoreFooter() = default; @@ -42,7 +42,7 @@ class PAIMON_EXPORT SortLookupStoreFooter { const BlockHandle& GetIndexBlockHandle() const { return index_block_handle_; } - std::shared_ptr GetBloomFilterHandle() const { + const std::optional& GetBloomFilterHandle() const { return bloom_filter_handle_; } @@ -55,6 +55,6 @@ class PAIMON_EXPORT SortLookupStoreFooter { private: BlockHandle index_block_handle_; - std::shared_ptr bloom_filter_handle_; + std::optional bloom_filter_handle_; }; } // namespace paimon diff --git a/src/paimon/common/predicate/literal.cpp b/src/paimon/common/predicate/literal.cpp index c29398d80..bc37a1b4d 100644 --- a/src/paimon/common/predicate/literal.cpp +++ b/src/paimon/common/predicate/literal.cpp @@ -26,6 +26,7 @@ #include "fmt/format.h" #include "paimon/common/utils/field_type_utils.h" +#include "paimon/common/utils/fields_comparator.h" #include "paimon/data/decimal.h" #include "paimon/data/timestamp.h" #include "paimon/status.h" @@ -329,13 +330,11 @@ Result Literal::CompareTo(const Literal& other) const { ? 0 : ((impl_->value_.BigIntVal < other.impl_->value_.BigIntVal) ? -1 : 1); case FieldType::FLOAT: - return impl_->value_.FloatVal == other.impl_->value_.FloatVal - ? 0 - : ((impl_->value_.FloatVal < other.impl_->value_.FloatVal) ? -1 : 1); + return FieldsComparator::CompareFloatingPoint(impl_->value_.FloatVal, + other.impl_->value_.FloatVal); case FieldType::DOUBLE: - return impl_->value_.DoubleVal == other.impl_->value_.DoubleVal - ? 0 - : ((impl_->value_.DoubleVal < other.impl_->value_.DoubleVal) ? -1 : 1); + return FieldsComparator::CompareFloatingPoint(impl_->value_.DoubleVal, + other.impl_->value_.DoubleVal); case FieldType::STRING: case FieldType::BINARY: { std::string_view v1(impl_->value_.Buffer, impl_->size_); diff --git a/src/paimon/common/sst/block_cache.h b/src/paimon/common/sst/block_cache.h index f46ebb5cf..a5c3554f1 100644 --- a/src/paimon/common/sst/block_cache.h +++ b/src/paimon/common/sst/block_cache.h @@ -34,6 +34,10 @@ class PAIMON_EXPORT BlockCache { const std::shared_ptr& pool) : pool_(pool), file_path_(file_path), in_(in), cache_manager_(cache_manager) {} + ~BlockCache() { + Close(); + } + Result GetBlock( int64_t position, int32_t length, bool is_index, std::function(const MemorySegment&)> decompress_func) { @@ -80,7 +84,9 @@ class PAIMON_EXPORT BlockCache { for (const auto& [key, _] : copied_blocks) { cache_manager_->InvalidPage(key); } - assert(blocks_.empty()); + // Some entries may remain in blocks_ if they were already evicted from the + // LRU cache (InvalidPage is a no-op for missing keys), so clear explicitly. + blocks_.clear(); } private: diff --git a/src/paimon/common/sst/block_iterator.cpp b/src/paimon/common/sst/block_iterator.cpp index 43650c4dc..db8b1564a 100644 --- a/src/paimon/common/sst/block_iterator.cpp +++ b/src/paimon/common/sst/block_iterator.cpp @@ -83,7 +83,7 @@ Result BlockIterator::SeekTo(const MemorySlice& target_key) { } // If we exit the loop without finding exact match, polled_position_ points to - // the first entry with key > target_key (if any), or -1 if all keys < target_key + // the first entry with key >= target_key (if any), or -1 if all keys < target_key return false; } diff --git a/src/paimon/common/sst/block_trailer.cpp b/src/paimon/common/sst/block_trailer.cpp index c1f545858..ac60b734a 100644 --- a/src/paimon/common/sst/block_trailer.cpp +++ b/src/paimon/common/sst/block_trailer.cpp @@ -16,6 +16,7 @@ #include "paimon/common/sst/block_trailer.h" +#include "fmt/format.h" #include "paimon/common/memory/memory_slice_output.h" namespace paimon { @@ -35,10 +36,8 @@ int8_t BlockTrailer::CompressionType() const { } std::string BlockTrailer::ToString() const { - std::stringstream sstream; - sstream << std::hex << crc32c_; - return "BlockTrailer{compression_type=" + std::to_string(compression_type_) + ", crc32c_=0x" + - sstream.str() + "}"; + return fmt::format("BlockTrailer{{compression_type={}, crc32c_={:#x}}}", + std::to_string(compression_type_), static_cast(crc32c_)); } MemorySlice BlockTrailer::WriteBlockTrailer(MemoryPool* pool) { diff --git a/src/paimon/common/sst/sst_file_io_test.cpp b/src/paimon/common/sst/sst_file_io_test.cpp index a1ef71b7e..dc1305e9c 100644 --- a/src/paimon/common/sst/sst_file_io_test.cpp +++ b/src/paimon/common/sst/sst_file_io_test.cpp @@ -144,8 +144,8 @@ TEST_P(SstFileIOTest, TestSimple) { // test read ASSERT_OK_AND_ASSIGN(in, fs_->Open(index_path)); - ASSERT_OK_AND_ASSIGN(auto reader, - SstFileReader::CreateFromStream(in, comparator_, cache_manager_, pool_)); + ASSERT_OK_AND_ASSIGN(auto reader, SstFileReader::CreateForSortLookupStore( + in, comparator_, cache_manager_, pool_)); // not exist key std::string k0 = "k0"; @@ -178,8 +178,8 @@ TEST_P(SstFileIOTest, TestJavaCompatibility) { ASSERT_OK_AND_ASSIGN(std::shared_ptr in, fs_->Open(file)); // test read - ASSERT_OK_AND_ASSIGN(auto reader, - SstFileReader::CreateFromStream(in, comparator_, cache_manager_, pool_)); + ASSERT_OK_AND_ASSIGN(auto reader, SstFileReader::CreateForSortLookupStore( + in, comparator_, cache_manager_, pool_)); // not exist key std::string k0 = "10000"; ASSERT_FALSE(reader->Lookup(std::make_shared(k0, pool_.get())).value()); @@ -271,7 +271,7 @@ TEST_F(SstFileIOTest, TestIOException) { std::shared_ptr in = std::move(in_result).value(); auto reader_result = - SstFileReader::CreateFromStream(in, comparator_, cache_manager_, pool_); + SstFileReader::CreateForSortLookupStore(in, comparator_, cache_manager_, pool_); CHECK_HOOK_STATUS(reader_result.status(), i); std::shared_ptr reader = std::move(reader_result).value(); diff --git a/src/paimon/common/sst/sst_file_reader.cpp b/src/paimon/common/sst/sst_file_reader.cpp index d93829baf..9b2434f4b 100644 --- a/src/paimon/common/sst/sst_file_reader.cpp +++ b/src/paimon/common/sst/sst_file_reader.cpp @@ -24,7 +24,7 @@ namespace paimon { Result> SstFileReader::Create( const std::shared_ptr& in, const BlockHandle& index_block_handle, - const std::shared_ptr& bloom_filter_handle, + const std::optional& bloom_filter_handle, MemorySlice::SliceComparator comparator, const std::shared_ptr& cache_manager, const std::shared_ptr& pool) { PAIMON_ASSIGN_OR_RAISE(std::string file_path, in->GetUri()); @@ -32,8 +32,9 @@ Result> SstFileReader::Create( // read bloom filter directly now std::shared_ptr bloom_filter = nullptr; - if (bloom_filter_handle && (bloom_filter_handle->ExpectedEntries() || - bloom_filter_handle->Size() || bloom_filter_handle->Offset())) { + if (bloom_filter_handle.has_value() && + (bloom_filter_handle->ExpectedEntries() || bloom_filter_handle->Size() || + bloom_filter_handle->Offset())) { bloom_filter = std::make_shared(bloom_filter_handle->ExpectedEntries(), bloom_filter_handle->Size()); PAIMON_ASSIGN_OR_RAISE( @@ -64,7 +65,7 @@ Result> SstFileReader::Create( new SstFileReader(pool, block_cache, bloom_filter, reader, comparator)); } -Result> SstFileReader::CreateFromStream( +Result> SstFileReader::CreateForSortLookupStore( const std::shared_ptr& in, MemorySlice::SliceComparator comparator, const std::shared_ptr& cache_manager, const std::shared_ptr& pool) { PAIMON_ASSIGN_OR_RAISE(uint64_t file_len, in->Length()); @@ -93,10 +94,6 @@ SstFileReader::SstFileReader(const std::shared_ptr& pool, index_block_reader_(index_block_reader), comparator_(std::move(comparator)) {} -std::unique_ptr SstFileReader::CreateIterator() { - return std::make_unique(this, index_block_reader_->Iterator()); -} - std::unique_ptr SstFileReader::CreateIndexIterator() { return index_block_reader_->Iterator(); } @@ -161,9 +158,8 @@ Result SstFileReader::DecompressBlock(const MemorySegment& compre static_cast(static_cast(trailer->CompressionType()) & 0xFF); crc32c_code = CRC32C::calculate(&compression_val, 1, crc32c_code); if (trailer->Crc32c() != static_cast(crc32c_code)) { - return Status::Invalid(fmt::format("Expected crc32c({}) but found crc32c({})", - SstFileUtils::ToHexString(trailer->Crc32c()), - SstFileUtils::ToHexString(crc32c_code))); + return Status::Invalid(fmt::format("Expected crc32c({:#x}) but found crc32c({:#x})", + trailer->Crc32c(), crc32c_code)); } // decompress data @@ -199,24 +195,4 @@ Status SstFileReader::Close() { return Status::OK(); } -SstFileIterator::SstFileIterator(SstFileReader* reader, - std::unique_ptr index_iterator) - : reader_(reader), index_iterator_(std::move(index_iterator)) {} - -Status SstFileIterator::SeekTo(const std::shared_ptr& key) { - auto key_slice = MemorySlice::Wrap(key); - PAIMON_ASSIGN_OR_RAISE([[maybe_unused]] bool index_success, index_iterator_->SeekTo(key_slice)); - if (index_iterator_->HasNext()) { - PAIMON_ASSIGN_OR_RAISE(data_iterator_, reader_->GetNextBlock(index_iterator_)); - // The index block entry key is the last key of the corresponding data block. - // If there is some index entry key >= target key, the related data block must - // also contain some key >= target key, which means seeked_data_block.HasNext() - // must be true - PAIMON_ASSIGN_OR_RAISE([[maybe_unused]] bool data_success, - data_iterator_->SeekTo(key_slice)); - } else { - data_iterator_.reset(); - } - return Status::OK(); -} } // namespace paimon diff --git a/src/paimon/common/sst/sst_file_reader.h b/src/paimon/common/sst/sst_file_reader.h index 607f26f08..982e9e30f 100644 --- a/src/paimon/common/sst/sst_file_reader.h +++ b/src/paimon/common/sst/sst_file_reader.h @@ -41,20 +41,18 @@ class PAIMON_EXPORT SstFileReader { public: static Result> Create( const std::shared_ptr& input, const BlockHandle& index_block_handle, - const std::shared_ptr& bloom_filter_handle, + const std::optional& bloom_filter_handle, MemorySlice::SliceComparator comparator, const std::shared_ptr& cache_manager, const std::shared_ptr& pool); /// Create an SstFileReader by reading the SortLookupStoreFooter from the given InputStream. /// This method encapsulates the common pattern of reading the footer, parsing it, and /// creating the reader, which avoids code duplication across callers. - static Result> CreateFromStream( + static Result> CreateForSortLookupStore( const std::shared_ptr& input, MemorySlice::SliceComparator comparator, const std::shared_ptr& cache_manager, const std::shared_ptr& pool); - std::unique_ptr CreateIterator(); - /// Create an iterator for the index block. std::unique_ptr CreateIndexIterator(); @@ -93,18 +91,4 @@ class PAIMON_EXPORT SstFileReader { MemorySlice::SliceComparator comparator_; }; -class PAIMON_EXPORT SstFileIterator { - public: - SstFileIterator(SstFileReader* reader, std::unique_ptr index_iterator); - - /// Seek to the position of the record whose key is exactly equal to or greater than the - /// specified key. - Status SeekTo(const std::shared_ptr& key); - - private: - SstFileReader* reader_; - std::unique_ptr index_iterator_; - std::unique_ptr data_iterator_; -}; - } // namespace paimon diff --git a/src/paimon/common/sst/sst_file_utils.h b/src/paimon/common/sst/sst_file_utils.h index e84651dbc..37e0a08b9 100644 --- a/src/paimon/common/sst/sst_file_utils.h +++ b/src/paimon/common/sst/sst_file_utils.h @@ -37,12 +37,6 @@ class SstFileUtils { return Status::Invalid( fmt::format("not support compression type code {}", static_cast(v))); } - - static std::string ToHexString(int32_t crc32c) { - std::stringstream sstream; - sstream << std::hex << crc32c; - return sstream.str(); - } }; } // namespace paimon diff --git a/src/paimon/common/sst/sst_file_writer.cpp b/src/paimon/common/sst/sst_file_writer.cpp index fdef142dc..fa9ae435e 100644 --- a/src/paimon/common/sst/sst_file_writer.cpp +++ b/src/paimon/common/sst/sst_file_writer.cpp @@ -67,19 +67,18 @@ Result SstFileWriter::WriteIndexBlock() { return FlushBlockWriter(index_block_writer_.get()); } -Result> SstFileWriter::WriteBloomFilter() { +Result> SstFileWriter::WriteBloomFilter() { if (!bloom_filter_) { - return std::shared_ptr(); + return std::optional(); } auto bf_slice = bloom_filter_->GetBitSet()->ToSlice(); auto data = bf_slice.ReadStringView(); PAIMON_ASSIGN_OR_RAISE(int64_t bloom_filter_pos, out_->GetPos()); - auto handle = std::make_shared(bloom_filter_pos, data.size(), - bloom_filter_->ExpectedEntries()); + BloomFilterHandle handle(bloom_filter_pos, data.size(), bloom_filter_->ExpectedEntries()); PAIMON_RETURN_NOT_OK(WriteBytes(data.data(), data.size())); - return handle; + return std::optional(handle); } Status SstFileWriter::WriteSlice(const MemorySlice& slice) { diff --git a/src/paimon/common/sst/sst_file_writer.h b/src/paimon/common/sst/sst_file_writer.h index a04226b3e..7d627f212 100644 --- a/src/paimon/common/sst/sst_file_writer.h +++ b/src/paimon/common/sst/sst_file_writer.h @@ -17,6 +17,7 @@ #pragma once #include +#include #include "paimon/common/compression/block_compression_factory.h" #include "paimon/common/sst/block_handle.h" @@ -54,7 +55,7 @@ class PAIMON_EXPORT SstFileWriter { Result WriteIndexBlock(); // When bloom-filter is disabled, return nullptr. - Result> WriteBloomFilter(); + Result> WriteBloomFilter(); Status WriteSlice(const MemorySlice& slice); diff --git a/src/paimon/core/stats/simple_stats.h b/src/paimon/core/stats/simple_stats.h index 0a445218b..cd94fb84e 100644 --- a/src/paimon/core/stats/simple_stats.h +++ b/src/paimon/core/stats/simple_stats.h @@ -22,11 +22,11 @@ #include #include "arrow/api.h" +#include "fmt/format.h" #include "paimon/common/data/binary_array.h" #include "paimon/common/data/binary_row.h" #include "paimon/common/data/binary_row_writer.h" #include "paimon/result.h" - namespace arrow { class ArrayBuilder; } // namespace arrow @@ -71,9 +71,7 @@ class SimpleStats { static Result FromRow(const InternalRow* row, MemoryPool* pool); std::string ToString() const { - std::stringstream ss; - ss << std::hex << static_cast(HashCode()); - return "SimpleStats@" + ss.str(); + return fmt::format("SimpleStats@{:#x}", static_cast(HashCode())); } int32_t HashCode() const; diff --git a/src/paimon/core/table/sink/commit_message_impl_test.cpp b/src/paimon/core/table/sink/commit_message_impl_test.cpp index a42ef7cb6..22d59b1aa 100644 --- a/src/paimon/core/table/sink/commit_message_impl_test.cpp +++ b/src/paimon/core/table/sink/commit_message_impl_test.cpp @@ -54,7 +54,7 @@ TEST(CommitMessageImplTest, TestToString) { auto msg_impl = std::dynamic_pointer_cast(commit_messages[0]); ASSERT_TRUE(msg_impl); std::string expect = - "FileCommittable {partition = BinaryRow@9c67b85d, bucket = 0, totalBuckets = null, " + "FileCommittable {partition = BinaryRow@0x9c67b85d, bucket = 0, totalBuckets = null, " "newFilesIncrement = " "DataIncrement {newFiles = data-64d93fc3-eaf2-4253-9cff-a9faa701e207-0.orc, deletedFiles = " ", changelogFiles = , newIndexFiles = , deletedIndexFiles = }, compactIncrement = " diff --git a/test/test_data/global_index/btree/btree_compatibility_data/btree_test_varchar_100.bin b/test/test_data/global_index/btree/btree_compatibility_data/btree_test_string_100.bin similarity index 100% rename from test/test_data/global_index/btree/btree_compatibility_data/btree_test_varchar_100.bin rename to test/test_data/global_index/btree/btree_compatibility_data/btree_test_string_100.bin diff --git a/test/test_data/global_index/btree/btree_compatibility_data/btree_test_varchar_100.bin.meta b/test/test_data/global_index/btree/btree_compatibility_data/btree_test_string_100.bin.meta similarity index 100% rename from test/test_data/global_index/btree/btree_compatibility_data/btree_test_varchar_100.bin.meta rename to test/test_data/global_index/btree/btree_compatibility_data/btree_test_string_100.bin.meta diff --git a/test/test_data/global_index/btree/btree_compatibility_data/btree_test_varchar_100.csv b/test/test_data/global_index/btree/btree_compatibility_data/btree_test_string_100.csv similarity index 100% rename from test/test_data/global_index/btree/btree_compatibility_data/btree_test_varchar_100.csv rename to test/test_data/global_index/btree/btree_compatibility_data/btree_test_string_100.csv diff --git a/test/test_data/global_index/btree/btree_compatibility_data/btree_test_varchar_1000.bin b/test/test_data/global_index/btree/btree_compatibility_data/btree_test_string_1000.bin similarity index 100% rename from test/test_data/global_index/btree/btree_compatibility_data/btree_test_varchar_1000.bin rename to test/test_data/global_index/btree/btree_compatibility_data/btree_test_string_1000.bin diff --git a/test/test_data/global_index/btree/btree_compatibility_data/btree_test_varchar_1000.bin.meta b/test/test_data/global_index/btree/btree_compatibility_data/btree_test_string_1000.bin.meta similarity index 100% rename from test/test_data/global_index/btree/btree_compatibility_data/btree_test_varchar_1000.bin.meta rename to test/test_data/global_index/btree/btree_compatibility_data/btree_test_string_1000.bin.meta diff --git a/test/test_data/global_index/btree/btree_compatibility_data/btree_test_varchar_1000.csv b/test/test_data/global_index/btree/btree_compatibility_data/btree_test_string_1000.csv similarity index 100% rename from test/test_data/global_index/btree/btree_compatibility_data/btree_test_varchar_1000.csv rename to test/test_data/global_index/btree/btree_compatibility_data/btree_test_string_1000.csv diff --git a/test/test_data/global_index/btree/btree_compatibility_data/btree_test_varchar_50.bin b/test/test_data/global_index/btree/btree_compatibility_data/btree_test_string_50.bin similarity index 100% rename from test/test_data/global_index/btree/btree_compatibility_data/btree_test_varchar_50.bin rename to test/test_data/global_index/btree/btree_compatibility_data/btree_test_string_50.bin diff --git a/test/test_data/global_index/btree/btree_compatibility_data/btree_test_varchar_50.bin.meta b/test/test_data/global_index/btree/btree_compatibility_data/btree_test_string_50.bin.meta similarity index 100% rename from test/test_data/global_index/btree/btree_compatibility_data/btree_test_varchar_50.bin.meta rename to test/test_data/global_index/btree/btree_compatibility_data/btree_test_string_50.bin.meta diff --git a/test/test_data/global_index/btree/btree_compatibility_data/btree_test_varchar_50.csv b/test/test_data/global_index/btree/btree_compatibility_data/btree_test_string_50.csv similarity index 100% rename from test/test_data/global_index/btree/btree_compatibility_data/btree_test_varchar_50.csv rename to test/test_data/global_index/btree/btree_compatibility_data/btree_test_string_50.csv diff --git a/test/test_data/global_index/btree/btree_compatibility_data/btree_test_varchar_500.bin b/test/test_data/global_index/btree/btree_compatibility_data/btree_test_string_500.bin similarity index 100% rename from test/test_data/global_index/btree/btree_compatibility_data/btree_test_varchar_500.bin rename to test/test_data/global_index/btree/btree_compatibility_data/btree_test_string_500.bin diff --git a/test/test_data/global_index/btree/btree_compatibility_data/btree_test_varchar_500.bin.meta b/test/test_data/global_index/btree/btree_compatibility_data/btree_test_string_500.bin.meta similarity index 100% rename from test/test_data/global_index/btree/btree_compatibility_data/btree_test_varchar_500.bin.meta rename to test/test_data/global_index/btree/btree_compatibility_data/btree_test_string_500.bin.meta diff --git a/test/test_data/global_index/btree/btree_compatibility_data/btree_test_varchar_500.csv b/test/test_data/global_index/btree/btree_compatibility_data/btree_test_string_500.csv similarity index 100% rename from test/test_data/global_index/btree/btree_compatibility_data/btree_test_varchar_500.csv rename to test/test_data/global_index/btree/btree_compatibility_data/btree_test_string_500.csv diff --git a/test/test_data/global_index/btree/btree_compatibility_data/btree_test_varchar_5000.bin b/test/test_data/global_index/btree/btree_compatibility_data/btree_test_string_5000.bin similarity index 100% rename from test/test_data/global_index/btree/btree_compatibility_data/btree_test_varchar_5000.bin rename to test/test_data/global_index/btree/btree_compatibility_data/btree_test_string_5000.bin diff --git a/test/test_data/global_index/btree/btree_compatibility_data/btree_test_varchar_5000.bin.meta b/test/test_data/global_index/btree/btree_compatibility_data/btree_test_string_5000.bin.meta similarity index 100% rename from test/test_data/global_index/btree/btree_compatibility_data/btree_test_varchar_5000.bin.meta rename to test/test_data/global_index/btree/btree_compatibility_data/btree_test_string_5000.bin.meta diff --git a/test/test_data/global_index/btree/btree_compatibility_data/btree_test_varchar_5000.csv b/test/test_data/global_index/btree/btree_compatibility_data/btree_test_string_5000.csv similarity index 100% rename from test/test_data/global_index/btree/btree_compatibility_data/btree_test_varchar_5000.csv rename to test/test_data/global_index/btree/btree_compatibility_data/btree_test_string_5000.csv