Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
58 changes: 58 additions & 0 deletions include/paimon/utils/row_range_index.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
/*
* Copyright 2026-present Alibaba Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#pragma once

#include <cstdint>
#include <vector>

#include "paimon/result.h"
#include "paimon/utils/range.h"
#include "paimon/visibility.h"

namespace paimon {

/// Index for row ranges. Provides efficient intersection queries over a sorted, non-overlapping
/// collection of ranges using binary search.
class PAIMON_EXPORT RowRangeIndex {
public:
/// Creates a RowRangeIndex from the given ranges. The ranges will be sorted and merged
/// (overlapping and adjacent ranges are combined) before indexing.
static Result<RowRangeIndex> Create(const std::vector<Range>& ranges);

/// Returns the sorted, non-overlapping ranges held by this index.
const std::vector<Range>& Ranges() const;

/// Returns true if any range in this index intersects with the interval [start, end].
bool Intersects(int64_t start, int64_t end) const;

/// Returns the sub-ranges of this index that intersect with the interval [start, end].
/// Each returned range is clipped to lie within [start, end].
std::vector<Range> IntersectedRanges(int64_t start, int64_t end) const;

private:
explicit RowRangeIndex(std::vector<Range> ranges);

/// Finds the first index in `ends_` whose value is >= target (lower bound).
int32_t LowerBound(int64_t target) const;

private:
std::vector<Range> ranges_;
std::vector<int64_t> starts_;
std::vector<int64_t> ends_;
};

} // namespace paimon
2 changes: 2 additions & 0 deletions src/paimon/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -142,6 +142,7 @@ set(PAIMON_COMMON_SRCS
common/utils/byte_range_combiner.cpp
common/utils/roaring_bitmap32.cpp
common/utils/roaring_bitmap64.cpp
common/utils/row_range_index.cpp
common/utils/status.cpp
Comment thread
lxy-9602 marked this conversation as resolved.
common/utils/string_utils.cpp)

Expand Down Expand Up @@ -453,6 +454,7 @@ if(PAIMON_BUILD_TESTS)
common/types/data_type_json_parser_test.cpp
common/types/row_kind_test.cpp
common/types/data_type_test.cpp
common/utils/row_range_index_test.cpp
common/utils/var_length_int_utils_test.cpp
common/utils/arrow/arrow_utils_test.cpp
common/utils/arrow/arrow_stream_adapter_test.cpp
Expand Down
100 changes: 100 additions & 0 deletions src/paimon/common/utils/row_range_index.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,100 @@
/*
* Copyright 2026-present Alibaba Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#include "paimon/utils/row_range_index.h"

#include <algorithm>
#include <cassert>
Comment thread
lxy-9602 marked this conversation as resolved.

namespace paimon {

RowRangeIndex::RowRangeIndex(std::vector<Range> ranges) : ranges_(std::move(ranges)) {
starts_.reserve(ranges_.size());
ends_.reserve(ranges_.size());
for (const auto& range : ranges_) {
starts_.push_back(range.from);
ends_.push_back(range.to);
}
}

Result<RowRangeIndex> RowRangeIndex::Create(const std::vector<Range>& ranges) {
if (ranges.empty()) {
return Status::Invalid("Ranges cannot be empty in RowRangeIndex");
}
return RowRangeIndex(Range::SortAndMergeOverlap(ranges, /*adjacent=*/true));
}

const std::vector<Range>& RowRangeIndex::Ranges() const {
return ranges_;
}

bool RowRangeIndex::Intersects(int64_t start, int64_t end) const {
int32_t candidate = LowerBound(start);
return candidate < static_cast<int32_t>(starts_.size()) && starts_[candidate] <= end;
}

std::vector<Range> RowRangeIndex::IntersectedRanges(int64_t start, int64_t end) const {
Comment thread
lxy-9602 marked this conversation as resolved.
int32_t left = LowerBound(start);
if (left >= static_cast<int32_t>(ranges_.size())) {
return {};
}

int32_t right = LowerBound(end);
if (right >= static_cast<int32_t>(ranges_.size())) {
right = static_cast<int32_t>(ranges_.size()) - 1;
}

Comment thread
lxy-9602 marked this conversation as resolved.
if (starts_[left] > end) {
return {};
}

std::vector<Range> expected;

// Add the first intersecting range, clipped to [start, end].
const Range& first_range = ranges_[left];
expected.emplace_back(std::max(start, first_range.from), std::min(end, first_range.to));

// Add all fully contained ranges between first and last.
for (int32_t i = left + 1; i < right; ++i) {
expected.push_back(ranges_[i]);
}

// Add the last intersecting range (if different from the first), clipped to [start, end].
if (right != left) {
const Range& last_range = ranges_[right];
if (last_range.from <= end) {
expected.emplace_back(std::max(start, last_range.from), std::min(end, last_range.to));
}
}

return expected;
Comment thread
lxy-9602 marked this conversation as resolved.
}

int32_t RowRangeIndex::LowerBound(int64_t target) const {
int32_t left = 0;
auto right = static_cast<int32_t>(ends_.size());
while (left < right) {
int32_t mid = left + (right - left) / 2;
if (ends_[mid] < target) {
left = mid + 1;
} else {
right = mid;
}
}
return left;
}

} // namespace paimon
Loading
Loading