Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
70 changes: 70 additions & 0 deletions include/paimon/utils/range.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/

#pragma once
#include <optional>
#include <string>
#include <vector>

#include "paimon/visibility.h"

namespace paimon {
/// Range represents from (inclusive) and to (inclusive).
struct PAIMON_EXPORT Range {
Range(int64_t _from, int64_t _to);

/// Returns the number of integers in the range [from, to].
int64_t Count() const;

/// Computes the intersection of two ranges.
static std::optional<Range> Intersection(const Range& left, const Range& right);

/// Checks whether two ranges have any overlap.
static bool HasIntersection(const Range& left, const Range& right);

/// Sorts a list of ranges by `from`, then merges overlapping or adjacent ranges.
/// @param ranges Input vector of ranges to merge.
/// @param adjacent If true, also merges ranges that are adjacent (e.g., [1,3] and [4,5] →
/// [1,5]).
/// If false, only merges strictly overlapping ranges.
/// @return A new vector of non-overlapping, sorted ranges.
static std::vector<Range> SortAndMergeOverlap(const std::vector<Range>& ranges, bool adjacent);

/// Computes the set intersection of two collections of disjoint, sorted ranges.
static std::vector<Range> And(const std::vector<Range>& left, const std::vector<Range>& right);

/// Excludes the given ranges from this range and returns the remaining ranges.
///
/// For example, if this range is [0, 10000] and ranges to exclude are [1000, 2000], [3000,
/// 4000], [5000, 6000], then the result is [0, 999], [2001, 2999], [4001, 4999], [6001, 10000].
///
/// @param ranges The ranges to exclude (can be unsorted and overlapping).
/// @return The remaining ranges after exclusion.
std::vector<Range> Exclude(const std::vector<Range>& ranges) const;

bool operator==(const Range& other) const;
bool operator<(const Range& other) const;

std::string ToString() const;

int64_t from;
int64_t to;
};

} // namespace paimon
61 changes: 61 additions & 0 deletions include/paimon/utils/row_range_index.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/

#pragma once

#include <cstdint>
#include <vector>

#include "paimon/result.h"
#include "paimon/utils/range.h"
#include "paimon/visibility.h"

namespace paimon {

/// Index for row ranges. Provides efficient intersection queries over a sorted, non-overlapping
/// collection of ranges using binary search.
class PAIMON_EXPORT RowRangeIndex {
public:
/// Creates a RowRangeIndex from the given ranges. The ranges will be sorted and merged
/// (overlapping and adjacent ranges are combined) before indexing.
static Result<RowRangeIndex> Create(const std::vector<Range>& ranges);

/// Returns the sorted, non-overlapping ranges held by this index.
const std::vector<Range>& Ranges() const;

/// Returns true if any range in this index intersects with the interval [start, end].
bool Intersects(int64_t start, int64_t end) const;

/// Returns the sub-ranges of this index that intersect with the interval [start, end].
/// Each returned range is clipped to lie within [start, end].
std::vector<Range> IntersectedRanges(int64_t start, int64_t end) const;

private:
explicit RowRangeIndex(std::vector<Range> ranges);

/// Finds the first index in `ends_` whose value is >= target (lower bound).
int32_t LowerBound(int64_t target) const;

private:
std::vector<Range> ranges_;
std::vector<int64_t> starts_;
std::vector<int64_t> ends_;
};

} // namespace paimon
162 changes: 162 additions & 0 deletions src/paimon/common/utils/range.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,162 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
#include "paimon/utils/range.h"

#include <algorithm>
#include <cassert>

#include "fmt/format.h"
namespace paimon {
Range::Range(int64_t _from, int64_t _to) : from(_from), to(_to) {
assert(from <= to);
}

int64_t Range::Count() const {
return to - from + 1;
}

std::vector<Range> Range::SortAndMergeOverlap(const std::vector<Range>& ranges, bool adjacent) {
if (ranges.empty() || ranges.size() == 1) {
return ranges;
}
// sort
std::vector<Range> sorted_ranges = ranges;
std::sort(sorted_ranges.begin(), sorted_ranges.end(),
[](const Range& left, const Range& right) { return left.from < right.from; });

std::vector<Range> results;
Range current = sorted_ranges[0];

for (size_t i = 1; i < sorted_ranges.size(); ++i) {
Range next = sorted_ranges[i];
// Check if current and next overlap (not just adjacent)
if (current.to + (adjacent ? 1 : 0) >= next.from) {
// Merge: extend current range
current = Range(current.from, std::max(current.to, next.to));
} else {
// No overlap: add current to result and move to next
results.push_back(current);
current = next;
}
}
// Add the last range
results.push_back(current);
return results;
}

std::vector<Range> Range::And(const std::vector<Range>& left, const std::vector<Range>& right) {
if (left.empty() || right.empty()) {
return {};
}
std::vector<Range> results;
size_t i = 0;
size_t j = 0;

while (i < left.size() && j < right.size()) {
const Range& lhs = left[i];
const Range& rhs = right[j];

// Compute intersection of current ranges
std::optional<Range> intersect = Range::Intersection(lhs, rhs);
if (intersect) {
results.push_back(intersect.value());
}

// Advance the pointer of the range that ends earlier
if (lhs.to <= rhs.to) {
i++;
} else {
j++;
}
}

return results;
}

std::optional<Range> Range::Intersection(const Range& left, const Range& right) {
int64_t start = std::max(left.from, right.from);
int64_t end = std::min(left.to, right.to);
if (start > end) {
return std::nullopt;
}
return Range(start, end);
}

bool Range::HasIntersection(const Range& left, const Range& right) {
int64_t intersection_start = std::max(left.from, right.from);
int64_t intersection_end = std::min(left.to, right.to);
return intersection_start <= intersection_end;
}

std::vector<Range> Range::Exclude(const std::vector<Range>& ranges) const {
if (ranges.empty()) {
return {*this};
}

// Sort ranges by from
std::vector<Range> sorted = ranges;
std::sort(sorted.begin(), sorted.end(),
[](const Range& left, const Range& right) { return left.from < right.from; });

std::vector<Range> result;
int64_t current = from;

for (const auto& exclude : sorted) {
// Compute intersection with the current range
auto intersect = Range::Intersection(Range(current, to), exclude);
if (!intersect) {
continue;
}
// Add the part before the intersection (if any)
if (current < intersect.value().from) {
result.emplace_back(current, intersect.value().from - 1);
}
// Move current position past the intersection
current = intersect.value().to + 1;
if (current > to) {
break;
}
}
// Add the remaining part after all exclusions (if any)
if (current <= to) {
result.emplace_back(current, to);
}

return result;
}

bool Range::operator==(const Range& other) const {
if (this == &other) {
return true;
}
return from == other.from && to == other.to;
}

bool Range::operator<(const Range& other) const {
if (from == other.from) {
return to < other.to;
}
return from < other.from;
}

std::string Range::ToString() const {
return fmt::format("[{}, {}]", from, to);
}

} // namespace paimon
Loading