Skip to content

Commit

Permalink
Fix lead/lag for zero offset (#9026)
Browse files Browse the repository at this point in the history
Summary:
The lead and lag return incorrect result if the offset is zero and ignore nulls is true. The reason is that, the `rowNumberIgnoreNull` assumes the offset is bigger than zero.

This pr does two changes:
- if the offset is constant, then make a fast path to return the rowNumbers
- if the offset is not constant, then add a pre-condition check if the offset is 0 and return current row

Pull Request resolved: #9026

Reviewed By: Yuhta

Differential Revision: D55158362

Pulled By: kagamiori

fbshipit-source-id: 28082a1fee98c372f672bbd64db10af40fa685c8
  • Loading branch information
ulysses-you authored and facebook-github-bot committed Mar 21, 2024
1 parent 9b4a210 commit c4d3f6f
Show file tree
Hide file tree
Showing 2 changed files with 44 additions and 0 deletions.
15 changes: 15 additions & 0 deletions velox/functions/prestosql/window/LeadLag.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -111,6 +111,9 @@ class LeadLagFunction : public exec::WindowFunction {
constantOffset->as<ConstantVector<int64_t>>()->valueAt(0);
VELOX_USER_CHECK_GE(
constantOffset_.value(), 0, "Offset must be at least 0");
if (constantOffset_.value() == 0) {
isConstantOffsetZero_ = true;
}
}
} else {
offsetIndex_ = offsetArg.index.value();
Expand Down Expand Up @@ -144,6 +147,11 @@ class LeadLagFunction : public exec::WindowFunction {
std::fill(rowNumbers_.begin(), rowNumbers_.end(), kNullRow);
return;
}
// If the offset is 0 then it means always return the current row.
if (isConstantOffsetZero_) {
std::iota(rowNumbers_.begin(), rowNumbers_.end(), partitionOffset_);
return;
}

auto constantOffsetValue = constantOffset_.value();
// Set row number to kDefaultValueRow for out of range offset.
Expand Down Expand Up @@ -175,6 +183,11 @@ class LeadLagFunction : public exec::WindowFunction {
rowNumbers_[i] = kDefaultValueRow;
continue;
}
// If the offset is 0 then it means always return the current row.
if (offset == 0) {
rowNumbers_[i] = partitionOffset_ + i;
continue;
}

if constexpr (isLag) {
if constexpr (ignoreNulls) {
Expand Down Expand Up @@ -204,6 +217,7 @@ class LeadLagFunction : public exec::WindowFunction {
}
}

// This method assumes the input offset > 0
vector_size_t rowNumberIgnoreNull(
const uint64_t* rawNulls,
vector_size_t offset,
Expand Down Expand Up @@ -282,6 +296,7 @@ class LeadLagFunction : public exec::WindowFunction {
// Value of the 'offset' if constant.
std::optional<int64_t> constantOffset_;
bool isConstantOffsetNull_ = false;
bool isConstantOffsetZero_ = false;

// Index of the 'default_value' argument if default value is specified and not
// constant.
Expand Down
29 changes: 29 additions & 0 deletions velox/functions/prestosql/window/tests/LeadLagTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -184,6 +184,35 @@ TEST_P(LeadLagTest, ignoreNullsInt64Offset) {
assertResults(fn(fmt::format("c0, {} IGNORE NULLS", largeOffset)));
}

TEST_P(LeadLagTest, zeroOffset) {
auto data = makeRowVector({
// Values with null.
makeNullableFlatVector<int32_t>(
{1, std::nullopt, 2, std::nullopt, std::nullopt}),
// Values without null.
makeFlatVector<int32_t>({1, 2, 3, 4, 5}),
// Offsets.
makeFlatVector<int64_t>({0, 0, 0, 0, 0}),
});
createDuckDbTable({data});

auto assertResults = [&](const std::string& functionSql) {
auto queryInfo = buildWindowQuery({data}, functionSql, "order by c0", "");
SCOPED_TRACE(queryInfo.functionSql);
assertQuery(queryInfo.planNode, queryInfo.querySql);
};

assertResults(fn("c0, 0"));
assertResults(fn("c0, c2"));
assertResults(fn("c0, 0 IGNORE NULLS"));
assertResults(fn("c0, c2 IGNORE NULLS"));

assertResults(fn("c1, 0"));
assertResults(fn("c1, c2"));
assertResults(fn("c1, 0 IGNORE NULLS"));
assertResults(fn("c1, c2 IGNORE NULLS"));
}

TEST_P(LeadLagTest, defaultValue) {
auto data = makeRowVector({
// Values.
Expand Down

0 comments on commit c4d3f6f

Please sign in to comment.