diff --git a/pinot-query-planner/src/main/java/org/apache/pinot/calcite/sql/fun/PinotOperatorTable.java b/pinot-query-planner/src/main/java/org/apache/pinot/calcite/sql/fun/PinotOperatorTable.java index 283b644c8410..ebac46a9a9f9 100644 --- a/pinot-query-planner/src/main/java/org/apache/pinot/calcite/sql/fun/PinotOperatorTable.java +++ b/pinot-query-planner/src/main/java/org/apache/pinot/calcite/sql/fun/PinotOperatorTable.java @@ -38,7 +38,6 @@ import org.apache.calcite.sql.SqlPostfixOperator; import org.apache.calcite.sql.SqlSplittableAggFunction; import org.apache.calcite.sql.SqlSyntax; -import org.apache.calcite.sql.fun.SqlLeadLagAggFunction; import org.apache.calcite.sql.fun.SqlMonotonicBinaryOperator; import org.apache.calcite.sql.fun.SqlNtileAggFunction; import org.apache.calcite.sql.fun.SqlStdOperatorTable; @@ -214,10 +213,8 @@ public static PinotOperatorTable instance(boolean nullHandlingEnabled) { // WINDOW Functions (non-aggregate) SqlStdOperatorTable.LAST_VALUE, SqlStdOperatorTable.FIRST_VALUE, - // TODO: Replace these with SqlStdOperatorTable.LEAD and SqlStdOperatorTable.LAG when the function implementations - // are updated to support the IGNORE NULLS option. - PinotLeadWindowFunction.INSTANCE, - PinotLagWindowFunction.INSTANCE, + SqlStdOperatorTable.LEAD, + SqlStdOperatorTable.LAG, // SPECIAL OPERATORS SqlStdOperatorTable.IGNORE_NULLS, @@ -448,32 +445,6 @@ public List getOperatorList() { return _operatorList; } - private static class PinotLeadWindowFunction extends SqlLeadLagAggFunction { - static final SqlOperator INSTANCE = new PinotLeadWindowFunction(); - - public PinotLeadWindowFunction() { - super(SqlKind.LEAD); - } - - @Override - public boolean allowsNullTreatment() { - return false; - } - } - - private static class PinotLagWindowFunction extends SqlLeadLagAggFunction { - static final SqlOperator INSTANCE = new PinotLagWindowFunction(); - - public PinotLagWindowFunction() { - super(SqlKind.LAG); - } - - @Override - public boolean allowsNullTreatment() { - return false; - } - } - private static final class PinotNtileWindowFunction extends SqlNtileAggFunction { static final SqlOperator INSTANCE = new PinotNtileWindowFunction(); diff --git a/pinot-query-runtime/src/main/java/org/apache/pinot/query/runtime/operator/window/WindowFunction.java b/pinot-query-runtime/src/main/java/org/apache/pinot/query/runtime/operator/window/WindowFunction.java index 19aabcfe9dd4..55c1970aeae6 100644 --- a/pinot-query-runtime/src/main/java/org/apache/pinot/query/runtime/operator/window/WindowFunction.java +++ b/pinot-query-runtime/src/main/java/org/apache/pinot/query/runtime/operator/window/WindowFunction.java @@ -19,6 +19,7 @@ package org.apache.pinot.query.runtime.operator.window; import java.util.List; +import javax.annotation.Nullable; import org.apache.calcite.rel.RelFieldCollation; import org.apache.pinot.common.utils.DataSchema; import org.apache.pinot.query.planner.logical.RexExpression; @@ -63,6 +64,7 @@ public WindowFunction(RexExpression.FunctionCall aggCall, DataSchema inputSchema */ public abstract List processRows(List rows); + @Nullable protected Object extractValueFromRow(Object[] row) { return _inputRef == -1 ? _literal : (row == null ? null : row[_inputRef]); } diff --git a/pinot-query-runtime/src/main/java/org/apache/pinot/query/runtime/operator/window/value/LagValueWindowFunction.java b/pinot-query-runtime/src/main/java/org/apache/pinot/query/runtime/operator/window/value/LagValueWindowFunction.java index 1e583bf11c6f..e11848538107 100644 --- a/pinot-query-runtime/src/main/java/org/apache/pinot/query/runtime/operator/window/value/LagValueWindowFunction.java +++ b/pinot-query-runtime/src/main/java/org/apache/pinot/query/runtime/operator/window/value/LagValueWindowFunction.java @@ -19,6 +19,7 @@ package org.apache.pinot.query.runtime.operator.window.value; import com.google.common.base.Preconditions; +import java.util.ArrayDeque; import java.util.Arrays; import java.util.List; import org.apache.calcite.rel.RelFieldCollation; @@ -28,9 +29,10 @@ import org.apache.pinot.query.runtime.operator.window.WindowFrame; -/** - * The LAG window function doesn't allow custom window frames (and this is enforced by Calcite). - */ +/// Window function that returns the value of a column from a preceding row within the partition. +/// Supports an optional offset (default 1), an optional default value for when no row exists at +/// that offset, and IGNORE NULLS mode which skips null values when scanning backward. +/// Custom window frames are not allowed (enforced by Calcite). public class LagValueWindowFunction extends ValueWindowFunction { private final int _offset; private final Object _defaultValue; @@ -75,6 +77,9 @@ public LagValueWindowFunction(RexExpression.FunctionCall aggCall, DataSchema inp @Override public List processRows(List rows) { + if (_ignoreNulls) { + return processRowsIgnoreNulls(rows); + } int numRows = rows.size(); Object[] result = new Object[numRows]; if (_defaultValue != null) { @@ -88,4 +93,28 @@ public List processRows(List rows) { } return Arrays.asList(result); } + + /** + * LAG with IGNORE NULLS: for each row, find the offset-th non-null value scanning backward. + * Uses a bounded deque of size {@code _offset} for O(N) time and O(offset) memory. + * Scans left-to-right, maintaining a sliding window of preceding non-null values. The oldest + * element in the deque (peekFirst) is always the offset-th non-null value behind the current + * row. + */ + private List processRowsIgnoreNulls(List rows) { + int numRows = rows.size(); + Object[] result = new Object[numRows]; + ArrayDeque window = new ArrayDeque<>(_offset); + for (int i = 0; i < numRows; i++) { + result[i] = (window.size() == _offset) ? window.peekFirst() : _defaultValue; + Object val = extractValueFromRow(rows.get(i)); + if (val != null) { + window.addLast(val); + if (window.size() > _offset) { + window.pollFirst(); + } + } + } + return Arrays.asList(result); + } } diff --git a/pinot-query-runtime/src/main/java/org/apache/pinot/query/runtime/operator/window/value/LeadValueWindowFunction.java b/pinot-query-runtime/src/main/java/org/apache/pinot/query/runtime/operator/window/value/LeadValueWindowFunction.java index 4ec6c641e4b4..a4981cfc4dbb 100644 --- a/pinot-query-runtime/src/main/java/org/apache/pinot/query/runtime/operator/window/value/LeadValueWindowFunction.java +++ b/pinot-query-runtime/src/main/java/org/apache/pinot/query/runtime/operator/window/value/LeadValueWindowFunction.java @@ -19,6 +19,7 @@ package org.apache.pinot.query.runtime.operator.window.value; import com.google.common.base.Preconditions; +import java.util.ArrayDeque; import java.util.Arrays; import java.util.List; import org.apache.calcite.rel.RelFieldCollation; @@ -28,9 +29,10 @@ import org.apache.pinot.query.runtime.operator.window.WindowFrame; -/** - * The LAG window function doesn't allow custom window frames (and this is enforced by Calcite). - */ +/// Window function that returns the value of a column from a subsequent row within the partition. +/// Supports an optional offset (default 1), an optional default value for when no row exists at +/// that offset, and IGNORE NULLS mode which skips null values when scanning forward. +/// Custom window frames are not allowed (enforced by Calcite). public class LeadValueWindowFunction extends ValueWindowFunction { private final int _offset; @@ -75,6 +77,9 @@ public LeadValueWindowFunction(RexExpression.FunctionCall aggCall, DataSchema in @Override public List processRows(List rows) { + if (_ignoreNulls) { + return processRowsIgnoreNulls(rows); + } int numRows = rows.size(); Object[] result = new Object[numRows]; for (int i = 0; i < numRows - _offset; i++) { @@ -88,4 +93,28 @@ public List processRows(List rows) { } return Arrays.asList(result); } + + /** + * LEAD with IGNORE NULLS: for each row, find the offset-th non-null value scanning forward. + * Uses a bounded deque of size {@code _offset} for O(N) time and O(offset) memory. + * Scans right-to-left, maintaining a sliding window of upcoming non-null values. The oldest + * element in the deque (peekFirst) is always the offset-th non-null value ahead of the current + * row. + */ + private List processRowsIgnoreNulls(List rows) { + int numRows = rows.size(); + Object[] result = new Object[numRows]; + ArrayDeque window = new ArrayDeque<>(_offset); + for (int i = numRows - 1; i >= 0; i--) { + result[i] = (window.size() == _offset) ? window.peekFirst() : _defaultValue; + Object val = extractValueFromRow(rows.get(i)); + if (val != null) { + window.addLast(val); + if (window.size() > _offset) { + window.pollFirst(); + } + } + } + return Arrays.asList(result); + } } diff --git a/pinot-query-runtime/src/test/java/org/apache/pinot/query/runtime/operator/WindowAggregateOperatorTest.java b/pinot-query-runtime/src/test/java/org/apache/pinot/query/runtime/operator/WindowAggregateOperatorTest.java index 0ab3cab16ae6..326b7a05c153 100644 --- a/pinot-query-runtime/src/test/java/org/apache/pinot/query/runtime/operator/WindowAggregateOperatorTest.java +++ b/pinot-query-runtime/src/test/java/org/apache/pinot/query/runtime/operator/WindowAggregateOperatorTest.java @@ -705,6 +705,302 @@ public void testLeadLagWindowFunctionWithOffsetGreaterThanNumberOfRows() { assertTrue(operator.nextBlock().isSuccess(), "Second block is EOS (done processing)"); } + @Test + public void testLeadIgnoreNullsWithDefaultOffset() { + // Given: LEAD(value) IGNORE NULLS - should find next non-null value + DataSchema inputSchema = new DataSchema(new String[]{"group", "value"}, new ColumnDataType[]{INT, INT}); + MultiStageOperator input = new BlockListMultiStageOperator.Builder(inputSchema) + .addRow(1, null) + .addRow(1, null) + .addRow(1, 10) + .addRow(1, 20) + .addRow(1, null) + .addRow(2, 10) + .addRow(2, null) + .addRow(2, 20) + .addRow(3, null) + .addRow(3, null) + .buildWithEos(); + DataSchema resultSchema = + new DataSchema(new String[]{"group", "value", "lead"}, new ColumnDataType[]{INT, INT, INT}); + List keys = List.of(0); + List collations = + List.of(new RelFieldCollation(1, RelFieldCollation.Direction.ASCENDING, RelFieldCollation.NullDirection.LAST)); + List aggCalls = List.of( + new RexExpression.FunctionCall(ColumnDataType.INT, SqlKind.LEAD.name(), + List.of(new RexExpression.InputRef(1)), false, true)); + WindowAggregateOperator operator = + getOperator(inputSchema, resultSchema, keys, collations, aggCalls, WindowNode.WindowFrameType.RANGE, + Integer.MIN_VALUE, 0, input); + + // When: + List resultRows = ((MseBlock.Data) operator.nextBlock()).asRowHeap().getRows(); + + // Then: + verifyResultRows(resultRows, keys, Map.of( + 1, List.of( + new Object[]{1, null, 10}, + new Object[]{1, null, 10}, + new Object[]{1, 10, 20}, + new Object[]{1, 20, null}, + new Object[]{1, null, null}), + 2, List.of( + new Object[]{2, 10, 20}, + new Object[]{2, null, 20}, + new Object[]{2, 20, null}), + 3, List.of( + new Object[]{3, null, null}, + new Object[]{3, null, null}) + )); + assertTrue(operator.nextBlock().isSuccess(), "Second block is EOS (done processing)"); + } + + @Test + public void testLeadIgnoreNullsWithOffset() { + // Given: LEAD(value, 2) IGNORE NULLS - should find 2nd non-null value ahead + DataSchema inputSchema = new DataSchema(new String[]{"group", "value"}, new ColumnDataType[]{INT, INT}); + MultiStageOperator input = new BlockListMultiStageOperator.Builder(inputSchema) + .addRow(1, 10) + .addRow(1, null) + .addRow(1, 20) + .addRow(1, null) + .addRow(1, 30) + .addRow(1, null) + .buildWithEos(); + DataSchema resultSchema = + new DataSchema(new String[]{"group", "value", "lead"}, new ColumnDataType[]{INT, INT, INT}); + List keys = List.of(0); + List collations = + List.of(new RelFieldCollation(1, RelFieldCollation.Direction.ASCENDING, RelFieldCollation.NullDirection.LAST)); + List aggCalls = List.of( + new RexExpression.FunctionCall(ColumnDataType.INT, SqlKind.LEAD.name(), + List.of(new RexExpression.InputRef(1), new RexExpression.Literal(ColumnDataType.INT, 2)), false, true)); + WindowAggregateOperator operator = + getOperator(inputSchema, resultSchema, keys, collations, aggCalls, WindowNode.WindowFrameType.RANGE, + Integer.MIN_VALUE, 0, input); + + // When: + List resultRows = ((MseBlock.Data) operator.nextBlock()).asRowHeap().getRows(); + + // Then: + verifyResultRows(resultRows, keys, Map.of( + 1, List.of( + new Object[]{1, 10, 30}, + new Object[]{1, null, 30}, + new Object[]{1, 20, null}, + new Object[]{1, null, null}, + new Object[]{1, 30, null}, + new Object[]{1, null, null}) + )); + assertTrue(operator.nextBlock().isSuccess(), "Second block is EOS (done processing)"); + } + + @Test + public void testLeadIgnoreNullsWithOffsetAndDefault() { + // Given: LEAD(value, 2, 99) IGNORE NULLS - 2nd non-null value ahead, or 99 if not enough non-nulls + DataSchema inputSchema = new DataSchema(new String[]{"group", "value"}, new ColumnDataType[]{INT, INT}); + MultiStageOperator input = new BlockListMultiStageOperator.Builder(inputSchema) + .addRow(1, 10) + .addRow(1, null) + .addRow(1, 20) + .addRow(1, null) + .addRow(1, 30) + .addRow(1, null) + .buildWithEos(); + DataSchema resultSchema = + new DataSchema(new String[]{"group", "value", "lead"}, new ColumnDataType[]{INT, INT, INT}); + List keys = List.of(0); + List collations = + List.of(new RelFieldCollation(1, RelFieldCollation.Direction.ASCENDING, RelFieldCollation.NullDirection.LAST)); + List aggCalls = List.of( + new RexExpression.FunctionCall(ColumnDataType.INT, SqlKind.LEAD.name(), + List.of(new RexExpression.InputRef(1), new RexExpression.Literal(ColumnDataType.INT, 2), + new RexExpression.Literal(ColumnDataType.INT, 99)), false, true)); + WindowAggregateOperator operator = + getOperator(inputSchema, resultSchema, keys, collations, aggCalls, WindowNode.WindowFrameType.RANGE, + Integer.MIN_VALUE, 0, input); + + // When: + List resultRows = ((MseBlock.Data) operator.nextBlock()).asRowHeap().getRows(); + + // Then: + verifyResultRows(resultRows, keys, Map.of( + 1, List.of( + new Object[]{1, 10, 30}, + new Object[]{1, null, 30}, + new Object[]{1, 20, 99}, + new Object[]{1, null, 99}, + new Object[]{1, 30, 99}, + new Object[]{1, null, 99}) + )); + assertTrue(operator.nextBlock().isSuccess(), "Second block is EOS (done processing)"); + } + + @Test + public void testLagIgnoreNullsWithDefaultOffset() { + // Given: LAG(value) IGNORE NULLS - should find previous non-null value + DataSchema inputSchema = new DataSchema(new String[]{"group", "value"}, new ColumnDataType[]{INT, INT}); + MultiStageOperator input = new BlockListMultiStageOperator.Builder(inputSchema) + .addRow(1, null) + .addRow(1, null) + .addRow(1, 10) + .addRow(1, 20) + .addRow(1, null) + .addRow(2, 10) + .addRow(2, null) + .addRow(2, 20) + .addRow(3, null) + .addRow(3, null) + .buildWithEos(); + DataSchema resultSchema = + new DataSchema(new String[]{"group", "value", "lag"}, new ColumnDataType[]{INT, INT, INT}); + List keys = List.of(0); + List collations = + List.of(new RelFieldCollation(1, RelFieldCollation.Direction.ASCENDING, RelFieldCollation.NullDirection.LAST)); + List aggCalls = List.of( + new RexExpression.FunctionCall(ColumnDataType.INT, SqlKind.LAG.name(), + List.of(new RexExpression.InputRef(1)), false, true)); + WindowAggregateOperator operator = + getOperator(inputSchema, resultSchema, keys, collations, aggCalls, WindowNode.WindowFrameType.RANGE, + Integer.MIN_VALUE, 0, input); + + // When: + List resultRows = ((MseBlock.Data) operator.nextBlock()).asRowHeap().getRows(); + + // Then: + verifyResultRows(resultRows, keys, Map.of( + 1, List.of( + new Object[]{1, null, null}, + new Object[]{1, null, null}, + new Object[]{1, 10, null}, + new Object[]{1, 20, 10}, + new Object[]{1, null, 20}), + 2, List.of( + new Object[]{2, 10, null}, + new Object[]{2, null, 10}, + new Object[]{2, 20, 10}), + 3, List.of( + new Object[]{3, null, null}, + new Object[]{3, null, null}) + )); + assertTrue(operator.nextBlock().isSuccess(), "Second block is EOS (done processing)"); + } + + @Test + public void testLagIgnoreNullsWithOffset() { + // Given: LAG(value, 2) IGNORE NULLS - should find 2nd non-null value behind + DataSchema inputSchema = new DataSchema(new String[]{"group", "value"}, new ColumnDataType[]{INT, INT}); + MultiStageOperator input = new BlockListMultiStageOperator.Builder(inputSchema) + .addRow(1, 10) + .addRow(1, null) + .addRow(1, 20) + .addRow(1, null) + .addRow(1, 30) + .addRow(1, null) + .buildWithEos(); + DataSchema resultSchema = + new DataSchema(new String[]{"group", "value", "lag"}, new ColumnDataType[]{INT, INT, INT}); + List keys = List.of(0); + List collations = + List.of(new RelFieldCollation(1, RelFieldCollation.Direction.ASCENDING, RelFieldCollation.NullDirection.LAST)); + List aggCalls = List.of( + new RexExpression.FunctionCall(ColumnDataType.INT, SqlKind.LAG.name(), + List.of(new RexExpression.InputRef(1), new RexExpression.Literal(ColumnDataType.INT, 2)), false, true)); + WindowAggregateOperator operator = + getOperator(inputSchema, resultSchema, keys, collations, aggCalls, WindowNode.WindowFrameType.RANGE, + Integer.MIN_VALUE, 0, input); + + // When: + List resultRows = ((MseBlock.Data) operator.nextBlock()).asRowHeap().getRows(); + + // Then: + verifyResultRows(resultRows, keys, Map.of( + 1, List.of( + new Object[]{1, 10, null}, + new Object[]{1, null, null}, + new Object[]{1, 20, null}, + new Object[]{1, null, 10}, + new Object[]{1, 30, 10}, + new Object[]{1, null, 20}) + )); + assertTrue(operator.nextBlock().isSuccess(), "Second block is EOS (done processing)"); + } + + @Test + public void testLagIgnoreNullsWithOffsetAndDefault() { + // Given: LAG(value, 2, 99) IGNORE NULLS - 2nd non-null value behind, or 99 if not enough non-nulls + DataSchema inputSchema = new DataSchema(new String[]{"group", "value"}, new ColumnDataType[]{INT, INT}); + MultiStageOperator input = new BlockListMultiStageOperator.Builder(inputSchema) + .addRow(1, 10) + .addRow(1, null) + .addRow(1, 20) + .addRow(1, null) + .addRow(1, 30) + .addRow(1, null) + .buildWithEos(); + DataSchema resultSchema = + new DataSchema(new String[]{"group", "value", "lag"}, new ColumnDataType[]{INT, INT, INT}); + List keys = List.of(0); + List collations = + List.of(new RelFieldCollation(1, RelFieldCollation.Direction.ASCENDING, RelFieldCollation.NullDirection.LAST)); + List aggCalls = List.of( + new RexExpression.FunctionCall(ColumnDataType.INT, SqlKind.LAG.name(), + List.of(new RexExpression.InputRef(1), new RexExpression.Literal(ColumnDataType.INT, 2), + new RexExpression.Literal(ColumnDataType.INT, 99)), false, true)); + WindowAggregateOperator operator = + getOperator(inputSchema, resultSchema, keys, collations, aggCalls, WindowNode.WindowFrameType.RANGE, + Integer.MIN_VALUE, 0, input); + + // When: + List resultRows = ((MseBlock.Data) operator.nextBlock()).asRowHeap().getRows(); + + // Then: + verifyResultRows(resultRows, keys, Map.of( + 1, List.of( + new Object[]{1, 10, 99}, + new Object[]{1, null, 99}, + new Object[]{1, 20, 99}, + new Object[]{1, null, 10}, + new Object[]{1, 30, 10}, + new Object[]{1, null, 20}) + )); + assertTrue(operator.nextBlock().isSuccess(), "Second block is EOS (done processing)"); + } + + @Test + public void testLeadIgnoreNullsAllNulls() { + // Given: LEAD(value) IGNORE NULLS where all values are null + DataSchema inputSchema = new DataSchema(new String[]{"group", "value"}, new ColumnDataType[]{INT, INT}); + MultiStageOperator input = new BlockListMultiStageOperator.Builder(inputSchema) + .addRow(1, null) + .addRow(1, null) + .addRow(1, null) + .buildWithEos(); + DataSchema resultSchema = + new DataSchema(new String[]{"group", "value", "lead"}, new ColumnDataType[]{INT, INT, INT}); + List keys = List.of(0); + List collations = + List.of(new RelFieldCollation(1, RelFieldCollation.Direction.ASCENDING, RelFieldCollation.NullDirection.LAST)); + List aggCalls = List.of( + new RexExpression.FunctionCall(ColumnDataType.INT, SqlKind.LEAD.name(), + List.of(new RexExpression.InputRef(1)), false, true)); + WindowAggregateOperator operator = + getOperator(inputSchema, resultSchema, keys, collations, aggCalls, WindowNode.WindowFrameType.RANGE, + Integer.MIN_VALUE, 0, input); + + // When: + List resultRows = ((MseBlock.Data) operator.nextBlock()).asRowHeap().getRows(); + + // Then: + verifyResultRows(resultRows, keys, Map.of( + 1, List.of( + new Object[]{1, null, null}, + new Object[]{1, null, null}, + new Object[]{1, null, null}) + )); + assertTrue(operator.nextBlock().isSuccess(), "Second block is EOS (done processing)"); + } + @Test(dataProvider = "windowFrameTypes") public void testSumWithUnboundedPrecedingLowerAndUnboundedFollowingUpper(WindowNode.WindowFrameType frameType) { // Given: diff --git a/pinot-query-runtime/src/test/resources/queries/WindowFunctions.json b/pinot-query-runtime/src/test/resources/queries/WindowFunctions.json index 76f15a6f104f..74477d5c66c9 100644 --- a/pinot-query-runtime/src/test/resources/queries/WindowFunctions.json +++ b/pinot-query-runtime/src/test/resources/queries/WindowFunctions.json @@ -5473,6 +5473,116 @@ ["h", -1.53, null, null] ] }, + { + "description": "LEAD with IGNORE NULLS and default offset", + "sql": "SELECT string_col, double_col, nullable_int_col, LEAD(nullable_int_col) IGNORE NULLS OVER(PARTITION BY string_col ORDER BY double_col) FROM {tbl} ORDER BY string_col", + "outputs": [ + ["a", 42.0, 4, 5], + ["a", 50.5, null, 5], + ["a", 75.0, 5, 1], + ["a", 300.0, 1, null], + ["a", 400.0, null, null], + ["b", 1.0, 1, null], + ["b", 100.0, null, null], + ["c", 1.01, 7, 6], + ["c", 1.5, 6, 3], + ["c", 100.0, 3, null], + ["c", 400.0, null, null], + ["d", 42.0, null, null], + ["e", 42.0, null, 2], + ["e", 50.5, 2, null], + ["g", 100.0, 10, null], + ["h", -1.53, null, null] + ] + }, + { + "description": "LAG with IGNORE NULLS and default offset", + "sql": "SELECT string_col, double_col, nullable_int_col, LAG(nullable_int_col) IGNORE NULLS OVER(PARTITION BY string_col ORDER BY double_col) FROM {tbl} ORDER BY string_col", + "outputs": [ + ["a", 42.0, 4, null], + ["a", 50.5, null, 4], + ["a", 75.0, 5, 4], + ["a", 300.0, 1, 5], + ["a", 400.0, null, 1], + ["b", 1.0, 1, null], + ["b", 100.0, null, 1], + ["c", 1.01, 7, null], + ["c", 1.5, 6, 7], + ["c", 100.0, 3, 6], + ["c", 400.0, null, 3], + ["d", 42.0, null, null], + ["e", 42.0, null, null], + ["e", 50.5, 2, null], + ["g", 100.0, 10, null], + ["h", -1.53, null, null] + ] + }, + { + "description": "LEAD with IGNORE NULLS, offset 2, and default value 0", + "sql": "SELECT string_col, double_col, nullable_int_col, LEAD(nullable_int_col, 2, 0) IGNORE NULLS OVER(PARTITION BY string_col ORDER BY double_col) FROM {tbl} ORDER BY string_col", + "outputs": [ + ["a", 42.0, 4, 1], + ["a", 50.5, null, 1], + ["a", 75.0, 5, 0], + ["a", 300.0, 1, 0], + ["a", 400.0, null, 0], + ["b", 1.0, 1, 0], + ["b", 100.0, null, 0], + ["c", 1.01, 7, 3], + ["c", 1.5, 6, 0], + ["c", 100.0, 3, 0], + ["c", 400.0, null, 0], + ["d", 42.0, null, 0], + ["e", 42.0, null, 0], + ["e", 50.5, 2, 0], + ["g", 100.0, 10, 0], + ["h", -1.53, null, 0] + ] + }, + { + "description": "LAG with IGNORE NULLS, offset 2, and default value -1", + "sql": "SELECT string_col, double_col, nullable_int_col, LAG(nullable_int_col, 2, -1) IGNORE NULLS OVER(PARTITION BY string_col ORDER BY double_col) FROM {tbl} ORDER BY string_col", + "outputs": [ + ["a", 42.0, 4, -1], + ["a", 50.5, null, -1], + ["a", 75.0, 5, -1], + ["a", 300.0, 1, 4], + ["a", 400.0, null, 5], + ["b", 1.0, 1, -1], + ["b", 100.0, null, -1], + ["c", 1.01, 7, -1], + ["c", 1.5, 6, -1], + ["c", 100.0, 3, 7], + ["c", 400.0, null, 6], + ["d", 42.0, null, -1], + ["e", 42.0, null, -1], + ["e", 50.5, 2, -1], + ["g", 100.0, 10, -1], + ["h", -1.53, null, -1] + ] + }, + { + "description": "LEAD with RESPECT NULLS (explicit, same as default behavior)", + "sql": "SELECT string_col, double_col, nullable_int_col, LEAD(nullable_int_col) RESPECT NULLS OVER(PARTITION BY string_col ORDER BY double_col) FROM {tbl} ORDER BY string_col", + "outputs": [ + ["a", 42.0, 4, null], + ["a", 50.5, null, 5], + ["a", 75.0, 5, 1], + ["a", 300.0, 1, null], + ["a", 400.0, null, null], + ["b", 1.0, 1, null], + ["b", 100.0, null, null], + ["c", 1.01, 7, 6], + ["c", 1.5, 6, 3], + ["c", 100.0, 3, null], + ["c", 400.0, null, null], + ["d", 42.0, null, null], + ["e", 42.0, null, 2], + ["e", 50.5, 2, null], + ["g", 100.0, 10, null], + ["h", -1.53, null, null] + ] + }, { "description": "NTILE with 2 buckets", "sql": "SELECT string_col, int_col, NTILE(2) OVER(PARTITION BY string_col ORDER BY int_col) FROM {tbl} ORDER BY string_col, int_col",