From d724d1c607cf854cb59d47e3873d041c374ff0b9 Mon Sep 17 00:00:00 2001 From: Nhat Nguyen Date: Mon, 24 Feb 2025 08:49:54 -0800 Subject: [PATCH] Fix early termination in LuceneSourceOperator (#123197) The LuceneSourceOperator is supposed to terminate when it reaches the limit; unfortunately, we don't have a test to cover this. Due to this bug, we continue scanning all segments, even though we discard the results as the limit was reached. This can cause performance issues for simple queries like FROM .. | LIMIT 10, when Lucene indices are on the warm or cold tier. I will submit a follow-up PR to ensure we only collect up to the limit across multiple drivers. --- docs/changelog/123197.yaml | 5 +++++ .../compute/lucene/LuceneSourceOperator.java | 2 +- .../lucene/LuceneSourceOperatorTests.java | 22 +++++++++++++++++++ 3 files changed, 28 insertions(+), 1 deletion(-) create mode 100644 docs/changelog/123197.yaml diff --git a/docs/changelog/123197.yaml b/docs/changelog/123197.yaml new file mode 100644 index 0000000000000..ffb4bab79fe8c --- /dev/null +++ b/docs/changelog/123197.yaml @@ -0,0 +1,5 @@ +pr: 123197 +summary: Fix early termination in `LuceneSourceOperator` +area: ES|QL +type: bug +issues: [] diff --git a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/lucene/LuceneSourceOperator.java b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/lucene/LuceneSourceOperator.java index 3d34067e1a839..61a7cbad3e8af 100644 --- a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/lucene/LuceneSourceOperator.java +++ b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/lucene/LuceneSourceOperator.java @@ -140,7 +140,7 @@ public void collect(int doc) throws IOException { @Override public boolean isFinished() { - return doneCollecting; + return doneCollecting || remainingDocs <= 0; } @Override diff --git a/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/lucene/LuceneSourceOperatorTests.java b/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/lucene/LuceneSourceOperatorTests.java index b7114bb4e9b54..cba8722a384e3 100644 --- a/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/lucene/LuceneSourceOperatorTests.java +++ b/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/lucene/LuceneSourceOperatorTests.java @@ -25,6 +25,7 @@ import org.elasticsearch.compute.operator.Driver; import org.elasticsearch.compute.operator.DriverContext; import org.elasticsearch.compute.operator.Operator; +import org.elasticsearch.compute.operator.SourceOperator; import org.elasticsearch.compute.test.AnyOperatorTestCase; import org.elasticsearch.compute.test.OperatorTestCase; import org.elasticsearch.compute.test.TestResultPageSinkOperator; @@ -117,6 +118,27 @@ public void testShardDataPartitioning() { testSimple(driverContext(), size, limit); } + public void testEarlyTermination() { + int size = between(1_000, 20_000); + int limit = between(10, size); + LuceneSourceOperator.Factory factory = simple(randomFrom(DataPartitioning.values()), size, limit, scoring); + try (SourceOperator sourceOperator = factory.get(driverContext())) { + assertFalse(sourceOperator.isFinished()); + int collected = 0; + while (sourceOperator.isFinished() == false) { + Page page = sourceOperator.getOutput(); + if (page != null) { + collected += page.getPositionCount(); + page.releaseBlocks(); + } + if (collected >= limit) { + assertTrue("source operator is not finished after reaching limit", sourceOperator.isFinished()); + assertThat(collected, equalTo(limit)); + } + } + } + } + public void testEmpty() { testSimple(driverContext(), 0, between(10, 10_000)); }