Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix concurrency bug in AbstractStringScriptFieldAutomatonQuery #106678

Merged
merged 6 commits into from
Mar 26, 2024
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
6 changes: 6 additions & 0 deletions docs/changelog/106678.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
pr: 106678
summary: Fix concurrency bug in `AbstractStringScriptFieldAutomatonQuery`
area: Search
type: bug
issues:
- 105911
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ abstract class AbstractBooleanScriptFieldQuery extends AbstractScriptFieldQuery<
}

@Override
protected boolean matches(BooleanFieldScript scriptContext, int docId) {
protected final boolean matches(BooleanFieldScript scriptContext, int docId) {
scriptContext.runForDoc(docId);
return matches(scriptContext.trues(), scriptContext.falses());
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ abstract class AbstractDoubleScriptFieldQuery extends AbstractScriptFieldQuery<D
}

@Override
protected boolean matches(DoubleFieldScript scriptContext, int docId) {
protected final boolean matches(DoubleFieldScript scriptContext, int docId) {
scriptContext.runForDoc(docId);
return matches(scriptContext.values(), scriptContext.count());
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ abstract class AbstractGeoPointScriptFieldQuery extends AbstractScriptFieldQuery
}

@Override
protected boolean matches(GeoPointFieldScript scriptContext, int docId) {
protected final boolean matches(GeoPointFieldScript scriptContext, int docId) {
scriptContext.runForDoc(docId);
return matches(scriptContext.lats(), scriptContext.lons(), scriptContext.count());
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ abstract class AbstractIpScriptFieldQuery extends AbstractScriptFieldQuery<IpFie
}

@Override
protected boolean matches(IpFieldScript scriptContext, int docId) {
protected final boolean matches(IpFieldScript scriptContext, int docId) {
scriptContext.runForDoc(docId);
return matches(scriptContext.values(), scriptContext.count());
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ abstract class AbstractLongScriptFieldQuery extends AbstractScriptFieldQuery<Abs
}

@Override
protected boolean matches(AbstractLongFieldScript scriptContext, int docId) {
protected final boolean matches(AbstractLongFieldScript scriptContext, int docId) {
scriptContext.runForDoc(docId);
return AbstractLongScriptFieldQuery.this.matches(scriptContext.values(), scriptContext.count());
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -71,18 +71,7 @@ public boolean isCacheable(LeafReaderContext ctx) {
public Scorer scorer(LeafReaderContext ctx) {
S scriptContext = scriptContextFunction.apply(ctx);
DocIdSetIterator approximation = DocIdSetIterator.all(ctx.reader().maxDoc());
TwoPhaseIterator twoPhase = new TwoPhaseIterator(approximation) {
@Override
public boolean matches() {
return AbstractScriptFieldQuery.this.matches(scriptContext, approximation.docID());
}

@Override
public float matchCost() {
return MATCH_COST;
}
};
return new ConstantScoreScorer(this, score(), scoreMode, twoPhase);
return new ConstantScoreScorer(this, score(), scoreMode, createTwoPhaseIterator(scriptContext, approximation));
}

@Override
Expand All @@ -96,6 +85,20 @@ public Explanation explain(LeafReaderContext context, int doc) throws IOExceptio
};
}

protected TwoPhaseIterator createTwoPhaseIterator(S scriptContext, DocIdSetIterator approximation) {
return new TwoPhaseIterator(approximation) {
@Override
public boolean matches() {
return AbstractScriptFieldQuery.this.matches(scriptContext, approximation.docID());
}

@Override
public float matchCost() {
return MATCH_COST;
}
};
}

protected abstract boolean matches(S scriptContext, int docId);

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,9 @@

package org.elasticsearch.search.runtime;

import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.search.QueryVisitor;
import org.apache.lucene.search.TwoPhaseIterator;
import org.apache.lucene.util.BytesRefBuilder;
import org.apache.lucene.util.automaton.ByteRunAutomaton;
import org.elasticsearch.script.Script;
Expand All @@ -17,7 +19,6 @@
import java.util.List;

public abstract class AbstractStringScriptFieldAutomatonQuery extends AbstractStringScriptFieldQuery {
private final BytesRefBuilder scratch = new BytesRefBuilder();
private final ByteRunAutomaton automaton;

public AbstractStringScriptFieldAutomatonQuery(
Expand All @@ -31,7 +32,23 @@ public AbstractStringScriptFieldAutomatonQuery(
}

@Override
protected final boolean matches(List<String> values) {
protected TwoPhaseIterator createTwoPhaseIterator(StringFieldScript scriptContext, DocIdSetIterator approximation) {
BytesRefBuilder scratch = new BytesRefBuilder();
return new TwoPhaseIterator(approximation) {
@Override
public boolean matches() {
scriptContext.runForDoc(approximation.docID());
return AbstractStringScriptFieldAutomatonQuery.this.matches(scriptContext.getValues(), scratch);
}

@Override
public float matchCost() {
return MATCH_COST;
}
};
}

protected final boolean matches(List<String> values, BytesRefBuilder scratch) {
for (String value : values) {
scratch.copyChars(value);
if (automaton.run(scratch.bytes(), 0, scratch.length())) {
Expand All @@ -41,6 +58,11 @@ protected final boolean matches(List<String> values) {
return false;
}

@Override
protected final boolean matches(List<String> values) {
throw new UnsupportedOperationException();
}

@Override
public final void visit(QueryVisitor visitor) {
if (visitor.acceptField(fieldName())) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,10 +9,17 @@
package org.elasticsearch.search.runtime;

import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.BytesRefBuilder;
import org.apache.lucene.util.ThreadInterruptedException;
import org.apache.lucene.util.automaton.ByteRunAutomaton;
import org.elasticsearch.script.Script;

import java.util.ArrayList;
import java.util.List;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.Future;

import static org.hamcrest.Matchers.equalTo;
import static org.hamcrest.Matchers.is;
Expand Down Expand Up @@ -68,18 +75,42 @@ protected StringScriptFieldFuzzyQuery mutate(StringScriptFieldFuzzyQuery orig) {
@Override
public void testMatches() {
StringScriptFieldFuzzyQuery query = StringScriptFieldFuzzyQuery.build(randomScript(), leafFactory, "test", "foo", 1, 0, false);
assertTrue(query.matches(List.of("foo")));
assertTrue(query.matches(List.of("foa")));
assertTrue(query.matches(List.of("foo", "bar")));
assertFalse(query.matches(List.of("bar")));
BytesRefBuilder scratch = new BytesRefBuilder();
assertTrue(query.matches(List.of("foo"), scratch));
assertTrue(query.matches(List.of("foa"), scratch));
assertTrue(query.matches(List.of("foo", "bar"), scratch));
assertFalse(query.matches(List.of("bar"), scratch));
query = StringScriptFieldFuzzyQuery.build(randomScript(), leafFactory, "test", "foo", 0, 0, false);
assertTrue(query.matches(List.of("foo")));
assertFalse(query.matches(List.of("foa")));
assertTrue(query.matches(List.of("foo"), scratch));
assertFalse(query.matches(List.of("foa"), scratch));
query = StringScriptFieldFuzzyQuery.build(randomScript(), leafFactory, "test", "foo", 2, 0, false);
assertTrue(query.matches(List.of("foo")));
assertTrue(query.matches(List.of("foa")));
assertTrue(query.matches(List.of("faa")));
assertFalse(query.matches(List.of("faaa")));
assertTrue(query.matches(List.of("foo"), scratch));
assertTrue(query.matches(List.of("foa"), scratch));
assertTrue(query.matches(List.of("faa"), scratch));
assertFalse(query.matches(List.of("faaa"), scratch));
}

public void testConcurrentMatches() {
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

These concurrency test are kind of artificial now that they rely on the different matches method... we could rely on integration tests perhaps instead.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If you built a Scorer or something that could work. That's kind of integration-y. You'd need to make a lucene index, but sometimes that's life.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yep, I later realized that we already have coverage for all of this in our script field type tests. Only, those did not leverage concurrency as we never ended up running them against multiple segments. I have added an addDocument method to the base class that more aggressively flushes, this is ok as we are only ever adding a few documents. The random flush within RandomIndexWriter needs a minimum number of 10 docs which is never reached in these tests.
With this adjustment, I was able to reproduce the problem and ensure that it is now fixed.

StringScriptFieldFuzzyQuery query = StringScriptFieldFuzzyQuery.build(randomScript(), leafFactory, "test", "foo", 1, 0, false);
List<Future<?>> futures = new ArrayList<>();
ExecutorService executorService = Executors.newFixedThreadPool(3);
try {
futures.add(executorService.submit(() -> assertTrue(query.matches(List.of("foo"), new BytesRefBuilder()))));
futures.add(executorService.submit(() -> assertTrue(query.matches(List.of("foa"), new BytesRefBuilder()))));
futures.add(executorService.submit(() -> assertTrue(query.matches(List.of("foo", "bar"), new BytesRefBuilder()))));
futures.add(executorService.submit(() -> assertFalse(query.matches(List.of("bar"), new BytesRefBuilder()))));
for (Future<?> future : futures) {
try {
future.get();
} catch (ExecutionException e) {
fail(e);
} catch (InterruptedException e) {
throw new ThreadInterruptedException(e);
}
}
} finally {
terminate(executorService);
}
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,12 +9,19 @@
package org.elasticsearch.search.runtime;

import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.BytesRefBuilder;
import org.apache.lucene.util.ThreadInterruptedException;
import org.apache.lucene.util.automaton.ByteRunAutomaton;
import org.apache.lucene.util.automaton.Operations;
import org.apache.lucene.util.automaton.RegExp;
import org.elasticsearch.script.Script;

import java.util.ArrayList;
import java.util.List;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.Future;

import static org.hamcrest.Matchers.equalTo;
import static org.hamcrest.Matchers.is;
Expand Down Expand Up @@ -84,13 +91,14 @@ public void testMatches() {
0,
Operations.DEFAULT_DETERMINIZE_WORK_LIMIT
);
assertTrue(query.matches(List.of("astuffb")));
assertFalse(query.matches(List.of("astuffB")));
assertFalse(query.matches(List.of("fffff")));
assertFalse(query.matches(List.of("ab")));
assertFalse(query.matches(List.of("aasdf")));
assertFalse(query.matches(List.of("dsfb")));
assertTrue(query.matches(List.of("astuffb", "fffff")));
BytesRefBuilder scratch = new BytesRefBuilder();
assertTrue(query.matches(List.of("astuffb"), scratch));
assertFalse(query.matches(List.of("astuffB"), scratch));
assertFalse(query.matches(List.of("fffff"), scratch));
assertFalse(query.matches(List.of("ab"), scratch));
assertFalse(query.matches(List.of("aasdf"), scratch));
assertFalse(query.matches(List.of("dsfb"), scratch));
assertTrue(query.matches(List.of("astuffb", "fffff"), scratch));

StringScriptFieldRegexpQuery ciQuery = new StringScriptFieldRegexpQuery(
randomScript(),
Expand All @@ -106,6 +114,40 @@ public void testMatches() {

}

public void testConcurrentMatches() {
StringScriptFieldRegexpQuery query = new StringScriptFieldRegexpQuery(
randomScript(),
leafFactory,
"test",
"a.+b",
0,
0,
Operations.DEFAULT_DETERMINIZE_WORK_LIMIT
);
List<Future<?>> futures = new ArrayList<>();
ExecutorService executorService = Executors.newFixedThreadPool(3);
try {
futures.add(executorService.submit(() -> assertTrue(query.matches(List.of("astuffb"), new BytesRefBuilder()))));
futures.add(executorService.submit(() -> assertFalse(query.matches(List.of("astuffB"), new BytesRefBuilder()))));
futures.add(executorService.submit(() -> assertFalse(query.matches(List.of("fffff"), new BytesRefBuilder()))));
futures.add(executorService.submit(() -> assertFalse(query.matches(List.of("ab"), new BytesRefBuilder()))));
futures.add(executorService.submit(() -> assertFalse(query.matches(List.of("aasdf"), new BytesRefBuilder()))));
futures.add(executorService.submit(() -> assertFalse(query.matches(List.of("dsfb"), new BytesRefBuilder()))));
futures.add(executorService.submit(() -> assertTrue(query.matches(List.of("astuffb", "fffff"), new BytesRefBuilder()))));
for (Future<?> future : futures) {
try {
future.get();
} catch (ExecutionException e) {
fail(e);
} catch (InterruptedException e) {
throw new ThreadInterruptedException(e);
}
}
} finally {
terminate(executorService);
}
}

@Override
protected void assertToString(StringScriptFieldRegexpQuery query) {
assertThat(query.toString(query.fieldName()), equalTo("/" + query.pattern() + "/"));
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,10 +9,17 @@
package org.elasticsearch.search.runtime;

import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.BytesRefBuilder;
import org.apache.lucene.util.ThreadInterruptedException;
import org.apache.lucene.util.automaton.ByteRunAutomaton;
import org.elasticsearch.script.Script;

import java.util.ArrayList;
import java.util.List;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.Future;

import static org.hamcrest.Matchers.equalTo;

Expand Down Expand Up @@ -52,21 +59,46 @@ protected StringScriptFieldWildcardQuery mutate(StringScriptFieldWildcardQuery o
@Override
public void testMatches() {
StringScriptFieldWildcardQuery query = new StringScriptFieldWildcardQuery(randomScript(), leafFactory, "test", "a*b", false);
assertTrue(query.matches(List.of("astuffb")));
assertFalse(query.matches(List.of("Astuffb")));
assertFalse(query.matches(List.of("fffff")));
assertFalse(query.matches(List.of("a")));
assertFalse(query.matches(List.of("b")));
assertFalse(query.matches(List.of("aasdf")));
assertFalse(query.matches(List.of("dsfb")));
assertTrue(query.matches(List.of("astuffb", "fffff")));
BytesRefBuilder scratch = new BytesRefBuilder();
assertTrue(query.matches(List.of("astuffb"), scratch));
assertFalse(query.matches(List.of("Astuffb"), scratch));
assertFalse(query.matches(List.of("fffff"), scratch));
assertFalse(query.matches(List.of("a"), scratch));
assertFalse(query.matches(List.of("b"), scratch));
assertFalse(query.matches(List.of("aasdf"), scratch));
assertFalse(query.matches(List.of("dsfb"), scratch));
assertTrue(query.matches(List.of("astuffb", "fffff"), scratch));

StringScriptFieldWildcardQuery ciQuery = new StringScriptFieldWildcardQuery(randomScript(), leafFactory, "test", "a*b", true);
assertTrue(ciQuery.matches(List.of("Astuffb")));
assertTrue(ciQuery.matches(List.of("astuffB", "fffff")));
assertTrue(ciQuery.matches(List.of("Astuffb"), scratch));
assertTrue(ciQuery.matches(List.of("astuffB", "fffff"), scratch));

}

public void testConcurrentMatches() {
StringScriptFieldWildcardQuery query = new StringScriptFieldWildcardQuery(randomScript(), leafFactory, "test", "a*b", false);
List<Future<?>> futures = new ArrayList<>();
ExecutorService executorService = Executors.newFixedThreadPool(3);
try {
futures.add(executorService.submit(() -> assertTrue(query.matches(List.of("astuffb"), new BytesRefBuilder()))));
futures.add(executorService.submit(() -> assertFalse(query.matches(List.of("Astuffb"), new BytesRefBuilder()))));
futures.add(executorService.submit(() -> assertFalse(query.matches(List.of("fffff"), new BytesRefBuilder()))));
futures.add(executorService.submit(() -> assertFalse(query.matches(List.of("a"), new BytesRefBuilder()))));
futures.add(executorService.submit(() -> assertFalse(query.matches(List.of("b"), new BytesRefBuilder()))));
for (Future<?> future : futures) {
try {
future.get();
} catch (ExecutionException e) {
fail(e);
} catch (InterruptedException e) {
throw new ThreadInterruptedException(e);
}
}
} finally {
terminate(executorService);
}
}

@Override
protected void assertToString(StringScriptFieldWildcardQuery query) {
assertThat(query.toString(query.fieldName()), equalTo(query.pattern()));
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ abstract class AbstractGeoShapeScriptFieldQuery extends AbstractScriptFieldQuery
}

@Override
protected boolean matches(GeometryFieldScript scriptContext, int docId) {
protected final boolean matches(GeometryFieldScript scriptContext, int docId) {
scriptContext.runForDoc(docId);
return matches(scriptContext.geometry());
}
Expand Down