Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

LUCENE-9002: Query caching leads to absurdly slow queries #940

Merged
merged 8 commits into from Nov 11, 2019
3 changes: 3 additions & 0 deletions lucene/CHANGES.txt
Expand Up @@ -76,6 +76,9 @@ New Features

Improvements

* LUCENE-9002: Skip costly caching clause in LRUQueryCache if it makes the query
many times slower. (Guoqiang Jiang)

* LUCENE-9006: WordDelimiterGraphFilter's catenateAll token is now ordered before any token parts, like WDF did.
(David Smiley)

Expand Down
49 changes: 44 additions & 5 deletions lucene/core/src/java/org/apache/lucene/search/LRUQueryCache.java
Expand Up @@ -101,6 +101,7 @@ public class LRUQueryCache implements QueryCache, Accountable {
private final Set<Query> mostRecentlyUsedQueries;
private final Map<IndexReader.CacheKey, LeafCache> cache;
private final ReentrantLock lock;
private final float skipCacheFactor;

// these variables are volatile so that we do not need to sync reads
// but increments need to be performed under the lock
Expand All @@ -114,12 +115,20 @@ public class LRUQueryCache implements QueryCache, Accountable {
* Expert: Create a new instance that will cache at most <code>maxSize</code>
* queries with at most <code>maxRamBytesUsed</code> bytes of memory, only on
* leaves that satisfy {@code leavesToCache}.
*
* Also, clauses whose cost is {@code skipCacheFactor} times more than the cost of the top-level query
* will not be cached in order to not slow down queries too much.
*/
public LRUQueryCache(int maxSize, long maxRamBytesUsed,
Predicate<LeafReaderContext> leavesToCache) {
Predicate<LeafReaderContext> leavesToCache, float skipCacheFactor) {
this.maxSize = maxSize;
this.maxRamBytesUsed = maxRamBytesUsed;
this.leavesToCache = leavesToCache;
if (skipCacheFactor >= 1 == false) { // NaN >= 1 evaluates true
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
if (skipCacheFactor >= 1 == false) { // NaN >= 1 evaluates true
if (skipCacheFactor >= 1 == false) { // NaN >= 1 evaluates false

I got it backward, sorry about that.

throw new IllegalArgumentException("skipCacheFactor must be no less than 1, get " + skipCacheFactor);
}
this.skipCacheFactor = skipCacheFactor;
jgq2008303393 marked this conversation as resolved.
Show resolved Hide resolved

uniqueQueries = new LinkedHashMap<>(16, 0.75f, true);
mostRecentlyUsedQueries = uniqueQueries.keySet();
cache = new IdentityHashMap<>();
Expand All @@ -141,7 +150,7 @@ public LRUQueryCache(int maxSize, long maxRamBytesUsed,
* be cached in order to not hurt latency too much because of caching.
*/
public LRUQueryCache(int maxSize, long maxRamBytesUsed) {
this(maxSize, maxRamBytesUsed, new MinSegmentSizePredicate(10000, .03f));
this(maxSize, maxRamBytesUsed, new MinSegmentSizePredicate(10000, .03f), 250);
}

// pkg-private for testing
Expand Down Expand Up @@ -732,8 +741,38 @@ public ScorerSupplier scorerSupplier(LeafReaderContext context) throws IOExcepti

if (docIdSet == null) {
if (policy.shouldCache(in.getQuery())) {
docIdSet = cache(context);
putIfAbsent(in.getQuery(), docIdSet, cacheHelper);
final ScorerSupplier supplier = in.scorerSupplier(context);
if (supplier == null) {
putIfAbsent(in.getQuery(), DocIdSet.EMPTY, cacheHelper);
return null;
}

final long cost = supplier.cost();
return new ScorerSupplier() {
@Override
public Scorer get(long leadCost) throws IOException {
// skip cache operation which would slow query down too much
if (cost / skipCacheFactor > leadCost) {
return supplier.get(leadCost);
}

Scorer scorer = supplier.get(Long.MAX_VALUE);
DocIdSet docIdSet = cacheImpl(new DefaultBulkScorer(scorer), context.reader().maxDoc());
putIfAbsent(in.getQuery(), docIdSet, cacheHelper);
DocIdSetIterator disi = docIdSet.iterator();
if (disi == null) {
// docIdSet.iterator() is allowed to return null when empty but we want a non-null iterator here
disi = DocIdSetIterator.empty();
}

return new ConstantScoreScorer(CachingWrapperWeight.this, 0f, ScoreMode.COMPLETE_NO_SCORES, disi);
}

@Override
public long cost() {
return cost;
}
};
} else {
return in.scorerSupplier(context);
}
Expand All @@ -753,7 +792,7 @@ public ScorerSupplier scorerSupplier(LeafReaderContext context) throws IOExcepti
public Scorer get(long LeadCost) throws IOException {
return new ConstantScoreScorer(CachingWrapperWeight.this, 0f, ScoreMode.COMPLETE_NO_SCORES, disi);
}

@Override
public long cost() {
return disi.cost();
Expand Down