-
Notifications
You must be signed in to change notification settings - Fork 24.3k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
To load stored fields we currently directly use lucene's FieldVisitor API. There are a number of nice shortcuts and some dodgy hacks that get better performance here, which need to be reproduced everywhere the code uses this API. It is also confusingly stateful, and not especially easy to use. This commit adds a new StoredFieldLoader abstraction which exposes a per-leaf stored field loader as an iterator, and adapts ShardGetService and FetchPhase to use this new abstraction in place of field visitors. Further work to integrate source loaders will be done in followup commits.
- Loading branch information
1 parent
e891909
commit ce63ee5
Showing
11 changed files
with
400 additions
and
179 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
50 changes: 50 additions & 0 deletions
50
server/src/main/java/org/elasticsearch/index/fieldvisitor/LeafStoredFieldLoader.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,50 @@ | ||
/* | ||
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one | ||
* or more contributor license agreements. Licensed under the Elastic License | ||
* 2.0 and the Server Side Public License, v 1; you may not use this file except | ||
* in compliance with, at your election, the Elastic License 2.0 or the Server | ||
* Side Public License, v 1. | ||
*/ | ||
|
||
package org.elasticsearch.index.fieldvisitor; | ||
|
||
import org.elasticsearch.common.bytes.BytesReference; | ||
|
||
import java.io.IOException; | ||
import java.util.List; | ||
import java.util.Map; | ||
|
||
/** | ||
* Loads stored fields for a LeafReader | ||
* | ||
* Which stored fields to load will be configured by the loader's parent | ||
* {@link StoredFieldLoader} | ||
*/ | ||
public interface LeafStoredFieldLoader { | ||
|
||
/** | ||
* Advance the reader to a document. This should be idempotent. | ||
*/ | ||
void advanceTo(int doc) throws IOException; | ||
|
||
/** | ||
* @return the source for the current document | ||
*/ | ||
BytesReference source(); | ||
|
||
/** | ||
* @return the ID for the current document | ||
*/ | ||
String id(); | ||
|
||
/** | ||
* @return the routing path for the current document | ||
*/ | ||
String routing(); | ||
|
||
/** | ||
* @return stored fields for the current document | ||
*/ | ||
Map<String, List<Object>> storedFields(); | ||
|
||
} |
173 changes: 173 additions & 0 deletions
173
server/src/main/java/org/elasticsearch/index/fieldvisitor/StoredFieldLoader.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,173 @@ | ||
/* | ||
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one | ||
* or more contributor license agreements. Licensed under the Elastic License | ||
* 2.0 and the Server Side Public License, v 1; you may not use this file except | ||
* in compliance with, at your election, the Elastic License 2.0 or the Server | ||
* Side Public License, v 1. | ||
*/ | ||
|
||
package org.elasticsearch.index.fieldvisitor; | ||
|
||
import org.apache.lucene.index.LeafReader; | ||
import org.apache.lucene.index.LeafReaderContext; | ||
import org.elasticsearch.common.CheckedBiConsumer; | ||
import org.elasticsearch.common.bytes.BytesReference; | ||
import org.elasticsearch.common.lucene.index.SequentialStoredFieldsLeafReader; | ||
|
||
import java.io.IOException; | ||
import java.util.Collections; | ||
import java.util.HashSet; | ||
import java.util.List; | ||
import java.util.Map; | ||
import java.util.Set; | ||
|
||
/** | ||
* Generates a {@link LeafStoredFieldLoader} for a given lucene segment to load stored fields. | ||
*/ | ||
public abstract class StoredFieldLoader { | ||
|
||
/** | ||
* Return a {@link LeafStoredFieldLoader} for the given segment and document set | ||
* | ||
* The loader will use an internal lucene merge reader if the document set is of | ||
* sufficient size and is contiguous. Callers may pass {@code null} if the set | ||
* is not known up front or if the merge reader optimisation will not apply. | ||
*/ | ||
public abstract LeafStoredFieldLoader getLoader(LeafReaderContext ctx, int[] docs); | ||
|
||
/** | ||
* @return a list of fields that will be loaded for each document | ||
*/ | ||
public abstract List<String> fieldsToLoad(); | ||
|
||
/** | ||
* Creates a new StoredFieldLoader | ||
* @param loadSource should this loader load the _source field | ||
* @param fields a set of additional fields the loader should load | ||
*/ | ||
public static StoredFieldLoader create(boolean loadSource, Set<String> fields) { | ||
List<String> fieldsToLoad = fieldsToLoad(loadSource, fields); | ||
return new StoredFieldLoader() { | ||
@Override | ||
public LeafStoredFieldLoader getLoader(LeafReaderContext ctx, int[] docs) { | ||
return new ReaderStoredFieldLoader(reader(ctx, docs), loadSource, fields); | ||
} | ||
|
||
@Override | ||
public List<String> fieldsToLoad() { | ||
return fieldsToLoad; | ||
} | ||
}; | ||
} | ||
|
||
/** | ||
* Creates a no-op StoredFieldLoader that will not load any fields from disk | ||
*/ | ||
public static StoredFieldLoader empty() { | ||
return new StoredFieldLoader() { | ||
@Override | ||
public LeafStoredFieldLoader getLoader(LeafReaderContext ctx, int[] docs) { | ||
return new EmptyStoredFieldLoader(); | ||
} | ||
|
||
@Override | ||
public List<String> fieldsToLoad() { | ||
return List.of(); | ||
} | ||
}; | ||
} | ||
|
||
private static CheckedBiConsumer<Integer, FieldsVisitor, IOException> reader(LeafReaderContext ctx, int[] docs) { | ||
LeafReader leafReader = ctx.reader(); | ||
if (docs == null) { | ||
return leafReader::document; | ||
} | ||
if (leafReader instanceof SequentialStoredFieldsLeafReader lf && docs.length > 10 && hasSequentialDocs(docs)) { | ||
return lf.getSequentialStoredFieldsReader()::visitDocument; | ||
} | ||
return leafReader::document; | ||
} | ||
|
||
private static List<String> fieldsToLoad(boolean loadSource, Set<String> fields) { | ||
Set<String> fieldsToLoad = new HashSet<>(); | ||
fieldsToLoad.add("_id"); | ||
fieldsToLoad.add("_routing"); | ||
if (loadSource) { | ||
fieldsToLoad.add("_source"); | ||
} | ||
fieldsToLoad.addAll(fields); | ||
return fieldsToLoad.stream().sorted().toList(); | ||
} | ||
|
||
private static boolean hasSequentialDocs(int[] docs) { | ||
return docs.length > 0 && docs[docs.length - 1] - docs[0] == docs.length - 1; | ||
} | ||
|
||
private static class EmptyStoredFieldLoader implements LeafStoredFieldLoader { | ||
|
||
@Override | ||
public void advanceTo(int doc) throws IOException {} | ||
|
||
@Override | ||
public BytesReference source() { | ||
return null; | ||
} | ||
|
||
@Override | ||
public String id() { | ||
return null; | ||
} | ||
|
||
@Override | ||
public String routing() { | ||
return null; | ||
} | ||
|
||
@Override | ||
public Map<String, List<Object>> storedFields() { | ||
return Collections.emptyMap(); | ||
} | ||
} | ||
|
||
private static class ReaderStoredFieldLoader implements LeafStoredFieldLoader { | ||
|
||
private final CheckedBiConsumer<Integer, FieldsVisitor, IOException> reader; | ||
private final CustomFieldsVisitor visitor; | ||
private int doc = -1; | ||
|
||
ReaderStoredFieldLoader(CheckedBiConsumer<Integer, FieldsVisitor, IOException> reader, boolean loadSource, Set<String> fields) { | ||
this.reader = reader; | ||
this.visitor = new CustomFieldsVisitor(fields, loadSource); | ||
} | ||
|
||
@Override | ||
public void advanceTo(int doc) throws IOException { | ||
if (doc != this.doc) { | ||
visitor.reset(); | ||
reader.accept(doc, visitor); | ||
this.doc = doc; | ||
} | ||
} | ||
|
||
@Override | ||
public BytesReference source() { | ||
return visitor.source(); | ||
} | ||
|
||
@Override | ||
public String id() { | ||
return visitor.id(); | ||
} | ||
|
||
@Override | ||
public String routing() { | ||
return visitor.routing(); | ||
} | ||
|
||
@Override | ||
public Map<String, List<Object>> storedFields() { | ||
return visitor.fields(); | ||
} | ||
} | ||
|
||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.