Skip to content

Commit

Permalink
Speed up NumberFieldMapper (#85688)
Browse files Browse the repository at this point in the history
No need to create an intermediary list here. Creating it and adding it
to the document tended to take more time than the parsing of the number itself.
  • Loading branch information
original-brownbear committed Jun 4, 2022
1 parent 2113631 commit da4577e
Show file tree
Hide file tree
Showing 11 changed files with 97 additions and 90 deletions.
5 changes: 5 additions & 0 deletions docs/changelog/85688.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
pr: 85688
summary: Speed up `NumberFieldMapper`
area: Mapping
type: enhancement
issues: []
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,6 @@

package org.elasticsearch.index.mapper.extras;

import org.apache.lucene.document.Field;
import org.apache.lucene.index.DocValues;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.NumericDocValues;
Expand Down Expand Up @@ -466,8 +465,7 @@ protected void parseCreateField(DocumentParserContext context) throws IOExceptio
}
long scaledValue = encode(doubleValue, scalingFactor);

List<Field> fields = NumberFieldMapper.NumberType.LONG.createFields(fieldType().name(), scaledValue, indexed, hasDocValues, stored);
context.doc().addAll(fields);
NumberFieldMapper.NumberType.LONG.addFields(context.doc(), fieldType().name(), scaledValue, indexed, hasDocValues, stored);

if (hasDocValues == false && (indexed || stored)) {
context.addToFieldNames(fieldType().name());
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -166,7 +166,7 @@ protected void parseCreateField(DocumentParserContext context) throws IOExceptio
tokenCount = countPositions(analyzer, name(), value, enablePositionIncrements);
}

context.doc().addAll(NumberFieldMapper.NumberType.INTEGER.createFields(fieldType().name(), tokenCount, index, hasDocValues, store));
NumberFieldMapper.NumberType.INTEGER.addFields(context.doc(), fieldType().name(), tokenCount, index, hasDocValues, store);
}

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -292,16 +292,13 @@ public void testDuel() throws Exception {
// Disable query cache, because ControlQuery cannot be cached...
shardSearcher.setQueryCache(null);

Document document = new Document();
LuceneDocument document = new LuceneDocument();
for (Map.Entry<String, List<String>> entry : stringContent.entrySet()) {
String value = entry.getValue().stream().collect(Collectors.joining(" "));
document.add(new TextField(entry.getKey(), value, Field.Store.NO));
}
for (Integer intValue : intValues) {
List<Field> numberFields = NumberFieldMapper.NumberType.INTEGER.createFields("int_field", intValue, true, true, false);
for (Field numberField : numberFields) {
document.add(numberField);
}
NumberFieldMapper.NumberType.INTEGER.addFields(document, "int_field", intValue, true, true, false);
}
MemoryIndex memoryIndex = MemoryIndex.fromDocument(document, new WhitespaceAnalyzer());
duelRun(queryStore, memoryIndex, shardSearcher);
Expand Down Expand Up @@ -413,7 +410,7 @@ public void testDuel2() throws Exception {
// Disable query cache, because ControlQuery cannot be cached...
shardSearcher.setQueryCache(null);

Document document = new Document();
LuceneDocument document = new LuceneDocument();
for (String value : stringValues) {
document.add(new TextField("string_field", value, Field.Store.NO));
logger.info("Test with document: {}" + document);
Expand All @@ -422,16 +419,7 @@ public void testDuel2() throws Exception {
}

for (int[] range : ranges) {
List<Field> numberFields = NumberFieldMapper.NumberType.INTEGER.createFields(
"int_field",
between(range[0], range[1]),
true,
true,
false
);
for (Field numberField : numberFields) {
document.add(numberField);
}
NumberFieldMapper.NumberType.INTEGER.addFields(document, "int_field", between(range[0], range[1]), true, true, false);
logger.info("Test with document: {}" + document);
MemoryIndex memoryIndex = MemoryIndex.fromDocument(document, new WhitespaceAnalyzer());
duelRun(queryStore, memoryIndex, shardSearcher);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,6 @@
import org.elasticsearch.index.mapper.ValueFetcher;
import org.elasticsearch.index.query.SearchExecutionContext;

import java.io.IOException;
import java.util.List;

public class SizeFieldMapper extends MetadataFieldMapper {
Expand Down Expand Up @@ -84,13 +83,13 @@ public boolean enabled() {
}

@Override
public void postParse(DocumentParserContext context) throws IOException {
public void postParse(DocumentParserContext context) {
// we post parse it so we get the size stored, possibly compressed (source will be preParse)
if (enabled.value() == false) {
return;
}
final int value = context.sourceToParse().source().length();
context.doc().addAll(NumberType.INTEGER.createFields(name(), value, true, true, true));
NumberType.INTEGER.addFields(context.doc(), name(), value, true, true, true);
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,6 @@
package org.elasticsearch.index.mapper;

import org.apache.lucene.document.DoublePoint;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.FloatPoint;
import org.apache.lucene.document.IntPoint;
import org.apache.lucene.document.LongPoint;
Expand Down Expand Up @@ -59,7 +58,6 @@

import java.io.IOException;
import java.time.ZoneId;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
Expand Down Expand Up @@ -353,18 +351,17 @@ public Query rangeQuery(
}

@Override
public List<Field> createFields(String name, Number value, boolean indexed, boolean docValued, boolean stored) {
List<Field> fields = new ArrayList<>();
public void addFields(LuceneDocument document, String name, Number value, boolean indexed, boolean docValued, boolean stored) {
final float f = value.floatValue();
if (indexed) {
fields.add(new HalfFloatPoint(name, value.floatValue()));
document.add(new HalfFloatPoint(name, f));
}
if (docValued) {
fields.add(new SortedNumericDocValuesField(name, HalfFloatPoint.halfFloatToSortableShort(value.floatValue())));
document.add(new SortedNumericDocValuesField(name, HalfFloatPoint.halfFloatToSortableShort(f)));
}
if (stored) {
fields.add(new StoredField(name, value.floatValue()));
document.add(new StoredField(name, f));
}
return fields;
}

@Override
Expand Down Expand Up @@ -489,18 +486,17 @@ public Query rangeQuery(
}

@Override
public List<Field> createFields(String name, Number value, boolean indexed, boolean docValued, boolean stored) {
List<Field> fields = new ArrayList<>();
public void addFields(LuceneDocument document, String name, Number value, boolean indexed, boolean docValued, boolean stored) {
final float f = value.floatValue();
if (indexed) {
fields.add(new FloatPoint(name, value.floatValue()));
document.add(new FloatPoint(name, f));
}
if (docValued) {
fields.add(new SortedNumericDocValuesField(name, NumericUtils.floatToSortableInt(value.floatValue())));
document.add(new SortedNumericDocValuesField(name, NumericUtils.floatToSortableInt(f)));
}
if (stored) {
fields.add(new StoredField(name, value.floatValue()));
document.add(new StoredField(name, f));
}
return fields;
}

@Override
Expand Down Expand Up @@ -603,18 +599,17 @@ public Query rangeQuery(
}

@Override
public List<Field> createFields(String name, Number value, boolean indexed, boolean docValued, boolean stored) {
List<Field> fields = new ArrayList<>();
public void addFields(LuceneDocument document, String name, Number value, boolean indexed, boolean docValued, boolean stored) {
final double d = value.doubleValue();
if (indexed) {
fields.add(new DoublePoint(name, value.doubleValue()));
document.add(new DoublePoint(name, d));
}
if (docValued) {
fields.add(new SortedNumericDocValuesField(name, NumericUtils.doubleToSortableLong(value.doubleValue())));
document.add(new SortedNumericDocValuesField(name, NumericUtils.doubleToSortableLong(d)));
}
if (stored) {
fields.add(new StoredField(name, value.doubleValue()));
document.add(new StoredField(name, d));
}
return fields;
}

@Override
Expand Down Expand Up @@ -696,8 +691,8 @@ public Query rangeQuery(
}

@Override
public List<Field> createFields(String name, Number value, boolean indexed, boolean docValued, boolean stored) {
return INTEGER.createFields(name, value, indexed, docValued, stored);
public void addFields(LuceneDocument document, String name, Number value, boolean indexed, boolean docValued, boolean stored) {
INTEGER.addFields(document, name, value, indexed, docValued, stored);
}

@Override
Expand Down Expand Up @@ -769,8 +764,8 @@ public Query rangeQuery(
}

@Override
public List<Field> createFields(String name, Number value, boolean indexed, boolean docValued, boolean stored) {
return INTEGER.createFields(name, value, indexed, docValued, stored);
public void addFields(LuceneDocument document, String name, Number value, boolean indexed, boolean docValued, boolean stored) {
INTEGER.addFields(document, name, value, indexed, docValued, stored);
}

@Override
Expand Down Expand Up @@ -905,18 +900,17 @@ public Query rangeQuery(
}

@Override
public List<Field> createFields(String name, Number value, boolean indexed, boolean docValued, boolean stored) {
List<Field> fields = new ArrayList<>();
public void addFields(LuceneDocument document, String name, Number value, boolean indexed, boolean docValued, boolean stored) {
final int i = value.intValue();
if (indexed) {
fields.add(new IntPoint(name, value.intValue()));
document.add(new IntPoint(name, i));
}
if (docValued) {
fields.add(new SortedNumericDocValuesField(name, value.intValue()));
document.add(new SortedNumericDocValuesField(name, i));
}
if (stored) {
fields.add(new StoredField(name, value.intValue()));
document.add(new StoredField(name, i));
}
return fields;
}

@Override
Expand Down Expand Up @@ -1016,18 +1010,17 @@ public Query rangeQuery(
}

@Override
public List<Field> createFields(String name, Number value, boolean indexed, boolean docValued, boolean stored) {
List<Field> fields = new ArrayList<>();
public void addFields(LuceneDocument document, String name, Number value, boolean indexed, boolean docValued, boolean stored) {
final long l = value.longValue();
if (indexed) {
fields.add(new LongPoint(name, value.longValue()));
document.add(new LongPoint(name, l));
}
if (docValued) {
fields.add(new SortedNumericDocValuesField(name, value.longValue()));
document.add(new SortedNumericDocValuesField(name, l));
}
if (stored) {
fields.add(new StoredField(name, value.longValue()));
document.add(new StoredField(name, l));
}
return fields;
}

@Override
Expand Down Expand Up @@ -1089,7 +1082,25 @@ public abstract Query rangeQuery(

public abstract Number parsePoint(byte[] value);

public abstract List<Field> createFields(String name, Number value, boolean indexed, boolean docValued, boolean stored);
/**
* Maps the given {@code value} to one or more Lucene field values ands them to the given {@code document} under the given
* {@code name}.
*
* @param document document to add fields to
* @param name field name
* @param value value to map
* @param indexed whether or not the field is indexed
* @param docValued whether or not doc values should be added
* @param stored whether or not the field is stored
*/
public abstract void addFields(
LuceneDocument document,
String name,
Number value,
boolean indexed,
boolean docValued,
boolean stored
);

public FieldValues<Number> compile(String fieldName, Script script, ScriptCompiler compiler) {
// only implemented for long and double fields
Expand Down Expand Up @@ -1554,8 +1565,7 @@ private void indexValue(DocumentParserContext context, Number numericValue) {
if (dimension && numericValue != null) {
context.getDimensions().addLong(fieldType().name(), numericValue.longValue());
}
List<Field> fields = fieldType().type.createFields(fieldType().name(), numericValue, indexed, hasDocValues, stored);
context.doc().addAll(fields);
fieldType().type.addFields(context.doc(), fieldType().name(), numericValue, indexed, hasDocValues, stored);

if (hasDocValues == false && (stored || indexed)) {
context.addToFieldNames(fieldType().name());
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,11 +8,9 @@

package org.elasticsearch.index.fielddata.plain;

import org.apache.lucene.document.Document;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.IndexableField;
import org.apache.lucene.index.LeafReader;
import org.apache.lucene.store.Directory;
import org.apache.lucene.tests.util.TestUtil;
Expand All @@ -21,6 +19,7 @@
import org.elasticsearch.index.fielddata.ScriptDocValues.Doubles;
import org.elasticsearch.index.fielddata.ScriptDocValues.DoublesSupplier;
import org.elasticsearch.index.fielddata.SortedNumericDoubleValues;
import org.elasticsearch.index.mapper.LuceneDocument;
import org.elasticsearch.index.mapper.NumberFieldMapper;
import org.elasticsearch.script.field.DelegateDocValuesField;
import org.elasticsearch.test.ESTestCase;
Expand All @@ -33,10 +32,8 @@ public void testSingleValued() throws IOException {
Directory dir = newDirectory();
// we need the default codec to check for singletons
IndexWriter w = new IndexWriter(dir, new IndexWriterConfig(null).setCodec(TestUtil.getDefaultCodec()));
Document doc = new Document();
for (IndexableField f : NumberFieldMapper.NumberType.HALF_FLOAT.createFields("half_float", 3f, false, true, false)) {
doc.add(f);
}
LuceneDocument doc = new LuceneDocument();
NumberFieldMapper.NumberType.HALF_FLOAT.addFields(doc, "half_float", 3f, false, true, false);
w.addDocument(doc);
final DirectoryReader dirReader = DirectoryReader.open(w);
LeafReader reader = getOnlyLeafReader(dirReader);
Expand All @@ -55,13 +52,9 @@ public void testSingleValued() throws IOException {
public void testMultiValued() throws IOException {
Directory dir = newDirectory();
IndexWriter w = new IndexWriter(dir, new IndexWriterConfig(null));
Document doc = new Document();
for (IndexableField f : NumberFieldMapper.NumberType.HALF_FLOAT.createFields("half_float", 3f, false, true, false)) {
doc.add(f);
}
for (IndexableField f : NumberFieldMapper.NumberType.HALF_FLOAT.createFields("half_float", 2f, false, true, false)) {
doc.add(f);
}
LuceneDocument doc = new LuceneDocument();
NumberFieldMapper.NumberType.HALF_FLOAT.addFields(doc, "half_float", 3f, false, true, false);
NumberFieldMapper.NumberType.HALF_FLOAT.addFields(doc, "half_float", 2f, false, true, false);
w.addDocument(doc);
final DirectoryReader dirReader = DirectoryReader.open(w);
LeafReader reader = getOnlyLeafReader(dirReader);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -532,7 +532,9 @@ public void doTestDocValueRangeQueries(NumberType type, Supplier<Number> valueSu
IndexWriter w = new IndexWriter(dir, newIndexWriterConfig());
final int numDocs = TestUtil.nextInt(random(), 100, 500);
for (int i = 0; i < numDocs; ++i) {
w.addDocument(type.createFields("foo", valueSupplier.get(), true, true, false));
final LuceneDocument doc = new LuceneDocument();
type.addFields(doc, "foo", valueSupplier.get(), true, true, false);
w.addDocument(doc);
}
DirectoryReader reader = DirectoryReader.open(w);
IndexSearcher searcher = newSearcher(reader);
Expand Down Expand Up @@ -593,7 +595,9 @@ public void doTestIndexSortRangeQueries(NumberType type, Supplier<Number> valueS
IndexWriter w = new IndexWriter(dir, writerConfig);
final int numDocs = TestUtil.nextInt(random(), 100, 500);
for (int i = 0; i < numDocs; ++i) {
w.addDocument(type.createFields("field", valueSupplier.get(), true, true, false));
final LuceneDocument doc = new LuceneDocument();
type.addFields(doc, "field", valueSupplier.get(), true, true, false);
w.addDocument(doc);
}

// Ensure that the optimized index sort query gives the same results as a points query.
Expand Down

0 comments on commit da4577e

Please sign in to comment.