Skip to content

Commit

Permalink
Cut over from Field to StringField when applicable. (#94540)
Browse files Browse the repository at this point in the history
The most recent Lucene update made `StringField` more efficient than `Field`
when indexing simple keywords. This PR cuts over remaining places where we use
`Field` to index keywords to `StringField` instead.
  • Loading branch information
jpountz committed Mar 23, 2023
1 parent e12e83f commit 0c10cef
Show file tree
Hide file tree
Showing 31 changed files with 188 additions and 388 deletions.
5 changes: 5 additions & 0 deletions docs/changelog/94540.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
pr: 94540
summary: Cut over from Field to `StringField` when applicable
area: Mapping
type: enhancement
issues: []
Original file line number Diff line number Diff line change
Expand Up @@ -9,9 +9,8 @@
package org.elasticsearch.join.mapper;

import org.apache.lucene.document.Field;
import org.apache.lucene.document.FieldType;
import org.apache.lucene.document.SortedDocValuesField;
import org.apache.lucene.index.IndexOptions;
import org.apache.lucene.document.StringField;
import org.apache.lucene.util.BytesRef;
import org.elasticsearch.common.lucene.Lucene;
import org.elasticsearch.index.analysis.NamedAnalyzer;
Expand Down Expand Up @@ -40,17 +39,6 @@
public final class ParentIdFieldMapper extends FieldMapper {
static final String CONTENT_TYPE = "parent";

static class Defaults {
static final FieldType FIELD_TYPE = new FieldType();

static {
FIELD_TYPE.setTokenized(false);
FIELD_TYPE.setOmitNorms(true);
FIELD_TYPE.setIndexOptions(IndexOptions.DOCS);
FIELD_TYPE.freeze();
}
}

public static final class ParentIdFieldType extends StringFieldType {

private final boolean eagerGlobalOrdinals;
Expand Down Expand Up @@ -116,7 +104,7 @@ protected void parseCreateField(DocumentParserContext context) {

public void indexValue(DocumentParserContext context, String refId) {
BytesRef binaryValue = new BytesRef(refId);
Field field = new Field(fieldType().name(), binaryValue, Defaults.FIELD_TYPE);
Field field = new StringField(fieldType().name(), binaryValue, Field.Store.NO);
context.doc().add(field);
context.doc().add(new SortedDocValuesField(fieldType().name(), binaryValue));
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,9 +9,8 @@
package org.elasticsearch.join.mapper;

import org.apache.lucene.document.Field;
import org.apache.lucene.document.FieldType;
import org.apache.lucene.document.SortedDocValuesField;
import org.apache.lucene.index.IndexOptions;
import org.apache.lucene.document.StringField;
import org.apache.lucene.util.BytesRef;
import org.elasticsearch.common.lucene.Lucene;
import org.elasticsearch.index.IndexSettings;
Expand Down Expand Up @@ -57,17 +56,6 @@ public final class ParentJoinFieldMapper extends FieldMapper {
public static final String NAME = "join";
public static final String CONTENT_TYPE = "join";

public static class Defaults {
public static final FieldType FIELD_TYPE = new FieldType();

static {
FIELD_TYPE.setTokenized(false);
FIELD_TYPE.setOmitNorms(true);
FIELD_TYPE.setIndexOptions(IndexOptions.DOCS);
FIELD_TYPE.freeze();
}
}

private static void checkIndexCompatibility(IndexSettings settings, String name) {
String indexName = settings.getIndex().getName();
if (settings.getIndexMetadata().isRoutingPartitionedIndex()) {
Expand Down Expand Up @@ -303,7 +291,7 @@ public void parse(DocumentParserContext context) throws IOException {
}

BytesRef binaryValue = new BytesRef(name);
Field field = new Field(fieldType().name(), binaryValue, Defaults.FIELD_TYPE);
Field field = new StringField(fieldType().name(), binaryValue, Field.Store.NO);
context.doc().add(field);
context.doc().add(new SortedDocValuesField(fieldType().name(), binaryValue));
context.path().remove();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,10 +8,9 @@
package org.elasticsearch.percolator;

import org.apache.lucene.document.Field;
import org.apache.lucene.document.FieldType;
import org.apache.lucene.document.NumericDocValuesField;
import org.apache.lucene.document.StringField;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.IndexOptions;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.LeafReader;
import org.apache.lucene.index.PointValues;
Expand Down Expand Up @@ -442,21 +441,13 @@ static void createQueryBuilderField(
}
}

private static final FieldType INDEXED_KEYWORD = new FieldType();
static {
INDEXED_KEYWORD.setTokenized(false);
INDEXED_KEYWORD.setOmitNorms(true);
INDEXED_KEYWORD.setIndexOptions(IndexOptions.DOCS);
INDEXED_KEYWORD.freeze();
}

void processQuery(Query query, DocumentParserContext context) {
LuceneDocument doc = context.doc();
PercolatorFieldType pft = (PercolatorFieldType) this.fieldType();
QueryAnalyzer.Result result;
result = QueryAnalyzer.analyze(query);
if (result == QueryAnalyzer.Result.UNKNOWN) {
doc.add(new Field(pft.extractionResultField.name(), EXTRACTION_FAILED, INDEXED_KEYWORD));
doc.add(new StringField(pft.extractionResultField.name(), EXTRACTION_FAILED, Field.Store.NO));
return;
}
for (QueryAnalyzer.QueryExtraction extraction : result.extractions) {
Expand All @@ -465,7 +456,7 @@ void processQuery(Query query, DocumentParserContext context) {
builder.append(new BytesRef(extraction.field()));
builder.append(FIELD_VALUE_SEPARATOR);
builder.append(extraction.bytes());
doc.add(new Field(queryTermsField.name(), builder.toBytesRef(), INDEXED_KEYWORD));
doc.add(new StringField(queryTermsField.name(), builder.toBytesRef(), Field.Store.NO));
} else if (extraction.range != null) {
byte[] min = extraction.range.lowerPoint;
byte[] max = extraction.range.upperPoint;
Expand All @@ -474,14 +465,14 @@ void processQuery(Query query, DocumentParserContext context) {
}

if (result.matchAllDocs) {
doc.add(new Field(extractionResultField.name(), EXTRACTION_FAILED, INDEXED_KEYWORD));
doc.add(new StringField(extractionResultField.name(), EXTRACTION_FAILED, Field.Store.NO));
if (result.verified) {
doc.add(new Field(extractionResultField.name(), EXTRACTION_COMPLETE, INDEXED_KEYWORD));
doc.add(new StringField(extractionResultField.name(), EXTRACTION_COMPLETE, Field.Store.NO));
}
} else if (result.verified) {
doc.add(new Field(extractionResultField.name(), EXTRACTION_COMPLETE, INDEXED_KEYWORD));
doc.add(new StringField(extractionResultField.name(), EXTRACTION_COMPLETE, Field.Store.NO));
} else {
doc.add(new Field(extractionResultField.name(), EXTRACTION_PARTIAL, INDEXED_KEYWORD));
doc.add(new StringField(extractionResultField.name(), EXTRACTION_PARTIAL, Field.Store.NO));
}

context.addToFieldNames(fieldType().name());
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,10 +9,9 @@
package org.elasticsearch.index.mapper;

import org.apache.lucene.document.Field;
import org.apache.lucene.document.FieldType;
import org.apache.lucene.document.SortedNumericDocValuesField;
import org.apache.lucene.document.StoredField;
import org.apache.lucene.index.IndexOptions;
import org.apache.lucene.document.StringField;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanQuery;
Expand Down Expand Up @@ -60,17 +59,6 @@ public class BooleanFieldMapper extends FieldMapper {

public static final String CONTENT_TYPE = "boolean";

public static class Defaults {
public static final FieldType FIELD_TYPE = new FieldType();

static {
FIELD_TYPE.setOmitNorms(true);
FIELD_TYPE.setIndexOptions(IndexOptions.DOCS);
FIELD_TYPE.setTokenized(false);
FIELD_TYPE.freeze();
}
}

public static class Values {
public static final BytesRef TRUE = new BytesRef("T");
public static final BytesRef FALSE = new BytesRef("F");
Expand Down Expand Up @@ -452,7 +440,7 @@ private void indexValue(DocumentParserContext context, Boolean value) {
return;
}
if (indexed) {
context.doc().add(new Field(fieldType().name(), value ? "T" : "F", Defaults.FIELD_TYPE));
context.doc().add(new StringField(fieldType().name(), value ? Values.TRUE : Values.FALSE, Field.Store.NO));
}
if (stored) {
context.doc().add(new StoredField(fieldType().name(), value ? "T" : "F"));
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,7 @@
package org.elasticsearch.index.mapper;

import org.apache.lucene.document.Field;
import org.apache.lucene.document.FieldType;
import org.apache.lucene.index.IndexOptions;
import org.apache.lucene.document.StringField;
import org.apache.lucene.search.Query;
import org.elasticsearch.Version;
import org.elasticsearch.common.Explicit;
Expand Down Expand Up @@ -43,15 +42,7 @@ public static class Defaults {
public static final String NAME = FieldNamesFieldMapper.NAME;

public static final Explicit<Boolean> ENABLED = Explicit.IMPLICIT_TRUE;
public static final FieldType FIELD_TYPE = new FieldType();

static {
FIELD_TYPE.setIndexOptions(IndexOptions.DOCS);
FIELD_TYPE.setTokenized(false);
FIELD_TYPE.setStored(false);
FIELD_TYPE.setOmitNorms(true);
FIELD_TYPE.freeze();
}

}

private static FieldNamesFieldMapper toType(FieldMapper in) {
Expand Down Expand Up @@ -183,7 +174,7 @@ public void addFieldNames(DocumentParserContext context, String field) {
return;
}
assert noDocValues(field, context) : "Field " + field + " should not have docvalues";
context.doc().add(new Field(NAME, field, Defaults.FIELD_TYPE));
context.doc().add(new StringField(NAME, field, Field.Store.NO));
}

private static boolean noDocValues(String field, DocumentParserContext context) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,7 @@
package org.elasticsearch.index.mapper;

import org.apache.lucene.document.Field;
import org.apache.lucene.document.FieldType;
import org.apache.lucene.index.IndexOptions;
import org.apache.lucene.document.StringField;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.TermRangeQuery;
import org.elasticsearch.index.query.SearchExecutionContext;
Expand All @@ -28,16 +27,6 @@ public final class IgnoredFieldMapper extends MetadataFieldMapper {

public static class Defaults {
public static final String NAME = IgnoredFieldMapper.NAME;

public static final FieldType FIELD_TYPE = new FieldType();

static {
FIELD_TYPE.setIndexOptions(IndexOptions.DOCS);
FIELD_TYPE.setTokenized(false);
FIELD_TYPE.setStored(true);
FIELD_TYPE.setOmitNorms(true);
FIELD_TYPE.freeze();
}
}

public static final IgnoredFieldType FIELD_TYPE = new IgnoredFieldType();
Expand Down Expand Up @@ -79,7 +68,7 @@ private IgnoredFieldMapper() {
@Override
public void postParse(DocumentParserContext context) {
for (String field : context.getIgnoredFields()) {
context.doc().add(new Field(NAME, field, Defaults.FIELD_TYPE));
context.doc().add(new StringField(NAME, field, Field.Store.YES));
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,7 @@
package org.elasticsearch.index.mapper;

import org.apache.lucene.document.Field;
import org.apache.lucene.document.FieldType;
import org.apache.lucene.index.IndexOptions;
import org.apache.lucene.document.StringField;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.TermQuery;
Expand Down Expand Up @@ -41,20 +40,7 @@ public static Query filter(Version version, String path) {
}

public static Field field(Version version, String path) {
return new Field(name(version), path, Defaults.FIELD_TYPE);
}

public static class Defaults {

public static final FieldType FIELD_TYPE = new FieldType();

static {
FIELD_TYPE.setIndexOptions(IndexOptions.DOCS);
FIELD_TYPE.setTokenized(false);
FIELD_TYPE.setStored(false);
FIELD_TYPE.setOmitNorms(true);
FIELD_TYPE.freeze();
}
return new StringField(name(version), path, Field.Store.NO);
}

public static final TypeParser PARSER = new FixedTypeParser(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,6 @@

package org.elasticsearch.index.mapper;

import org.apache.lucene.document.FieldType;
import org.apache.lucene.document.StringField;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.search.MatchAllDocsQuery;
import org.apache.lucene.search.Query;
Expand Down Expand Up @@ -59,13 +57,6 @@ public class ProvidedIdFieldMapper extends IdFieldMapper {
+ "If you require sorting or aggregating on this field you should also include the id in the "
+ "body of your documents, and map this field as a keyword field that has [doc_values] enabled";

public static class Defaults {

public static final FieldType FIELD_TYPE = StringField.TYPE_STORED;
public static final FieldType NESTED_FIELD_TYPE = StringField.TYPE_NOT_STORED;

}

public static final ProvidedIdFieldMapper NO_FIELD_DATA = new ProvidedIdFieldMapper(() -> false);

static final class IdFieldType extends TermBasedFieldType {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,7 @@
package org.elasticsearch.index.mapper;

import org.apache.lucene.document.Field;
import org.apache.lucene.document.FieldType;
import org.apache.lucene.index.IndexOptions;
import org.apache.lucene.document.StringField;
import org.elasticsearch.common.lucene.Lucene;
import org.elasticsearch.index.analysis.NamedAnalyzer;
import org.elasticsearch.index.query.SearchExecutionContext;
Expand All @@ -29,16 +28,6 @@ public FieldMapper.Builder getMergeBuilder() {
}

public static class Defaults {

public static final FieldType FIELD_TYPE = new FieldType();
static {
FIELD_TYPE.setIndexOptions(IndexOptions.DOCS);
FIELD_TYPE.setTokenized(false);
FIELD_TYPE.setStored(true);
FIELD_TYPE.setOmitNorms(true);
FIELD_TYPE.freeze();
}

public static final boolean REQUIRED = false;
}

Expand Down Expand Up @@ -121,7 +110,7 @@ public boolean required() {
public void preParse(DocumentParserContext context) {
String routing = context.sourceToParse().routing();
if (routing != null) {
context.doc().add(new Field(fieldType().name(), routing, Defaults.FIELD_TYPE));
context.doc().add(new StringField(fieldType().name(), routing, Field.Store.YES));
context.addToFieldNames(fieldType().name());
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,6 @@
package org.elasticsearch.index.mapper;

import org.apache.lucene.document.Field;
import org.apache.lucene.document.FieldType;
import org.apache.lucene.document.StringField;
import org.apache.lucene.index.IndexableField;
import org.apache.lucene.search.MatchAllDocsQuery;
Expand All @@ -34,7 +33,6 @@
* {@code _tsid} and {@code @timestamp}.
*/
public class TsidExtractingIdFieldMapper extends IdFieldMapper {
public static final FieldType FIELD_TYPE = StringField.TYPE_STORED;
/**
* Maximum length of the {@code _tsid} in the {@link #documentDescription}.
*/
Expand Down

0 comments on commit 0c10cef

Please sign in to comment.