Cut over from Field to StringField when applicable. (#94540)

The most recent Lucene update made `StringField` more efficient than `Field` when indexing simple keywords. This PR cuts over remaining places where we use `Field` to index keywords to `StringField` instead.
elastic · Mar 23, 2023 · 0c10cef · 0c10cef
1 parent e12e83f
commit 0c10cef
Show file tree

Hide file tree

Showing 31 changed files with 188 additions and 388 deletions.
diff --git a/docs/changelog/94540.yaml b/docs/changelog/94540.yaml
@@ -0,0 +1,5 @@
+pr: 94540
+summary: Cut over from Field to `StringField` when applicable
+area: Mapping
+type: enhancement
+issues: []
diff --git a/modules/parent-join/src/main/java/org/elasticsearch/join/mapper/ParentIdFieldMapper.java b/modules/parent-join/src/main/java/org/elasticsearch/join/mapper/ParentIdFieldMapper.java
@@ -9,9 +9,8 @@
 package org.elasticsearch.join.mapper;
 
 import org.apache.lucene.document.Field;
-import org.apache.lucene.document.FieldType;
 import org.apache.lucene.document.SortedDocValuesField;
-import org.apache.lucene.index.IndexOptions;
+import org.apache.lucene.document.StringField;
 import org.apache.lucene.util.BytesRef;
 import org.elasticsearch.common.lucene.Lucene;
 import org.elasticsearch.index.analysis.NamedAnalyzer;
@@ -40,17 +39,6 @@
 public final class ParentIdFieldMapper extends FieldMapper {
     static final String CONTENT_TYPE = "parent";
 
-    static class Defaults {
-        static final FieldType FIELD_TYPE = new FieldType();
-
-        static {
-            FIELD_TYPE.setTokenized(false);
-            FIELD_TYPE.setOmitNorms(true);
-            FIELD_TYPE.setIndexOptions(IndexOptions.DOCS);
-            FIELD_TYPE.freeze();
-        }
-    }
-
     public static final class ParentIdFieldType extends StringFieldType {
 
         private final boolean eagerGlobalOrdinals;
@@ -116,7 +104,7 @@ protected void parseCreateField(DocumentParserContext context) {
 
     public void indexValue(DocumentParserContext context, String refId) {
         BytesRef binaryValue = new BytesRef(refId);
-        Field field = new Field(fieldType().name(), binaryValue, Defaults.FIELD_TYPE);
+        Field field = new StringField(fieldType().name(), binaryValue, Field.Store.NO);
         context.doc().add(field);
         context.doc().add(new SortedDocValuesField(fieldType().name(), binaryValue));
     }

diff --git a/modules/parent-join/src/main/java/org/elasticsearch/join/mapper/ParentJoinFieldMapper.java b/modules/parent-join/src/main/java/org/elasticsearch/join/mapper/ParentJoinFieldMapper.java
@@ -9,9 +9,8 @@
 package org.elasticsearch.join.mapper;
 
 import org.apache.lucene.document.Field;
-import org.apache.lucene.document.FieldType;
 import org.apache.lucene.document.SortedDocValuesField;
-import org.apache.lucene.index.IndexOptions;
+import org.apache.lucene.document.StringField;
 import org.apache.lucene.util.BytesRef;
 import org.elasticsearch.common.lucene.Lucene;
 import org.elasticsearch.index.IndexSettings;
@@ -57,17 +56,6 @@ public final class ParentJoinFieldMapper extends FieldMapper {
     public static final String NAME = "join";
     public static final String CONTENT_TYPE = "join";
 
-    public static class Defaults {
-        public static final FieldType FIELD_TYPE = new FieldType();
-
-        static {
-            FIELD_TYPE.setTokenized(false);
-            FIELD_TYPE.setOmitNorms(true);
-            FIELD_TYPE.setIndexOptions(IndexOptions.DOCS);
-            FIELD_TYPE.freeze();
-        }
-    }
-
     private static void checkIndexCompatibility(IndexSettings settings, String name) {
         String indexName = settings.getIndex().getName();
         if (settings.getIndexMetadata().isRoutingPartitionedIndex()) {
@@ -303,7 +291,7 @@ public void parse(DocumentParserContext context) throws IOException {
         }
 
         BytesRef binaryValue = new BytesRef(name);
-        Field field = new Field(fieldType().name(), binaryValue, Defaults.FIELD_TYPE);
+        Field field = new StringField(fieldType().name(), binaryValue, Field.Store.NO);
         context.doc().add(field);
         context.doc().add(new SortedDocValuesField(fieldType().name(), binaryValue));
         context.path().remove();

diff --git a/modules/percolator/src/main/java/org/elasticsearch/percolator/PercolatorFieldMapper.java b/modules/percolator/src/main/java/org/elasticsearch/percolator/PercolatorFieldMapper.java
@@ -8,10 +8,9 @@
 package org.elasticsearch.percolator;
 
 import org.apache.lucene.document.Field;
-import org.apache.lucene.document.FieldType;
 import org.apache.lucene.document.NumericDocValuesField;
+import org.apache.lucene.document.StringField;
 import org.apache.lucene.index.FieldInfo;
-import org.apache.lucene.index.IndexOptions;
 import org.apache.lucene.index.IndexReader;
 import org.apache.lucene.index.LeafReader;
 import org.apache.lucene.index.PointValues;
@@ -442,21 +441,13 @@ static void createQueryBuilderField(
         }
     }
 
-    private static final FieldType INDEXED_KEYWORD = new FieldType();
-    static {
-        INDEXED_KEYWORD.setTokenized(false);
-        INDEXED_KEYWORD.setOmitNorms(true);
-        INDEXED_KEYWORD.setIndexOptions(IndexOptions.DOCS);
-        INDEXED_KEYWORD.freeze();
-    }
-
     void processQuery(Query query, DocumentParserContext context) {
         LuceneDocument doc = context.doc();
         PercolatorFieldType pft = (PercolatorFieldType) this.fieldType();
         QueryAnalyzer.Result result;
         result = QueryAnalyzer.analyze(query);
         if (result == QueryAnalyzer.Result.UNKNOWN) {
-            doc.add(new Field(pft.extractionResultField.name(), EXTRACTION_FAILED, INDEXED_KEYWORD));
+            doc.add(new StringField(pft.extractionResultField.name(), EXTRACTION_FAILED, Field.Store.NO));
             return;
         }
         for (QueryAnalyzer.QueryExtraction extraction : result.extractions) {
@@ -465,7 +456,7 @@ void processQuery(Query query, DocumentParserContext context) {
                 builder.append(new BytesRef(extraction.field()));
                 builder.append(FIELD_VALUE_SEPARATOR);
                 builder.append(extraction.bytes());
-                doc.add(new Field(queryTermsField.name(), builder.toBytesRef(), INDEXED_KEYWORD));
+                doc.add(new StringField(queryTermsField.name(), builder.toBytesRef(), Field.Store.NO));
             } else if (extraction.range != null) {
                 byte[] min = extraction.range.lowerPoint;
                 byte[] max = extraction.range.upperPoint;
@@ -474,14 +465,14 @@ void processQuery(Query query, DocumentParserContext context) {
         }
 
         if (result.matchAllDocs) {
-            doc.add(new Field(extractionResultField.name(), EXTRACTION_FAILED, INDEXED_KEYWORD));
+            doc.add(new StringField(extractionResultField.name(), EXTRACTION_FAILED, Field.Store.NO));
             if (result.verified) {
-                doc.add(new Field(extractionResultField.name(), EXTRACTION_COMPLETE, INDEXED_KEYWORD));
+                doc.add(new StringField(extractionResultField.name(), EXTRACTION_COMPLETE, Field.Store.NO));
             }
         } else if (result.verified) {
-            doc.add(new Field(extractionResultField.name(), EXTRACTION_COMPLETE, INDEXED_KEYWORD));
+            doc.add(new StringField(extractionResultField.name(), EXTRACTION_COMPLETE, Field.Store.NO));
         } else {
-            doc.add(new Field(extractionResultField.name(), EXTRACTION_PARTIAL, INDEXED_KEYWORD));
+            doc.add(new StringField(extractionResultField.name(), EXTRACTION_PARTIAL, Field.Store.NO));
         }
 
         context.addToFieldNames(fieldType().name());

diff --git a/server/src/main/java/org/elasticsearch/index/mapper/BooleanFieldMapper.java b/server/src/main/java/org/elasticsearch/index/mapper/BooleanFieldMapper.java
@@ -9,10 +9,9 @@
 package org.elasticsearch.index.mapper;
 
 import org.apache.lucene.document.Field;
-import org.apache.lucene.document.FieldType;
 import org.apache.lucene.document.SortedNumericDocValuesField;
 import org.apache.lucene.document.StoredField;
-import org.apache.lucene.index.IndexOptions;
+import org.apache.lucene.document.StringField;
 import org.apache.lucene.index.LeafReaderContext;
 import org.apache.lucene.search.BooleanClause;
 import org.apache.lucene.search.BooleanQuery;
@@ -60,17 +59,6 @@ public class BooleanFieldMapper extends FieldMapper {
 
     public static final String CONTENT_TYPE = "boolean";
 
-    public static class Defaults {
-        public static final FieldType FIELD_TYPE = new FieldType();
-
-        static {
-            FIELD_TYPE.setOmitNorms(true);
-            FIELD_TYPE.setIndexOptions(IndexOptions.DOCS);
-            FIELD_TYPE.setTokenized(false);
-            FIELD_TYPE.freeze();
-        }
-    }
-
     public static class Values {
         public static final BytesRef TRUE = new BytesRef("T");
         public static final BytesRef FALSE = new BytesRef("F");
@@ -452,7 +440,7 @@ private void indexValue(DocumentParserContext context, Boolean value) {
             return;
         }
         if (indexed) {
-            context.doc().add(new Field(fieldType().name(), value ? "T" : "F", Defaults.FIELD_TYPE));
+            context.doc().add(new StringField(fieldType().name(), value ? Values.TRUE : Values.FALSE, Field.Store.NO));
         }
         if (stored) {
             context.doc().add(new StoredField(fieldType().name(), value ? "T" : "F"));

diff --git a/server/src/main/java/org/elasticsearch/index/mapper/FieldNamesFieldMapper.java b/server/src/main/java/org/elasticsearch/index/mapper/FieldNamesFieldMapper.java
@@ -9,8 +9,7 @@
 package org.elasticsearch.index.mapper;
 
 import org.apache.lucene.document.Field;
-import org.apache.lucene.document.FieldType;
-import org.apache.lucene.index.IndexOptions;
+import org.apache.lucene.document.StringField;
 import org.apache.lucene.search.Query;
 import org.elasticsearch.Version;
 import org.elasticsearch.common.Explicit;
@@ -43,15 +42,7 @@ public static class Defaults {
         public static final String NAME = FieldNamesFieldMapper.NAME;
 
         public static final Explicit<Boolean> ENABLED = Explicit.IMPLICIT_TRUE;
-        public static final FieldType FIELD_TYPE = new FieldType();
-
-        static {
-            FIELD_TYPE.setIndexOptions(IndexOptions.DOCS);
-            FIELD_TYPE.setTokenized(false);
-            FIELD_TYPE.setStored(false);
-            FIELD_TYPE.setOmitNorms(true);
-            FIELD_TYPE.freeze();
-        }
+
     }
 
     private static FieldNamesFieldMapper toType(FieldMapper in) {
@@ -183,7 +174,7 @@ public void addFieldNames(DocumentParserContext context, String field) {
             return;
         }
         assert noDocValues(field, context) : "Field " + field + " should not have docvalues";
-        context.doc().add(new Field(NAME, field, Defaults.FIELD_TYPE));
+        context.doc().add(new StringField(NAME, field, Field.Store.NO));
     }
 
     private static boolean noDocValues(String field, DocumentParserContext context) {

diff --git a/server/src/main/java/org/elasticsearch/index/mapper/IgnoredFieldMapper.java b/server/src/main/java/org/elasticsearch/index/mapper/IgnoredFieldMapper.java
@@ -9,8 +9,7 @@
 package org.elasticsearch.index.mapper;
 
 import org.apache.lucene.document.Field;
-import org.apache.lucene.document.FieldType;
-import org.apache.lucene.index.IndexOptions;
+import org.apache.lucene.document.StringField;
 import org.apache.lucene.search.Query;
 import org.apache.lucene.search.TermRangeQuery;
 import org.elasticsearch.index.query.SearchExecutionContext;
@@ -28,16 +27,6 @@ public final class IgnoredFieldMapper extends MetadataFieldMapper {
 
     public static class Defaults {
         public static final String NAME = IgnoredFieldMapper.NAME;
-
-        public static final FieldType FIELD_TYPE = new FieldType();
-
-        static {
-            FIELD_TYPE.setIndexOptions(IndexOptions.DOCS);
-            FIELD_TYPE.setTokenized(false);
-            FIELD_TYPE.setStored(true);
-            FIELD_TYPE.setOmitNorms(true);
-            FIELD_TYPE.freeze();
-        }
     }
 
     public static final IgnoredFieldType FIELD_TYPE = new IgnoredFieldType();
@@ -79,7 +68,7 @@ private IgnoredFieldMapper() {
     @Override
     public void postParse(DocumentParserContext context) {
         for (String field : context.getIgnoredFields()) {
-            context.doc().add(new Field(NAME, field, Defaults.FIELD_TYPE));
+            context.doc().add(new StringField(NAME, field, Field.Store.YES));
         }
     }
 

diff --git a/server/src/main/java/org/elasticsearch/index/mapper/NestedPathFieldMapper.java b/server/src/main/java/org/elasticsearch/index/mapper/NestedPathFieldMapper.java
@@ -9,8 +9,7 @@
 package org.elasticsearch.index.mapper;
 
 import org.apache.lucene.document.Field;
-import org.apache.lucene.document.FieldType;
-import org.apache.lucene.index.IndexOptions;
+import org.apache.lucene.document.StringField;
 import org.apache.lucene.index.Term;
 import org.apache.lucene.search.Query;
 import org.apache.lucene.search.TermQuery;
@@ -41,20 +40,7 @@ public static Query filter(Version version, String path) {
     }
 
     public static Field field(Version version, String path) {
-        return new Field(name(version), path, Defaults.FIELD_TYPE);
-    }
-
-    public static class Defaults {
-
-        public static final FieldType FIELD_TYPE = new FieldType();
-
-        static {
-            FIELD_TYPE.setIndexOptions(IndexOptions.DOCS);
-            FIELD_TYPE.setTokenized(false);
-            FIELD_TYPE.setStored(false);
-            FIELD_TYPE.setOmitNorms(true);
-            FIELD_TYPE.freeze();
-        }
+        return new StringField(name(version), path, Field.Store.NO);
     }
 
     public static final TypeParser PARSER = new FixedTypeParser(

diff --git a/server/src/main/java/org/elasticsearch/index/mapper/ProvidedIdFieldMapper.java b/server/src/main/java/org/elasticsearch/index/mapper/ProvidedIdFieldMapper.java
@@ -8,8 +8,6 @@
 
 package org.elasticsearch.index.mapper;
 
-import org.apache.lucene.document.FieldType;
-import org.apache.lucene.document.StringField;
 import org.apache.lucene.index.LeafReaderContext;
 import org.apache.lucene.search.MatchAllDocsQuery;
 import org.apache.lucene.search.Query;
@@ -59,13 +57,6 @@ public class ProvidedIdFieldMapper extends IdFieldMapper {
             + "If you require sorting or aggregating on this field you should also include the id in the "
             + "body of your documents, and map this field as a keyword field that has [doc_values] enabled";
 
-    public static class Defaults {
-
-        public static final FieldType FIELD_TYPE = StringField.TYPE_STORED;
-        public static final FieldType NESTED_FIELD_TYPE = StringField.TYPE_NOT_STORED;
-
-    }
-
     public static final ProvidedIdFieldMapper NO_FIELD_DATA = new ProvidedIdFieldMapper(() -> false);
 
     static final class IdFieldType extends TermBasedFieldType {

diff --git a/server/src/main/java/org/elasticsearch/index/mapper/RoutingFieldMapper.java b/server/src/main/java/org/elasticsearch/index/mapper/RoutingFieldMapper.java
@@ -9,8 +9,7 @@
 package org.elasticsearch.index.mapper;
 
 import org.apache.lucene.document.Field;
-import org.apache.lucene.document.FieldType;
-import org.apache.lucene.index.IndexOptions;
+import org.apache.lucene.document.StringField;
 import org.elasticsearch.common.lucene.Lucene;
 import org.elasticsearch.index.analysis.NamedAnalyzer;
 import org.elasticsearch.index.query.SearchExecutionContext;
@@ -29,16 +28,6 @@ public FieldMapper.Builder getMergeBuilder() {
     }
 
     public static class Defaults {
-
-        public static final FieldType FIELD_TYPE = new FieldType();
-        static {
-            FIELD_TYPE.setIndexOptions(IndexOptions.DOCS);
-            FIELD_TYPE.setTokenized(false);
-            FIELD_TYPE.setStored(true);
-            FIELD_TYPE.setOmitNorms(true);
-            FIELD_TYPE.freeze();
-        }
-
         public static final boolean REQUIRED = false;
     }
 
@@ -121,7 +110,7 @@ public boolean required() {
     public void preParse(DocumentParserContext context) {
         String routing = context.sourceToParse().routing();
         if (routing != null) {
-            context.doc().add(new Field(fieldType().name(), routing, Defaults.FIELD_TYPE));
+            context.doc().add(new StringField(fieldType().name(), routing, Field.Store.YES));
             context.addToFieldNames(fieldType().name());
         }
     }

diff --git a/server/src/main/java/org/elasticsearch/index/mapper/TsidExtractingIdFieldMapper.java b/server/src/main/java/org/elasticsearch/index/mapper/TsidExtractingIdFieldMapper.java
@@ -9,7 +9,6 @@
 package org.elasticsearch.index.mapper;
 
 import org.apache.lucene.document.Field;
-import org.apache.lucene.document.FieldType;
 import org.apache.lucene.document.StringField;
 import org.apache.lucene.index.IndexableField;
 import org.apache.lucene.search.MatchAllDocsQuery;
@@ -34,7 +33,6 @@
  * {@code _tsid} and {@code @timestamp}.
  */
 public class TsidExtractingIdFieldMapper extends IdFieldMapper {
-    public static final FieldType FIELD_TYPE = StringField.TYPE_STORED;
     /**
      * Maximum length of the {@code _tsid} in the {@link #documentDescription}.
      */