elastic · martijnvg · Dec 2, 2025 · Dec 2, 2025
diff --git a/...-extras/src/main/java/org/elasticsearch/index/mapper/extras/MatchOnlyTextFieldMapper.java b/...-extras/src/main/java/org/elasticsearch/index/mapper/extras/MatchOnlyTextFieldMapper.java
@@ -14,8 +14,6 @@
 import org.apache.lucene.document.Field;
 import org.apache.lucene.document.FieldType;
 import org.apache.lucene.document.StoredField;
-import org.apache.lucene.index.BinaryDocValues;
-import org.apache.lucene.index.DocValues;
 import org.apache.lucene.index.IndexOptions;
 import org.apache.lucene.index.LeafReaderContext;
 import org.apache.lucene.index.Term;
@@ -32,7 +30,6 @@
 import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.IOFunction;
 import org.elasticsearch.common.CheckedIntFunction;
-import org.elasticsearch.common.io.stream.ByteArrayStreamInput;
 import org.elasticsearch.common.lucene.Lucene;
 import org.elasticsearch.common.text.UTF8DecodingReader;
 import org.elasticsearch.common.unit.Fuzziness;
@@ -42,7 +39,6 @@
 import org.elasticsearch.index.analysis.NamedAnalyzer;
 import org.elasticsearch.index.fielddata.FieldDataContext;
 import org.elasticsearch.index.fielddata.IndexFieldData;
-import org.elasticsearch.index.fielddata.SortedBinaryDocValues;
 import org.elasticsearch.index.fielddata.SourceValueFetcherSortedBinaryIndexFieldData;
 import org.elasticsearch.index.fielddata.StoredFieldSortedBinaryIndexFieldData;
 import org.elasticsearch.index.fieldvisitor.StoredFieldLoader;
@@ -301,17 +297,12 @@ private IOFunction<LeafReaderContext, CheckedIntFunction<List<Object>, IOExcepti
 
             if (parent instanceof KeywordFieldMapper.KeywordFieldType keywordParent
                 && keywordParent.ignoreAbove().valuesPotentiallyIgnored()) {
+                final String parentFallbackFieldName = keywordParent.syntheticSourceFallbackFieldName();
                 if (parent.isStored()) {
-                    return combineFieldFetchers(
-                        storedFieldFetcher(parentFieldName),
-                        ignoredValuesDocValuesFieldFetcher(keywordParent.syntheticSourceFallbackFieldName())
-                    );
+                    return storedFieldFetcher(parentFieldName, parentFallbackFieldName);
                 } else if (parent.hasDocValues()) {
                     var ifd = searchExecutionContext.getForField(parent, MappedFieldType.FielddataOperation.SEARCH);
-                    return combineFieldFetchers(
-                        docValuesFieldFetcher(ifd),
-                        ignoredValuesDocValuesFieldFetcher(keywordParent.syntheticSourceFallbackFieldName())
-                    );
+                    return combineFieldFetchers(docValuesFieldFetcher(ifd), storedFieldFetcher(parentFallbackFieldName));
                 }
             }
 
@@ -334,16 +325,22 @@ private IOFunction<LeafReaderContext, CheckedIntFunction<List<Object>, IOExcepti
             final KeywordFieldMapper.KeywordFieldType keywordDelegate
         ) {
             if (keywordDelegate.ignoreAbove().valuesPotentiallyIgnored()) {
-                String delegateFieldName = keywordDelegate.name();
-                // bc we don't know whether the delegate will ignore a value, we must also check the fallback field created by this
-                // match_only_text field
+                // because we don't know whether the delegate field will be ignored during parsing, we must also check the current field
+                String fieldName = name();
                 String fallbackName = syntheticSourceFallbackFieldName();
 
+                // delegate field names
+                String delegateFieldName = keywordDelegate.name();
+                String delegateFieldFallbackName = keywordDelegate.syntheticSourceFallbackFieldName();
+
                 if (keywordDelegate.isStored()) {
-                    return storedFieldFetcher(delegateFieldName, fallbackName);
+                    return storedFieldFetcher(delegateFieldName, delegateFieldFallbackName, fieldName, fallbackName);
                 } else if (keywordDelegate.hasDocValues()) {
                     var ifd = searchExecutionContext.getForField(keywordDelegate, MappedFieldType.FielddataOperation.SEARCH);
-                    return combineFieldFetchers(docValuesFieldFetcher(ifd), storedFieldFetcher(fallbackName));
+                    return combineFieldFetchers(
+                        docValuesFieldFetcher(ifd),
+                        storedFieldFetcher(delegateFieldFallbackName, fieldName, fallbackName)
+                    );
                 }
             }
 
@@ -358,34 +355,25 @@ private IOFunction<LeafReaderContext, CheckedIntFunction<List<Object>, IOExcepti
             }
         }
 
-        private IOFunction<LeafReaderContext, CheckedIntFunction<List<Object>, IOException>> docValuesFieldFetcher(IndexFieldData<?> ifd) {
-            return context -> {
-                SortedBinaryDocValues indexedValuesDocValues = ifd.load(context).getBytesValues();
-                return docId -> getValuesFromDocValues(indexedValuesDocValues, docId);
-            };
-        }
-
-        private IOFunction<LeafReaderContext, CheckedIntFunction<List<Object>, IOException>> ignoredValuesDocValuesFieldFetcher(
-            String fieldName
+        private static IOFunction<LeafReaderContext, CheckedIntFunction<List<Object>, IOException>> docValuesFieldFetcher(
+            IndexFieldData<?> ifd
         ) {
             return context -> {
-                CustomBinaryDocValues ignoredValuesDocValues = new CustomBinaryDocValues(DocValues.getBinary(context.reader(), fieldName));
-                return docId -> getValuesFromDocValues(ignoredValuesDocValues, docId);
+                var sortedBinaryDocValues = ifd.load(context).getBytesValues();
+                return docId -> {
+                    if (sortedBinaryDocValues.advanceExact(docId)) {
+                        var values = new ArrayList<>(sortedBinaryDocValues.docValueCount());
+                        for (int i = 0; i < sortedBinaryDocValues.docValueCount(); i++) {
+                            values.add(sortedBinaryDocValues.nextValue().utf8ToString());
+                        }
+                        return values;
+                    } else {
+                        return List.of();
+                    }
+                };
             };
         }
 
-        private List<Object> getValuesFromDocValues(SortedBinaryDocValues docValues, int docId) throws IOException {
-            if (docValues.advanceExact(docId)) {
-                var values = new ArrayList<>(docValues.docValueCount());
-                for (int i = 0; i < docValues.docValueCount(); i++) {
-                    values.add(docValues.nextValue().utf8ToString());
-                }
-                return values;
-            } else {
-                return List.of();
-            }
-        }
-
         private static IOFunction<LeafReaderContext, CheckedIntFunction<List<Object>, IOException>> storedFieldFetcher(String... names) {
             var loader = StoredFieldLoader.create(false, Set.of(names));
             return context -> {
@@ -791,46 +779,4 @@ protected void writeValue(Object value, XContentBuilder b) throws IOException {
 
         return fieldLoader;
     }
-
-    /**
-     * A wrapper around {@link BinaryDocValues} that exposes some quality of life functions. Note, these values are not sorted.
-     */
-    private static class CustomBinaryDocValues extends SortedBinaryDocValues {
-
-        private final BinaryDocValues binaryDocValues;
-        private final ByteArrayStreamInput stream;
-
-        private int docValueCount = 0;
-
-        CustomBinaryDocValues(BinaryDocValues binaryDocValues) {
-            this.binaryDocValues = binaryDocValues;
-            this.stream = new ByteArrayStreamInput();
-        }
-
-        @Override
-        public BytesRef nextValue() throws IOException {
-            // this function already knows how to decode the underlying bytes array, so no need to explicitly call VInt()
-            return stream.readBytesRef();
-        }
-
-        @Override
-        public boolean advanceExact(int docId) throws IOException {
-            // if document has a value, read underlying bytes
-            if (binaryDocValues.advanceExact(docId)) {
-                BytesRef docValuesBytes = binaryDocValues.binaryValue();
-                stream.reset(docValuesBytes.bytes, docValuesBytes.offset, docValuesBytes.length);
-                docValueCount = stream.readVInt();
-                return true;
-            }
-
-            // otherwise there is nothing to do
-            docValueCount = 0;
-            return false;
-        }
-
-        @Override
-        public int docValueCount() {
-            return docValueCount;
-        }
-    }
 }
diff --git a/.../mapper-extras/src/yamlRestTest/resources/rest-api-spec/test/match_only_text/10_basic.yml b/.../mapper-extras/src/yamlRestTest/resources/rest-api-spec/test/match_only_text/10_basic.yml
@@ -465,7 +465,7 @@ synthetic_source match_only_text as multi-field with ignored keyword as parent:
         id: "1"
         refresh: true
         body:
-          foo: [ "Apache Lucene powers Elasticsearch", "Apache", "Apache Lucene" ]
+          foo: [ "Apache Lucene powers Elasticsearch", "Apache" ]
 
   - do:
       search:
@@ -477,7 +477,7 @@ synthetic_source match_only_text as multi-field with ignored keyword as parent:
 
   - match: { "hits.total.value": 1 }
   - match:
-      hits.hits.0._source.foo: [ "Apache", "Apache Lucene powers Elasticsearch", "Apache Lucene" ]
+      hits.hits.0._source.foo: [ "Apache", "Apache Lucene powers Elasticsearch" ]
 
 ---
 synthetic_source match_only_text as multi-field with stored keyword as parent:

diff --git a/...rc/main/java/org/elasticsearch/index/mapper/BinaryDocValuesSyntheticFieldLoaderLayer.java b/...rc/main/java/org/elasticsearch/index/mapper/BinaryDocValuesSyntheticFieldLoaderLayer.java
diff --git a/server/src/main/java/org/elasticsearch/index/mapper/KeywordFieldMapper.java b/server/src/main/java/org/elasticsearch/index/mapper/KeywordFieldMapper.java
@@ -40,8 +40,6 @@
 import org.apache.lucene.util.automaton.CompiledAutomaton;
 import org.apache.lucene.util.automaton.CompiledAutomaton.AUTOMATON_TYPE;
 import org.apache.lucene.util.automaton.Operations;
-import org.elasticsearch.ElasticsearchException;
-import org.elasticsearch.common.io.stream.BytesStreamOutput;
 import org.elasticsearch.common.lucene.BytesRefs;
 import org.elasticsearch.common.lucene.Lucene;
 import org.elasticsearch.common.lucene.search.AutomatonQueries;
@@ -91,7 +89,6 @@
 import java.util.Arrays;
 import java.util.Collection;
 import java.util.Collections;
-import java.util.LinkedHashSet;
 import java.util.List;
 import java.util.Locale;
 import java.util.Map;
@@ -1174,14 +1171,7 @@ private boolean indexValue(DocumentParserContext context, XContentString value)
                 var utfBytes = value.bytes();
                 var bytesRef = new BytesRef(utfBytes.bytes(), utfBytes.offset(), utfBytes.length());
                 final String fieldName = fieldType().syntheticSourceFallbackFieldName();
-
-                // store the value in a binary doc values field, create one if it doesn't exist
-                MultiValuedBinaryDocValuesField field = (MultiValuedBinaryDocValuesField) context.doc().getByKey(fieldName);
-                if (field == null) {
-                    field = new MultiValuedBinaryDocValuesField(fieldName);
-                    context.doc().addWithKey(fieldName, field);
-                }
-                field.add(bytesRef);
+                context.doc().add(new StoredField(fieldName, bytesRef));
             }
 
             return false;
@@ -1344,56 +1334,15 @@ protected BytesRef preserve(BytesRef value) {
         // extra copy of the field for supporting synthetic source. This layer will check that copy.
         if (fieldType().ignoreAbove.valuesPotentiallyIgnored()) {
             final String fieldName = fieldType().syntheticSourceFallbackFieldName();
-            layers.add(new BinaryDocValuesSyntheticFieldLoaderLayer(fieldName));
-        }
-
-        return new CompositeSyntheticFieldLoader(leafFieldName, fullFieldName, layers);
-    }
-
-    /**
-     * A custom implementation of {@link org.apache.lucene.index.BinaryDocValues} that uses a {@link Set} to maintain a collection of unique
-     * binary doc values for fields with multiple values per document.
-     */
-    private static final class MultiValuedBinaryDocValuesField extends CustomDocValuesField {
-
-        private final Set<BytesRef> uniqueValues;
-        private int docValuesByteCount = 0;
-
-        MultiValuedBinaryDocValuesField(String name) {
-            super(name);
-            // linked hash set to maintain insertion order of elements
-            uniqueValues = new LinkedHashSet<>();
-        }
-
-        public void add(final BytesRef value) {
-            if (uniqueValues.add(value)) {
-                // might as well track these on the go as opposed to having to loop through all entries later
-                docValuesByteCount += value.length;
-            }
-        }
-
-        /**
-         * Encodes the collection of binary doc values as a single contiguous binary array, wrapped in {@link BytesRef}. This array takes
-         * the form of [doc value count][length of value 1][value 1][length of value 2][value 2]...
-         */
-        @Override
-        public BytesRef binaryValue() {
-            int docValuesCount = uniqueValues.size();
-            // the + 1 is for the total doc values count, which is prefixed at the start of the array
-            int streamSize = docValuesByteCount + (docValuesCount + 1) * Integer.BYTES;
-
-            try (BytesStreamOutput out = new BytesStreamOutput(streamSize)) {
-                out.writeVInt(docValuesCount);
-                for (BytesRef value : uniqueValues) {
-                    int valueLength = value.length;
-                    out.writeVInt(valueLength);
-                    out.writeBytes(value.bytes, value.offset, valueLength);
+            layers.add(new CompositeSyntheticFieldLoader.StoredFieldLayer(fieldName) {
+                @Override
+                protected void writeValue(Object value, XContentBuilder b) throws IOException {
+                    BytesRef ref = (BytesRef) value;
+                    b.utf8Value(ref.bytes, ref.offset, ref.length);
                 }
-                return out.bytes().toBytesRef();
-            } catch (IOException e) {
-                throw new ElasticsearchException("Failed to get binary value", e);
-            }
+            });
         }
 
+        return new CompositeSyntheticFieldLoader(leafFieldName, fullFieldName, layers);
     }
 }
diff --git a/...ava/org/elasticsearch/index/mapper/KeywordSyntheticSourceNativeArrayIntegrationTests.java b/...ava/org/elasticsearch/index/mapper/KeywordSyntheticSourceNativeArrayIntegrationTests.java
@@ -52,7 +52,6 @@ public void testSynthesizeArrayIgnoreAbove() throws Exception {
             .endObject()
             .endObject()
             .endObject();
-
         // Note values that would be ignored are added at the end of arrays,
         // this makes testing easier as ignored values are always synthesized after regular values:
         var arrayValues = new Object[][] {
@@ -61,16 +60,7 @@ public void testSynthesizeArrayIgnoreAbove() throws Exception {
             new Object[] { "123", "1234", "12345" },
             new Object[] { null, null, null, "blabla" },
             new Object[] { "1", "2", "3", "blabla" } };
-
-        // values in the original array should be deduplicated
-        var expectedArrayValues = new Object[][] {
-            new Object[] { null, "a", "ab", "abc", "abcd", null, "abcde" },
-            new Object[] { "12345" },
-            new Object[] { "123", "1234", "12345" },
-            new Object[] { null, null, null, "blabla" },
-            new Object[] { "1", "2", "3", "blabla" } };
-
-        verifySyntheticArray(arrayValues, expectedArrayValues, mapping, "_id");
+        verifySyntheticArray(arrayValues, mapping, "_id", "field._original");
     }
 
     public void testSynthesizeObjectArray() throws Exception {