Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,6 @@
import org.apache.lucene.document.Field;
import org.apache.lucene.document.FieldType;
import org.apache.lucene.document.StoredField;
import org.apache.lucene.index.BinaryDocValues;
import org.apache.lucene.index.DocValues;
import org.apache.lucene.index.IndexOptions;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.Term;
Expand All @@ -32,7 +30,6 @@
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.IOFunction;
import org.elasticsearch.common.CheckedIntFunction;
import org.elasticsearch.common.io.stream.ByteArrayStreamInput;
import org.elasticsearch.common.lucene.Lucene;
import org.elasticsearch.common.text.UTF8DecodingReader;
import org.elasticsearch.common.unit.Fuzziness;
Expand All @@ -42,7 +39,6 @@
import org.elasticsearch.index.analysis.NamedAnalyzer;
import org.elasticsearch.index.fielddata.FieldDataContext;
import org.elasticsearch.index.fielddata.IndexFieldData;
import org.elasticsearch.index.fielddata.SortedBinaryDocValues;
import org.elasticsearch.index.fielddata.SourceValueFetcherSortedBinaryIndexFieldData;
import org.elasticsearch.index.fielddata.StoredFieldSortedBinaryIndexFieldData;
import org.elasticsearch.index.fieldvisitor.StoredFieldLoader;
Expand Down Expand Up @@ -301,17 +297,12 @@ private IOFunction<LeafReaderContext, CheckedIntFunction<List<Object>, IOExcepti

if (parent instanceof KeywordFieldMapper.KeywordFieldType keywordParent
&& keywordParent.ignoreAbove().valuesPotentiallyIgnored()) {
final String parentFallbackFieldName = keywordParent.syntheticSourceFallbackFieldName();
if (parent.isStored()) {
return combineFieldFetchers(
storedFieldFetcher(parentFieldName),
ignoredValuesDocValuesFieldFetcher(keywordParent.syntheticSourceFallbackFieldName())
);
return storedFieldFetcher(parentFieldName, parentFallbackFieldName);
} else if (parent.hasDocValues()) {
var ifd = searchExecutionContext.getForField(parent, MappedFieldType.FielddataOperation.SEARCH);
return combineFieldFetchers(
docValuesFieldFetcher(ifd),
ignoredValuesDocValuesFieldFetcher(keywordParent.syntheticSourceFallbackFieldName())
);
return combineFieldFetchers(docValuesFieldFetcher(ifd), storedFieldFetcher(parentFallbackFieldName));
}
}

Expand All @@ -334,16 +325,22 @@ private IOFunction<LeafReaderContext, CheckedIntFunction<List<Object>, IOExcepti
final KeywordFieldMapper.KeywordFieldType keywordDelegate
) {
if (keywordDelegate.ignoreAbove().valuesPotentiallyIgnored()) {
String delegateFieldName = keywordDelegate.name();
// bc we don't know whether the delegate will ignore a value, we must also check the fallback field created by this
// match_only_text field
// because we don't know whether the delegate field will be ignored during parsing, we must also check the current field
String fieldName = name();
String fallbackName = syntheticSourceFallbackFieldName();

// delegate field names
String delegateFieldName = keywordDelegate.name();
String delegateFieldFallbackName = keywordDelegate.syntheticSourceFallbackFieldName();

if (keywordDelegate.isStored()) {
return storedFieldFetcher(delegateFieldName, fallbackName);
return storedFieldFetcher(delegateFieldName, delegateFieldFallbackName, fieldName, fallbackName);
} else if (keywordDelegate.hasDocValues()) {
var ifd = searchExecutionContext.getForField(keywordDelegate, MappedFieldType.FielddataOperation.SEARCH);
return combineFieldFetchers(docValuesFieldFetcher(ifd), storedFieldFetcher(fallbackName));
return combineFieldFetchers(
docValuesFieldFetcher(ifd),
storedFieldFetcher(delegateFieldFallbackName, fieldName, fallbackName)
);
}
}

Expand All @@ -358,34 +355,25 @@ private IOFunction<LeafReaderContext, CheckedIntFunction<List<Object>, IOExcepti
}
}

private IOFunction<LeafReaderContext, CheckedIntFunction<List<Object>, IOException>> docValuesFieldFetcher(IndexFieldData<?> ifd) {
return context -> {
SortedBinaryDocValues indexedValuesDocValues = ifd.load(context).getBytesValues();
return docId -> getValuesFromDocValues(indexedValuesDocValues, docId);
};
}

private IOFunction<LeafReaderContext, CheckedIntFunction<List<Object>, IOException>> ignoredValuesDocValuesFieldFetcher(
String fieldName
private static IOFunction<LeafReaderContext, CheckedIntFunction<List<Object>, IOException>> docValuesFieldFetcher(
IndexFieldData<?> ifd
) {
return context -> {
CustomBinaryDocValues ignoredValuesDocValues = new CustomBinaryDocValues(DocValues.getBinary(context.reader(), fieldName));
return docId -> getValuesFromDocValues(ignoredValuesDocValues, docId);
var sortedBinaryDocValues = ifd.load(context).getBytesValues();
return docId -> {
if (sortedBinaryDocValues.advanceExact(docId)) {
var values = new ArrayList<>(sortedBinaryDocValues.docValueCount());
for (int i = 0; i < sortedBinaryDocValues.docValueCount(); i++) {
values.add(sortedBinaryDocValues.nextValue().utf8ToString());
}
return values;
} else {
return List.of();
}
};
};
}

private List<Object> getValuesFromDocValues(SortedBinaryDocValues docValues, int docId) throws IOException {
if (docValues.advanceExact(docId)) {
var values = new ArrayList<>(docValues.docValueCount());
for (int i = 0; i < docValues.docValueCount(); i++) {
values.add(docValues.nextValue().utf8ToString());
}
return values;
} else {
return List.of();
}
}

private static IOFunction<LeafReaderContext, CheckedIntFunction<List<Object>, IOException>> storedFieldFetcher(String... names) {
var loader = StoredFieldLoader.create(false, Set.of(names));
return context -> {
Expand Down Expand Up @@ -791,46 +779,4 @@ protected void writeValue(Object value, XContentBuilder b) throws IOException {

return fieldLoader;
}

/**
* A wrapper around {@link BinaryDocValues} that exposes some quality of life functions. Note, these values are not sorted.
*/
private static class CustomBinaryDocValues extends SortedBinaryDocValues {

private final BinaryDocValues binaryDocValues;
private final ByteArrayStreamInput stream;

private int docValueCount = 0;

CustomBinaryDocValues(BinaryDocValues binaryDocValues) {
this.binaryDocValues = binaryDocValues;
this.stream = new ByteArrayStreamInput();
}

@Override
public BytesRef nextValue() throws IOException {
// this function already knows how to decode the underlying bytes array, so no need to explicitly call VInt()
return stream.readBytesRef();
}

@Override
public boolean advanceExact(int docId) throws IOException {
// if document has a value, read underlying bytes
if (binaryDocValues.advanceExact(docId)) {
BytesRef docValuesBytes = binaryDocValues.binaryValue();
stream.reset(docValuesBytes.bytes, docValuesBytes.offset, docValuesBytes.length);
docValueCount = stream.readVInt();
return true;
}

// otherwise there is nothing to do
docValueCount = 0;
return false;
}

@Override
public int docValueCount() {
return docValueCount;
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -465,7 +465,7 @@ synthetic_source match_only_text as multi-field with ignored keyword as parent:
id: "1"
refresh: true
body:
foo: [ "Apache Lucene powers Elasticsearch", "Apache", "Apache Lucene" ]
foo: [ "Apache Lucene powers Elasticsearch", "Apache" ]

- do:
search:
Expand All @@ -477,7 +477,7 @@ synthetic_source match_only_text as multi-field with ignored keyword as parent:

- match: { "hits.total.value": 1 }
- match:
hits.hits.0._source.foo: [ "Apache", "Apache Lucene powers Elasticsearch", "Apache Lucene" ]
hits.hits.0._source.foo: [ "Apache", "Apache Lucene powers Elasticsearch" ]

---
synthetic_source match_only_text as multi-field with stored keyword as parent:
Expand Down

This file was deleted.

Original file line number Diff line number Diff line change
Expand Up @@ -40,8 +40,6 @@
import org.apache.lucene.util.automaton.CompiledAutomaton;
import org.apache.lucene.util.automaton.CompiledAutomaton.AUTOMATON_TYPE;
import org.apache.lucene.util.automaton.Operations;
import org.elasticsearch.ElasticsearchException;
import org.elasticsearch.common.io.stream.BytesStreamOutput;
import org.elasticsearch.common.lucene.BytesRefs;
import org.elasticsearch.common.lucene.Lucene;
import org.elasticsearch.common.lucene.search.AutomatonQueries;
Expand Down Expand Up @@ -91,7 +89,6 @@
import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
import java.util.LinkedHashSet;
import java.util.List;
import java.util.Locale;
import java.util.Map;
Expand Down Expand Up @@ -1174,14 +1171,7 @@ private boolean indexValue(DocumentParserContext context, XContentString value)
var utfBytes = value.bytes();
var bytesRef = new BytesRef(utfBytes.bytes(), utfBytes.offset(), utfBytes.length());
final String fieldName = fieldType().syntheticSourceFallbackFieldName();

// store the value in a binary doc values field, create one if it doesn't exist
MultiValuedBinaryDocValuesField field = (MultiValuedBinaryDocValuesField) context.doc().getByKey(fieldName);
if (field == null) {
field = new MultiValuedBinaryDocValuesField(fieldName);
context.doc().addWithKey(fieldName, field);
}
field.add(bytesRef);
context.doc().add(new StoredField(fieldName, bytesRef));
}

return false;
Expand Down Expand Up @@ -1344,56 +1334,15 @@ protected BytesRef preserve(BytesRef value) {
// extra copy of the field for supporting synthetic source. This layer will check that copy.
if (fieldType().ignoreAbove.valuesPotentiallyIgnored()) {
final String fieldName = fieldType().syntheticSourceFallbackFieldName();
layers.add(new BinaryDocValuesSyntheticFieldLoaderLayer(fieldName));
}

return new CompositeSyntheticFieldLoader(leafFieldName, fullFieldName, layers);
}

/**
* A custom implementation of {@link org.apache.lucene.index.BinaryDocValues} that uses a {@link Set} to maintain a collection of unique
* binary doc values for fields with multiple values per document.
*/
private static final class MultiValuedBinaryDocValuesField extends CustomDocValuesField {

private final Set<BytesRef> uniqueValues;
private int docValuesByteCount = 0;

MultiValuedBinaryDocValuesField(String name) {
super(name);
// linked hash set to maintain insertion order of elements
uniqueValues = new LinkedHashSet<>();
}

public void add(final BytesRef value) {
if (uniqueValues.add(value)) {
// might as well track these on the go as opposed to having to loop through all entries later
docValuesByteCount += value.length;
}
}

/**
* Encodes the collection of binary doc values as a single contiguous binary array, wrapped in {@link BytesRef}. This array takes
* the form of [doc value count][length of value 1][value 1][length of value 2][value 2]...
*/
@Override
public BytesRef binaryValue() {
int docValuesCount = uniqueValues.size();
// the + 1 is for the total doc values count, which is prefixed at the start of the array
int streamSize = docValuesByteCount + (docValuesCount + 1) * Integer.BYTES;

try (BytesStreamOutput out = new BytesStreamOutput(streamSize)) {
out.writeVInt(docValuesCount);
for (BytesRef value : uniqueValues) {
int valueLength = value.length;
out.writeVInt(valueLength);
out.writeBytes(value.bytes, value.offset, valueLength);
layers.add(new CompositeSyntheticFieldLoader.StoredFieldLayer(fieldName) {
@Override
protected void writeValue(Object value, XContentBuilder b) throws IOException {
BytesRef ref = (BytesRef) value;
b.utf8Value(ref.bytes, ref.offset, ref.length);
}
return out.bytes().toBytesRef();
} catch (IOException e) {
throw new ElasticsearchException("Failed to get binary value", e);
}
});
}

return new CompositeSyntheticFieldLoader(leafFieldName, fullFieldName, layers);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,6 @@ public void testSynthesizeArrayIgnoreAbove() throws Exception {
.endObject()
.endObject()
.endObject();

// Note values that would be ignored are added at the end of arrays,
// this makes testing easier as ignored values are always synthesized after regular values:
var arrayValues = new Object[][] {
Expand All @@ -61,16 +60,7 @@ public void testSynthesizeArrayIgnoreAbove() throws Exception {
new Object[] { "123", "1234", "12345" },
new Object[] { null, null, null, "blabla" },
new Object[] { "1", "2", "3", "blabla" } };

// values in the original array should be deduplicated
var expectedArrayValues = new Object[][] {
new Object[] { null, "a", "ab", "abc", "abcd", null, "abcde" },
new Object[] { "12345" },
new Object[] { "123", "1234", "12345" },
new Object[] { null, null, null, "blabla" },
new Object[] { "1", "2", "3", "blabla" } };

verifySyntheticArray(arrayValues, expectedArrayValues, mapping, "_id");
verifySyntheticArray(arrayValues, mapping, "_id", "field._original");
}

public void testSynthesizeObjectArray() throws Exception {
Expand Down
Loading