Skip to content

Commit

Permalink
Synthetic _source: support field in many cases (#89950)
Browse files Browse the repository at this point in the history
This adds support for the `field` scripting API in many but not all
cases. Before this change numbers, dates, and IPs supported the `field`
API when running with _source in synthetic mode because they always have
doc values. This change adds support for `match_only_text`, `store`d
`keyword` fields, and `store`d `text` fields. Two remaining field
configurations work with synthetic _source and do not work with `field`:
* A `text` field with a sub-`keyword` field that has `doc_values` * A
`text` field with a sub-`keyword` field that is `store`d

![image](https://user-images.githubusercontent.com/215970/189217841-4378ed42-e454-42c1-aaf0-6c2c041b29be.png)
  • Loading branch information
nik9000 committed Nov 10, 2022
1 parent f83a530 commit 74d0d19
Show file tree
Hide file tree
Showing 36 changed files with 623 additions and 205 deletions.
5 changes: 5 additions & 0 deletions docs/changelog/89950.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
pr: 89950
summary: "Synthetic _source: support `field` in many cases"
area: TSDB
type: enhancement
issues: []

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@
import org.elasticsearch.index.fielddata.FieldDataContext;
import org.elasticsearch.index.fielddata.IndexFieldData;
import org.elasticsearch.index.fielddata.SourceValueFetcherSortedBinaryIndexFieldData;
import org.elasticsearch.index.fielddata.StoredFieldSortedBinaryIndexFieldData;
import org.elasticsearch.index.fieldvisitor.LeafStoredFieldLoader;
import org.elasticsearch.index.fieldvisitor.StoredFieldLoader;
import org.elasticsearch.index.mapper.DocumentParserContext;
Expand Down Expand Up @@ -121,7 +122,13 @@ private MatchOnlyTextFieldType buildFieldType(MapperBuilderContext context) {
NamedAnalyzer searchQuoteAnalyzer = analyzers.getSearchQuoteAnalyzer();
NamedAnalyzer indexAnalyzer = analyzers.getIndexAnalyzer();
TextSearchInfo tsi = new TextSearchInfo(Defaults.FIELD_TYPE, null, searchAnalyzer, searchQuoteAnalyzer);
MatchOnlyTextFieldType ft = new MatchOnlyTextFieldType(context.buildFullName(name), tsi, indexAnalyzer, meta.getValue());
MatchOnlyTextFieldType ft = new MatchOnlyTextFieldType(
context.buildFullName(name),
tsi,
indexAnalyzer,
context.isSourceSynthetic(),
meta.getValue()
);
return ft;
}

Expand All @@ -148,17 +155,24 @@ public static class MatchOnlyTextFieldType extends StringFieldType {
private final Analyzer indexAnalyzer;
private final TextFieldType textFieldType;

public MatchOnlyTextFieldType(String name, TextSearchInfo tsi, Analyzer indexAnalyzer, Map<String, String> meta) {
public MatchOnlyTextFieldType(
String name,
TextSearchInfo tsi,
Analyzer indexAnalyzer,
boolean isSyntheticSource,
Map<String, String> meta
) {
super(name, true, false, false, tsi, meta);
this.indexAnalyzer = Objects.requireNonNull(indexAnalyzer);
this.textFieldType = new TextFieldType(name);
this.textFieldType = new TextFieldType(name, isSyntheticSource);
}

public MatchOnlyTextFieldType(String name) {
this(
name,
new TextSearchInfo(Defaults.FIELD_TYPE, null, Lucene.STANDARD_ANALYZER, Lucene.STANDARD_ANALYZER),
Lucene.STANDARD_ANALYZER,
false,
Collections.emptyMap()
);
}
Expand Down Expand Up @@ -305,17 +319,28 @@ public Query phrasePrefixQuery(TokenStream stream, int slop, int maxExpansions,

@Override
public IndexFieldData.Builder fielddataBuilder(FieldDataContext fieldDataContext) {
if (fieldDataContext.fielddataOperation() == FielddataOperation.SCRIPT) {
return new SourceValueFetcherSortedBinaryIndexFieldData.Builder(
name(),
if (fieldDataContext.fielddataOperation() != FielddataOperation.SCRIPT) {
throw new IllegalArgumentException(CONTENT_TYPE + " fields do not support sorting and aggregations");
}
if (textFieldType.isSyntheticSource()) {
return (cache, breaker) -> new StoredFieldSortedBinaryIndexFieldData(
storedFieldNameForSyntheticSource(),
CoreValuesSourceType.KEYWORD,
SourceValueFetcher.toString(fieldDataContext.sourcePathsLookup().apply(name())),
fieldDataContext.lookupSupplier().get().source(),
TextDocValuesField::new
);
) {
@Override
protected BytesRef storedToBytesRef(Object stored) {
return new BytesRef((String) stored);
}
};
}

throw new IllegalArgumentException(CONTENT_TYPE + " fields do not support sorting and aggregations");
return new SourceValueFetcherSortedBinaryIndexFieldData.Builder(
name(),
CoreValuesSourceType.KEYWORD,
SourceValueFetcher.toString(fieldDataContext.sourcePathsLookup().apply(name())),
fieldDataContext.lookupSupplier().get().source(),
TextDocValuesField::new
);
}

private String storedFieldNameForSyntheticSource() {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -247,6 +247,16 @@ public List<SyntheticSourceInvalidExample> invalidExample() throws IOException {
}
}

public void testDocValues() throws IOException {
MapperService mapper = createMapperService(fieldMapping(b -> b.field("type", "match_only_text")));
assertScriptDocValues(mapper, "foo", equalTo(List.of("foo")));
}

public void testDocValuesLoadedFromSynthetic() throws IOException {
MapperService mapper = createMapperService(syntheticSourceFieldMapping(b -> b.field("type", "match_only_text")));
assertScriptDocValues(mapper, "foo", equalTo(List.of("foo")));
}

@Override
protected IngestScriptSupport ingestScriptSupport() {
throw new AssumptionViolatedException("not supported");
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -121,7 +121,13 @@ private AnnotatedTextFieldType buildFieldType(FieldType fieldType, MapperBuilder
wrapAnalyzer(analyzers.getSearchAnalyzer()),
wrapAnalyzer(analyzers.getSearchQuoteAnalyzer())
);
return new AnnotatedTextFieldType(context.buildFullName(name), store.getValue(), tsi, meta.getValue());
return new AnnotatedTextFieldType(
context.buildFullName(name),
store.getValue(),
tsi,
context.isSourceSynthetic(),
meta.getValue()
);
}

@Override
Expand Down Expand Up @@ -467,8 +473,14 @@ private void emitAnnotation(int firstSpannedTextPosInc, int annotationPosLen) th

public static final class AnnotatedTextFieldType extends TextFieldMapper.TextFieldType {

private AnnotatedTextFieldType(String name, boolean store, TextSearchInfo tsi, Map<String, String> meta) {
super(name, true, store, tsi, meta);
private AnnotatedTextFieldType(
String name,
boolean store,
TextSearchInfo tsi,
boolean isSyntheticSource,
Map<String, String> meta
) {
super(name, true, store, tsi, isSyntheticSource, meta);
}

public AnnotatedTextFieldType(String name, Map<String, String> meta) {
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,108 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0 and the Server Side Public License, v 1; you may not use this file except
* in compliance with, at your election, the Elastic License 2.0 or the Server
* Side Public License, v 1.
*/

package org.elasticsearch.index.fielddata;

import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.search.SortField;
import org.elasticsearch.common.util.BigArrays;
import org.elasticsearch.index.fieldvisitor.LeafStoredFieldLoader;
import org.elasticsearch.index.fieldvisitor.StoredFieldLoader;
import org.elasticsearch.script.field.DocValuesScriptFieldFactory;
import org.elasticsearch.script.field.ToScriptFieldFactory;
import org.elasticsearch.search.DocValueFormat;
import org.elasticsearch.search.MultiValueMode;
import org.elasticsearch.search.aggregations.support.ValuesSourceType;
import org.elasticsearch.search.sort.BucketedSort;
import org.elasticsearch.search.sort.SortOrder;

import java.util.Set;

/**
* Per segment values for a field loaded from stored fields.
*/
public abstract class StoredFieldIndexFieldData<T> implements IndexFieldData<StoredFieldIndexFieldData<T>.StoredFieldLeafFieldData> {
private final String fieldName;
private final ValuesSourceType valuesSourceType;
protected final ToScriptFieldFactory<T> toScriptFieldFactory;
protected final StoredFieldLoader loader;

protected StoredFieldIndexFieldData(String fieldName, ValuesSourceType valuesSourceType, ToScriptFieldFactory<T> toScriptFieldFactory) {
this.fieldName = fieldName;
this.valuesSourceType = valuesSourceType;
this.toScriptFieldFactory = toScriptFieldFactory;
this.loader = StoredFieldLoader.create(false, Set.of(fieldName));
}

@Override
public String getFieldName() {
return fieldName;
}

@Override
public ValuesSourceType getValuesSourceType() {
return valuesSourceType;
}

@Override
public final StoredFieldLeafFieldData load(LeafReaderContext context) {
return loadDirect(context);
}

@Override
public final StoredFieldLeafFieldData loadDirect(LeafReaderContext context) {
return new StoredFieldLeafFieldData(loader.getLoader(context, null));
}

protected abstract T loadLeaf(LeafStoredFieldLoader leafStoredFieldLoader);

@Override
public SortField sortField(Object missingValue, MultiValueMode sortMode, XFieldComparatorSource.Nested nested, boolean reverse) {
throw new IllegalArgumentException("not supported for stored field fallback");
}

@Override
public BucketedSort newBucketedSort(
BigArrays bigArrays,
Object missingValue,
MultiValueMode sortMode,
XFieldComparatorSource.Nested nested,
SortOrder sortOrder,
DocValueFormat format,
int bucketSize,
BucketedSort.ExtraData extra
) {
throw new IllegalArgumentException("not supported for stored field fallback");
}

public class StoredFieldLeafFieldData implements LeafFieldData {
private final LeafStoredFieldLoader loader;

protected StoredFieldLeafFieldData(LeafStoredFieldLoader loader) {
this.loader = loader;
}

@Override
public DocValuesScriptFieldFactory getScriptFieldFactory(String name) {
return toScriptFieldFactory.getScriptFieldFactory(loadLeaf(loader), fieldName);
}

@Override
public long ramBytesUsed() {
return 0;
}

@Override
public void close() {}

@Override
public SortedBinaryDocValues getBytesValues() {
throw new IllegalArgumentException("not supported for source fallback");
}
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,82 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0 and the Server Side Public License, v 1; you may not use this file except
* in compliance with, at your election, the Elastic License 2.0 or the Server
* Side Public License, v 1.
*/

package org.elasticsearch.index.fielddata;

import org.apache.lucene.util.BytesRef;
import org.elasticsearch.index.fieldvisitor.LeafStoredFieldLoader;
import org.elasticsearch.script.field.ToScriptFieldFactory;
import org.elasticsearch.search.aggregations.support.ValuesSourceType;

import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;

/**
* Per segment values for a field loaded from stored fields exposing {@link SortedBinaryDocValues}.
*/
public abstract class StoredFieldSortedBinaryIndexFieldData extends StoredFieldIndexFieldData<SortedBinaryDocValues> {

protected StoredFieldSortedBinaryIndexFieldData(
String fieldName,
ValuesSourceType valuesSourceType,
ToScriptFieldFactory<SortedBinaryDocValues> toScriptFieldFactory
) {
super(fieldName, valuesSourceType, toScriptFieldFactory);
}

@Override
protected SourceValueFetcherSortedBinaryDocValues loadLeaf(LeafStoredFieldLoader leafStoredFieldLoader) {
return new SourceValueFetcherSortedBinaryDocValues(leafStoredFieldLoader);
}

protected abstract BytesRef storedToBytesRef(Object stored);

class SourceValueFetcherSortedBinaryDocValues extends SortedBinaryDocValues {
private final LeafStoredFieldLoader loader;
private final List<BytesRef> sorted = new ArrayList<>();

private int current;
private int docValueCount;

SourceValueFetcherSortedBinaryDocValues(LeafStoredFieldLoader loader) {
this.loader = loader;
}

@Override
public boolean advanceExact(int doc) throws IOException {
loader.advanceTo(doc);
List<Object> values = loader.storedFields().get(getFieldName());
if (values == null || values.isEmpty()) {
current = 0;
docValueCount = 0;
return false;
}
sorted.clear();
for (Object o : values) {
sorted.add(storedToBytesRef(o));
}
Collections.sort(sorted);
current = 0;
docValueCount = sorted.size();
return true;
}

@Override
public int docValueCount() {
return docValueCount;
}

@Override
public BytesRef nextValue() throws IOException {
assert current < docValueCount;
return sorted.get(current++);
}
}
}

0 comments on commit 74d0d19

Please sign in to comment.