Skip to content

Commit

Permalink
Synthetic _source: support match_only_text (#89516)
Browse files Browse the repository at this point in the history
This adds support for synthetic `_source` to the `match_only_text` field
type. When synthetic `_source` is enabled `match_only_text` fields
create a hidden stored field to contain their text. This should have
similar or better search performance for this specific field type,
though it will have slightly worse indexing performance because
synthetic `_source` is still writing `_recovery_source`, which means
we're writing the bits for this field twice.
  • Loading branch information
nik9000 committed Sep 1, 2022
1 parent e3f34dc commit 44693b0
Show file tree
Hide file tree
Showing 6 changed files with 145 additions and 5 deletions.
5 changes: 5 additions & 0 deletions docs/changelog/89516.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
pr: 89516
summary: "Synthetic _source: support `match_only_text`"
area: "TSDB"
type: enhancement
issues: []
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.FieldType;
import org.apache.lucene.document.StoredField;
import org.apache.lucene.index.IndexOptions;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.Term;
Expand All @@ -34,11 +35,15 @@
import org.elasticsearch.index.fielddata.FieldDataContext;
import org.elasticsearch.index.fielddata.IndexFieldData;
import org.elasticsearch.index.fielddata.SourceValueFetcherSortedBinaryIndexFieldData;
import org.elasticsearch.index.fieldvisitor.LeafStoredFieldLoader;
import org.elasticsearch.index.fieldvisitor.StoredFieldLoader;
import org.elasticsearch.index.mapper.DocumentParserContext;
import org.elasticsearch.index.mapper.FieldMapper;
import org.elasticsearch.index.mapper.MapperBuilderContext;
import org.elasticsearch.index.mapper.SourceLoader;
import org.elasticsearch.index.mapper.SourceValueFetcher;
import org.elasticsearch.index.mapper.StringFieldType;
import org.elasticsearch.index.mapper.StringStoredFieldFieldLoader;
import org.elasticsearch.index.mapper.TextFieldMapper;
import org.elasticsearch.index.mapper.TextFieldMapper.TextFieldType;
import org.elasticsearch.index.mapper.TextParams;
Expand All @@ -48,6 +53,7 @@
import org.elasticsearch.script.field.TextDocValuesField;
import org.elasticsearch.search.aggregations.support.CoreValuesSourceType;
import org.elasticsearch.search.lookup.SourceLookup;
import org.elasticsearch.xcontent.XContentBuilder;

import java.io.IOException;
import java.io.UncheckedIOException;
Expand All @@ -56,6 +62,7 @@
import java.util.List;
import java.util.Map;
import java.util.Objects;
import java.util.Set;
import java.util.function.Function;

/**
Expand Down Expand Up @@ -171,6 +178,17 @@ private Function<LeafReaderContext, CheckedIntFunction<List<Object>, IOException
"Field [" + name() + "] of type [" + CONTENT_TYPE + "] cannot run positional queries since [_source] is disabled."
);
}
if (searchExecutionContext.isSourceSynthetic()) {
String name = storedFieldNameForSyntheticSource();
StoredFieldLoader loader = StoredFieldLoader.create(false, Set.of(name));
return context -> {
LeafStoredFieldLoader leafLoader = loader.getLoader(context, null);
return docId -> {
leafLoader.advanceTo(docId);
return leafLoader.storedFields().get(name);
};
};
}
SourceLookup sourceLookup = searchExecutionContext.lookup().source();
ValueFetcher valueFetcher = valueFetcher(searchExecutionContext, null);
return context -> {
Expand Down Expand Up @@ -292,6 +310,9 @@ public IndexFieldData.Builder fielddataBuilder(FieldDataContext fieldDataContext
throw new IllegalArgumentException(CONTENT_TYPE + " fields do not support sorting and aggregations");
}

private String storedFieldNameForSyntheticSource() {
return name() + "._original";
}
}

private final Version indexCreatedVersion;
Expand Down Expand Up @@ -339,6 +360,10 @@ protected void parseCreateField(DocumentParserContext context) throws IOExceptio
Field field = new Field(fieldType().name(), value, fieldType);
context.doc().add(field);
context.addToFieldNames(fieldType().name());

if (context.isSyntheticSource()) {
context.doc().add(new StoredField(fieldType().storedFieldNameForSyntheticSource(), value));
}
}

@Override
Expand All @@ -351,4 +376,18 @@ public MatchOnlyTextFieldType fieldType() {
return (MatchOnlyTextFieldType) super.fieldType();
}

@Override
public SourceLoader.SyntheticFieldLoader syntheticFieldLoader() {
if (copyTo.copyToFields().isEmpty() != true) {

This comment has been minimized.

Copy link
@dostonbokhodirov

dostonbokhodirov Sep 1, 2022

why isn't this written in the view like that?
if (!copyTo.copyToFields().isEmpty())
Or this considered to make the code readable?

throw new IllegalArgumentException(
"field [" + name() + "] of type [" + typeName() + "] doesn't support synthetic source because it declares copy_to"
);
}
return new StringStoredFieldFieldLoader(fieldType().storedFieldNameForSyntheticSource(), simpleName(), null) {
@Override
protected void write(XContentBuilder b, Object value) throws IOException {
b.value((String) value);
}
};
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -9,20 +9,28 @@
package org.elasticsearch.index.mapper.extras;

import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.DocValuesType;
import org.apache.lucene.index.IndexOptions;
import org.apache.lucene.index.IndexableField;
import org.apache.lucene.index.IndexableFieldType;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.search.TotalHits;
import org.apache.lucene.store.Directory;
import org.apache.lucene.tests.analysis.CannedTokenStream;
import org.apache.lucene.tests.analysis.Token;
import org.apache.lucene.tests.index.RandomIndexWriter;
import org.elasticsearch.common.Strings;
import org.elasticsearch.core.Tuple;
import org.elasticsearch.index.mapper.DocumentMapper;
import org.elasticsearch.index.mapper.KeywordFieldMapper;
import org.elasticsearch.index.mapper.LuceneDocument;
import org.elasticsearch.index.mapper.MappedFieldType;
import org.elasticsearch.index.mapper.MapperParsingException;
import org.elasticsearch.index.mapper.MapperService;
import org.elasticsearch.index.mapper.MapperTestCase;
import org.elasticsearch.index.mapper.ParsedDocument;
import org.elasticsearch.index.query.MatchPhraseQueryBuilder;
import org.elasticsearch.index.query.SearchExecutionContext;
import org.elasticsearch.plugins.Plugin;
import org.elasticsearch.xcontent.XContentBuilder;
Expand All @@ -34,6 +42,7 @@
import java.util.Collection;
import java.util.Collections;
import java.util.List;
import java.util.stream.Collectors;

import static org.hamcrest.Matchers.containsString;
import static org.hamcrest.Matchers.equalTo;
Expand All @@ -51,10 +60,37 @@ protected Object getSampleValueForDocument() {
return "value";
}

public final void testExists() throws IOException {
MapperService mapperService = createMapperService(fieldMapping(b -> { minimalMapping(b); }));
assertExistsQuery(mapperService);
assertParseMinimalWarnings();
public void testExistsStandardSource() throws IOException {
assertExistsQuery(createMapperService(testMapping(false)));
}

public void testExistsSyntheticSource() throws IOException {
assertExistsQuery(createMapperService(testMapping(true)));
}

public void testPhraseQueryStandardSource() throws IOException {
assertPhraseQuery(createMapperService(testMapping(false)));
}

public void testPhraseQuerySyntheticSource() throws IOException {
assertPhraseQuery(createMapperService(testMapping(true)));
}

private void assertPhraseQuery(MapperService mapperService) throws IOException {
try (Directory directory = newDirectory()) {
RandomIndexWriter iw = new RandomIndexWriter(random(), directory);
LuceneDocument doc = mapperService.documentMapper().parse(source(b -> b.field("field", "the quick brown fox"))).rootDoc();
iw.addDocument(doc);
iw.close();
try (DirectoryReader reader = DirectoryReader.open(directory)) {
SearchExecutionContext context = createSearchExecutionContext(mapperService, newSearcher(reader));
MatchPhraseQueryBuilder queryBuilder = new MatchPhraseQueryBuilder("field", "brown fox");
TopDocs docs = context.searcher().search(queryBuilder.toQuery(context), 1);
assertThat(docs.totalHits.value, equalTo(1L));
assertThat(docs.totalHits.relation, equalTo(TotalHits.Relation.EQUAL_TO));
assertThat(docs.scoreDocs[0].doc, equalTo(0));
}
}
}

@Override
Expand All @@ -65,6 +101,13 @@ protected void registerParameters(ParameterChecker checker) throws IOException {
);
}

private XContentBuilder testMapping(boolean syntheticSource) throws IOException {
if (syntheticSource) {
return syntheticSourceMapping(b -> b.startObject("field").field("type", "match_only_text").endObject());
}
return fieldMapping(b -> b.field("type", "match_only_text"));
}

@Override
protected void minimalMapping(XContentBuilder b) throws IOException {
b.field("type", "match_only_text");
Expand Down Expand Up @@ -166,7 +209,36 @@ protected void randomFetchTestFieldConfig(XContentBuilder b) throws IOException

@Override
protected SyntheticSourceSupport syntheticSourceSupport() {
throw new AssumptionViolatedException("not supported");
return new MatchOnlyTextSyntheticSourceSupport();
}

static class MatchOnlyTextSyntheticSourceSupport implements SyntheticSourceSupport {
@Override
public SyntheticSourceExample example(int maxValues) {
if (randomBoolean()) {
Tuple<String, String> v = generateValue();
return new SyntheticSourceExample(v.v1(), v.v2(), this::mapping);
}
List<Tuple<String, String>> values = randomList(1, maxValues, this::generateValue);
List<String> in = values.stream().map(Tuple::v1).toList();
List<String> outList = values.stream().map(Tuple::v2).toList();
Object out = outList.size() == 1 ? outList.get(0) : outList;
return new SyntheticSourceExample(in, out, this::mapping);
}

private Tuple<String, String> generateValue() {
String v = randomList(1, 10, () -> randomAlphaOfLength(5)).stream().collect(Collectors.joining(" "));
return Tuple.tuple(v, v);
}

private void mapping(XContentBuilder b) throws IOException {
b.field("type", "match_only_text");
}

@Override
public List<SyntheticSourceInvalidExample> invalidExample() throws IOException {
return List.of();
}
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,9 @@ public Set<String> getFieldNames() {
public final void postProcess(Function<String, MappedFieldType> fieldTypeLookup) {
for (Map.Entry<String, List<Object>> entry : fields().entrySet()) {
MappedFieldType fieldType = fieldTypeLookup.apply(entry.getKey());
if (fieldType == null) {
continue; // TODO this is lame
}
List<Object> fieldValues = entry.getValue();
for (int i = 0; i < fieldValues.size(); i++) {
fieldValues.set(i, fieldType.valueForDisplay(fieldValues.get(i)));
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -404,11 +404,22 @@ public boolean hasMappings() {
return this != EMPTY;
}

/**
* Will there be {@code _source}.
*/
public boolean isSourceEnabled() {
SourceFieldMapper sfm = mapping.getMetadataMapperByClass(SourceFieldMapper.class);
return sfm != null && sfm.enabled();
}

/**
* Does the source need to be rebuilt on the fly?
*/
public boolean isSourceSynthetic() {
SourceFieldMapper sfm = mapping.getMetadataMapperByClass(SourceFieldMapper.class);
return sfm != null && sfm.isSynthetic();
}

/**
* Build something to load source {@code _source}.
*/
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -392,10 +392,20 @@ public Set<String> sourcePath(String fullName) {
return mappingLookup.sourcePaths(fullName);
}

/**
* Will there be {@code _source}.
*/
public boolean isSourceEnabled() {
return mappingLookup.isSourceEnabled();
}

/**
* Does the source need to be rebuilt on the fly?
*/
public boolean isSourceSynthetic() {
return mappingLookup.isSourceSynthetic();
}

/**
* Build something to load source {@code _source}.
*/
Expand Down

0 comments on commit 44693b0

Please sign in to comment.