Skip to content

Commit

Permalink
Make runtime fields highlightable (#65560)
Browse files Browse the repository at this point in the history
This commit adds the ability to request highlights for runtime fields.

There are two changes included here:

* You can ask QueryShardContext for the index analyzer for a specific
  field, and provide an optional lambda to be executed if the field is
  unmapped. This allows us to return the standard Keyword analyzer by
  default for fields that have not been configured in the mappings.
* HighlighterUtil.loadValues() now correctly handles values fetched
  from a DocValueFetcher as well as a source fetcher, by setting the
  LeafReaderContext from the passed-in HitContext.

The first change has a number of knock-on effects, notably that
MoreLikeThisQuery has to stop using its configured analyzer directly
in equals() and hashcode() checks as the anonymous analyzer
returned from QueryShardContext#getIndexAnalyzer() uses object
identity for comparisons.
  • Loading branch information
romseygeek committed Nov 30, 2020
1 parent de1e319 commit dcd4fad
Show file tree
Hide file tree
Showing 16 changed files with 101 additions and 38 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,6 @@
import org.elasticsearch.common.xcontent.XContentHelper;
import org.elasticsearch.common.xcontent.XContentParser;
import org.elasticsearch.common.xcontent.XContentType;
import org.elasticsearch.index.analysis.FieldNameAnalyzer;
import org.elasticsearch.index.fielddata.IndexFieldData;
import org.elasticsearch.index.fielddata.IndexFieldDataCache;
import org.elasticsearch.index.mapper.MappedFieldType;
Expand Down Expand Up @@ -466,18 +465,12 @@ protected Query doToQuery(QueryShardContext context) throws IOException {
docs.add(context.parseDocument(new SourceToParse(context.index().getName(), "_temp_id", document, documentXContentType)));
}

FieldNameAnalyzer fieldNameAnalyzer = context.getFieldNameIndexAnalyzer();
// We need this custom analyzer because FieldNameAnalyzer is strict and the percolator sometimes isn't when
// We need this custom analyzer because the default index analyzer is strict and the percolator sometimes isn't when
// 'index.percolator.map_unmapped_fields_as_string' is enabled:
Analyzer analyzer = new DelegatingAnalyzerWrapper(Analyzer.PER_FIELD_REUSE_STRATEGY) {
@Override
protected Analyzer getWrappedAnalyzer(String fieldName) {
Analyzer analyzer = fieldNameAnalyzer.analyzers().get(fieldName);
if (analyzer != null) {
return analyzer;
} else {
return context.getIndexAnalyzers().getDefaultIndexAnalyzer();
}
return context.getIndexAnalyzer(f -> context.getIndexAnalyzers().getDefaultIndexAnalyzer());
}
};
final IndexSearcher docSearcher;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -180,7 +180,8 @@ private static Analyzer getAnalyzer(AnalyzeAction.Request request, AnalysisRegis
MappedFieldType fieldType = indexService.mapperService().fieldType(request.field());
if (fieldType != null) {
if (fieldType instanceof StringFieldType) {
return indexService.mapperService().indexAnalyzer();
return indexService.mapperService().indexAnalyzer(fieldType.name(),
f -> { throw new IllegalArgumentException("No analyzer configured for field " + fieldType.name()); });
} else {
throw new IllegalArgumentException("Can't process field [" + request.field() +
"], Analysis requests are only supported on tokenized fields");
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@
import org.apache.lucene.search.similarities.TFIDFSimilarity;
import org.apache.lucene.util.BytesRef;
import org.elasticsearch.common.Strings;
import org.elasticsearch.index.analysis.NamedAnalyzer;

import java.io.IOException;
import java.io.Reader;
Expand All @@ -57,6 +58,7 @@ public class MoreLikeThisQuery extends Query {
private Fields[] unlikeFields;
private String[] moreLikeFields;
private Analyzer analyzer;
private String analyzerName; // used for equals/hashcode
private String minimumShouldMatch = DEFAULT_MINIMUM_SHOULD_MATCH;
private int minTermFrequency = XMoreLikeThis.DEFAULT_MIN_TERM_FREQ;
private int maxQueryTerms = XMoreLikeThis.DEFAULT_MAX_QUERY_TERMS;
Expand All @@ -73,10 +75,11 @@ public MoreLikeThisQuery() {

}

public MoreLikeThisQuery(String likeText, String[] moreLikeFields, Analyzer analyzer) {
public MoreLikeThisQuery(String likeText, String[] moreLikeFields, NamedAnalyzer analyzer) {
this.likeText = new String[]{likeText};
this.moreLikeFields = moreLikeFields;
this.analyzer = analyzer;
this.analyzerName = analyzer.name();
}

@Override
Expand All @@ -92,7 +95,7 @@ public boolean equals(Object obj) {
return false;
}
MoreLikeThisQuery other = (MoreLikeThisQuery) obj;
if (!analyzer.equals(other.analyzer))
if (Objects.equals(analyzerName, other.analyzerName) == false)
return false;
if (boostTerms != other.boostTerms)
return false;
Expand Down Expand Up @@ -271,8 +274,9 @@ public Analyzer getAnalyzer() {
return analyzer;
}

public void setAnalyzer(Analyzer analyzer) {
public void setAnalyzer(String analyzerName, Analyzer analyzer) {
this.analyzer = analyzer;
this.analyzerName = analyzerName;
}

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -27,14 +27,14 @@

public final class FieldNameAnalyzer extends DelegatingAnalyzerWrapper {

private final Map<String, Analyzer> analyzers;
private final Map<String, NamedAnalyzer> analyzers;

public FieldNameAnalyzer(Map<String, Analyzer> analyzers) {
public FieldNameAnalyzer(Map<String, NamedAnalyzer> analyzers) {
super(Analyzer.PER_FIELD_REUSE_STRATEGY);
this.analyzers = CopyOnWriteHashMap.copyOf(analyzers);
}

public Map<String, Analyzer> analyzers() {
public Map<String, NamedAnalyzer> analyzers() {
return analyzers;
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -130,6 +130,18 @@ public void checkAllowedInMode(AnalysisMode mode) {
}
}

public boolean containsBrokenAnalysis() {
if (analyzer instanceof AnalyzerComponentsProvider) {
final TokenFilterFactory[] tokenFilters = ((AnalyzerComponentsProvider) analyzer).getComponents().getTokenFilters();
for (TokenFilterFactory tokenFilterFactory : tokenFilters) {
if (tokenFilterFactory.breaksFastVectorHighlighter()) {
return true;
}
}
}
return false;
}

@Override
public String toString() {
return "analyzer name[" + name + "], analyzer [" + analyzer + "], analysisMode [" + analysisMode + "]";
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -445,8 +445,25 @@ public Analyzer indexAnalyzer() {
return this.indexAnalyzer;
}

/**
* Return the index-time analyzer associated with a particular field
* @param field the field name
* @param unindexedFieldAnalyzer a function to return an Analyzer for a field with no
* directly associated index-time analyzer
*/
public NamedAnalyzer indexAnalyzer(String field, Function<String, NamedAnalyzer> unindexedFieldAnalyzer) {
if (this.mapper == null) {
return unindexedFieldAnalyzer.apply(field);
}
return this.mapper.mappers().indexAnalyzer(field, unindexedFieldAnalyzer);
}

public boolean containsBrokenAnalysis(String field) {
return this.indexAnalyzer.containsBrokenAnalysis(field);
NamedAnalyzer a = indexAnalyzer(field, f -> null);
if (a == null) {
return false;
}
return a.containsBrokenAnalysis();
}

@Override
Expand Down Expand Up @@ -490,9 +507,6 @@ protected Analyzer getWrappedAnalyzer(String fieldName) {
return mapper.mappers().indexAnalyzer();
}

boolean containsBrokenAnalysis(String field) {
return mapper.mappers().indexAnalyzer().containsBrokenAnalysis(field);
}
}

public synchronized List<String> reloadSearchAnalyzers(AnalysisRegistry registry) throws IOException {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,16 +19,17 @@

package org.elasticsearch.index.mapper;

import org.apache.lucene.analysis.Analyzer;
import org.elasticsearch.index.IndexSettings;
import org.elasticsearch.index.analysis.FieldNameAnalyzer;
import org.elasticsearch.index.analysis.NamedAnalyzer;

import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.function.Function;
import java.util.stream.Stream;

public final class MappingLookup {
Expand Down Expand Up @@ -80,7 +81,7 @@ public MappingLookup(Collection<FieldMapper> mappers,
Collection<RuntimeFieldType> runtimeFieldTypes,
int metadataFieldCount) {
Map<String, Mapper> fieldMappers = new HashMap<>();
Map<String, Analyzer> indexAnalyzers = new HashMap<>();
Map<String, NamedAnalyzer> indexAnalyzers = new HashMap<>();
Map<String, ObjectMapper> objects = new HashMap<>();

boolean hasNested = false;
Expand Down Expand Up @@ -143,6 +144,13 @@ public FieldNameAnalyzer indexAnalyzer() {
return this.indexAnalyzer;
}

public NamedAnalyzer indexAnalyzer(String field, Function<String, NamedAnalyzer> unmappedFieldAnalyzer) {
if (this.indexAnalyzer.analyzers().containsKey(field)) {
return this.indexAnalyzer.analyzers().get(field);
}
return unmappedFieldAnalyzer.apply(field);
}

/**
* Returns an iterable over all the registered field mappers (including alias mappers)
*/
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -971,9 +971,11 @@ protected Query doToQuery(QueryShardContext context) throws IOException {
// set analyzer
Analyzer analyzerObj = context.getIndexAnalyzers().get(analyzer);
if (analyzerObj == null) {
analyzerObj = context.getIndexAnalyzer();
analyzerObj = context.getIndexAnalyzer(f -> {
throw new UnsupportedOperationException("No analyzer configured for field " + f);
});
}
mltQuery.setAnalyzer(analyzerObj);
mltQuery.setAnalyzer(analyzer, analyzerObj);

// set like text fields
boolean useDefaultField = (fields == null);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
package org.elasticsearch.index.query;

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.DelegatingAnalyzerWrapper;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
Expand All @@ -41,12 +42,11 @@
import org.elasticsearch.index.Index;
import org.elasticsearch.index.IndexSettings;
import org.elasticsearch.index.IndexSortConfig;
import org.elasticsearch.index.analysis.FieldNameAnalyzer;
import org.elasticsearch.index.analysis.IndexAnalyzers;
import org.elasticsearch.index.analysis.NamedAnalyzer;
import org.elasticsearch.index.cache.bitset.BitsetFilterCache;
import org.elasticsearch.index.fielddata.IndexFieldData;
import org.elasticsearch.index.mapper.ContentPath;
import org.elasticsearch.index.mapper.DocumentMapper;
import org.elasticsearch.index.mapper.FieldMapper;
import org.elasticsearch.index.mapper.MappedFieldType;
import org.elasticsearch.index.mapper.Mapper;
Expand Down Expand Up @@ -76,6 +76,7 @@
import java.util.Set;
import java.util.function.BiConsumer;
import java.util.function.BooleanSupplier;
import java.util.function.Function;
import java.util.function.LongSupplier;
import java.util.function.Predicate;
import java.util.function.Supplier;
Expand Down Expand Up @@ -256,11 +257,6 @@ public ParsedDocument parseDocument(SourceToParse source) throws MapperParsingEx
return mapperService.documentMapper() == null ? null : mapperService.documentMapper().parse(source);
}

public FieldNameAnalyzer getFieldNameIndexAnalyzer() {
DocumentMapper documentMapper = mapperService.documentMapper();
return documentMapper == null ? null : documentMapper.mappers().indexAnalyzer();
}

public boolean hasNested() {
return mapperService.hasNested();
}
Expand Down Expand Up @@ -353,8 +349,17 @@ public IndexAnalyzers getIndexAnalyzers() {
return mapperService.getIndexAnalyzers();
}

public Analyzer getIndexAnalyzer() {
return mapperService.indexAnalyzer();
/**
* Return the index-time analyzer for the current index
* @param unindexedFieldAnalyzer a function that builds an analyzer for unindexed fields
*/
public Analyzer getIndexAnalyzer(Function<String, NamedAnalyzer> unindexedFieldAnalyzer) {
return new DelegatingAnalyzerWrapper(Analyzer.PER_FIELD_REUSE_STRATEGY) {
@Override
protected Analyzer getWrappedAnalyzer(String fieldName) {
return mapperService.indexAnalyzer(fieldName, unindexedFieldAnalyzer);
}
};
}

public ValuesSourceRegistry getValuesSourceRegistry() {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -221,7 +221,9 @@ private static Analyzer getAnalyzerAtField(IndexShard indexShard, String field,
if (perFieldAnalyzer != null && perFieldAnalyzer.containsKey(field)) {
return mapperService.getIndexAnalyzers().get(perFieldAnalyzer.get(field));
} else {
return mapperService.indexAnalyzer();
return mapperService.indexAnalyzer(field, f -> {
throw new IllegalArgumentException("No analyzer configured for field " + f);
});
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -131,7 +131,9 @@ protected Aggregator createInternal(SearchContext searchContext, Aggregator pare
context.lookup().source(),
context.bigArrays(),
fieldType,
searchContext.getQueryShardContext().getIndexAnalyzer(),
searchContext.getQueryShardContext().getIndexAnalyzer(f -> {
throw new IllegalArgumentException("No analyzer configured for field " + f);
}),
sourceFieldNames,
filterDuplicateText
);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,7 @@ public static List<Object> loadFieldValues(MappedFieldType fieldType,
return Objects.requireNonNullElse(textsToHighlight, Collections.emptyList());
}
ValueFetcher fetcher = fieldType.valueFetcher(qsc, null);
fetcher.setNextReader(hitContext.readerContext());
return fetcher.fetchValues(hitContext.sourceLookup());
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@
import org.apache.lucene.search.highlight.TextFragment;
import org.apache.lucene.util.BytesRefHash;
import org.apache.lucene.util.CollectionUtil;
import org.elasticsearch.common.lucene.Lucene;
import org.elasticsearch.common.text.Text;
import org.elasticsearch.index.IndexSettings;
import org.elasticsearch.index.mapper.MappedFieldType;
Expand Down Expand Up @@ -99,7 +100,7 @@ public HighlightField highlight(FieldHighlightContext fieldContext) throws IOExc
int numberOfFragments = field.fieldOptions().numberOfFragments() == 0 ? 1 : field.fieldOptions().numberOfFragments();
ArrayList<TextFragment> fragsList = new ArrayList<>();
List<Object> textsToHighlight;
Analyzer analyzer = context.getQueryShardContext().getFieldNameIndexAnalyzer();
Analyzer analyzer = context.getQueryShardContext().getIndexAnalyzer(f -> Lucene.KEYWORD_ANALYZER);
final int maxAnalyzedOffset = context.getQueryShardContext().getIndexSettings().getHighlightMaxAnalyzedOffset();

textsToHighlight
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@
import org.apache.lucene.util.CollectionUtil;
import org.elasticsearch.common.CheckedSupplier;
import org.elasticsearch.common.Strings;
import org.elasticsearch.common.lucene.Lucene;
import org.elasticsearch.common.text.Text;
import org.elasticsearch.index.mapper.IdFieldMapper;
import org.elasticsearch.index.mapper.MappedFieldType;
Expand Down Expand Up @@ -113,7 +114,7 @@ CustomUnifiedHighlighter buildHighlighter(FieldHighlightContext fieldContext) th
: HighlightUtils.Encoders.DEFAULT;
int maxAnalyzedOffset = fieldContext.context.getQueryShardContext().getIndexSettings().getHighlightMaxAnalyzedOffset();
int numberOfFragments = fieldContext.field.fieldOptions().numberOfFragments();
Analyzer analyzer = wrapAnalyzer(fieldContext.context.getQueryShardContext().getFieldNameIndexAnalyzer());
Analyzer analyzer = wrapAnalyzer(fieldContext.context.getQueryShardContext().getIndexAnalyzer(f -> Lucene.KEYWORD_ANALYZER));
PassageFormatter passageFormatter = getPassageFormatter(fieldContext.hitContext, fieldContext.field, encoder);
IndexSearcher searcher = fieldContext.context.searcher();
OffsetSource offsetSource = getOffsetSource(fieldContext.fieldType);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,7 @@
import org.elasticsearch.common.io.stream.StreamOutput;
import org.elasticsearch.common.lease.Releasable;
import org.elasticsearch.common.lease.Releasables;
import org.elasticsearch.common.lucene.Lucene;
import org.elasticsearch.common.lucene.index.ElasticsearchDirectoryReader;
import org.elasticsearch.common.network.NetworkAddress;
import org.elasticsearch.common.settings.Settings;
Expand Down Expand Up @@ -144,6 +145,7 @@
import static org.elasticsearch.test.InternalAggregationTestCase.DEFAULT_MAX_BUCKETS;
import static org.hamcrest.Matchers.equalTo;
import static org.hamcrest.Matchers.instanceOf;
import static org.mockito.Matchers.any;
import static org.mockito.Matchers.anyObject;
import static org.mockito.Matchers.anyString;
import static org.mockito.Mockito.doAnswer;
Expand Down Expand Up @@ -278,7 +280,7 @@ public boolean shouldCache(Query query) {
MapperService mapperService = mapperServiceMock();
when(mapperService.getIndexSettings()).thenReturn(indexSettings);
when(mapperService.hasNested()).thenReturn(false);
when(mapperService.indexAnalyzer()).thenReturn(new StandardAnalyzer()); // for significant text
when(mapperService.indexAnalyzer(anyString(), any())).thenReturn(Lucene.STANDARD_ANALYZER); // for significant text
QueryShardContext queryShardContext =
queryShardContextMock(contextIndexSearcher, mapperService, indexSettings, circuitBreakerService, bigArrays);
when(searchContext.getQueryShardContext()).thenReturn(queryShardContext);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -150,6 +150,21 @@ setup:
- match: {hits.total.value: 1}
- match: {hits.hits.0._source.voltage: 5.8}

---
"highlight term query":
- do:
search:
index: sensor
body:
query:
term:
day_of_week: Monday
highlight:
fields:
day_of_week: {}

- match: { hits.hits.0.highlight.day_of_week : [ "<em>Monday</em>" ] }

---
"explain term query":
- do:
Expand Down

0 comments on commit dcd4fad

Please sign in to comment.