Skip to content

Commit

Permalink
Add text field support to archive indices (#86591)
Browse files Browse the repository at this point in the history
Adds support for "text" fields in archive indices, with the goal of adding simple filtering support on text fields when
querying archive indices.

There are some differences to regular text fields:

- no global statistics: queries on text fields return constant score (similar to match_only_text).
- analyzer fields can be updated
- if defined analyzer is not available, falls back to default analyzer
- no guarantees that analyzers are BWC
The above limitations also give us the flexibility to eventually swap out the implementation with a "runtime-text field"
variant, and hence only provide those capabilities that can be emulated via a runtime field.

Relates #81210
  • Loading branch information
ywelsch committed May 18, 2022
1 parent d6519b4 commit 5aebb8e
Show file tree
Hide file tree
Showing 22 changed files with 687 additions and 60 deletions.
Expand Up @@ -97,7 +97,8 @@ public Builder(String name, Version indexCreatedVersion, IndexAnalyzers indexAna
this.analyzers = new TextParams.Analyzers(
indexAnalyzers,
m -> ((MatchOnlyTextFieldMapper) m).indexAnalyzer,
m -> ((MatchOnlyTextFieldMapper) m).positionIncrementGap
m -> ((MatchOnlyTextFieldMapper) m).positionIncrementGap,
indexCreatedVersion
);
}

Expand Down
Expand Up @@ -35,6 +35,7 @@
import org.apache.lucene.util.automaton.Automata;
import org.apache.lucene.util.automaton.Automaton;
import org.apache.lucene.util.automaton.Operations;
import org.elasticsearch.Version;
import org.elasticsearch.common.collect.Iterators;
import org.elasticsearch.index.analysis.AnalyzerScope;
import org.elasticsearch.index.analysis.IndexAnalyzers;
Expand Down Expand Up @@ -92,7 +93,7 @@ public static class Defaults {
public static final int MAX_SHINGLE_SIZE = 3;
}

public static final TypeParser PARSER = new TypeParser((n, c) -> new Builder(n, c.getIndexAnalyzers()));
public static final TypeParser PARSER = new TypeParser((n, c) -> new Builder(n, c.indexVersionCreated(), c.getIndexAnalyzers()));

private static Builder builder(FieldMapper in) {
return ((SearchAsYouTypeFieldMapper) in).builder;
Expand Down Expand Up @@ -141,12 +142,16 @@ public static class Builder extends FieldMapper.Builder {

private final Parameter<Map<String, String>> meta = Parameter.metaParam();

public Builder(String name, IndexAnalyzers indexAnalyzers) {
private final Version indexCreatedVersion;

public Builder(String name, Version indexCreatedVersion, IndexAnalyzers indexAnalyzers) {
super(name);
this.indexCreatedVersion = indexCreatedVersion;
this.analyzers = new TextParams.Analyzers(
indexAnalyzers,
m -> builder(m).analyzers.getIndexAnalyzer(),
m -> builder(m).analyzers.positionIncrementGap.getValue()
m -> builder(m).analyzers.positionIncrementGap.getValue(),
indexCreatedVersion
);
}

Expand Down Expand Up @@ -710,7 +715,7 @@ protected String contentType() {
}

public FieldMapper.Builder getMergeBuilder() {
return new Builder(simpleName(), builder.analyzers.indexAnalyzers).init(this);
return new Builder(simpleName(), builder.indexCreatedVersion, builder.analyzers.indexAnalyzers).init(this);
}

public static String getShingleFieldName(String parentField, int shingleSize) {
Expand Down
Expand Up @@ -21,6 +21,7 @@
import org.apache.lucene.document.FieldType;
import org.apache.lucene.index.IndexOptions;
import org.elasticsearch.ElasticsearchParseException;
import org.elasticsearch.Version;
import org.elasticsearch.index.analysis.AnalyzerScope;
import org.elasticsearch.index.analysis.IndexAnalyzers;
import org.elasticsearch.index.analysis.NamedAnalyzer;
Expand Down Expand Up @@ -86,12 +87,16 @@ public static class Builder extends FieldMapper.Builder {

private final Parameter<Map<String, String>> meta = Parameter.metaParam();

public Builder(String name, IndexAnalyzers indexAnalyzers) {
private final Version indexCreatedVersion;

public Builder(String name, Version indexCreatedVersion, IndexAnalyzers indexAnalyzers) {
super(name);
this.indexCreatedVersion = indexCreatedVersion;
this.analyzers = new TextParams.Analyzers(
indexAnalyzers,
m -> builder(m).analyzers.getIndexAnalyzer(),
m -> builder(m).analyzers.positionIncrementGap.getValue()
m -> builder(m).analyzers.positionIncrementGap.getValue(),
indexCreatedVersion
);
}

Expand Down Expand Up @@ -145,7 +150,7 @@ public AnnotatedTextFieldMapper build(MapperBuilderContext context) {
}
}

public static TypeParser PARSER = new TypeParser((n, c) -> new Builder(n, c.getIndexAnalyzers()));
public static TypeParser PARSER = new TypeParser((n, c) -> new Builder(n, c.indexVersionCreated(), c.getIndexAnalyzers()));

/**
* Parses markdown-like syntax into plain text and AnnotationTokens with offsets for
Expand Down Expand Up @@ -527,6 +532,6 @@ protected String contentType() {

@Override
public FieldMapper.Builder getMergeBuilder() {
return new Builder(simpleName(), builder.analyzers.indexAnalyzers).init(this);
return new Builder(simpleName(), builder.indexCreatedVersion, builder.analyzers.indexAnalyzers).init(this);
}
}
Expand Up @@ -11,6 +11,7 @@
import org.apache.lucene.queries.intervals.Intervals;
import org.apache.lucene.queries.intervals.IntervalsSource;
import org.apache.lucene.util.BytesRef;
import org.elasticsearch.Version;
import org.elasticsearch.index.mapper.FieldTypeTestCase;
import org.elasticsearch.index.mapper.MappedFieldType;
import org.elasticsearch.index.mapper.MapperBuilderContext;
Expand All @@ -28,7 +29,7 @@ public void testIntervals() throws IOException {
}

public void testFetchSourceValue() throws IOException {
MappedFieldType fieldType = new AnnotatedTextFieldMapper.Builder("field", createDefaultIndexAnalyzers()).build(
MappedFieldType fieldType = new AnnotatedTextFieldMapper.Builder("field", Version.CURRENT, createDefaultIndexAnalyzers()).build(
MapperBuilderContext.ROOT
).fieldType();

Expand Down
Expand Up @@ -18,15 +18,12 @@
import org.elasticsearch.common.settings.IndexScopedSettings;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.common.xcontent.LoggingDeprecationHandler;
import org.elasticsearch.core.Nullable;
import org.elasticsearch.index.IndexSettings;
import org.elasticsearch.index.analysis.AnalyzerScope;
import org.elasticsearch.index.analysis.IndexAnalyzers;
import org.elasticsearch.index.analysis.NamedAnalyzer;
import org.elasticsearch.index.mapper.DocumentMapper;
import org.elasticsearch.index.mapper.MapperRegistry;
import org.elasticsearch.index.mapper.MapperService;
import org.elasticsearch.index.mapper.Mapping;
import org.elasticsearch.index.similarity.SimilarityService;
import org.elasticsearch.script.ScriptCompiler;
import org.elasticsearch.script.ScriptService;
Expand Down Expand Up @@ -92,7 +89,7 @@ public IndexMetadata verifyIndexMetadata(IndexMetadata indexMetadata, Version mi
// Next we have to run this otherwise if we try to create IndexSettings
// with broken settings it would fail in checkMappingsCompatibility
newMetadata = archiveBrokenIndexSettings(newMetadata);
createAndValidateMapping(newMetadata);
checkMappingsCompatibility(newMetadata);
return newMetadata;
}

Expand Down Expand Up @@ -129,10 +126,8 @@ private static void checkSupportedVersion(IndexMetadata indexMetadata, Version m
* Note that we don't expect users to encounter mapping incompatibilities, since our index compatibility
* policy guarantees we can read mappings from previous compatible index versions. A failure here would
* indicate a compatibility bug (which are unfortunately not that uncommon).
* @return the mapping
*/
@Nullable
public Mapping createAndValidateMapping(IndexMetadata indexMetadata) {
private void checkMappingsCompatibility(IndexMetadata indexMetadata) {
try {

// We cannot instantiate real analysis server or similarity service at this point because the node
Expand Down Expand Up @@ -199,8 +194,6 @@ public Set<Entry<String, NamedAnalyzer>> entrySet() {
scriptService
);
mapperService.merge(indexMetadata, MapperService.MergeReason.MAPPING_RECOVERY);
DocumentMapper documentMapper = mapperService.documentMapper();
return documentMapper == null ? null : documentMapper.mapping();
}
} catch (Exception ex) {
// Wrap the inner exception so we have the index name in the exception message
Expand Down
Expand Up @@ -8,6 +8,9 @@

package org.elasticsearch.index.mapper;

import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.apache.logging.log4j.message.ParameterizedMessage;
import org.apache.lucene.index.LeafReaderContext;
import org.elasticsearch.Version;
import org.elasticsearch.common.Explicit;
Expand Down Expand Up @@ -48,6 +51,8 @@
import java.util.function.Supplier;

public abstract class FieldMapper extends Mapper implements Cloneable {
private static final Logger logger = LogManager.getLogger(FieldMapper.class);

public static final Setting<Boolean> IGNORE_MALFORMED_SETTING = Setting.boolSetting(
"index.mapping.ignore_malformed",
false,
Expand Down Expand Up @@ -959,23 +964,48 @@ public static <T extends Enum<T>> Parameter<T> restrictedEnumParam(
* @param updateable whether the parameter can be changed by a mapping update
* @param initializer a function that reads the parameter value from an existing mapper
* @param defaultAnalyzer the default value, to be used if the parameter is undefined in a mapping
* @param indexCreatedVersion the version on which this index was created
*/
public static Parameter<NamedAnalyzer> analyzerParam(
String name,
boolean updateable,
Function<FieldMapper, NamedAnalyzer> initializer,
Supplier<NamedAnalyzer> defaultAnalyzer
Supplier<NamedAnalyzer> defaultAnalyzer,
Version indexCreatedVersion
) {
return new Parameter<>(name, updateable, defaultAnalyzer, (n, c, o) -> {
String analyzerName = o.toString();
NamedAnalyzer a = c.getIndexAnalyzers().get(analyzerName);
if (a == null) {
throw new IllegalArgumentException("analyzer [" + analyzerName + "] has not been configured in mappings");
if (indexCreatedVersion.isLegacyIndexVersion()) {
logger.warn(
new ParameterizedMessage("Could not find analyzer [{}] of legacy index, falling back to default", analyzerName)
);
a = defaultAnalyzer.get();
} else {
throw new IllegalArgumentException("analyzer [" + analyzerName + "] has not been configured in mappings");
}
}
return a;
}, initializer, (b, n, v) -> b.field(n, v.name()), NamedAnalyzer::name);
}

/**
* Defines a parameter that takes an analyzer name
* @param name the parameter name
* @param updateable whether the parameter can be changed by a mapping update
* @param initializer a function that reads the parameter value from an existing mapper
* @param defaultAnalyzer the default value, to be used if the parameter is undefined in a mapping
*/
public static Parameter<NamedAnalyzer> analyzerParam(
String name,
boolean updateable,
Function<FieldMapper, NamedAnalyzer> initializer,
Supplier<NamedAnalyzer> defaultAnalyzer
) {
return analyzerParam(name, updateable, initializer, defaultAnalyzer, Version.CURRENT);
}

/**
* Declares a metadata parameter
*/
Expand Down

0 comments on commit 5aebb8e

Please sign in to comment.