Skip to content

Commit

Permalink
More validation for routing_path (#79520)
Browse files Browse the repository at this point in the history
This adds more validation for fields matching `routing_path`. In
particular, it disallows runtime fields, fields with a `script`, and
fails when you try to use `dynamic:false` to skip mapping them fields
that would match the `routing_path`. This makes sure that the we get
predictable time routing for the time series.
  • Loading branch information
nik9000 committed Oct 25, 2021
1 parent dd32271 commit cfa92ef
Show file tree
Hide file tree
Showing 13 changed files with 353 additions and 105 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -168,45 +168,3 @@ routing required:
mappings:
_routing:
required: true

---
bad routing_path:
- skip:
version: " - 7.99.99"
reason: introduced in 8.0.0

- do:
catch: /All fields that match routing_path must be keyword time_series_dimensions but \[@timestamp\] was \[date\]/
indices.create:
index: test_index
body:
settings:
index:
mode: time_series
routing_path: [metricset, k8s.pod.uid, "@timestamp"]
number_of_replicas: 0
number_of_shards: 2
mappings:
properties:
"@timestamp":
type: date
metricset:
type: keyword
time_series_dimension: true
k8s:
properties:
pod:
properties:
uid:
type: keyword
time_series_dimension: true
name:
type: keyword
ip:
type: ip
network:
properties:
tx:
type: long
rx:
type: long
Original file line number Diff line number Diff line change
Expand Up @@ -104,3 +104,149 @@ top level dim object:
latency:
type: double
time_series_metric: gauge

---
non keyword matches routing_path:
- skip:
version: " - 7.99.99"
reason: introduced in 8.0.0

- do:
catch: '/All fields that match routing_path must be keywords with \[time_series_dimension: true\] and without the \[script\] parameter. \[@timestamp\] was \[date\]./'
indices.create:
index: test_index
body:
settings:
index:
mode: time_series
routing_path: [metricset, k8s.pod.uid, "@timestamp"]
number_of_replicas: 0
number_of_shards: 2
mappings:
properties:
"@timestamp":
type: date
metricset:
type: keyword
time_series_dimension: true
k8s:
properties:
pod:
properties:
uid:
type: keyword
time_series_dimension: true
name:
type: keyword
ip:
type: ip
network:
properties:
tx:
type: long
rx:
type: long

---
runtime field matching routing path:
- skip:
version: " - 7.99.99"
reason: introduced in 8.0.0

- do:
indices.create:
index: test
body:
settings:
index:
mode: time_series
routing_path: [dim.*]
mappings:
properties:
"@timestamp":
type: date

- do:
bulk:
refresh: true
index: test
body:
- '{"index": {}}'
- '{"@timestamp": "2021-04-28T18:50:04.467Z", "dim": {"foo": "a"}}'

- do:
catch: /runtime fields may not match \[routing_path\] but \[dim.bar\] matched/
search:
index: test
body:
runtime_mappings:
dim.bar:
type: keyword
query:
match:
dim.foo: a

---
"dynamic: runtime matches routing_path":
- skip:
version: " - 7.99.99"
reason: introduced in 8.0.0

- do:
indices.create:
index: test
body:
settings:
index:
mode: time_series
routing_path: [dim.*]
mappings:
properties:
"@timestamp":
type: date
dim:
type: object
dynamic: runtime

- do:
bulk:
refresh: true
index: test
body:
- '{"index": {}}'
- '{"@timestamp": "2021-04-28T18:50:04.467Z", "dim": {"foo": "a"}}'
- match: {items.0.index.error.reason: "All fields that match routing_path must be keywords with [time_series_dimension: true] and without the [script] parameter. [dim.foo] was a runtime [keyword]."}

---
"dynamic: false matches routing_path":
- skip:
version: " - 7.99.99"
reason: introduced in 8.0.0

- do:
indices.create:
index: test
body:
settings:
index:
mode: time_series
routing_path: [dim.*]
mappings:
properties:
"@timestamp":
type: date
dim:
type: object
dynamic: false

- do:
bulk:
refresh: true
index: test
body:
- '{"index": {}}'
- '{"@timestamp": "2021-04-28T18:50:04.467Z", "dim": {"foo": "a"}}'
- '{"index": {}}'
- '{"@timestamp": "2021-04-28T18:50:04.467Z", "dim": {"foo": {"bar": "a"}}}'
- match: {items.0.index.error.reason: "All fields matching [routing_path] must be mapped but [dim.foo] was declared as [dynamic: false]"}
- match: {items.1.index.error.reason: "All fields matching [routing_path] must be mapped but [dim.foo] was declared as [dynamic: false]"}
Original file line number Diff line number Diff line change
Expand Up @@ -194,6 +194,18 @@ protected final LeafFactory leafFactory(SearchExecutionContext context) {
return leafFactory(context.lookup().forkAndTrackFieldReferences(name()));
}

@Override
public void validateMatchedRoutingPath() {
throw new IllegalArgumentException(
"All fields that match routing_path must be keywords with [time_series_dimension: true] "
+ "and without the [script] parameter. ["
+ name()
+ "] was a runtime ["
+ typeName()
+ "]."
);
}

// Placeholder Script for source-only fields
// TODO rework things so that we don't need this
protected static final Script DEFAULT_SCRIPT = new Script("");
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
package org.elasticsearch.index.mapper;

import org.elasticsearch.common.compress.CompressedXContent;
import org.elasticsearch.common.regex.Regex;
import org.elasticsearch.index.IndexSettings;

import java.util.List;
Expand Down Expand Up @@ -90,8 +91,20 @@ public void validate(IndexSettings settings, boolean checkLimits) {
throw new IllegalArgumentException("cannot have nested fields when index sort is activated");
}
List<String> routingPaths = settings.getIndexMetadata().getRoutingPaths();
if (false == routingPaths.isEmpty()) {
mappingLookup.getMapping().getRoot().validateRoutingPath(routingPaths);
for (String path : routingPaths) {
for (String match : mappingLookup.getMatchingFieldNames(path)) {
mappingLookup.getFieldType(match).validateMatchedRoutingPath();
}
for (String objectName : mappingLookup.objectMappers().keySet()) {
if (Regex.simpleMatch(path, objectName)) {
throw new IllegalArgumentException(
"All fields that match routing_path must be keywords with [time_series_dimension: true] "
+ "and without the [script] parameter. ["
+ objectName
+ "] was [object]."
);
}
}
}
if (checkLimits) {
this.mappingLookup.checkLimits(settings);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,20 +15,21 @@
import org.elasticsearch.Version;
import org.elasticsearch.common.Explicit;
import org.elasticsearch.common.Strings;
import org.elasticsearch.common.regex.Regex;
import org.elasticsearch.common.time.DateFormatter;
import org.elasticsearch.common.xcontent.LoggingDeprecationHandler;
import org.elasticsearch.xcontent.NamedXContentRegistry;
import org.elasticsearch.xcontent.XContentBuilder;
import org.elasticsearch.common.xcontent.XContentHelper;
import org.elasticsearch.xcontent.XContentParser;
import org.elasticsearch.xcontent.XContentType;
import org.elasticsearch.core.Tuple;
import org.elasticsearch.index.IndexSettings;
import org.elasticsearch.index.analysis.IndexAnalyzers;
import org.elasticsearch.index.fielddata.IndexFieldDataCache;
import org.elasticsearch.index.query.SearchExecutionContext;
import org.elasticsearch.indices.breaker.NoneCircuitBreakerService;
import org.elasticsearch.search.lookup.SearchLookup;
import org.elasticsearch.xcontent.NamedXContentRegistry;
import org.elasticsearch.xcontent.XContentBuilder;
import org.elasticsearch.xcontent.XContentParser;
import org.elasticsearch.xcontent.XContentType;

import java.io.IOException;
import java.util.ArrayList;
Expand Down Expand Up @@ -573,6 +574,7 @@ private static void parseObject(final DocumentParserContext context, ObjectMappe
if (dynamic == ObjectMapper.Dynamic.STRICT) {
throw new StrictDynamicMappingException(mapper.fullPath(), currentFieldName);
} else if (dynamic == ObjectMapper.Dynamic.FALSE) {
failIfMatchesRoutingPath(context, parentMapper, currentFieldName);
// not dynamic, read everything up to end object
context.parser().skipChildren();
} else {
Expand Down Expand Up @@ -708,11 +710,24 @@ private static void parseDynamicValue(final DocumentParserContext context, Objec
throw new StrictDynamicMappingException(parentMapper.fullPath(), currentFieldName);
}
if (dynamic == ObjectMapper.Dynamic.FALSE) {
failIfMatchesRoutingPath(context, parentMapper, currentFieldName);
return;
}
dynamic.getDynamicFieldsBuilder().createDynamicFieldFromValue(context, token, currentFieldName);
}

private static void failIfMatchesRoutingPath(DocumentParserContext context, ObjectMapper parentMapper, String currentFieldName) {
if (context.indexSettings().getIndexMetadata().getRoutingPaths().isEmpty()) {
return;
}
String path = parentMapper.fullPath().isEmpty() ? currentFieldName : parentMapper.fullPath() + "." + currentFieldName;
if (Regex.simpleMatch(context.indexSettings().getIndexMetadata().getRoutingPaths(), path)) {
throw new MapperParsingException(
"All fields matching [routing_path] must be mapped but [" + path + "] was declared as [dynamic: false]"
);
}
}

/**
* Creates instances of the fields that the current field should be copied to
*/
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,6 @@
import org.apache.lucene.util.automaton.Operations;
import org.elasticsearch.common.lucene.Lucene;
import org.elasticsearch.common.lucene.search.AutomatonQueries;
import org.elasticsearch.xcontent.XContentParser;
import org.elasticsearch.index.analysis.IndexAnalyzers;
import org.elasticsearch.index.analysis.NamedAnalyzer;
import org.elasticsearch.index.fielddata.IndexFieldData;
Expand All @@ -44,6 +43,7 @@
import org.elasticsearch.search.aggregations.support.CoreValuesSourceType;
import org.elasticsearch.search.lookup.FieldValues;
import org.elasticsearch.search.lookup.SearchLookup;
import org.elasticsearch.xcontent.XContentParser;

import java.io.IOException;
import java.io.UncheckedIOException;
Expand Down Expand Up @@ -436,6 +436,26 @@ public int ignoreAbove() {
public boolean isDimension() {
return isDimension;
}

@Override
public void validateMatchedRoutingPath() {
if (false == isDimension) {
throw new IllegalArgumentException(
"All fields that match routing_path must be keywords with [time_series_dimension: true] "
+ "and without the [script] parameter. ["
+ name()
+ "] was not [time_series_dimension: true]."
);
}
if (scriptValues != null) {
throw new IllegalArgumentException(
"All fields that match routing_path must be keywords with [time_series_dimension: true] "
+ "and without the [script] parameter. ["
+ name()
+ "] has a [script] parameter."
);
}
}
}

/** The maximum keyword length allowed for a dimension field */
Expand Down Expand Up @@ -583,15 +603,4 @@ protected String contentType() {
public FieldMapper.Builder getMergeBuilder() {
return new Builder(simpleName(), indexAnalyzers, scriptCompiler).dimension(dimension).init(this);
}

@Override
protected void validateMatchedRoutingPath() {
if (false == fieldType().isDimension()) {
throw new IllegalArgumentException(
"All fields that match routing_path must be keyword time_series_dimensions but ["
+ name()
+ "] was not a time_series_dimension"
);
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@
import org.apache.lucene.queries.spans.SpanQuery;
import org.apache.lucene.util.BytesRef;
import org.elasticsearch.ElasticsearchParseException;
import org.elasticsearch.cluster.metadata.IndexMetadata;
import org.elasticsearch.core.Nullable;
import org.elasticsearch.common.geo.ShapeRelation;
import org.elasticsearch.common.time.DateMathParser;
Expand Down Expand Up @@ -477,4 +478,18 @@ public TermsEnum getTerms(boolean caseInsensitive, String string, SearchExecutio
throws IOException {
return null;
}

/**
* Validate that this field can be the target of {@link IndexMetadata#INDEX_ROUTING_PATH}.
*/
public void validateMatchedRoutingPath() {
throw new IllegalArgumentException(
"All fields that match routing_path must be keywords with [time_series_dimension: true] "
+ "and without the [script] parameter. ["
+ name()
+ "] was ["
+ typeName()
+ "]."
);
}
}

0 comments on commit cfa92ef

Please sign in to comment.