diff --git a/docs/changelog/135673.yaml b/docs/changelog/135673.yaml new file mode 100644 index 0000000000000..b4607e852a826 --- /dev/null +++ b/docs/changelog/135673.yaml @@ -0,0 +1,6 @@ +pr: 135673 +summary: Add index setting that disables the `index.dimensions` based routing and + `_tsid` creation strategy +area: TSDB +type: enhancement +issues: [] diff --git a/docs/reference/elasticsearch/index-settings/serverless.md b/docs/reference/elasticsearch/index-settings/serverless.md index 1175bcc84865f..4f5c876771241 100644 --- a/docs/reference/elasticsearch/index-settings/serverless.md +++ b/docs/reference/elasticsearch/index-settings/serverless.md @@ -13,13 +13,13 @@ This page lists the {{es}} index settings available in {{serverless-full}} proje ### General settings * [`index.codec`](./index-modules.md#index-codec) -* [`index.default_pipeline`](./index-modules.md#index-default-pipeline) +* [`index.default_pipeline`](./index-modules.md#index-default-pipeline) * [`index.dense_vector.hnsw_filter_heuristic`](./index-modules.md#index-dense-vector-hnsw-filter-heuristic) -* [`index.final_pipeline`](./index-modules.md#index-final-pipeline) +* [`index.final_pipeline`](./index-modules.md#index-final-pipeline) * [`index.hidden`](./index-modules.md#index-hidden) * [`index.mode`](./index-modules.md#index-mode-setting) -* [`index.query.default_field`](./index-modules.md#index-query-default-field) -* [`index.refresh_interval`](./index-modules.md#index-refresh-interval-setting) +* [`index.query.default_field`](./index-modules.md#index-query-default-field) +* [`index.refresh_interval`](./index-modules.md#index-refresh-interval-setting) ### Index sorting settings @@ -30,10 +30,10 @@ This page lists the {{es}} index settings available in {{serverless-full}} proje ### Index blocks settings -* [`index.blocks.read_only`](./index-block.md#index-blocks-read-only) -* [`index.blocks.read`](./index-block.md#index-blocks-read) -* [`index.blocks.write`](./index-block.md#index-blocks-write) -* [`index.blocks.metadata`](./index-block.md#index-blocks-metadata) +* [`index.blocks.read_only`](./index-block.md#index-blocks-read-only) +* [`index.blocks.read`](./index-block.md#index-blocks-read) +* [`index.blocks.write`](./index-block.md#index-blocks-write) +* [`index.blocks.metadata`](./index-block.md#index-blocks-metadata) ### Field and mapping related settings @@ -56,8 +56,9 @@ This page lists the {{es}} index settings available in {{serverless-full}} proje * [`index.look_ahead_time`](./time-series.md#index-look-ahead-time) * [`index.look_back_time`](./time-series.md#index-look-back-time) * [`index.routing_path`](./time-series.md#index-routing-path) +* [`index.dimensions_tsid_strategy_enabled`](./time-series.md#index-dimensions-tsid-strategy-enabled) ### Similarity and analyzers * [`index.similarity.*`](../mapping-reference/similarity.md) -* [`index.analysis.*`](../mapping-reference/analyzer.md) \ No newline at end of file +* [`index.analysis.*`](../mapping-reference/analyzer.md) diff --git a/docs/reference/elasticsearch/index-settings/time-series.md b/docs/reference/elasticsearch/index-settings/time-series.md index 336d7e7730b65..061889e731f9e 100644 --- a/docs/reference/elasticsearch/index-settings/time-series.md +++ b/docs/reference/elasticsearch/index-settings/time-series.md @@ -44,7 +44,39 @@ $$$index-look-back-time$$$ : (Static, [time units](/reference/elasticsearch/rest-apis/api-conventions.md#time-units)) Interval used to calculate the `index.time_series.start_time` for a TSDS’s first backing index when a tsdb data stream is created. Defaults to `2h` (2 hours). Accepts `1m` (one minute) to `7d` (seven days). Only indices with an `index.mode` of `time_series` support this setting. For more information, refer to [Look-back time](docs-content://manage-data/data-store/data-streams/time-series-data-stream-tsds.md#tsds-look-back-time). $$$index-routing-path$$$ `index.routing_path` {applies_to}`serverless: all` -: (Static, string or array of strings) Plain `keyword` fields used to route documents in a TSDS to index shards. Supports wildcards (`*`). Only indices with an `index.mode` of `time_series` support this setting. Defaults to an empty list, except for data streams then defaults to the list of [dimension fields](docs-content://manage-data/data-store/data-streams/time-series-data-stream-tsds.md#time-series-dimension) with a `time_series_dimension` value of `true` defined in your component and index templates. For more information, refer to [Dimension-based routing](docs-content://manage-data/data-store/data-streams/time-series-data-stream-tsds.md#dimension-based-routing). +: (Static, string or array of strings) Time series dimension fields used to route documents in a TSDS to index shards. +Supports wildcards (`*`). +Only indices with an `index.mode` of `time_series` support this setting. + +: Defaults value: +: Indices that are not part of a time series data stream have no default value and require the routing path to be defined explicitly. +If a time series data stream is used that is eligible for the `index.dimensions`-based routing (see [`index.dimensions_tsid_strategy_enabled`](#index-dimensions-tsid-strategy-enabled)), +the `index.routing_path` will be empty. +For time series data streams where the `index.dimensions`-based routing does not apply, +this defaults to the list of [dimension fields](docs-content://manage-data/data-store/data-streams/time-series-data-stream-tsds.md#time-series-dimension) with a `time_series_dimension` value of `true` as defined in your component and index templates. + +: Manually setting a value disables the `index.dimensions`-based routing strategy (see [`index.dimensions_tsid_strategy_enabled`](#index-dimensions-tsid-strategy-enabled)). +For more information, refer to [Dimension-based routing](docs-content://manage-data/data-store/data-streams/time-series-data-stream-tsds.md#dimension-based-routing). + + +$$$index-dimensions-tsid-strategy-enabled$$$ + +`index.dimensions_tsid_strategy_enabled` {applies_to}`stack: ga 9.2` {applies_to}`serverless: all` +: (Static, boolean) Controls if the `_tsid` can be created using the `index.dimensions` index setting. +This is an internal setting that will be automatically populated and updated for eligible time series data streams and is not user-configurable. +This strategy offers an improved ingestion performance that avoids processing dimensions multiple times for the purposes of shard routing and creating the `_tsid`. +When used, `index.routing_path` will not be set and shard routing uses the full `_tsid`, +which can help to avoid shard hot-spotting. + +: If set to `false`, +or `index.routing_path` is configured manually, +or in case the index isn't eligible (see below), +shard routing will be based on the `index.routing_path` instead. + +: Defaults to `true`. + +: This optimized `_tsid` creation strategy is only available for data streams and if there are no dynamic templates that set `time_series_dimension: true`. +Trying to add such a dynamic template to existing backing indices after the fact will fail the update mapping request and you will need to roll over the data stream instead. $$$index-mapping-dimension-fields-limit$$$ diff --git a/modules/data-streams/src/internalClusterTest/java/org/elasticsearch/datastreams/TSDBIndexingIT.java b/modules/data-streams/src/internalClusterTest/java/org/elasticsearch/datastreams/TSDBIndexingIT.java index c64421bd3b8a5..c22f79b11a597 100644 --- a/modules/data-streams/src/internalClusterTest/java/org/elasticsearch/datastreams/TSDBIndexingIT.java +++ b/modules/data-streams/src/internalClusterTest/java/org/elasticsearch/datastreams/TSDBIndexingIT.java @@ -333,6 +333,7 @@ public void testTsdbTemplatesNoKeywordFieldType() throws Exception { Settings.builder() .put("index.mode", "time_series") .put("index.routing_path", randomBoolean() ? null : "metricset") + .put("index.dimensions_tsid_strategy_enabled", randomDouble() < 0.8) .build(), new CompressedXContent(mappingTemplate), null @@ -640,12 +641,16 @@ public void testReindexing() throws Exception { public void testAddDimensionToMapping() throws Exception { String dataStreamName = "my-ds"; var putTemplateRequest = new TransportPutComposableIndexTemplateAction.Request("id"); + boolean indexDimensionsTsidStrategyEnabled = randomBoolean(); putTemplateRequest.indexTemplate( ComposableIndexTemplate.builder() .indexPatterns(List.of(dataStreamName)) .template( new Template( - Settings.builder().put("index.mode", "time_series").build(), + Settings.builder() + .put("index.mode", "time_series") + .put("index.dimensions_tsid_strategy_enabled", indexDimensionsTsidStrategyEnabled) + .build(), new CompressedXContent(MAPPING_TEMPLATE), null ) @@ -662,8 +667,13 @@ public void testAddDimensionToMapping() throws Exception { "my-ds" ); assertAcked(client().execute(CreateDataStreamAction.INSTANCE, createDsRequest)); - assertThat(getSetting(dataStreamName, IndexMetadata.INDEX_DIMENSIONS), equalTo(List.of("metricset"))); - assertThat(getSetting(dataStreamName, IndexMetadata.INDEX_ROUTING_PATH), empty()); + if (indexDimensionsTsidStrategyEnabled) { + assertThat(getSetting(dataStreamName, IndexMetadata.INDEX_DIMENSIONS), equalTo(List.of("metricset"))); + assertThat(getSetting(dataStreamName, IndexMetadata.INDEX_ROUTING_PATH), empty()); + } else { + assertThat(getSetting(dataStreamName, IndexMetadata.INDEX_DIMENSIONS), empty()); + assertThat(getSetting(dataStreamName, IndexMetadata.INDEX_ROUTING_PATH), equalTo(List.of("metricset"))); + } // put mapping with k8s.pod.uid as another time series dimension var putMappingRequest = new PutMappingRequest(dataStreamName).source(""" @@ -677,8 +687,13 @@ public void testAddDimensionToMapping() throws Exception { } """, XContentType.JSON); assertAcked(client().execute(TransportPutMappingAction.TYPE, putMappingRequest).actionGet()); - assertThat(getSetting(dataStreamName, IndexMetadata.INDEX_DIMENSIONS), containsInAnyOrder("metricset", "k8s.pod.name")); - assertThat(getSetting(dataStreamName, IndexMetadata.INDEX_ROUTING_PATH), empty()); + if (indexDimensionsTsidStrategyEnabled) { + assertThat(getSetting(dataStreamName, IndexMetadata.INDEX_DIMENSIONS), containsInAnyOrder("metricset", "k8s.pod.name")); + assertThat(getSetting(dataStreamName, IndexMetadata.INDEX_ROUTING_PATH), empty()); + } else { + assertThat(getSetting(dataStreamName, IndexMetadata.INDEX_DIMENSIONS), empty()); + assertThat(getSetting(dataStreamName, IndexMetadata.INDEX_ROUTING_PATH), equalTo(List.of("metricset"))); + } // put dynamic template defining time series dimensions // we don't support index.dimensions in that case @@ -698,13 +713,19 @@ public void testAddDimensionToMapping() throws Exception { } """, XContentType.JSON); ActionFuture putMappingFuture = client().execute(TransportPutMappingAction.TYPE, putMappingRequest); - IllegalArgumentException exception = assertThrows(IllegalArgumentException.class, putMappingFuture::actionGet); - assertThat( - exception.getMessage(), - containsString("Cannot add dynamic templates that define dimension fields on an existing index with index.dimensions") - ); - assertThat(getSetting(dataStreamName, IndexMetadata.INDEX_DIMENSIONS), containsInAnyOrder("metricset", "k8s.pod.name")); - assertThat(getSetting(dataStreamName, IndexMetadata.INDEX_ROUTING_PATH), empty()); + if (indexDimensionsTsidStrategyEnabled) { + IllegalArgumentException exception = assertThrows(IllegalArgumentException.class, putMappingFuture::actionGet); + assertThat( + exception.getMessage(), + containsString("Cannot add dynamic templates that define dimension fields on an existing index with index.dimensions") + ); + assertThat(getSetting(dataStreamName, IndexMetadata.INDEX_DIMENSIONS), containsInAnyOrder("metricset", "k8s.pod.name")); + assertThat(getSetting(dataStreamName, IndexMetadata.INDEX_ROUTING_PATH), empty()); + } else { + assertAcked(putMappingFuture.actionGet()); + assertThat(getSetting(dataStreamName, IndexMetadata.INDEX_DIMENSIONS), empty()); + assertThat(getSetting(dataStreamName, IndexMetadata.INDEX_ROUTING_PATH), equalTo(List.of("metricset"))); + } indexWithPodNames(dataStreamName, Instant.now(), Map.of(), "dog", "cat"); } diff --git a/modules/data-streams/src/main/java/org/elasticsearch/datastreams/DataStreamIndexSettingsProvider.java b/modules/data-streams/src/main/java/org/elasticsearch/datastreams/DataStreamIndexSettingsProvider.java index 2da10211d54ed..2f3fc8e3cb66c 100644 --- a/modules/data-streams/src/main/java/org/elasticsearch/datastreams/DataStreamIndexSettingsProvider.java +++ b/modules/data-streams/src/main/java/org/elasticsearch/datastreams/DataStreamIndexSettingsProvider.java @@ -130,7 +130,9 @@ public void provideAdditionalSettings( dimensions ); if (dimensions.isEmpty() == false) { - if (matchesAllDimensions && indexVersion.onOrAfter(IndexVersions.TSID_CREATED_DURING_ROUTING)) { + if (matchesAllDimensions + && IndexMetadata.INDEX_DIMENSIONS_TSID_STRATEGY_ENABLED.get(indexTemplateAndCreateRequestSettings) + && indexVersion.onOrAfter(IndexVersions.TSID_CREATED_DURING_ROUTING)) { // Only set index.dimensions if the paths in the dimensions list match all potential dimension fields. // This is not the case e.g. if a dynamic template matches by match_mapping_type instead of path_match additionalSettings.putList(INDEX_DIMENSIONS.getKey(), dimensions); diff --git a/modules/data-streams/src/test/java/org/elasticsearch/datastreams/DataStreamIndexSettingsProviderTests.java b/modules/data-streams/src/test/java/org/elasticsearch/datastreams/DataStreamIndexSettingsProviderTests.java index cb636b69c28fe..0b0458905526d 100644 --- a/modules/data-streams/src/test/java/org/elasticsearch/datastreams/DataStreamIndexSettingsProviderTests.java +++ b/modules/data-streams/src/test/java/org/elasticsearch/datastreams/DataStreamIndexSettingsProviderTests.java @@ -49,7 +49,8 @@ public class DataStreamIndexSettingsProviderTests extends ESTestCase { private static final TimeValue DEFAULT_LOOK_AHEAD_TIME = TimeValue.timeValueMinutes(30); // default DataStreamIndexSettingsProvider provider; - private boolean indexDimensionsTsidOptimizationEnabled; + private boolean indexDimensionsTsidStrategyEnabledSetting; + private boolean expectedIndexDimensionsTsidOptimizationEnabled; private IndexVersion indexVersion; @Before @@ -60,7 +61,9 @@ public void setup() { indexVersion = randomBoolean() ? IndexVersionUtils.randomPreviousCompatibleVersion(random(), IndexVersions.TSID_CREATED_DURING_ROUTING) : IndexVersionUtils.randomVersionBetween(random(), IndexVersions.TSID_CREATED_DURING_ROUTING, IndexVersion.current()); - indexDimensionsTsidOptimizationEnabled = indexVersion.onOrAfter(IndexVersions.TSID_CREATED_DURING_ROUTING); + indexDimensionsTsidStrategyEnabledSetting = usually(); + expectedIndexDimensionsTsidOptimizationEnabled = indexDimensionsTsidStrategyEnabledSetting + && indexVersion.onOrAfter(IndexVersions.TSID_CREATED_DURING_ROUTING); } public void testGetAdditionalIndexSettings() throws Exception { @@ -114,12 +117,15 @@ public void testGetAdditionalIndexSettings() throws Exception { Settings result = additionalSettings.build(); // The index.time_series.end_time setting requires index.mode to be set to time_series adding it here so that we read this setting: // (in production the index.mode setting is usually provided in an index or component template) - result = builder().put(result).put("index.mode", "time_series").build(); - assertThat(result.size(), equalTo(4)); + result = builder().put(result) + .put("index.mode", "time_series") + .put("index.dimensions_tsid_strategy_enabled", indexDimensionsTsidStrategyEnabledSetting) + .build(); + assertThat(result.size(), equalTo(5)); assertThat(IndexSettings.MODE.get(result), equalTo(IndexMode.TIME_SERIES)); assertThat(IndexSettings.TIME_SERIES_START_TIME.get(result), equalTo(now.minusMillis(DEFAULT_LOOK_BACK_TIME.getMillis()))); assertThat(IndexSettings.TIME_SERIES_END_TIME.get(result), equalTo(now.plusMillis(DEFAULT_LOOK_AHEAD_TIME.getMillis()))); - if (indexDimensionsTsidOptimizationEnabled) { + if (expectedIndexDimensionsTsidOptimizationEnabled) { assertThat(IndexMetadata.INDEX_DIMENSIONS.get(result), containsInAnyOrder("field3", "field4", "field5", "field6")); assertThat(IndexMetadata.INDEX_ROUTING_PATH.get(result), empty()); } else { @@ -243,12 +249,15 @@ public void testGetAdditionalIndexSettingsMappingsMerging() throws Exception { Settings result = additionalSettings.build(); // The index.time_series.end_time setting requires index.mode to be set to time_series adding it here so that we read this setting: // (in production the index.mode setting is usually provided in an index or component template) - result = builder().put(result).put("index.mode", "time_series").build(); - assertThat(result.size(), equalTo(4)); + result = builder().put(result) + .put("index.mode", "time_series") + .put("index.dimensions_tsid_strategy_enabled", indexDimensionsTsidStrategyEnabledSetting) + .build(); + assertThat(result.size(), equalTo(5)); assertThat(IndexSettings.MODE.get(result), equalTo(IndexMode.TIME_SERIES)); assertThat(IndexSettings.TIME_SERIES_START_TIME.get(result), equalTo(now.minusMillis(DEFAULT_LOOK_BACK_TIME.getMillis()))); assertThat(IndexSettings.TIME_SERIES_END_TIME.get(result), equalTo(now.plusMillis(DEFAULT_LOOK_AHEAD_TIME.getMillis()))); - if (indexDimensionsTsidOptimizationEnabled) { + if (expectedIndexDimensionsTsidOptimizationEnabled) { assertThat(IndexMetadata.INDEX_DIMENSIONS.get(result), containsInAnyOrder("field1", "field3")); assertThat(IndexMetadata.INDEX_ROUTING_PATH.get(result), empty()); } else { @@ -719,7 +728,7 @@ public void testGenerateNonDimensionDynamicTemplate() throws Exception { assertThat(IndexSettings.MODE.get(result), equalTo(IndexMode.TIME_SERIES)); assertThat(IndexSettings.TIME_SERIES_START_TIME.get(result), equalTo(now.minusMillis(DEFAULT_LOOK_BACK_TIME.getMillis()))); assertThat(IndexSettings.TIME_SERIES_END_TIME.get(result), equalTo(now.plusMillis(DEFAULT_LOOK_AHEAD_TIME.getMillis()))); - if (indexDimensionsTsidOptimizationEnabled) { + if (expectedIndexDimensionsTsidOptimizationEnabled) { assertThat(IndexMetadata.INDEX_DIMENSIONS.get(result), containsInAnyOrder("host.id")); assertThat(IndexMetadata.INDEX_ROUTING_PATH.get(result), empty()); } else { @@ -807,7 +816,7 @@ public void testGenerateRoutingPathFromPassThroughObject() throws Exception { assertThat(IndexSettings.MODE.get(result), equalTo(IndexMode.TIME_SERIES)); assertThat(IndexSettings.TIME_SERIES_START_TIME.get(result), equalTo(now.minusMillis(DEFAULT_LOOK_BACK_TIME.getMillis()))); assertThat(IndexSettings.TIME_SERIES_END_TIME.get(result), equalTo(now.plusMillis(DEFAULT_LOOK_AHEAD_TIME.getMillis()))); - if (indexDimensionsTsidOptimizationEnabled) { + if (expectedIndexDimensionsTsidOptimizationEnabled) { assertThat(IndexMetadata.INDEX_DIMENSIONS.get(result), containsInAnyOrder("labels.*")); assertThat(IndexMetadata.INDEX_ROUTING_PATH.get(result), empty()); } else { @@ -971,7 +980,9 @@ public void testAddDynamicTemplate() throws Exception { private Settings generateTsdbSettings(String mapping, Instant now) throws IOException { ProjectMetadata projectMetadata = emptyProject(); String dataStreamName = "logs-app1"; - Settings settings = Settings.EMPTY; + Settings settings = Settings.builder() + .put("index.dimensions_tsid_strategy_enabled", indexDimensionsTsidStrategyEnabledSetting) + .build(); Settings.Builder additionalSettings = builder(); provider.provideAdditionalSettings( diff --git a/server/src/main/java/org/elasticsearch/cluster/metadata/IndexMetadata.java b/server/src/main/java/org/elasticsearch/cluster/metadata/IndexMetadata.java index 25edec3e855e4..3207f7b9f8b0b 100644 --- a/server/src/main/java/org/elasticsearch/cluster/metadata/IndexMetadata.java +++ b/server/src/main/java/org/elasticsearch/cluster/metadata/IndexMetadata.java @@ -534,7 +534,22 @@ public Iterator> settings() { "index.dimensions", Setting.Property.IndexScope, Property.Dynamic, - Property.PrivateIndex + Property.PrivateIndex, + Property.ServerlessPublic + ); + + /** + * Allows to disable the {@link #INDEX_DIMENSIONS}-based tsid creation strategy on a per-index basis. + * This can help to mitigate potential issues with that strategy. + * For example, when using this strategy, + * it's not allowed to add a dynamic template that defines dimension fields to existing backing indices of a time series data stream. + */ + public static final Setting INDEX_DIMENSIONS_TSID_STRATEGY_ENABLED = Setting.boolSetting( + "index.dimensions_tsid_strategy_enabled", + true, + Setting.Property.IndexScope, + Property.Final, + Property.ServerlessPublic ); /** diff --git a/server/src/main/java/org/elasticsearch/common/settings/IndexScopedSettings.java b/server/src/main/java/org/elasticsearch/common/settings/IndexScopedSettings.java index a0a28e9322956..d4fef4e9bb489 100644 --- a/server/src/main/java/org/elasticsearch/common/settings/IndexScopedSettings.java +++ b/server/src/main/java/org/elasticsearch/common/settings/IndexScopedSettings.java @@ -224,6 +224,7 @@ public final class IndexScopedSettings extends AbstractScopedSettings { IndexSettings.MODE, IndexMetadata.INDEX_ROUTING_PATH, IndexMetadata.INDEX_DIMENSIONS, + IndexMetadata.INDEX_DIMENSIONS_TSID_STRATEGY_ENABLED, IndexSettings.TIME_SERIES_START_TIME, IndexSettings.TIME_SERIES_END_TIME, IndexSettings.SEQ_NO_INDEX_OPTIONS_SETTING,