From 95d852cb700cbf1a8dcacfd410e5623c7c682538 Mon Sep 17 00:00:00 2001 From: Marci W <333176+marciw@users.noreply.github.com> Date: Mon, 29 Sep 2025 20:46:35 -0400 Subject: [PATCH 01/14] Remaining edits --- .../data-streams/advanced-topics-tsds.md | 15 + .../data-store/data-streams/reindex-tsds.md | 265 ++++++------------ .../data-store/data-streams/set-up-tsds.md | 168 ++++++----- .../data-streams/time-bound-tsds.md | 76 +++++ .../time-series-data-stream-tsds.md | 213 ++++---------- ...sticsearch-reference-time-series-chart.svg | 1 - ...ticsearch-reference-time-series-chart2.svg | 46 +++ manage-data/toc.yml | 5 +- 8 files changed, 382 insertions(+), 407 deletions(-) create mode 100644 manage-data/data-store/data-streams/advanced-topics-tsds.md create mode 100644 manage-data/data-store/data-streams/time-bound-tsds.md delete mode 100644 manage-data/images/elasticsearch-reference-time-series-chart.svg create mode 100644 manage-data/images/elasticsearch-reference-time-series-chart2.svg diff --git a/manage-data/data-store/data-streams/advanced-topics-tsds.md b/manage-data/data-store/data-streams/advanced-topics-tsds.md new file mode 100644 index 0000000000..36ae51226b --- /dev/null +++ b/manage-data/data-store/data-streams/advanced-topics-tsds.md @@ -0,0 +1,15 @@ +--- +navigation_title: "Advanced topics" +applies_to: + stack: ga + serverless: ga +products: + - id: elasticsearch +--- + +# Advanced topics for working with time series data streams + +This section contains information about advanced concepts and operations for [time series data streams](/manage-data/data-store/data-streams/time-series-data-stream-tsds.md): + +- [](/manage-data/data-store/data-streams/time-bound-tsds.md) +- [](/manage-data/data-store/data-streams/reindex-tsds.md) \ No newline at end of file diff --git a/manage-data/data-store/data-streams/reindex-tsds.md b/manage-data/data-store/data-streams/reindex-tsds.md index 773eb1a40d..73b2865617 100644 --- a/manage-data/data-store/data-streams/reindex-tsds.md +++ b/manage-data/data-store/data-streams/reindex-tsds.md @@ -1,5 +1,5 @@ --- -navigation_title: Reindex a TSDS +navigation_title: "Reindex a TSDS" mapped_pages: - https://www.elastic.co/guide/en/elasticsearch/reference/current/tsds-reindex.html applies_to: @@ -9,208 +9,111 @@ products: - id: elasticsearch --- -# Reindex a TSDS [tsds-reindex] +# Reindex a time series data stream [tsds-reindex] -## Introduction [tsds-reindex-intro] +Reindexing allows you to copy documents from an old [time series data stream (TSDS)](/manage-data/data-store/data-streams/time-series-data-stream-tsds.md) to a new one. All data streams support reindexing, but time series data streams require special handling due to their time-bound backing indices and strict timestamp acceptance windows. -With reindexing, you can copy documents from an old [time-series data stream (TSDS)](../data-streams/time-series-data-stream-tsds.md) to a new one. Data streams support reindexing in general, with a few [restrictions](use-data-stream.md#reindex-with-a-data-stream). Still, time-series data streams introduce additional challenges due to tight control on the accepted timestamp range for each backing index they contain. Direct use of the reindex API would likely error out due to attempting to insert documents with timestamps that are outside the current acceptance window. +To reindex, follow the steps on this page. -To avoid these limitations, use the process that is outlined below: +:::{note} +This process only applies to time series data streams without a [downsampling](/manage-data/data-store/data-streams/downsampling-time-series-data-stream.md) configuration. To reindex a downsampled data stream, reindex the backing indices individually, then add them to a new, empty data stream. +::: -1. Create an index template for the destination data stream that will contain the re-indexed data. -2. Update the template to +## Overview - 1. Set `index.time_series.start_time` and `index.time_series.end_time` index settings to match the lowest and highest `@timestamp` values in the old data stream. - 2. Set the `index.number_of_shards` index setting to the sum of all primary shards of all backing indices of the old data stream. - 3. Set `index.number_of_replicas` to zero and unset the `index.lifecycle.name` index setting. +These high-level steps summarize the process of reindexing a time series data stream. Each step is detailed in later sections. -3. Run the reindex operation to completion. -4. Revert the overridden index settings in the destination index template. -5. Invoke the `rollover` api to create a new backing index that can receive new documents. +1. Create an index template for the destination data stream +2. Update the template with temporary settings for reindexing +3. Run the reindex operation +4. Revert the temporary index settings +5. Perform a manual rollover to create a new backing index for incoming data -::::{note} -This process only applies to time-series data streams without [downsampling](./downsampling-time-series-data-stream.md) configuration. Data streams with downsampling can only be re-indexed by re-indexing their backing indexes individually and adding them to an empty destination data stream. -:::: +The examples on this page use Dev Tools [Console](/explore-analyze/query-filter/tools/console.md) syntax. +## Create the destination index template [tsds-reindex-create-template] -In what follows, we elaborate on each step of the process with examples. - - -## Create a TSDS template to accept old documents [tsds-reindex-create-template] - -Consider a TSDS with the following template: +Create an index template for the new TSDS, using your preferred mappings and settings: ```console -POST /_component_template/source_template +PUT _index_template/my-new-tsds-template { + "index_patterns": ["my-new-tsds"], + "priority": 100, + "data_stream": {}, "template": { "settings": { - "index": { - "number_of_replicas": 2, - "number_of_shards": 2, - "mode": "time_series", - "routing_path": [ "metricset" ] - } + "index.mode": "time_series", + "index.routing_path": ["dimension_field"] }, "mappings": { "properties": { - "@timestamp": { "type": "date" }, - "metricset": { + "@timestamp": { + "type": "date" + }, + "dimension_field": { "type": "keyword", "time_series_dimension": true }, - "k8s": { - "properties": { - "tx": { "type": "long" }, - "rx": { "type": "long" } - } + "metric_field": { + "type": "double", + "time_series_metric": "gauge" } } } } } - -POST /_index_template/1 -{ - "index_patterns": [ - "k8s*" - ], - "composed_of": [ - "source_template" - ], - "data_stream": {} -} -``` - -A possible output of `/k8s/_settings` looks like: - -```console-result -{ - ".ds-k8s-2023.09.01-000002": { - "settings": { - "index": { - "mode": "time_series", - "routing": { - "allocation": { - "include": { - "_tier_preference": "data_hot" - } - } - }, - "hidden": "true", - "number_of_shards": "2", - "time_series": { - "end_time": "2023-09-01T14:00:00.000Z", - "start_time": "2023-09-01T10:00:00.000Z" - }, - "provided_name": ".ds-k9s-2023.09.01-000002", - "creation_date": "1694439857608", - "number_of_replicas": "2", - "routing_path": [ - "metricset" - ], - ... - } - } - }, - ".ds-k8s-2023.09.01-000001": { - "settings": { - "index": { - "mode": "time_series", - "routing": { - "allocation": { - "include": { - "_tier_preference": "data_hot" - } - } - }, - "hidden": "true", - "number_of_shards": "2", - "time_series": { - "end_time": "2023-09-01T10:00:00.000Z", - "start_time": "2023-09-01T06:00:00.000Z" - }, - "provided_name": ".ds-k9s-2023.09.01-000001", - "creation_date": "1694439837126", - "number_of_replicas": "2", - "routing_path": [ - "metricset" - ], - ... - } - } - } -} ``` +## Update the template for reindexing -To reindex this TSDS, do not to re-use its index template in the destination data stream, to avoid impacting its functionality. Instead, clone the template of the source TSDS and apply the following modifications: +To support the reindexing process, you need to temporarily modify the template: -* Set `index.time_series.start_time` and `index.time_series.end_time` index settings explicitly. Their values should be based on the lowest and highest `@timestamp` values in the data stream to reindex. This way, the initial backing index can load all data that is contained in the source data stream. -* Set `index.number_of_shards` index setting to the sum of all primary shards of all backing indices of the source data stream. This helps maintain the same level of search parallelism, as each shard is processed in a separate thread (or more). -* Unset the `index.lifecycle.name` index setting, if any. This prevents ILM from modifying the destination data stream during reindexing. -* (Optional) Set `index.number_of_replicas` to zero. This helps speed up the reindex operation. Since the data gets copied, there is limited risk of data loss due to lack of replicas. - -Using the example above as source TSDS, the template for the destination TSDS would be: + 1. Set `index.time_series.start_time` and `index.time_series.end_time` index settings to match the lowest and highest `@timestamp` values in the old data stream. + 2. Set `index.number_of_shards` to the sum of all primary shards of all backing indices of the old data stream. + 3. Clear the `index.lifecycle.name` index setting (if any), to prevent ILM from modifying the destination data stream during reindexing. + 4. (Optional) Set `index.number_of_replicas` to zero, to speed up reindexing. Because the data gets copied in the reindexing process, you don't need replicas. ```console -POST /_component_template/destination_template +PUT _index_template/new-tsds-template { + "index_patterns": ["new-tsds*"], + "priority": 100, + "data_stream": {}, "template": { "settings": { - "index": { - "number_of_replicas": 0, - "number_of_shards": 4, - "mode": "time_series", - "routing_path": [ "metricset" ], - "time_series": { - "end_time": "2023-09-01T14:00:00.000Z", - "start_time": "2023-09-01T06:00:00.000Z" - } - } + "index.mode": "time_series", + "index.routing_path": ["host", "service"], + "index.time_series.start_time": "2023-01-01T00:00:00Z", <1> + "index.time_series.end_time": "2025-01-01T00:00:00Z", <2> + "index.number_of_shards": 6, <3> + "index.number_of_replicas": 0, <4> + "index.lifecycle.name": null <5> }, "mappings": { - "properties": { - "@timestamp": { "type": "date" }, - "metricset": { - "type": "keyword", - "time_series_dimension": true - }, - "k8s": { - "properties": { - "tx": { "type": "long" }, - "rx": { "type": "long" } - } - } - } + ... } } } - -POST /_index_template/2 -{ - "index_patterns": [ - "k9s*" - ], - "composed_of": [ - "destination_template" - ], - "data_stream": {} -} ``` +1. Lowest timestamp value in the old data stream +2. Highest timestamp value in the old data stream +3. Sum of the primary shards from all source backing indices +4. Speed up reindexing +5. Pause ILM -## Reindex [tsds-reindex-op] +### Create the destination data stream and reindex [tsds-reindex-op] -Invoke the reindex api, for instance: +Run the reindex operation using `op_type: create` to prevent overwrites: ```console POST /_reindex { "source": { - "index": "k8s" + "index": "old-tsds" }, "dest": { - "index": "k9s", + "index": "new-tsds", "op_type": "create" } } @@ -219,51 +122,45 @@ POST /_reindex ## Restore the destination index template [tsds-reindex-restore] -Once the reindexing operation completes, restore the index template for the destination TSDS as follows: +After reindexing completes, update the index template again to remove the temporary settings: * Remove the overrides for `index.time_series.start_time` and `index.time_series.end_time`. -* Restore the values of `index.number_of_shards`, `index.number_of_replicas` and `index.lifecycle.name` as applicable. - -Using the previous example, the destination template is modified as follows: +* Restore the values of `index.number_of_shards`, `index.number_of_replicas`, and `index.lifecycle.name` (as applicable). ```console -POST /_component_template/destination_template +PUT _index_template/new-tsds-template { + "index_patterns": ["new-tsds*"], + "priority": 100, + "data_stream": {}, "template": { - "settings": { - "index": { - "number_of_replicas": 2, - "number_of_shards": 2, - "mode": "time_series", - "routing_path": [ "metricset" ] - } - }, + "settings": { + "index.mode": "time_series", + "index.routing_path": ["host", "service"], + "index.number_of_replicas": 1, <1> + "index.lifecycle.name": "my-ilm-policy" <2> + }, "mappings": { - "properties": { - "@timestamp": { "type": "date" }, - "metricset": { - "type": "keyword", - "time_series_dimension": true - }, - "k8s": { - "properties": { - "tx": { "type": "long" }, - "rx": { "type": "long" } - } - } - } + ... } } } ``` -Next, Invoke the `rollover` api on the destination data stream without any conditions set. +1. Restore replicas +2. Re-enable ILM + +## Roll over for new data + +Create a new backing index with a manual rollover request: ```console -POST /k9s/_rollover/ +POST new-tsds/_rollover/ ``` -This creates a new backing index with the updated index settings. The destination data stream is now ready to accept new documents. +The destination data stream is now ready to accept new documents. -Note that the initial backing index can still accept documents within the range of timestamps derived from the source data stream. If this is not desired, mark it as [read-only](elasticsearch://reference/elasticsearch/index-settings/index-block.md#index-blocks-read-only) explicitly. +## Related resources +- [Time series data streams overview](/manage-data/data-store/data-streams/time-series-data-stream-tsds.md) +- [Reindex API](elasticsearch://reference/elasticsearch/docs-reindex) \ No newline at end of file diff --git a/manage-data/data-store/data-streams/set-up-tsds.md b/manage-data/data-store/data-streams/set-up-tsds.md index 1fa7aca55b..f21fd5321d 100644 --- a/manage-data/data-store/data-streams/set-up-tsds.md +++ b/manage-data/data-store/data-streams/set-up-tsds.md @@ -1,5 +1,5 @@ --- -navigation_title: Set up a TSDS +navigation_title: "Set up a TSDS" mapped_pages: - https://www.elastic.co/guide/en/elasticsearch/reference/current/set-up-tsds.html applies_to: @@ -11,30 +11,38 @@ products: # Set up a time series data stream [set-up-tsds] -To set up a [time series data stream (TSDS)](../data-streams/time-series-data-stream-tsds.md), complete these steps: +This page shows you how to set up a [time series data stream](/manage-data/data-store/data-streams/time-series-data-stream-tsds.md) (TSDS). -1. Check the [prerequisites](#tsds-prereqs). -2. [Create an index lifecycle policy](#tsds-ilm-policy). -3. [Create an index template](#create-tsds-index-template). -4. [Create the TSDS](#create-tsds). -5. [Secure the TSDS](#secure-tsds). +## Before you begin [tsds-prereqs] +- Before you create a time series data stream, review [](../data-streams.md) and [TSDS concepts](time-series-data-stream-tsds.md). You can also try the [quickstart](/manage-data/data-store/data-streams/quickstart-tsds.md) for a hands-on introduction. +- Make sure you have the following permissions: + - [Cluster privileges](elasticsearch://reference/elasticsearch/security-privileges.md#privileges-list-cluster) + - `manage_index_templates` for creating a template to base the TSDS on + - `manage_ilm` (Stack only) if you're using [index lifecycle management](#tsds-ilm-policy) + - [Index privileges](elasticsearch://reference/elasticsearch/security-privileges.md#privileges-list-indices) + - `create_doc` and `create_index` for creating or converting a TSDS + - `manage` to [roll over](#convert-existing-data-stream-to-tsds) a TSDS -## Prerequisites [tsds-prereqs] +## Set up a TSDS -* Before you create a TSDS, you should be familiar with [data streams](../data-streams.md) and [TSDS concepts](time-series-data-stream-tsds.md). -* To follow this tutorial, you must have the following permissions: - - * [Cluster privileges](elasticsearch://reference/elasticsearch/security-privileges.md#privileges-list-cluster): `manage_ilm` and `manage_index_templates`. - * [Index privileges](elasticsearch://reference/elasticsearch/security-privileges.md#privileges-list-indices): `create_doc` and `create_index` for any TSDS you create or convert. To roll over a TSDS, you must have the `manage` privilege. +:::::{stepper} +::::{step} Create an index lifecycle policy (optional) +:anchor: tsds-ilm-policy +```{applies_to} +stack: ga +serverless: unavailable +``` +In most cases, you can use a data stream lifecycle to manage your time series data stream. If you're using [data tiers](/manage-data/lifecycle/data-tiers.md) in {{stack}}, you can use index lifecycle management (ILM). +:::{dropdown} Create an ILM policy -## Create an index lifecycle policy [tsds-ilm-policy] +If you're using {{stack}}, {{ilm-init}} can help you manage a time series data stream's backing indices. {{ilm-init}} requires an index lifecycle policy. -While optional, we recommend using {{ilm-init}} to automate the management of your TSDS’s backing indices. {{ilm-init}} requires an index lifecycle policy. +For best results, specify a `max_age` criteria for the `rollover` action in the policy. This ensures the [`@timestamp` ranges](/manage-data/data-store/data-streams/time-bound-tsds.md) for the backing indices are consistent. For example, setting a `max_age` of `1d` for the `rollover` action ensures your backing indices consistently contain one day's worth of data. -We recommend you specify a `max_age` criteria for the `rollover` action in the policy. This ensures the [`@timestamp` ranges](time-series-data-stream-tsds.md#time-bound-indices) for the TSDS’s backing indices are consistent. For example, setting a `max_age` of `1d` for the `rollover` action ensures your backing indices consistently contain one day’s worth of data. +**Example:** ```console PUT _ilm/policy/my-weather-sensor-lifecycle-policy @@ -86,28 +94,26 @@ PUT _ilm/policy/my-weather-sensor-lifecycle-policy } } ``` +::: +:::: -## Create an index template [create-tsds-index-template] - -To setup a TSDS create an index template with the following details: - -* One or more index patterns that match the TSDS’s name. We recommend using our [data stream naming scheme](/reference/fleet/data-streams.md#data-streams-naming-scheme). -* Enable data streams. -* Specify a mapping that defines your dimensions and metrics: - - * One or more [dimension fields](time-series-data-stream-tsds.md#time-series-dimension) with a `time_series_dimension` value of `true`. Alternatively, one or more [pass-through](elasticsearch://reference/elasticsearch/mapping-reference/passthrough.md#passthrough-dimensions) fields configured as dimension containers, provided that they will contain at least one sub-field (mapped statically or dynamically). - * One or more [metric fields](time-series-data-stream-tsds.md#time-series-metric), marked using the `time_series_metric` mapping parameter. - * Optional: A `date` or `date_nanos` mapping for the `@timestamp` field. If you don’t specify a mapping, Elasticsearch maps `@timestamp` as a `date` field with default options. +::::{step} Create an index template +:anchor: create-tsds-index-template -* Define index settings: +The structure of a time series data stream is defined by an index template. Create an index template with the following required elements and settings: - * Set `index.mode` setting to `time_series`. - * Your lifecycle policy in the `index.lifecycle.name` index setting. - * Optional: Other index settings, such as [`index.number_of_replicas`](elasticsearch://reference/elasticsearch/index-settings/index-modules.md#dynamic-index-number-of-replicas), for your TSDS’s backing indices. +- **Index patterns:** One or more wildcard patterns matching the name of your TSDS, such as `weather-sernsors-*`. For best results, use the [data stream naming scheme](/reference/fleet/data-streams.md#data-streams-naming-scheme). +- **Data stream object:** The template must include `"data_stream": {}`. +- **Time series mode:** Set `index.mode: time_series`. +- **Field mappings:** Define at least one `keyword` dimension field and typically one or more metric fields: + - To define a dimension, set `time_series_dimension` to `true`. Dimension fields like `counter` only increase over time. For more details, refer to [Dimensions](/manage-data/data-store/data-streams/time-series-data-stream-tsds.md#time-series-dimension). + - To define a metric, use the `time_series_metric` mapping parameter. Metric fields like `gauge` can increase or decrease over time. For more details, refer to [Metrics](/manage-data/data-store/data-streams/time-series-data-stream-tsds.md#time-series-metric). + - (Optional) Define a `date` or `date_nanos` mapping for the `@timestamp` field. If you don't specify a mapping, {{es}} maps `@timestamp` as a `date` field with default options. + * (Optional) Other index settings, such as [`index.number_of_replicas`](elasticsearch://reference/elasticsearch/index-settings/index-modules.md#dynamic-index-number-of-replicas), for the data stream's backing indices. +- A priority higher than `200`, to avoid [collisions](/manage-data/data-store/templates.md#avoid-index-pattern-collisions) with built-in templates. -* A priority higher than `200` to avoid collisions with built-in templates. See [Avoid index pattern collisions](../templates.md#avoid-index-pattern-collisions). -* Optional: Component templates containing your mappings and other index settings. +**Example index template PUT request:** ```console PUT _index_template/my-weather-sensor-index-template @@ -150,79 +156,105 @@ PUT _index_template/my-weather-sensor-index-template } ``` +:::{dropdown} Component templates (optional) -## Create the TSDS [create-tsds] +If you're using component templates with a time series data stream, check the following requirements: -[Indexing requests](use-data-stream.md#add-documents-to-a-data-stream) add documents to a TSDS. Documents in a TSDS must include: +- Each component template is valid on its own +- The `index.routing_path` setting and its referenced dimension fields are defined in the same component template +- The `time_series_dimension` attribute is enabled for fields referenced in `index.routing_path` +::: -* A `@timestamp` field -* One or more dimension fields. At least one dimension must match the `index.routing_path` index setting, if specified. If not specified explicitly, `index.routing_path` is set automatically to whichever mappings have `time_series_dimension` set to `true`. +:::: -To automatically create your TSDS, submit an indexing request that targets the TSDS’s name. This name must match one of your index template’s index patterns. +::::{step} Create the time series data stream and add data +:anchor: create-tsds -::::{important} -To test the following example, update the timestamps to within three hours of your current time. Data added to a TSDS must always fall within an [accepted time range](time-series-data-stream-tsds.md#tsds-accepted-time-range). -:::: +After creating the index template, you can create a time series data stream by [indexing a document](use-data-stream.md#add-documents-to-a-data-stream). The TSDS is created automatically when you index the first document, as long as the index name matches the index template pattern. You can use a bulk API request or a POST request. +:::{important} +To test the following `_bulk` example, update the timestamps to within three hours of your current time. Data added to a TSDS must fit the [accepted time range](/manage-data/data-store/data-streams/time-bound-tsds.md#tsds-accepted-time-range). +::: ```console -PUT metrics-weather_sensors-dev/_bulk +PUT metrics-weather-sensors/_bulk { "create":{ } } -{ "@timestamp": "2099-05-06T16:21:15.000Z", "sensor_id": "HAL-000001", "location": "plains", "temperature": 26.7,"humidity": 49.9 } +{ "@timestamp": "2099-05-06T16:21:15.000Z", "sensor_id": "SENSOR-001", "location": "warehouse-A", "temperature": 26.7,"humidity": 49.9 } { "create":{ } } -{ "@timestamp": "2099-05-06T16:25:42.000Z", "sensor_id": "SYKENET-000001", "location": "swamp", "temperature": 32.4, "humidity": 88.9 } +{ "@timestamp": "2099-05-06T16:25:42.000Z", "sensor_id": "SENSOR-002", "location": "warehouse-B", "temperature": 32.4, "humidity": 88.9 } +``` -POST metrics-weather_sensors-dev/_doc +```console +POST metrics-weather-sensors/_doc { "@timestamp": "2099-05-06T16:21:15.000Z", - "sensor_id": "SYKENET-000001", - "location": "swamp", + "sensor_id": "SENSOR-00002", + "location": "warehouse-B", "temperature": 32.4, "humidity": 88.9 } ``` +:::: + +::::{step} Verify setup +To make sure your time series data stream is working, try some GET requests. -You can also manually create the TSDS using the [create data stream API](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-indices-create-data-stream). The TSDS’s name must still match one of your template’s index patterns. +View data stream details: ```console -PUT _data_stream/metrics-weather_sensors-dev +GET _data_stream/metrics-prod ``` +Check the document count in a time series data stream: -## Secure the TSDS [secure-tsds] +```console +GET metrics-prod/_count +``` -Use [index privileges](elasticsearch://reference/elasticsearch/security-privileges.md#privileges-list-indices) to control access to a TSDS. Granting privileges on a TSDS grants the same privileges on its backing indices. +Query the time series data: -For an example, refer to [Data stream privileges](../../../deploy-manage/users-roles/cluster-or-deployment-auth/granting-privileges-for-data-streams-aliases.md#data-stream-privileges). +```console +GET metrics-prod/_search +{ + "size": 5, + "sort": ["@timestamp"] +} +``` -## Convert an existing data stream to a TSDS [convert-existing-data-stream-to-tsds] +:::: -You can also use the above steps to convert an existing regular data stream to a TSDS. In this case, you’ll want to: -* Edit your existing index lifecycle policy, component templates, and index templates instead of creating new ones. -* Instead of creating the TSDS, manually roll over its write index. This ensures the current write index and any new backing indices have an [`index.mode` of `time_series`](time-series-data-stream-tsds.md#time-series-mode). +## Advanced setup - You can manually roll over the write index using the [rollover API](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-indices-rollover). +### Convert an existing data stream to a TSDS [convert-existing-data-stream-to-tsds] - ```console - POST metrics-weather_sensors-dev/_rollover - ``` +You can convert an existing regular data stream to a TSDS. Follow these steps: +1. Update your existing index template to include time series settings. Also update your index lifecycle policy (if any) and component templates (if any). +2. Use the [rollover API](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-indices-rollover) to manually roll over the existing data stream's write index, to apply the changes you made in step 1: +```console +POST metrics-weather-sensors/_rollover +``` -## A note about component templates and index.mode setting [set-up-component-templates] +:::{note} +After the rollover, new backing indices will have time series functionality. Existing backing indices are not affected by the rollover (because their `index.mode` cannot be changed). +::: -Configuring a TSDS via an index template that uses component templates is a bit more complicated. Typically with component templates mappings and settings get scattered across multiple component templates. If the `index.routing_path` is defined, the fields it references need to be defined in the same component template with the `time_series_dimension` attribute enabled. +### Secure a time series data stream [secure-tsds] -The reasons for this is that each component template needs to be valid on its own. When configuring the `index.mode` setting in an index template, the `index.routing_path` setting is configured automatically. It is derived from the field mappings with `time_series_dimension` attribute enabled. +To control access to a TSDS, use [index privileges](elasticsearch://reference/elasticsearch/security-privileges.md#privileges-list-indices). Privileges set on a TSDS also apply to the backing indices. +For an example, refer to [Data stream privileges](../../../deploy-manage/users-roles/cluster-or-deployment-auth/granting-privileges-for-data-streams-aliases.md#data-stream-privileges). -## What’s next? [set-up-tsds-whats-next] +% TODO Common patterns for time series data streams -Now that you’ve set up your TSDS, you can manage and use it like a regular data stream. For more information, refer to: +## Next steps [set-up-tsds-whats-next] -* [*Use a data stream*](use-data-stream.md) -* [Change mappings and settings for a data stream](modify-data-stream.md#data-streams-change-mappings-and-settings) -* [data stream APIs](https://www.elastic.co/docs/api/doc/elasticsearch/group/endpoint-data-stream) +Now that you've set up a time series data stream, you can manage and use it like a regular data stream. For more information, refer to: +* [Use a data stream](use-data-stream.md) for indexing and searching +* [Change data stream settings](modify-data-stream.md#data-streams-change-mappings-and-settings) as needed +* Query time series data using the {{esql}} [`TS` command](elasticsearch://reference/query-languages/esql/commands/ts.md) +* Use [data stream APIs](https://www.elastic.co/docs/api/doc/elasticsearch/group/endpoint-data-stream) \ No newline at end of file diff --git a/manage-data/data-store/data-streams/time-bound-tsds.md b/manage-data/data-store/data-streams/time-bound-tsds.md new file mode 100644 index 0000000000..61f6db1478 --- /dev/null +++ b/manage-data/data-store/data-streams/time-bound-tsds.md @@ -0,0 +1,76 @@ +--- +navigation_title: "Time-bound indices" +applies_to: + stack: ga + serverless: ga +products: + - id: elasticsearch +--- + +# Time-bound indices and dimension-based routing [time-bound-indices] + +Unlike regular data streams that only write to the most recent backing index, time series data streams (TSDS) use time-bound backing indices that accept documents based on their timestamp values. This page provides details and best practices to help you work with time-bound indices. + +## How time-bound indices work + +Each TSDS backing index has a time range for accepted `@timestamp` values, defined by two settings: + +- [`index.time_series.start_time`](elasticsearch://reference/elasticsearch/index-settings/time-series.md#index-time-series-start-time): The earliest accepted timestamp (inclusive) +- [`index.time_series.end_time`](elasticsearch://reference/elasticsearch/index-settings/time-series.md#index-time-series-end-time): The latest accepted timestamp (exclusive) + +When you add a document to a TSDS, {{es}} adds the document to the appropriate backing index based on its `@timestamp` value. This means a TSDS can write to multiple backing indices simultaneously, not just the most recent one. + +:::{image} /manage-data/images/elasticsearch-reference-time-bound-indices.svg +:alt: time bound indices +::: + +If no backing index can accept a document's `@timestamp` value, {{es}} rejects the document. + +{{es}} automatically configures `index.time_series.start_time` and `index.time_series.end_time` settings as part of the index creation and rollover process. + +### Accepted time range for adding data [tsds-accepted-time-range] + +A TSDS is designed to ingest current metrics data. When the TSDS is first created, the initial backing index has the following settings: + +- An `index.time_series.start_time` value set to `now - index.look_back_time` +- An `index.time_series.end_time` value set to `now + index.look_ahead_time` + +Only data that falls within this range is indexed. + +To check the accepted time range for writing to a TSDS, use the [get data stream API](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-indices-get-data-stream): + +```console +GET _data_stream/my-tsds +``` + +::::{tip} +These {{ilm-init}} actions mark the source index as read-only or prevent writes for performance reasons: + - [Delete](elasticsearch://reference/elasticsearch/index-lifecycle-actions/ilm-delete.md) + - [Downsample](elasticsearch://reference/elasticsearch/index-lifecycle-actions/ilm-downsample.md) + - [Force merge](elasticsearch://reference/elasticsearch/index-lifecycle-actions/ilm-forcemerge.md) + - [Read only](elasticsearch://reference/elasticsearch/index-lifecycle-actions/ilm-readonly.md) + - [Searchable snapshot](elasticsearch://reference/elasticsearch/index-lifecycle-actions/ilm-searchable-snapshot.md) + - [Shrink](elasticsearch://reference/elasticsearch/index-lifecycle-actions/ilm-shrink.md) + + {{ilm-cap}} will **not** proceed with executing these actions until [`index.time_series.end_time`](elasticsearch://reference/elasticsearch/index-settings/time-series.md#index-time-series-end-time) has passed. +:::: + + +### Dimension-based routing [dimension-based-routing] + +In addition to time-based routing, time series data streams use dimension-based routing to determine which shard to route data to. Documents with the same dimensions are routed to the same shards. + +The [`index.routing_path`](elasticsearch://reference/elasticsearch/index-settings/time-series.md#index-routing-path) setting specifies the dimension fields to use for routing, for example: + +```console +"settings": { + "index.mode": "time_series", + "index.routing_path": ["host", "service"] +} +``` + +Documents with the same dimension values are routed to the same shard, improving compression and query performance for time series data. + +The `index.routing_path` setting supports wildcards (for example, `dim.*`) and can dynamically match new fields. + + diff --git a/manage-data/data-store/data-streams/time-series-data-stream-tsds.md b/manage-data/data-store/data-streams/time-series-data-stream-tsds.md index cd673edbf0..1bc3ac71ac 100644 --- a/manage-data/data-store/data-streams/time-series-data-stream-tsds.md +++ b/manage-data/data-store/data-streams/time-series-data-stream-tsds.md @@ -11,63 +11,68 @@ products: # Time series data streams [tsds] -A time series data stream (TSDS) models timestamped metrics data as one or more time series. +A time series data stream (TSDS) is a type of [data stream](/manage-data/data-store/data-streams.md) optimized for indexing metrics data. A TSDS helps you analyze a sequence of data points as a whole. -You can use a TSDS to store metrics data more efficiently. In our benchmarks, metrics data stored in a TSDS used 70% less disk space than a regular data stream. The exact impact will vary per data set. +A TSDS can also help you store metrics data more efficiently. In our benchmarks, metrics data stored in a TSDS used 70% less disk space than a regular data stream. The exact impact varies by data set. +Before setting up a time series data stream, make sure you're familiar with general [data stream](/manage-data/data-store/data-streams.md) concepts. -## When to use a TSDS [when-to-use-tsds] +## When to use a time series data stream [when-to-use-tsds] -Both a [regular data stream](../data-streams.md) and a TSDS can store timestamped metrics data. Only use a TSDS if you typically add metrics data to {{es}} in near real-time and `@timestamp` order. +Both a regular data stream and a time series data stream can store timestamped metrics data. -Use a time series data stream for metrics data only. For other timestamped data, such as logs or traces, use a [logs data stream](logs-data-stream.md) or regular data stream. +Choose a time series data stream if you typically add metrics data to {{es}} in near real-time and in `@timestamp` order. For other timestamped data, such as logs or traces, use a [logs data stream](logs-data-stream.md) or [regular data stream](/manage-data/data-store/data-streams.md). +### Differences from a regular data stream [differences-from-regular-data-stream] -## Differences from a regular data stream [differences-from-regular-data-stream] +A time series data stream works like a regular data stream, with some key differences: -A TSDS works like a regular data stream with some key differences: +* **Time series index mode:** The matching index template for a TSDS must include a `data_stream` object with `index.mode` set to `time_series`. This option enables most TSDS-related functionality. +* **Required fields:** In a TSDS, each document contains: + * A `@timestamp` field + * One or more [dimension fields](#time-series-dimension), set with `time_series_dimension: true` + * One or more [metric fields](#time-series-metric) (not strictly required, but typical for a TSDS) +* **Document IDs:** Time series documents use two IDs: + * An internal [`_tsid`](#tsid) metadata field, generated by {{es}} for each document in a TSDS and used for sorting and compression + * The document `_id`, a generated hash of the document's dimensions and `@timestamp` (custom `_id` values are not supported) +* **Backing indices:** A TSDS uses [time-bound indices](/manage-data/data-store/data-streams/time-bound-tsds.md) to store data from the same time period in the same backing index. +* **Dimension-based routing:** The matching index template for a TSDS must contain the `index.routing_path` index setting, which specifies dimensions for routing documents to shards. +* **Sorting:** A TSDS uses internal [index sorting](elasticsearch://reference/elasticsearch/index-settings/sorting.md) to order shard segments by `_tsid` and `@timestamp`, for better compression. Time series data streams do not use `index.sort.*` settings. +* **Synthetic source:** A TSDS uses [synthetic `_source`](elasticsearch://reference/elasticsearch/mapping-reference/mapping-source-field.md#synthetic-source), which has some [restrictions](elasticsearch://reference/elasticsearch/mapping-reference/mapping-source-field.md#synthetic-source-restrictions) and [modifications](elasticsearch://reference/elasticsearch/mapping-reference/mapping-source-field.md#synthetic-source-modifications). -* The matching index template for a TSDS requires a `data_stream` object with the [`index.mode: time_series`](#time-series-mode) option. This option enables most TSDS-related functionality. -* In addition to a `@timestamp`, each document in a TSDS must contain one or more [dimension fields](#time-series-dimension). The matching index template for a TSDS must contain mappings for at least one `keyword` dimension. - TSDS documents also typically contain one or more [metric fields](#time-series-metric). +## Query time series data -* {{es}} generates a hidden [`_tsid`](#tsid) metadata field for each document in a TSDS. -* A TSDS uses [time-bound backing indices](#time-bound-indices) to store data from the same time period in the same backing index. -* The matching index template for a TSDS must contain the `index.routing_path` index setting. A TSDS uses this setting to perform [dimension-based routing](#dimension-based-routing). -* A TSDS uses internal [index sorting](elasticsearch://reference/elasticsearch/index-settings/sorting.md) to order shard segments by `_tsid` and `@timestamp`. -* TSDS documents only support auto-generated document `_id` values. For TSDS documents, the document `_id` is a hash of the document’s dimensions and `@timestamp`. A TSDS doesn’t support custom document `_id` values. -* A TSDS uses [synthetic `_source`](elasticsearch://reference/elasticsearch/mapping-reference/mapping-source-field.md#synthetic-source), and as a result is subject to some [restrictions](elasticsearch://reference/elasticsearch/mapping-reference/mapping-source-field.md#synthetic-source-restrictions) and [modifications](elasticsearch://reference/elasticsearch/mapping-reference/mapping-source-field.md#synthetic-source-modifications) applied to the `_source` field. +You can use the {{esql}} [`TS` command](elasticsearch://reference/query-languages/esql/commands/ts.md) to query time series data streams. The `TS` command is optimized for time series data. It also enables the use of aggregation functions that efficiently process metrics per time series, before aggregating results. -::::{note} -A time series index can contain fields other than dimensions or metrics. -:::: - - - -## What is a time series? [time-series] +## Time series concepts [time-series] A time series is a sequence of observations for a specific entity. Together, these observations let you track changes to the entity over time. For example, a time series can track: -* CPU and disk usage for a computer -* The price of a stock -* Temperature and humidity readings from a weather sensor. +- CPU and disk usage for a computer +- The price of a stock +- Temperature and humidity readings from a weather sensor -:::{image} /manage-data/images/elasticsearch-reference-time-series-chart.svg +:::{image} /manage-data/images/elasticsearch-reference-time-series-chart2.svg :alt: time series chart :title: Time series of weather sensor readings plotted as a graph ::: -In a TSDS, each {{es}} document represents an observation, or data point, in a specific time series. Although a TSDS can contain multiple time series, a document can only belong to one time series. A time series can’t span multiple data streams. +In a TSDS, each {{es}} document represents an observation, or data point, in a specific time series. Although a TSDS can contain multiple time series, a document can belong to only one time series. A single time series can't span multiple data streams. + +### Time series fields +Compared to a regular data stream, a TSDS uses some additional fields specific to time series: dimension fields (required) and metric fields (optional but usually defined), plus an internal `_tsid` metadata field. -### Dimensions [time-series-dimension] +#### Dimensions [time-series-dimension] -Dimensions are field names and values that, in combination, identify a document’s time series. In most cases, a dimension describes some aspect of the entity you’re measuring. For example, documents related to the same weather sensor may always have the same `sensor_id` and `location` values. +Dimension fields often correspond to characteristics of the items you're measuring. For example, documents related to the same weather sensor might have the same `sensor_id` and `location` values. -A TSDS document is uniquely identified by its time series and timestamp, both of which are used to generate the document `_id`. So, two documents with the same dimensions and the same timestamp are considered to be duplicates. When you use the `_bulk` endpoint to add documents to a TSDS, a second document with the same timestamp and dimensions overwrites the first. When you use the `PUT //_create/<_id>` format to add an individual document and a document with the same `_id` already exists, an error is generated. +:::{tip} +{{es}} uses dimensions and timestamps to generate time series document `_id` values. Two documents with the same dimensions and timestamp are considered duplicates. +::: -You mark a field as a dimension using the boolean `time_series_dimension` mapping parameter. The following field types support the `time_series_dimension` parameter: +To mark a field as a dimension, set the Boolean `time_series_dimension` mapping parameter to `true`. The following field types support the `time_series_dimension` parameter: * [`keyword`](elasticsearch://reference/elasticsearch/mapping-reference/keyword.md#keyword-field-type) * [`ip`](elasticsearch://reference/elasticsearch/mapping-reference/ip.md) @@ -78,146 +83,48 @@ You mark a field as a dimension using the boolean `time_series_dimension` mappin * [`unsigned_long`](elasticsearch://reference/elasticsearch/mapping-reference/number.md) * [`boolean`](elasticsearch://reference/elasticsearch/mapping-reference/boolean.md) -For a flattened field, use the `time_series_dimensions` parameter to configure an array of fields as dimensions. For details refer to [`flattened`](elasticsearch://reference/elasticsearch/mapping-reference/flattened.md#flattened-params). - -Dimension definitions can be simplified through [pass-through](elasticsearch://reference/elasticsearch/mapping-reference/passthrough.md#passthrough-dimensions) fields. +:::{dropdown} Advanced field types +To work with a flattened field, use the `time_series_dimensions` parameter to configure an array of fields as dimensions. For details, refer to [`flattened`](elasticsearch://reference/elasticsearch/mapping-reference/flattened.md#flattened-params). +You can also simplify dimension definitions by using [pass-through](elasticsearch://reference/elasticsearch/mapping-reference/passthrough.md#passthrough-dimensions) fields. +::: -### Metrics [time-series-metric] +#### Metrics [time-series-metric] -Metrics are fields that contain numeric measurements, as well as aggregations and/or downsampling values based off of those measurements. While not required, documents in a TSDS typically contain one or more metric fields. +Metrics are numeric measurements that change over time. Although metrics are not required, documents in a TSDS typically contain one or more metric fields. -Metrics differ from dimensions in that while dimensions generally remain constant, metrics are expected to change over time, even if rarely or slowly. +:::{tip} +Metrics are expected to change (even if rarely or slowly), while dimensions generally remain constant. +::: -To mark a field as a metric, you must specify a metric type using the `time_series_metric` mapping parameter. The following field types support the `time_series_metric` parameter: +To mark a field as a metric, use the `time_series_metric` mapping parameter. This parameter ensures data is stored in an optimal way for time series analysis. The following field types support the `time_series_metric` parameter: * [`aggregate_metric_double`](elasticsearch://reference/elasticsearch/mapping-reference/aggregate-metric-double.md) * All [numeric field types](elasticsearch://reference/elasticsearch/mapping-reference/number.md) -Accepted metric types vary based on the field type: +The valid values for `time_series_metric` are `counter` and `gauge`: -:::::{dropdown} Valid values for time_series_metric `counter` -: A cumulative metric that only monotonically increases or resets to `0` (zero). For example, a count of errors or completed tasks. - - A counter field has additional semantic meaning, because it represents a cumulative counter. This works well with the `rate` aggregation, since a rate can be derived from a cumulative monotonically increasing counter. However a number of aggregations (for example `sum`) compute results that don’t make sense for a counter field, because of its cumulative nature. - - Only numeric and `aggregate_metric_double` fields support the `counter` metric type. - - -::::{note} -Due to the cumulative nature of counter fields, the following aggregations are supported and expected to provide meaningful results with the `counter` field: `rate`, `histogram`, `range`, `min`, `max`, `top_metrics` and `variable_width_histogram`. In order to prevent issues with existing integrations and custom dashboards, we also allow the following aggregations, even if the result might be meaningless on counters: `avg`, `box plot`, `cardinality`, `extended stats`, `median absolute deviation`, `percentile ranks`, `percentiles`, `stats`, `sum` and `value count`. -:::: - +: A cumulative metric that only monotonically increases or resets to `0` (zero). For example, a count of errors or completed tasks. `gauge` -: A metric that represents a single numeric that can arbitrarily increase or decrease. For example, a temperature or available disk space. +: A metric that represents a single numeric that can arbitrarily increase or decrease. For example, a temperature or available disk space. - Only numeric and `aggregate_metric_double` fields support the `gauge` metric type. - - -`null` (Default) -: Not a time series metric. ::::: +#### `_tsid` metadata field [tsid] +The `_tsid` is an automatically generated object containing the document’s dimensions. It's intended for internal {{es}} use, so in most cases you won't need to work with it. -## Time series mode [time-series-mode] - -The matching index template for a TSDS must contain a `data_stream` object with the `index_mode: time_series` option. This option ensures the TSDS creates backing indices with an [`index.mode`](elasticsearch://reference/elasticsearch/index-settings/time-series.md#index-mode) setting of `time_series`. This setting enables most TSDS-related functionality in the backing indices. - -If you convert an existing data stream to a TSDS, only backing indices created after the conversion have an `index.mode` of `time_series`. You can’t change the `index.mode` of an existing backing index. - - -### `_tsid` metadata field [tsid] - -When you add a document to a TSDS, {{es}} automatically generates a `_tsid` metadata field for the document. The `_tsid` is an object containing the document’s dimensions. Documents in the same TSDS with the same `_tsid` are part of the same time series. - -The `_tsid` field is not queryable or updatable. You also can’t retrieve a document’s `_tsid` using a [get document](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-get) request. However, you can use the `_tsid` field in aggregations and retrieve the `_tsid` value in searches using the [`fields` parameter](elasticsearch://reference/elasticsearch/rest-apis/retrieve-selected-fields.md#search-fields-param). - -::::{warning} -The format of the `_tsid` field shouldn’t be relied upon. It may change from version to version. -:::: - - - -### Time-bound indices [time-bound-indices] - -In a TSDS, each backing index, including the most recent backing index, has a range of accepted `@timestamp` values. This range is defined by the [`index.time_series.start_time`](elasticsearch://reference/elasticsearch/index-settings/time-series.md#index-time-series-start-time) and [`index.time_series.end_time`](elasticsearch://reference/elasticsearch/index-settings/time-series.md#index-time-series-end-time) index settings. - -When you add a document to a TSDS, {{es}} adds the document to the appropriate backing index based on its `@timestamp` value. As a result, a TSDS can add documents to any TSDS backing index that can receive writes. This applies even if the index isn’t the most recent backing index. - -:::{image} /manage-data/images/elasticsearch-reference-time-bound-indices.svg -:alt: time bound indices -::: - -::::{tip} -Some {{ilm-init}} actions mark the source index as read-only, or expect the index to not be actively written anymore in order to provide good performance. These actions are: - [Delete](elasticsearch://reference/elasticsearch/index-lifecycle-actions/ilm-delete.md) - [Downsample](elasticsearch://reference/elasticsearch/index-lifecycle-actions/ilm-downsample.md) - [Force merge](elasticsearch://reference/elasticsearch/index-lifecycle-actions/ilm-forcemerge.md) - [Read only](elasticsearch://reference/elasticsearch/index-lifecycle-actions/ilm-readonly.md) - [Searchable snapshot](elasticsearch://reference/elasticsearch/index-lifecycle-actions/ilm-searchable-snapshot.md) - [Shrink](elasticsearch://reference/elasticsearch/index-lifecycle-actions/ilm-shrink.md) {{ilm-cap}} will **not** proceed with executing these actions until the upper time-bound for accepting writes, represented by the [`index.time_series.end_time`](elasticsearch://reference/elasticsearch/index-settings/time-series.md#index-time-series-end-time) index setting, has lapsed. -:::: - - -If no backing index can accept a document’s `@timestamp` value, {{es}} rejects the document. - -{{es}} automatically configures `index.time_series.start_time` and `index.time_series.end_time` settings as part of the index creation and rollover process. - - -### Look-ahead time [tsds-look-ahead-time] - -Use the [`index.look_ahead_time`](elasticsearch://reference/elasticsearch/index-settings/time-series.md#index-look-ahead-time) index setting to configure how far into the future you can add documents to an index. When you create a new write index for a TSDS, {{es}} calculates the index’s `index.time_series.end_time` value as: - -`now + index.look_ahead_time` - -At the time series poll interval (controlled via `time_series.poll_interval` setting), {{es}} checks if the write index has met the rollover criteria in its index lifecycle policy. If not, {{es}} refreshes the `now` value and updates the write index’s `index.time_series.end_time` to: - -`now + index.look_ahead_time + time_series.poll_interval` - -This process continues until the write index rolls over. When the index rolls over, {{es}} sets a final `index.time_series.end_time` value for the index. This value borders the `index.time_series.start_time` for the new write index. This ensures the `@timestamp` ranges for neighboring backing indices always border but never overlap. - - -### Look-back time [tsds-look-back-time] - -Use the [`index.look_back_time`](elasticsearch://reference/elasticsearch/index-settings/time-series.md#index-look-back-time) index setting to configure how far in the past you can add documents to an index. When you create a data stream for a TSDS, {{es}} calculates the index’s `index.time_series.start_time` value as: - -`now - index.look_back_time` - -This setting is only used when a data stream gets created and controls the `index.time_series.start_time` index setting of the first backing index. Configuring this index setting can be useful to accept documents with `@timestamp` field values that are older than 2 hours (the `index.look_back_time` default). - - -### Accepted time range for adding data [tsds-accepted-time-range] - -A TSDS is designed to ingest current metrics data. When the TSDS is first created the initial backing index has: - -* an `index.time_series.start_time` value set to `now - index.look_back_time` -* an `index.time_series.end_time` value set to `now + index.look_ahead_time` - -Only data that falls inside that range can be indexed. - -You can use the [get data stream API](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-indices-get-data-stream) to check the accepted time range for writing to any TSDS. - - -### Dimension-based routing [dimension-based-routing] - -Within each TSDS backing index, {{es}} uses the [`index.routing_path`](elasticsearch://reference/elasticsearch/index-settings/time-series.md#index-routing-path) index setting to route documents with the same dimensions to the same shards. - -When you create the matching index template for a TSDS, you must specify one or more dimensions in the `index.routing_path` setting. Each document in a TSDS must contain one or more dimensions that match the `index.routing_path` setting. - -The `index.routing_path` setting accepts wildcard patterns (for example `dim.*`) and can dynamically match new fields. However, {{es}} will reject any mapping updates that add scripted, runtime, or non-dimension fields that match the `index.routing_path` value. - -[Pass-through](elasticsearch://reference/elasticsearch/mapping-reference/passthrough.md#passthrough-dimensions) fields may be configured as dimension containers. In this case, their sub-fields get included to the routing path automatically. - -TSDS documents don’t support a custom `_routing` value. Similarly, you can’t require a `_routing` value in mappings for a TSDS. - - -### Index sorting [tsds-index-sorting] - -{{es}} uses [compression algorithms](elasticsearch://reference/elasticsearch/index-settings/index-modules.md#index-codec) to compress repeated values. This compression works best when repeated values are stored near each other — in the same index, on the same shard, and side-by-side in the same shard segment. - -Most time series data contains repeated values. Dimensions are repeated across documents in the same time series. The metric values of a time series may also change slowly over time. - -Internally, each TSDS backing index uses [index sorting](elasticsearch://reference/elasticsearch/index-settings/sorting.md) to order its shard segments by `_tsid` and `@timestamp`. This makes it more likely that these repeated values are stored near each other for better compression. A TSDS doesn’t support any [`index.sort.*`](elasticsearch://reference/elasticsearch/index-settings/sorting.md) index settings. +- You **can't** query or update the internal `_tsid` field. +- You **can** use the `_tsid` in aggregations. +- To retrieve the value of `_tsid`, use the fields parameter in a search. The `_tsid` is not included in get document responses. +- The format of the `_tsid` field is subject to change. -## What’s next? [tsds-whats-next] +## Next steps [tsds-whats-next] -Now that you know the basics, you’re ready to [create a TSDS](../data-streams/set-up-tsds.md) or [convert an existing data stream to a TSDS](../data-streams/set-up-tsds.md#convert-existing-data-stream-to-tsds). +* Try the [quickstart](/manage-data/data-store/data-streams/quickstart-tsds.md) for a hands-on introduction +* [Set up a time series data stream](/manage-data/data-store/data-streams/set-up-tsds.md) +* Learn about [downsampling](/manage-data/data-store/data-streams/downsampling-time-series-data-stream.md) to reduce storage footprint \ No newline at end of file diff --git a/manage-data/images/elasticsearch-reference-time-series-chart.svg b/manage-data/images/elasticsearch-reference-time-series-chart.svg deleted file mode 100644 index ce09bde046..0000000000 --- a/manage-data/images/elasticsearch-reference-time-series-chart.svg +++ /dev/null @@ -1 +0,0 @@ - \ No newline at end of file diff --git a/manage-data/images/elasticsearch-reference-time-series-chart2.svg b/manage-data/images/elasticsearch-reference-time-series-chart2.svg new file mode 100644 index 0000000000..3d87b1c83e --- /dev/null +++ b/manage-data/images/elasticsearch-reference-time-series-chart2.svg @@ -0,0 +1,46 @@ + + + + + + + + + + + + + 40 + 38 + 36 + 34 + + + + + 7:00:00 + 7:02:00 + 7:04:00 + 7:06:00 + + + + + @timestamp + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/manage-data/toc.yml b/manage-data/toc.yml index b88c4737b2..f6604aad98 100644 --- a/manage-data/toc.yml +++ b/manage-data/toc.yml @@ -20,7 +20,10 @@ toc: - file: data-store/data-streams/downsampling-concepts.md - file: data-store/data-streams/run-downsampling.md - file: data-store/data-streams/query-downsampled-data.md - - file: data-store/data-streams/reindex-tsds.md + - file: data-store/data-streams/advanced-topics-tsds.md + children: + - file: data-store/data-streams/time-bound-tsds.md + - file: data-store/data-streams/reindex-tsds.md - file: data-store/data-streams/logs-data-stream.md - file: data-store/data-streams/failure-store.md children: From b68069d8e89fd91a7d228344cc1c4185de629d03 Mon Sep 17 00:00:00 2001 From: Marci W <333176+marciw@users.noreply.github.com> Date: Mon, 29 Sep 2025 21:03:02 -0400 Subject: [PATCH 02/14] temp anchors --- .../data-streams/time-series-data-stream-tsds.md | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/manage-data/data-store/data-streams/time-series-data-stream-tsds.md b/manage-data/data-store/data-streams/time-series-data-stream-tsds.md index 1bc3ac71ac..e07038e48a 100644 --- a/manage-data/data-store/data-streams/time-series-data-stream-tsds.md +++ b/manage-data/data-store/data-streams/time-series-data-stream-tsds.md @@ -127,4 +127,11 @@ The `_tsid` is an automatically generated object containing the document’s dim * Try the [quickstart](/manage-data/data-store/data-streams/quickstart-tsds.md) for a hands-on introduction * [Set up a time series data stream](/manage-data/data-store/data-streams/set-up-tsds.md) -* Learn about [downsampling](/manage-data/data-store/data-streams/downsampling-time-series-data-stream.md) to reduce storage footprint \ No newline at end of file +* Learn about [downsampling](/manage-data/data-store/data-streams/downsampling-time-series-data-stream.md) to reduce storage footprint + +% suppress anchor warnings until chicken-and-egg resolved +##### [time-series-mode] +##### [tsds-look-ahead-time] +##### [tsds-look-back-time] +##### [time-bound-indices] +##### [dimension-based-routing] \ No newline at end of file From 9bbda53b2a2c9114a96456535fa2a5d26a247c12 Mon Sep 17 00:00:00 2001 From: Marci W <333176+marciw@users.noreply.github.com> Date: Mon, 29 Sep 2025 21:06:43 -0400 Subject: [PATCH 03/14] temp anchor --- .../data-store/data-streams/time-series-data-stream-tsds.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/manage-data/data-store/data-streams/time-series-data-stream-tsds.md b/manage-data/data-store/data-streams/time-series-data-stream-tsds.md index e07038e48a..9d49ca0ba8 100644 --- a/manage-data/data-store/data-streams/time-series-data-stream-tsds.md +++ b/manage-data/data-store/data-streams/time-series-data-stream-tsds.md @@ -134,4 +134,5 @@ The `_tsid` is an automatically generated object containing the document’s dim ##### [tsds-look-ahead-time] ##### [tsds-look-back-time] ##### [time-bound-indices] -##### [dimension-based-routing] \ No newline at end of file +##### [dimension-based-routing] +##### [tsds-accepted-time-range] \ No newline at end of file From d4fe9bc9dc43e6b08b634d10a064d11213b9cc98 Mon Sep 17 00:00:00 2001 From: Felix Barnsteiner Date: Tue, 30 Sep 2025 17:21:00 +0200 Subject: [PATCH 04/14] Document `index.dimensions`-based routing --- .../data-store/data-streams/time-bound-tsds.md | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/manage-data/data-store/data-streams/time-bound-tsds.md b/manage-data/data-store/data-streams/time-bound-tsds.md index 61f6db1478..5f24f9b535 100644 --- a/manage-data/data-store/data-streams/time-bound-tsds.md +++ b/manage-data/data-store/data-streams/time-bound-tsds.md @@ -58,7 +58,17 @@ These {{ilm-init}} actions mark the source index as read-only or prevent writes ### Dimension-based routing [dimension-based-routing] -In addition to time-based routing, time series data streams use dimension-based routing to determine which shard to route data to. Documents with the same dimensions are routed to the same shards. +In addition to time-based routing, time series data streams use dimension-based routing to determine which shard to route data to. Documents with the same dimensions are routed to the same shards using one of two strategies: + +1. Based on the internally managed `index.dimensions` index setting (preferred). Available as of stack version 9.2. +2. Based on the [`index.routing_path`](elasticsearch://reference/elasticsearch/index-settings/time-series.md#index-routing-path) index setting (as a fallback). + +The `index.dimensions`-based strategy offers a better ingest performance. +It uses a list of dimension paths that's automatically kept up-to-date and is not user-configurable. +This strategy is not available for time series data streams with dynamic templates that set `time_series_dimension: true`. + +It can be disabled by setting [`index.index_dimensions_tsid_strategy_enabled`](elasticsearch://reference/elasticsearch/index-settings/time-series.md#index-dimensions-tsid-strategy-enabled) to `false`, +or by manually setting `index.routing_path`. The [`index.routing_path`](elasticsearch://reference/elasticsearch/index-settings/time-series.md#index-routing-path) setting specifies the dimension fields to use for routing, for example: From 666dd350c5553c613aa822648f35bd41fa267f76 Mon Sep 17 00:00:00 2001 From: Marci W <333176+marciw@users.noreply.github.com> Date: Sat, 4 Oct 2025 08:43:49 -0400 Subject: [PATCH 05/14] fix link --- manage-data/data-store/data-streams/reindex-tsds.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/manage-data/data-store/data-streams/reindex-tsds.md b/manage-data/data-store/data-streams/reindex-tsds.md index 73b2865617..cc69f6a92c 100644 --- a/manage-data/data-store/data-streams/reindex-tsds.md +++ b/manage-data/data-store/data-streams/reindex-tsds.md @@ -163,4 +163,4 @@ The destination data stream is now ready to accept new documents. ## Related resources - [Time series data streams overview](/manage-data/data-store/data-streams/time-series-data-stream-tsds.md) -- [Reindex API](elasticsearch://reference/elasticsearch/docs-reindex) \ No newline at end of file +- [Reindex API](elasticsearch://reference/elasticsearch/docs-reindex.md) \ No newline at end of file From b7a43d261b33aed7ce80bb992f7ae4d7cfe505c9 Mon Sep 17 00:00:00 2001 From: Marci W <333176+marciw@users.noreply.github.com> Date: Sat, 4 Oct 2025 08:55:46 -0400 Subject: [PATCH 06/14] fix link for real --- manage-data/data-store/data-streams/reindex-tsds.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/manage-data/data-store/data-streams/reindex-tsds.md b/manage-data/data-store/data-streams/reindex-tsds.md index cc69f6a92c..8cb17aefdc 100644 --- a/manage-data/data-store/data-streams/reindex-tsds.md +++ b/manage-data/data-store/data-streams/reindex-tsds.md @@ -163,4 +163,4 @@ The destination data stream is now ready to accept new documents. ## Related resources - [Time series data streams overview](/manage-data/data-store/data-streams/time-series-data-stream-tsds.md) -- [Reindex API](elasticsearch://reference/elasticsearch/docs-reindex.md) \ No newline at end of file +- [Reindex API](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-reindex) \ No newline at end of file From 7141fff19db00a824e1bfd28f44f48c3d540956d Mon Sep 17 00:00:00 2001 From: Marci W <333176+marciw@users.noreply.github.com> Date: Sat, 4 Oct 2025 10:23:03 -0400 Subject: [PATCH 07/14] reorder as suggested in review --- .../data-store/data-streams/reindex-tsds.md | 2 +- .../time-series-data-stream-tsds.md | 52 +++++++++---------- 2 files changed, 26 insertions(+), 28 deletions(-) diff --git a/manage-data/data-store/data-streams/reindex-tsds.md b/manage-data/data-store/data-streams/reindex-tsds.md index 8cb17aefdc..ba1f215662 100644 --- a/manage-data/data-store/data-streams/reindex-tsds.md +++ b/manage-data/data-store/data-streams/reindex-tsds.md @@ -104,7 +104,7 @@ PUT _index_template/new-tsds-template ### Create the destination data stream and reindex [tsds-reindex-op] -Run the reindex operation using `op_type: create` to prevent overwrites: +Run the reindex operation: ```console POST /_reindex diff --git a/manage-data/data-store/data-streams/time-series-data-stream-tsds.md b/manage-data/data-store/data-streams/time-series-data-stream-tsds.md index 9d49ca0ba8..a84edc1d73 100644 --- a/manage-data/data-store/data-streams/time-series-data-stream-tsds.md +++ b/manage-data/data-store/data-streams/time-series-data-stream-tsds.md @@ -19,33 +19,13 @@ Before setting up a time series data stream, make sure you're familiar with gene ## When to use a time series data stream [when-to-use-tsds] -Both a regular data stream and a time series data stream can store timestamped metrics data. +_Metrics_ consist of data point–timestamp pairs, identified by [dimension fields]() that can be used in aggregation queries. Both a regular data stream and a time series data stream can store metrics data. Choose a time series data stream if you typically add metrics data to {{es}} in near real-time and in `@timestamp` order. For other timestamped data, such as logs or traces, use a [logs data stream](logs-data-stream.md) or [regular data stream](/manage-data/data-store/data-streams.md). -### Differences from a regular data stream [differences-from-regular-data-stream] - -A time series data stream works like a regular data stream, with some key differences: - -* **Time series index mode:** The matching index template for a TSDS must include a `data_stream` object with `index.mode` set to `time_series`. This option enables most TSDS-related functionality. -* **Required fields:** In a TSDS, each document contains: - * A `@timestamp` field - * One or more [dimension fields](#time-series-dimension), set with `time_series_dimension: true` - * One or more [metric fields](#time-series-metric) (not strictly required, but typical for a TSDS) -* **Document IDs:** Time series documents use two IDs: - * An internal [`_tsid`](#tsid) metadata field, generated by {{es}} for each document in a TSDS and used for sorting and compression - * The document `_id`, a generated hash of the document's dimensions and `@timestamp` (custom `_id` values are not supported) -* **Backing indices:** A TSDS uses [time-bound indices](/manage-data/data-store/data-streams/time-bound-tsds.md) to store data from the same time period in the same backing index. -* **Dimension-based routing:** The matching index template for a TSDS must contain the `index.routing_path` index setting, which specifies dimensions for routing documents to shards. -* **Sorting:** A TSDS uses internal [index sorting](elasticsearch://reference/elasticsearch/index-settings/sorting.md) to order shard segments by `_tsid` and `@timestamp`, for better compression. Time series data streams do not use `index.sort.*` settings. -* **Synthetic source:** A TSDS uses [synthetic `_source`](elasticsearch://reference/elasticsearch/mapping-reference/mapping-source-field.md#synthetic-source), which has some [restrictions](elasticsearch://reference/elasticsearch/mapping-reference/mapping-source-field.md#synthetic-source-restrictions) and [modifications](elasticsearch://reference/elasticsearch/mapping-reference/mapping-source-field.md#synthetic-source-modifications). +To make sure a TSDS is right for your use case, review the list of [differences from a regular data stream](#differences-from-regular-data-stream) on this page. - -## Query time series data - -You can use the {{esql}} [`TS` command](elasticsearch://reference/query-languages/esql/commands/ts.md) to query time series data streams. The `TS` command is optimized for time series data. It also enables the use of aggregation functions that efficiently process metrics per time series, before aggregating results. - -## Time series concepts [time-series] +## Time series overview [time-series] A time series is a sequence of observations for a specific entity. Together, these observations let you track changes to the entity over time. For example, a time series can track: @@ -91,7 +71,7 @@ You can also simplify dimension definitions by using [pass-through](elasticsearc #### Metrics [time-series-metric] -Metrics are numeric measurements that change over time. Although metrics are not required, documents in a TSDS typically contain one or more metric fields. +Metrics are numeric measurements that change over time. Documents in a TSDS contain one or more metric fields. :::{tip} Metrics are expected to change (even if rarely or slowly), while dimensions generally remain constant. @@ -110,9 +90,6 @@ The valid values for `time_series_metric` are `counter` and `gauge`: `gauge` : A metric that represents a single numeric that can arbitrarily increase or decrease. For example, a temperature or available disk space. - -::::: - #### `_tsid` metadata field [tsid] The `_tsid` is an automatically generated object containing the document’s dimensions. It's intended for internal {{es}} use, so in most cases you won't need to work with it. @@ -122,6 +99,27 @@ The `_tsid` is an automatically generated object containing the document’s dim - To retrieve the value of `_tsid`, use the fields parameter in a search. The `_tsid` is not included in get document responses. - The format of the `_tsid` field is subject to change. +### Differences from a regular data stream [differences-from-regular-data-stream] + +A time series data stream works like a regular data stream, with some key differences: + +* **Time series index mode:** The matching index template for a TSDS must include a `data_stream` object with `index.mode` set to `time_series`. This option enables most TSDS-related functionality. +* **Required fields:** In a TSDS, each document contains: + * A `@timestamp` field + * One or more [dimension fields](#time-series-dimension), set with `time_series_dimension: true` + * One or more [metric fields](#time-series-metric) (not strictly required, but typical for a TSDS) +* **Document IDs:** Time series documents use two IDs: + * An internal [`_tsid`](#tsid) metadata field, generated by {{es}} for each document in a TSDS and used for sorting and compression + * The document `_id`, a generated hash of the document's dimensions and `@timestamp` (custom `_id` values are not supported) +* **Backing indices:** A TSDS uses [time-bound indices](/manage-data/data-store/data-streams/time-bound-tsds.md) to store data from the same time period in the same backing index. +* **Dimension-based routing:** The matching index template for a TSDS must contain the `index.routing_path` index setting, which specifies dimensions for routing documents to shards. +* **Sorting:** A TSDS uses internal [index sorting](elasticsearch://reference/elasticsearch/index-settings/sorting.md) to order shard segments by `_tsid` and `@timestamp`, for better compression. Time series data streams do not use `index.sort.*` settings. +* **Synthetic source:** A TSDS uses [synthetic `_source`](elasticsearch://reference/elasticsearch/mapping-reference/mapping-source-field.md#synthetic-source), which has some [restrictions](elasticsearch://reference/elasticsearch/mapping-reference/mapping-source-field.md#synthetic-source-restrictions) and [modifications](elasticsearch://reference/elasticsearch/mapping-reference/mapping-source-field.md#synthetic-source-modifications). + +## Query time series data + +You can use the {{esql}} [`TS` command](elasticsearch://reference/query-languages/esql/commands/ts.md) to query time series data streams. The `TS` command is optimized for time series data. It also enables the use of aggregation functions that efficiently process metrics per time series, before aggregating results. + ## Next steps [tsds-whats-next] From 947e8d52a8df35a4f6401645406dbaabe9ef53db Mon Sep 17 00:00:00 2001 From: Marci W <333176+marciw@users.noreply.github.com> Date: Sun, 5 Oct 2025 18:02:55 -0400 Subject: [PATCH 08/14] Address comments and suggestions from reviewers --- .../data-store/data-streams/set-up-tsds.md | 89 +++---------------- .../data-streams/time-bound-tsds.md | 4 +- .../time-series-data-stream-tsds.md | 38 ++++---- 3 files changed, 27 insertions(+), 104 deletions(-) diff --git a/manage-data/data-store/data-streams/set-up-tsds.md b/manage-data/data-store/data-streams/set-up-tsds.md index f21fd5321d..7abf1ae833 100644 --- a/manage-data/data-store/data-streams/set-up-tsds.md +++ b/manage-data/data-store/data-streams/set-up-tsds.md @@ -19,7 +19,6 @@ This page shows you how to set up a [time series data stream](/manage-data/data- - Make sure you have the following permissions: - [Cluster privileges](elasticsearch://reference/elasticsearch/security-privileges.md#privileges-list-cluster) - `manage_index_templates` for creating a template to base the TSDS on - - `manage_ilm` (Stack only) if you're using [index lifecycle management](#tsds-ilm-policy) - [Index privileges](elasticsearch://reference/elasticsearch/security-privileges.md#privileges-list-indices) - `create_doc` and `create_index` for creating or converting a TSDS - `manage` to [roll over](#convert-existing-data-stream-to-tsds) a TSDS @@ -27,77 +26,6 @@ This page shows you how to set up a [time series data stream](/manage-data/data- ## Set up a TSDS :::::{stepper} -::::{step} Create an index lifecycle policy (optional) -:anchor: tsds-ilm-policy - -```{applies_to} -stack: ga -serverless: unavailable -``` -In most cases, you can use a data stream lifecycle to manage your time series data stream. If you're using [data tiers](/manage-data/lifecycle/data-tiers.md) in {{stack}}, you can use index lifecycle management (ILM). - -:::{dropdown} Create an ILM policy - -If you're using {{stack}}, {{ilm-init}} can help you manage a time series data stream's backing indices. {{ilm-init}} requires an index lifecycle policy. - -For best results, specify a `max_age` criteria for the `rollover` action in the policy. This ensures the [`@timestamp` ranges](/manage-data/data-store/data-streams/time-bound-tsds.md) for the backing indices are consistent. For example, setting a `max_age` of `1d` for the `rollover` action ensures your backing indices consistently contain one day's worth of data. - -**Example:** - -```console -PUT _ilm/policy/my-weather-sensor-lifecycle-policy -{ - "policy": { - "phases": { - "hot": { - "actions": { - "rollover": { - "max_age": "1d", - "max_primary_shard_size": "50gb" - } - } - }, - "warm": { - "min_age": "30d", - "actions": { - "shrink": { - "number_of_shards": 1 - }, - "forcemerge": { - "max_num_segments": 1 - } - } - }, - "cold": { - "min_age": "60d", - "actions": { - "searchable_snapshot": { - "snapshot_repository": "found-snapshots" - } - } - }, - "frozen": { - "min_age": "90d", - "actions": { - "searchable_snapshot": { - "snapshot_repository": "found-snapshots" - } - } - }, - "delete": { - "min_age": "735d", - "actions": { - "delete": {} - } - } - } - } -} -``` -::: - -:::: - ::::{step} Create an index template :anchor: create-tsds-index-template @@ -106,9 +34,10 @@ The structure of a time series data stream is defined by an index template. Crea - **Index patterns:** One or more wildcard patterns matching the name of your TSDS, such as `weather-sernsors-*`. For best results, use the [data stream naming scheme](/reference/fleet/data-streams.md#data-streams-naming-scheme). - **Data stream object:** The template must include `"data_stream": {}`. - **Time series mode:** Set `index.mode: time_series`. -- **Field mappings:** Define at least one `keyword` dimension field and typically one or more metric fields: - - To define a dimension, set `time_series_dimension` to `true`. Dimension fields like `counter` only increase over time. For more details, refer to [Dimensions](/manage-data/data-store/data-streams/time-series-data-stream-tsds.md#time-series-dimension). - - To define a metric, use the `time_series_metric` mapping parameter. Metric fields like `gauge` can increase or decrease over time. For more details, refer to [Metrics](/manage-data/data-store/data-streams/time-series-data-stream-tsds.md#time-series-metric). +- **Field mappings:** Define at least one dimension field and typically one or more metric fields: + - To define a dimension, set `time_series_dimension` to `true`. For more details, refer to [Dimensions](/manage-data/data-store/data-streams/time-series-data-stream-tsds.md#time-series-dimension). + - To define dimensions dynamically, you can use a pass-through object. For details, refer to [Defining sub-fields as time series dimensions](elasticsearch://reference/elasticsearch/mapping-reference/passthrough.md#passthrough-dimensions). + - To define a metric, use the `time_series_metric` mapping parameter. For more details, refer to [Metrics](/manage-data/data-store/data-streams/time-series-data-stream-tsds.md#time-series-metric). - (Optional) Define a `date` or `date_nanos` mapping for the `@timestamp` field. If you don't specify a mapping, {{es}} maps `@timestamp` as a `date` field with default options. * (Optional) Other index settings, such as [`index.number_of_replicas`](elasticsearch://reference/elasticsearch/index-settings/index-modules.md#dynamic-index-number-of-replicas), for the data stream's backing indices. - A priority higher than `200`, to avoid [collisions](/manage-data/data-store/templates.md#avoid-index-pattern-collisions) with built-in templates. @@ -173,7 +102,7 @@ If you're using component templates with a time series data stream, check the fo After creating the index template, you can create a time series data stream by [indexing a document](use-data-stream.md#add-documents-to-a-data-stream). The TSDS is created automatically when you index the first document, as long as the index name matches the index template pattern. You can use a bulk API request or a POST request. :::{important} -To test the following `_bulk` example, update the timestamps to within three hours of your current time. Data added to a TSDS must fit the [accepted time range](/manage-data/data-store/data-streams/time-bound-tsds.md#tsds-accepted-time-range). +To test the following `_bulk` example, update the timestamps to within two hours of your current time. Data added to a TSDS must fit the [accepted time range](/manage-data/data-store/data-streams/time-bound-tsds.md#tsds-accepted-time-range). ::: ```console @@ -231,7 +160,7 @@ GET metrics-prod/_search You can convert an existing regular data stream to a TSDS. Follow these steps: -1. Update your existing index template to include time series settings. Also update your index lifecycle policy (if any) and component templates (if any). +1. Update your existing index template and component templates (if any) to include time series settings. 2. Use the [rollover API](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-indices-rollover) to manually roll over the existing data stream's write index, to apply the changes you made in step 1: ```console @@ -246,9 +175,11 @@ After the rollover, new backing indices will have time series functionality. Exi To control access to a TSDS, use [index privileges](elasticsearch://reference/elasticsearch/security-privileges.md#privileges-list-indices). Privileges set on a TSDS also apply to the backing indices. -For an example, refer to [Data stream privileges](../../../deploy-manage/users-roles/cluster-or-deployment-auth/granting-privileges-for-data-streams-aliases.md#data-stream-privileges). +For an example, refer to [Data stream privileges](/deploy-manage/users-roles/cluster-or-deployment-auth/granting-privileges-for-data-streams-aliases.md#data-stream-privileges). + +### Set up lifecycle management -% TODO Common patterns for time series data streams +In most cases, you can use a [data stream lifecycle](/manage-data/lifecycle/data-stream.md) to manage your time series data stream. If you're using [data tiers](/manage-data/lifecycle/data-tiers.md) in {{stack}}, you can use [index lifecycle management](/manage-data/lifecycle/index-lifecycle-management.md). ## Next steps [set-up-tsds-whats-next] diff --git a/manage-data/data-store/data-streams/time-bound-tsds.md b/manage-data/data-store/data-streams/time-bound-tsds.md index 5f24f9b535..5f8a2d76ed 100644 --- a/manage-data/data-store/data-streams/time-bound-tsds.md +++ b/manage-data/data-store/data-streams/time-bound-tsds.md @@ -44,13 +44,13 @@ GET _data_stream/my-tsds ``` ::::{tip} -These {{ilm-init}} actions mark the source index as read-only or prevent writes for performance reasons: +The following actions affect the writable time range of a TSDS, either because they make a backing index read-only or remove it: - [Delete](elasticsearch://reference/elasticsearch/index-lifecycle-actions/ilm-delete.md) - [Downsample](elasticsearch://reference/elasticsearch/index-lifecycle-actions/ilm-downsample.md) - [Force merge](elasticsearch://reference/elasticsearch/index-lifecycle-actions/ilm-forcemerge.md) - [Read only](elasticsearch://reference/elasticsearch/index-lifecycle-actions/ilm-readonly.md) - [Searchable snapshot](elasticsearch://reference/elasticsearch/index-lifecycle-actions/ilm-searchable-snapshot.md) - - [Shrink](elasticsearch://reference/elasticsearch/index-lifecycle-actions/ilm-shrink.md) + - [Shrink](elasticsearch://reference/elasticsearch/index-lifecycle-actions/ilm-shrink.md), which might then revert the read-only status at the end of the action {{ilm-cap}} will **not** proceed with executing these actions until [`index.time_series.end_time`](elasticsearch://reference/elasticsearch/index-settings/time-series.md#index-time-series-end-time) has passed. :::: diff --git a/manage-data/data-store/data-streams/time-series-data-stream-tsds.md b/manage-data/data-store/data-streams/time-series-data-stream-tsds.md index a84edc1d73..2366977f4f 100644 --- a/manage-data/data-store/data-streams/time-series-data-stream-tsds.md +++ b/manage-data/data-store/data-streams/time-series-data-stream-tsds.md @@ -19,7 +19,7 @@ Before setting up a time series data stream, make sure you're familiar with gene ## When to use a time series data stream [when-to-use-tsds] -_Metrics_ consist of data point–timestamp pairs, identified by [dimension fields]() that can be used in aggregation queries. Both a regular data stream and a time series data stream can store metrics data. +_Metrics_ consist of data point–timestamp pairs, identified by [dimension fields](#time-series-dimension) that can be used in aggregation queries. Both a regular data stream and a time series data stream can store metrics data. Choose a time series data stream if you typically add metrics data to {{es}} in near real-time and in `@timestamp` order. For other timestamped data, such as logs or traces, use a [logs data stream](logs-data-stream.md) or [regular data stream](/manage-data/data-store/data-streams.md). @@ -38,11 +38,9 @@ A time series is a sequence of observations for a specific entity. Together, the :title: Time series of weather sensor readings plotted as a graph ::: -In a TSDS, each {{es}} document represents an observation, or data point, in a specific time series. Although a TSDS can contain multiple time series, a document can belong to only one time series. A single time series can't span multiple data streams. - ### Time series fields -Compared to a regular data stream, a TSDS uses some additional fields specific to time series: dimension fields (required) and metric fields (optional but usually defined), plus an internal `_tsid` metadata field. +Compared to a regular data stream, a TSDS uses some additional fields specific to time series: dimension fields and metric fields, plus an internal `_tsid` metadata field. #### Dimensions [time-series-dimension] @@ -63,15 +61,13 @@ To mark a field as a dimension, set the Boolean `time_series_dimension` mapping * [`unsigned_long`](elasticsearch://reference/elasticsearch/mapping-reference/number.md) * [`boolean`](elasticsearch://reference/elasticsearch/mapping-reference/boolean.md) -:::{dropdown} Advanced field types To work with a flattened field, use the `time_series_dimensions` parameter to configure an array of fields as dimensions. For details, refer to [`flattened`](elasticsearch://reference/elasticsearch/mapping-reference/flattened.md#flattened-params). You can also simplify dimension definitions by using [pass-through](elasticsearch://reference/elasticsearch/mapping-reference/passthrough.md#passthrough-dimensions) fields. -::: #### Metrics [time-series-metric] -Metrics are numeric measurements that change over time. Documents in a TSDS contain one or more metric fields. +Metrics are numeric measurements that change over time. Documents in a TSDS typically contain one or more metric fields. :::{tip} Metrics are expected to change (even if rarely or slowly), while dimensions generally remain constant. @@ -79,46 +75,42 @@ Metrics are expected to change (even if rarely or slowly), while dimensions gene To mark a field as a metric, use the `time_series_metric` mapping parameter. This parameter ensures data is stored in an optimal way for time series analysis. The following field types support the `time_series_metric` parameter: -* [`aggregate_metric_double`](elasticsearch://reference/elasticsearch/mapping-reference/aggregate-metric-double.md) * All [numeric field types](elasticsearch://reference/elasticsearch/mapping-reference/number.md) +* [`aggregate_metric_double`](elasticsearch://reference/elasticsearch/mapping-reference/aggregate-metric-double.md), for internal use during downsampling (rarely user-populated) The valid values for `time_series_metric` are `counter` and `gauge`: `counter` -: A cumulative metric that only monotonically increases or resets to `0` (zero). For example, a count of errors or completed tasks. +: A cumulative metric that only monotonically increases or resets to `0` (zero). For example, a count of errors or completed tasks that resets when a serving process restarts. `gauge` : A metric that represents a single numeric that can arbitrarily increase or decrease. For example, a temperature or available disk space. #### `_tsid` metadata field [tsid] -The `_tsid` is an automatically generated object containing the document’s dimensions. It's intended for internal {{es}} use, so in most cases you won't need to work with it. - -- You **can't** query or update the internal `_tsid` field. -- You **can** use the `_tsid` in aggregations. -- To retrieve the value of `_tsid`, use the fields parameter in a search. The `_tsid` is not included in get document responses. -- The format of the `_tsid` field is subject to change. +The `_tsid` is an automatically generated object containing the document’s dimensions. It's intended for internal {{es}} use, so in most cases you won't need to work with it. The format of the `_tsid` field is subject to change. ### Differences from a regular data stream [differences-from-regular-data-stream] A time series data stream works like a regular data stream, with some key differences: * **Time series index mode:** The matching index template for a TSDS must include a `data_stream` object with `index.mode` set to `time_series`. This option enables most TSDS-related functionality. -* **Required fields:** In a TSDS, each document contains: +* **Fields:** In a TSDS, each document contains: * A `@timestamp` field * One or more [dimension fields](#time-series-dimension), set with `time_series_dimension: true` - * One or more [metric fields](#time-series-metric) (not strictly required, but typical for a TSDS) -* **Document IDs:** Time series documents use two IDs: - * An internal [`_tsid`](#tsid) metadata field, generated by {{es}} for each document in a TSDS and used for sorting and compression - * The document `_id`, a generated hash of the document's dimensions and `@timestamp` (custom `_id` values are not supported) + * One or more [metric fields](#time-series-metric) + * An auto-generated document `_id` (custom `_id` values are not supported) * **Backing indices:** A TSDS uses [time-bound indices](/manage-data/data-store/data-streams/time-bound-tsds.md) to store data from the same time period in the same backing index. -* **Dimension-based routing:** The matching index template for a TSDS must contain the `index.routing_path` index setting, which specifies dimensions for routing documents to shards. +* **Dimension-based routing:** The routing logic uses dimension fields to map data to shards, improving storage efficiency and query performance. * **Sorting:** A TSDS uses internal [index sorting](elasticsearch://reference/elasticsearch/index-settings/sorting.md) to order shard segments by `_tsid` and `@timestamp`, for better compression. Time series data streams do not use `index.sort.*` settings. -* **Synthetic source:** A TSDS uses [synthetic `_source`](elasticsearch://reference/elasticsearch/mapping-reference/mapping-source-field.md#synthetic-source), which has some [restrictions](elasticsearch://reference/elasticsearch/mapping-reference/mapping-source-field.md#synthetic-source-restrictions) and [modifications](elasticsearch://reference/elasticsearch/mapping-reference/mapping-source-field.md#synthetic-source-modifications). ## Query time series data +```{applies_to} +stack: preview +serverless: preview +``` -You can use the {{esql}} [`TS` command](elasticsearch://reference/query-languages/esql/commands/ts.md) to query time series data streams. The `TS` command is optimized for time series data. It also enables the use of aggregation functions that efficiently process metrics per time series, before aggregating results. +You can use the {{esql}} [`TS` command](elasticsearch://reference/query-languages/esql/commands/ts.md) (in technical preview) to query time series data streams. The `TS` command is optimized for time series data. It also enables the use of aggregation functions that efficiently process metrics per time series, before aggregating results. ## Next steps [tsds-whats-next] From 854239f5996c40f52d7cf01e7319834dae6a6042 Mon Sep 17 00:00:00 2001 From: Marci W <333176+marciw@users.noreply.github.com> Date: Sun, 5 Oct 2025 18:03:42 -0400 Subject: [PATCH 09/14] Clarify accepted time range vs. writable time range --- manage-data/data-store/data-streams/time-bound-tsds.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/manage-data/data-store/data-streams/time-bound-tsds.md b/manage-data/data-store/data-streams/time-bound-tsds.md index 5f8a2d76ed..8caff4c4c6 100644 --- a/manage-data/data-store/data-streams/time-bound-tsds.md +++ b/manage-data/data-store/data-streams/time-bound-tsds.md @@ -44,7 +44,7 @@ GET _data_stream/my-tsds ``` ::::{tip} -The following actions affect the writable time range of a TSDS, either because they make a backing index read-only or remove it: +Writes within the accepted time range might still be rejected. The following actions can affect the writable time range, either because they make a backing index read-only or remove it: - [Delete](elasticsearch://reference/elasticsearch/index-lifecycle-actions/ilm-delete.md) - [Downsample](elasticsearch://reference/elasticsearch/index-lifecycle-actions/ilm-downsample.md) - [Force merge](elasticsearch://reference/elasticsearch/index-lifecycle-actions/ilm-forcemerge.md) From fb68872ace3ccda612e7801eb16fb1346b3f0567 Mon Sep 17 00:00:00 2001 From: Marci W <333176+marciw@users.noreply.github.com> Date: Sun, 5 Oct 2025 18:20:55 -0400 Subject: [PATCH 10/14] Various cleanup --- .../downsampling-time-series-data-stream.md | 1 - .../data-streams/query-downsampled-data.md | 2 +- .../data-streams/run-downsampling.md | 21 +++---------------- 3 files changed, 4 insertions(+), 20 deletions(-) diff --git a/manage-data/data-store/data-streams/downsampling-time-series-data-stream.md b/manage-data/data-store/data-streams/downsampling-time-series-data-stream.md index 6a12f625c5..461447ad13 100644 --- a/manage-data/data-store/data-streams/downsampling-time-series-data-stream.md +++ b/manage-data/data-store/data-streams/downsampling-time-series-data-stream.md @@ -8,7 +8,6 @@ applies_to: products: - id: elasticsearch --- -% TODO flesh out after the rest of the section has been restructured # Downsampling a time series data stream [downsampling] diff --git a/manage-data/data-store/data-streams/query-downsampled-data.md b/manage-data/data-store/data-streams/query-downsampled-data.md index 106b2dc482..70e7af67ce 100644 --- a/manage-data/data-store/data-streams/query-downsampled-data.md +++ b/manage-data/data-store/data-streams/query-downsampled-data.md @@ -2,7 +2,7 @@ applies_to: stack: ga serverless: ga -navigation_title: "Query downsampled data" +navigation_title: "Querying" products: - id: elasticsearch --- diff --git a/manage-data/data-store/data-streams/run-downsampling.md b/manage-data/data-store/data-streams/run-downsampling.md index ddf07341c7..939c3ee910 100644 --- a/manage-data/data-store/data-streams/run-downsampling.md +++ b/manage-data/data-store/data-streams/run-downsampling.md @@ -2,7 +2,7 @@ applies_to: stack: ga serverless: ga -navigation_title: "Downsample data" +navigation_title: "Configuration" mapped_pages: - https://www.elastic.co/guide/en/elasticsearch/reference/current/downsampling-manual.html - https://www.elastic.co/guide/en/elasticsearch/reference/current/downsampling-ilm.html @@ -10,11 +10,11 @@ products: - id: elasticsearch --- -# Downsample time series data [running-downsampling] +# Configuring a time series data stream for downsampling [running-downsampling] To downsample a time series data stream (TSDS), you can use index lifecycle management (ILM) or a data stream lifecycle. (You can also use the [downsample API](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-indices-downsample) with an individual time series index, but most users don't need to use the API.) -Before you begin, review the [](downsampling-concepts.md). +Before you begin, review [](downsampling-concepts.md). :::{important} Downsampling requires **read-only** data. @@ -108,19 +108,4 @@ Set `fixed_interval` to your preferred level of granularity. The original time s * [](time-series-data-stream-tsds.md) * [](set-up-tsds.md) -% :::{tab-item} Downsample API -% ## Downsampling with the API - -% Make a [downsample API] request: - -% ```console -% POST /my-time-series-index/_downsample/my-downsampled-time-series-index -% { -% "fixed_interval": "1d" -% } -% ``` - -% Set `fixed_interval` to your preferred level of granularity. The original time series data will be aggregated at this interval. - -% ::: From 0b0de908429d4edd3a0d1cdb94dcb73416e1693b Mon Sep 17 00:00:00 2001 From: Marci W <333176+marciw@users.noreply.github.com> Date: Sun, 5 Oct 2025 18:36:09 -0400 Subject: [PATCH 11/14] Edit Felix's addition --- .../data-store/data-streams/time-bound-tsds.md | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/manage-data/data-store/data-streams/time-bound-tsds.md b/manage-data/data-store/data-streams/time-bound-tsds.md index 8caff4c4c6..16406e3fe8 100644 --- a/manage-data/data-store/data-streams/time-bound-tsds.md +++ b/manage-data/data-store/data-streams/time-bound-tsds.md @@ -58,19 +58,18 @@ Writes within the accepted time range might still be rejected. The following act ### Dimension-based routing [dimension-based-routing] -In addition to time-based routing, time series data streams use dimension-based routing to determine which shard to route data to. Documents with the same dimensions are routed to the same shards using one of two strategies: +In addition to time-based routing, time series data streams use dimension-based routing to determine which shard to route data to. Documents with the same dimensions are routed to the same shards, using one of two strategies: -1. Based on the internally managed `index.dimensions` index setting (preferred). Available as of stack version 9.2. -2. Based on the [`index.routing_path`](elasticsearch://reference/elasticsearch/index-settings/time-series.md#index-routing-path) index setting (as a fallback). +**Index dimensions** {applies_to}`stack: ga 9.2` {applies_to}`serverless: all` +: Routing based on the internally managed `index.dimensions` setting. -The `index.dimensions`-based strategy offers a better ingest performance. -It uses a list of dimension paths that's automatically kept up-to-date and is not user-configurable. -This strategy is not available for time series data streams with dynamic templates that set `time_series_dimension: true`. +**Routing path** +: Routing based on the [`index.routing_path`](elasticsearch://reference/elasticsearch/index-settings/time-series.md#index-routing-path) setting (as a fallback). -It can be disabled by setting [`index.index_dimensions_tsid_strategy_enabled`](elasticsearch://reference/elasticsearch/index-settings/time-series.md#index-dimensions-tsid-strategy-enabled) to `false`, -or by manually setting `index.routing_path`. +The `index.dimensions`-based strategy offers better ingest performance. It uses a list of dimension paths that is automatically updated (and is not user-configurable). This strategy is not available for time series data streams with dynamic templates that set `time_series_dimension: true`. -The [`index.routing_path`](elasticsearch://reference/elasticsearch/index-settings/time-series.md#index-routing-path) setting specifies the dimension fields to use for routing, for example: +To disable routing based on `index.dimensions`, set [`index.index_dimensions_tsid_strategy_enabled`](elasticsearch://reference/elasticsearch/index-settings/time-series.md#index-dimensions-tsid-strategy-enabled) to `false`, +or manually set the [`index.routing_path`](elasticsearch://reference/elasticsearch/index-settings/time-series.md#index-routing-path) to the dimensions you want to use: ```console "settings": { From cdfa09508692f440c051b11734aa130acb22bf95 Mon Sep 17 00:00:00 2001 From: Marci W <333176+marciw@users.noreply.github.com> Date: Tue, 7 Oct 2025 16:52:52 -0400 Subject: [PATCH 12/14] Apply suggestion from review Co-authored-by: Yannis Roussos --- .../data-store/data-streams/time-series-data-stream-tsds.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/manage-data/data-store/data-streams/time-series-data-stream-tsds.md b/manage-data/data-store/data-streams/time-series-data-stream-tsds.md index 2366977f4f..670a7b9dda 100644 --- a/manage-data/data-store/data-streams/time-series-data-stream-tsds.md +++ b/manage-data/data-store/data-streams/time-series-data-stream-tsds.md @@ -19,7 +19,7 @@ Before setting up a time series data stream, make sure you're familiar with gene ## When to use a time series data stream [when-to-use-tsds] -_Metrics_ consist of data point–timestamp pairs, identified by [dimension fields](#time-series-dimension) that can be used in aggregation queries. Both a regular data stream and a time series data stream can store metrics data. +_Metrics_ consist of data point–timestamp pairs, identified by [dimension fields](#time-series-dimension), that can be used in aggregation queries. Both a regular data stream and a time series data stream can store metrics data. Choose a time series data stream if you typically add metrics data to {{es}} in near real-time and in `@timestamp` order. For other timestamped data, such as logs or traces, use a [logs data stream](logs-data-stream.md) or [regular data stream](/manage-data/data-store/data-streams.md). From a49050d5453a36e8c738512fc32be50e20c804c4 Mon Sep 17 00:00:00 2001 From: Marci W <333176+marciw@users.noreply.github.com> Date: Tue, 7 Oct 2025 16:53:37 -0400 Subject: [PATCH 13/14] Apply suggestion from review Co-authored-by: Yannis Roussos --- .../data-store/data-streams/time-series-data-stream-tsds.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/manage-data/data-store/data-streams/time-series-data-stream-tsds.md b/manage-data/data-store/data-streams/time-series-data-stream-tsds.md index 670a7b9dda..caade01957 100644 --- a/manage-data/data-store/data-streams/time-series-data-stream-tsds.md +++ b/manage-data/data-store/data-streams/time-series-data-stream-tsds.md @@ -21,7 +21,7 @@ Before setting up a time series data stream, make sure you're familiar with gene _Metrics_ consist of data point–timestamp pairs, identified by [dimension fields](#time-series-dimension), that can be used in aggregation queries. Both a regular data stream and a time series data stream can store metrics data. -Choose a time series data stream if you typically add metrics data to {{es}} in near real-time and in `@timestamp` order. For other timestamped data, such as logs or traces, use a [logs data stream](logs-data-stream.md) or [regular data stream](/manage-data/data-store/data-streams.md). +Choose a time series data stream if you typically add metrics data to {{es}} in near real-time and in `@timestamp` order. For other timestamped data, such as logs or traces, use a [logs data stream](logs-data-stream.md) or a [regular data stream](/manage-data/data-store/data-streams.md). To make sure a TSDS is right for your use case, review the list of [differences from a regular data stream](#differences-from-regular-data-stream) on this page. From 3fc8503dc80a611223adf857180e8144b3dd6c45 Mon Sep 17 00:00:00 2001 From: Felix Barnsteiner Date: Wed, 8 Oct 2025 15:57:02 +0200 Subject: [PATCH 14/14] Add OTLP docs (#3360) Adds docs for the new OTLP endpoint added via https://github.com/elastic/elasticsearch/pull/133057 Closes https://github.com/elastic/docs-content/issues/3363 --------- Co-authored-by: Fabrizio Ferri-Benedetti Co-authored-by: Kostas Krikellas <131142368+kkrik-es@users.noreply.github.com> --- .../data-streams/advanced-topics-tsds.md | 3 +- .../data-streams/quickstart-tsds.md | 8 +- .../data-store/data-streams/set-up-tsds.md | 10 +- .../time-series-data-stream-tsds.md | 3 +- .../data-streams/tsds-ingest-otlp.md | 103 ++++++++++++++++++ manage-data/toc.yml | 7 +- 6 files changed, 126 insertions(+), 8 deletions(-) create mode 100644 manage-data/data-store/data-streams/tsds-ingest-otlp.md diff --git a/manage-data/data-store/data-streams/advanced-topics-tsds.md b/manage-data/data-store/data-streams/advanced-topics-tsds.md index 36ae51226b..6d08a3eb32 100644 --- a/manage-data/data-store/data-streams/advanced-topics-tsds.md +++ b/manage-data/data-store/data-streams/advanced-topics-tsds.md @@ -12,4 +12,5 @@ products: This section contains information about advanced concepts and operations for [time series data streams](/manage-data/data-store/data-streams/time-series-data-stream-tsds.md): - [](/manage-data/data-store/data-streams/time-bound-tsds.md) -- [](/manage-data/data-store/data-streams/reindex-tsds.md) \ No newline at end of file +- [](/manage-data/data-store/data-streams/reindex-tsds.md) +- [](/manage-data/data-store/data-streams/tsds-ingest-otlp.md) diff --git a/manage-data/data-store/data-streams/quickstart-tsds.md b/manage-data/data-store/data-streams/quickstart-tsds.md index 086d49605f..09ddd18a63 100644 --- a/manage-data/data-store/data-streams/quickstart-tsds.md +++ b/manage-data/data-store/data-streams/quickstart-tsds.md @@ -13,6 +13,12 @@ Use this quickstart to set up a time series data stream (TSDS), ingest a few doc A _time series_ is a sequence of data points collected at regular time intervals. For example, you might track CPU usage or stock price over time. This quickstart uses simplified weather sensor readings to show how a TSDS helps you analyze metrics data over time. +::::{note} +If you're looking to ingest OpenTelemetry metrics, +follow the [OpenTelemetry quickstarts](/solutions/observability/get-started/opentelemetry/quickstart/index.md). +This allows you to start sending data into a TSDS without having to worry about manually setting up data streams or configuring mappings. +:::: + ## Prerequisites * Access to [{{dev-tools-app}} Console](/explore-analyze/query-filter/tools/console.md) in {{kib}}, or another way to make {{es}} API requests @@ -320,4 +326,4 @@ For more information about the APIs used in this quickstart, review the {{es}} A * [Bulk API](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-bulk) * [Index template API](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-indices-put-index-template) -* [Search API](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-search) \ No newline at end of file +* [Search API](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-search) diff --git a/manage-data/data-store/data-streams/set-up-tsds.md b/manage-data/data-store/data-streams/set-up-tsds.md index 7abf1ae833..a97de12ec7 100644 --- a/manage-data/data-store/data-streams/set-up-tsds.md +++ b/manage-data/data-store/data-streams/set-up-tsds.md @@ -11,7 +11,13 @@ products: # Set up a time series data stream [set-up-tsds] -This page shows you how to set up a [time series data stream](/manage-data/data-store/data-streams/time-series-data-stream-tsds.md) (TSDS). +This page shows you how to manually set up a [time series data stream](/manage-data/data-store/data-streams/time-series-data-stream-tsds.md) (TSDS). + +::::{note} +If you're looking to ingest OpenTelemetry metrics, +follow the [OpenTelemetry quickstarts](/solutions/observability/get-started/opentelemetry/quickstart/index.md). +This allows you to start sending data into a TSDS without having to worry about manually setting up data streams or configuring mappings. +:::: ## Before you begin [tsds-prereqs] @@ -188,4 +194,4 @@ Now that you've set up a time series data stream, you can manage and use it like * [Use a data stream](use-data-stream.md) for indexing and searching * [Change data stream settings](modify-data-stream.md#data-streams-change-mappings-and-settings) as needed * Query time series data using the {{esql}} [`TS` command](elasticsearch://reference/query-languages/esql/commands/ts.md) -* Use [data stream APIs](https://www.elastic.co/docs/api/doc/elasticsearch/group/endpoint-data-stream) \ No newline at end of file +* Use [data stream APIs](https://www.elastic.co/docs/api/doc/elasticsearch/group/endpoint-data-stream) diff --git a/manage-data/data-store/data-streams/time-series-data-stream-tsds.md b/manage-data/data-store/data-streams/time-series-data-stream-tsds.md index caade01957..dd08eff11c 100644 --- a/manage-data/data-store/data-streams/time-series-data-stream-tsds.md +++ b/manage-data/data-store/data-streams/time-series-data-stream-tsds.md @@ -117,6 +117,7 @@ You can use the {{esql}} [`TS` command](elasticsearch://reference/query-language * Try the [quickstart](/manage-data/data-store/data-streams/quickstart-tsds.md) for a hands-on introduction * [Set up a time series data stream](/manage-data/data-store/data-streams/set-up-tsds.md) +* [Ingest data using the OpenTelemetry Protocol (OTLP)](/manage-data/data-store/data-streams/tsds-ingest-otlp.md) * Learn about [downsampling](/manage-data/data-store/data-streams/downsampling-time-series-data-stream.md) to reduce storage footprint % suppress anchor warnings until chicken-and-egg resolved @@ -125,4 +126,4 @@ You can use the {{esql}} [`TS` command](elasticsearch://reference/query-language ##### [tsds-look-back-time] ##### [time-bound-indices] ##### [dimension-based-routing] -##### [tsds-accepted-time-range] \ No newline at end of file +##### [tsds-accepted-time-range] diff --git a/manage-data/data-store/data-streams/tsds-ingest-otlp.md b/manage-data/data-store/data-streams/tsds-ingest-otlp.md new file mode 100644 index 0000000000..6f7a497451 --- /dev/null +++ b/manage-data/data-store/data-streams/tsds-ingest-otlp.md @@ -0,0 +1,103 @@ +--- +navigation_title: "OTLP/HTTP endpoint" +applies_to: + stack: preview 9.2 + deployment: + self: +products: + - id: elasticsearch +--- + +# OTLP/HTTP endpoint + +:::{important} +The recommended way to send OTLP data for most use cases is through an OpenTelemetry Collector in [Gateway mode](elastic-agent://reference/edot-collector/config/default-config-standalone.md#gateway-mode) or, if you're on {{ecloud}}, directly to the [{{motlp}}](opentelemetry:/reference/motlp.md). Refer to [Best practices](#best-practices) for more details. +::: + +In addition to the ingestion of metrics data through the bulk API, +{{es}} offers an alternative way to ingest data through the [OpenTelemetry Protocol (OTLP)](https://opentelemetry.io/docs/specs/otlp). + +The endpoint is available under `/_otlp/v1/metrics`. + +Ingesting metrics data using the OTLP endpoint has the following advantages: + +* Improved ingestion performance, especially if the data contains many resource attributes. +* Simplified index mapping: + there's no need to manually create data streams, index templates, or define dimensions and metrics. + Metrics are dynamically mapped using the metadata included in the OTLP requests. + +:::{note} +{{es}} only supports [OTLP/HTTP](https://opentelemetry.io/docs/specs/otlp/#otlphttp), +not [OTLP/gRPC](https://opentelemetry.io/docs/specs/otlp/#otlpgrpc). +::: + +## How to send data to the OTLP endpoint + +To send data from an OpenTelemetry Collector to the {{es}} OTLP endpoint, +use the [`OTLP/HTTP` exporter](https://github.com/open-telemetry/opentelemetry-collector/tree/main/exporter/otlphttpexporter). +This is an example configuration: + +```yaml +extensions: + basicauth/elasticsearch: + client_auth: + username: + password: +exporters: + otlphttp/elasticsearch-metrics: + endpoint: /_otlp + sending_queue: + enabled: true + sizer: bytes + queue_size: 50_000_000 # 50MB uncompressed + block_on_overflow: true + batch: + flush_timeout: 1s + min_size: 1_000_000 # 1MB uncompressed + max_size: 4_000_000 # 4MB uncompressed + auth: + authenticator: basicauth/elasticsearch +service: + extensions: [basicauth/elasticsearch] + pipelines: + metrics: + exporters: [otlphttp/elasticsearch-metrics] + receivers: ... +``` + +The supported options for `compression` are `gzip` (default value of the `OTLP/HTTP` exporter) and `none`. + +% TODO we might actually also support snappy and zstd, test and update accordingly) + +To track metrics in your custom application, +use the [OpenTelemetry language SDK](https://opentelemetry.io/docs/getting-started/dev/) of your choice. + +:::{note} +Only `encoding: proto` is supported, which the `OTLP/HTTP` exporter uses by default. +::: + +## Best practices + +Don't send metrics from applications directly to the {{es}} OTLP endpoint, especially if there are many individual applications that periodically send a small amount of metrics. Instead, send data to an OpenTelemetry Collector first. This helps with handling many connections, and with creating bigger batches to improve ingestion performance. + +On {{ecloud}}, use the [{{motlp}}](opentelemetry:/reference/motlp.md) and for self-managed use cases, you can use the [Elastic Distribution of OpenTelemetry Collector](elastic-agent:/reference/edot-collector/index.md). + +For more details on the recommended way to set up OpenTelemetry-based data ingestion, refer to the [EDOT reference architecture](opentelemetry:/reference/architecture/index.md). + +## Send data to different data streams + +By default, metrics are ingested into the `metrics-generic.otel-default` data stream. You can influence the target data stream by setting specific attributes on your data: + +- `data_stream.dataset` or `data_stream.namespace` in attributes, with the following order of precedence: data point attribute -> scope attribute -> resource attribute +- Otherwise, if the scope name contains `/receiver/`, `data_stream.dataset` is set to the receiver name. +- Otherwise, `data_stream.dataset` falls back to `generic` and `data_stream.namespace` falls back to `default`. + +The target data stream name is constructed as `metrics-${data_stream.dataset}.otel-${data_stream.namespace}`. + +## Limitations + +* Only the OTLP metrics endpoint (`/_otlp/v1/metrics`) is supported. + To ingest logs, traces, and profiles, use a distribution of the OpenTelemetry Collector that includes the [{{es}} exporter](opentelemetry:/reference/edot-collector/components/elasticsearchexporter.md), + such as the [Elastic Distribution of OpenTelemetry (EDOT) Collector](opentelemetry:/reference/edot-collector/index.md). +* Histograms are only supported in delta temporality. Set the temporality preference to delta in your SDKs, or use the [`cumulativetodelta` processor](https://github.com/open-telemetry/opentelemetry-collector-contrib/tree/main/processor/cumulativetodeltaprocessor) to avoid cumulative histograms to be dropped. +* Exemplars are not supported. diff --git a/manage-data/toc.yml b/manage-data/toc.yml index f6604aad98..fe7be01008 100644 --- a/manage-data/toc.yml +++ b/manage-data/toc.yml @@ -21,9 +21,10 @@ toc: - file: data-store/data-streams/run-downsampling.md - file: data-store/data-streams/query-downsampled-data.md - file: data-store/data-streams/advanced-topics-tsds.md - children: - - file: data-store/data-streams/time-bound-tsds.md + children: + - file: data-store/data-streams/time-bound-tsds.md - file: data-store/data-streams/reindex-tsds.md + - file: data-store/data-streams/tsds-ingest-otlp.md - file: data-store/data-streams/logs-data-stream.md - file: data-store/data-streams/failure-store.md children: @@ -167,4 +168,4 @@ toc: - file: migrate/migrate-from-a-self-managed-cluster-with-a-self-signed-certificate-using-remote-reindex.md - file: migrate/migrate-internal-indices.md - file: migrate/migrate-data-between-elasticsearch-clusters-with-minimal-downtime.md - - file: use-case-use-elasticsearch-to-manage-time-series-data.md \ No newline at end of file + - file: use-case-use-elasticsearch-to-manage-time-series-data.md