diff --git a/docs/sources/tempo/configuration/_index.md b/docs/sources/tempo/configuration/_index.md index 8c75b326f6a..a31f6957450 100644 --- a/docs/sources/tempo/configuration/_index.md +++ b/docs/sources/tempo/configuration/_index.md @@ -30,6 +30,12 @@ The Tempo configuration options include: - [Local storage recommendations](#local-storage-recommendations) - [Storage block configuration example](#storage-block-configuration-example) - [Memberlist](#memberlist) + - [Configuration blocks](#configuration-blocks) + - [Block config](#block-config) + - [Filter policy config](#filter-policy-config) + - [KVStore config](#kvstore-config) + - [Search config](#search-config) + - [WAL config](#wal-config) - [Overrides](#overrides) - [Ingestion limits](#ingestion-limits) - [Standard overrides](#standard-overrides) @@ -284,12 +290,36 @@ metrics_generator: # Ring configuration ring: + kvstore: + [store: | default = memberlist] + [prefix: | default = "collectors/"] - kvstore: + # Period at which to heartbeat the instance + # 0 disables heartbeat altogether + [heartbeat_period: | default = 5s] - # The metrics-generator uses the ring to balance work across instances. The ring is stored - # in a key-vault store. - [store: | default = memberlist] + # The heartbeat timeout, after which, the instance is skipped. + # 0 disables timeout. + [heartbeat_timeout: | default = 1m] + + # Our Instance ID to register as in the ring. + [instance_id: | default = os.Hostname()] + + # Name of the network interface to read address from. + [instance_interface_names: | default = ["eth0", "en0"] ] + + # Our advertised IP address in the ring, (usefull if the local ip =/= the external ip) + # Will default to the configured `instance_id` ip address, + # if unset, will fallback to ip reported by `instance_interface_names` + # (Effected by `enable_inet6`) + [instance_addr: | default = auto(instance_id, instance_interface_names)] + + # Our advertised port in the ring + # Defaults to the configured GRPC listing port + [instance_port: | default = auto(listen_port)] + + # Enables the registering of ipv6 addresses in the ring. + [enable_inet6: | default = false] # Processor-specific configuration processor: @@ -321,6 +351,12 @@ metrics_generator: # `wait` value for this processor. [enable_messaging_system_latency_histogram: | default = false] + # Attributes that will be used to create a peer edge + # Attributes are searched in the order they are provided + # See: https://pkg.go.dev/go.opentelemetry.io/otel/semconv/v1.18.0 + # Example: ["peer.service", "db.name", "db.system", "host.name"] + [peer_attributes: | default = ["peer.service", "db.name", "db.system"] ] + # Attribute Key to multiply span metrics [span_multiplier_key: | default = ""] @@ -330,7 +366,7 @@ metrics_generator: span_metrics: # Buckets for the latency histogram in seconds. - [histogram_buckets: | default = 0.002, 0.004, 0.008, 0.016, 0.032, 0.064, 0.128, 0.256, 0.512, 1.02, 2.05, 4.10] + [histogram_buckets: | default = 0.002, 0.004, 0.008, 0.016, 0.032, 0.064, 0.128, 0.256, 0.512, 1.024, 2.048, 4.096, 8.192, 16.384] # Configure intrinsic dimensions to add to the metrics. Intrinsic dimensions are taken # directly from the respective resource and span properties. @@ -352,19 +388,66 @@ metrics_generator: # the metrics if present. [dimensions: ] - # Custom labeling of dimensions is possible via a list of maps consisting of - # "name" , "source_labels" , "join" - # "name" appears in the metrics, "source_labels" are the actual - # attributes that will make up the value of the label and "join" is the - # separator if multiple source_labels are provided - [dimension_mappings: ] + # Custom labeling mapping + dimension_mappings: + # The new label name + - [name: ] + # The actual attributes that will make the value of the new label + [source_labels: ] + # The separator used to join multiple `source_labels` + [join: ] + # Enable traces_target_info metrics - [enable_target_info: ] - # Drop specific labels from traces_target_info metrics - [target_info_excluded_dimensions: ] + [enable_target_info: | default = false] + # Attribute Key to multiply span metrics [span_multiplier_key: | default = ""] + # List of policies that will be applied to spans for inclusion or exclusion. + [filter_policies: | default = []] + + # Drop specific labels from `traces_target_info` metrics + [target_info_excluded_dimensions: ] + + local_blocks: + + # Block configuration + block: + + # Search configuration + search: + + # How often to run the flush loop to cut idle traces and blocks + [flush_check_period: | default = 10s] + + # A trace is considered complete after this period of inactivity (no new spans recieved) + [trace_idle_period: | default = 10s] + + # Maximum duration which the head block can be appended to, before cutting it. + [max_block_duration: | default = 1m] + + # Maximum size of the head block, before cutting it + [max_block_bytes: | default = 500000000] + + # Duration to keep blocks in the ingester after they have been flushed + [complete_block_timeout: | default = 1h] + + # Maximum amount of live traces + # If this value is exceeded, traces will be dropped with reason: `live_traces_exceeded` + # A value of 0 disables this limit. + [max_live_traces: ] + + # Whether server spans should be filtered in or not. + # If enabled, only parent spans or spans with the SpanKind of `server` will be retained + [filter_server_spans: | default = true] + + # Number of blocks that are allowed to be processed concurently + [concurrent_blocks: | default = 10] + + # A tuning factor that controls whether the trace-level timestamp columns are used in a metrics query. + # If a block overlaps the time window by less than this ratio, then we skip the columns. + # A value of 1.0 will always load the columns, and 0.0 will never load any. + [time_overlap_cutoff: | default = 0.2] # Registry configuration registry: @@ -388,6 +471,14 @@ metrics_generator: # The maximum length of label values. Label values exceeding this limit will be truncated. [max_label_value_length: | default = 2048] + # Configuration block for the Write Ahead Log (WAL) + traces_storage: + + # Path to store the WAL files. + # Must be set. + # Example: "/var/tempo/generator/traces" + [path: | default = ""] + # Storage and remote write configuration storage: @@ -395,7 +486,8 @@ metrics_generator: path: # Configuration for the Prometheus Agent WAL - wal: + # https://github.com/prometheus/prometheus/blob/v2.51.2/tsdb/agent/db.go#L62-L84 + wal: # How long to wait when flushing samples on shutdown [remote_write_flush_deadline: | default = 1m] @@ -412,6 +504,12 @@ metrics_generator: # considered in metrics generation. # This is to filter out spans that are outdated. [metrics_ingestion_time_range_slack: | default = 30s] + + # Timeout for metric requests + [query_timeout: | default = 30s ] + + # Overides the key used to register the metrics-generator in the ring. + [override_ring_key: | default = "metrics-generator"] ``` ## Query-frontend @@ -582,7 +680,7 @@ querier: # Lookback period to include ingesters that were part of the shuffle sharded subring. [shuffle_sharding_ingesters_lookback_period: | default = 1hr] - # The query frontend sents sharded requests to ingesters and querier (/api/traces/) + # The query frontend sends sharded requests to ingesters and querier (/api/traces/) # By default, all healthy ingesters are queried for the trace id. # When true the querier will hash the trace id in the same way that distributors do and then # only query those ingesters who own the trace id hash as determined by the ring. @@ -599,7 +697,7 @@ querier: # A list of external endpoints that the querier will use to offload backend search requests. They must # take and return the same value as /api/search endpoint on the querier. This is intended to be - # used with serverless technologies for massive parrallelization of the search path. + # used with serverless technologies for massive parallelization of the search path. # The default value of "" disables this feature. [external_endpoints: | default = ] @@ -628,7 +726,7 @@ querier: google_cloud_run: # A list of external endpoints that the querier will use to offload backend search requests. They must # take and return the same value as /api/search endpoint on the querier. This is intended to be - # used with serverless technologies for massive parrallelization of the search path. + # used with serverless technologies for massive parallelization of the search path. # The default value of "" disables this feature. [external_endpoints: | default = ] @@ -657,13 +755,9 @@ compactor: [disabled: ] ring: - - kvstore: - - # in a high volume environment multiple compactors need to work together to keep up with incoming blocks. - # this tells the compactors to use a ring stored in memberlist to coordinate. - # Example: "store: memberlist" - [store: ] + kvstore: + [store: | default = memberlist] + [prefix: | default = "collectors/" ] compaction: @@ -960,7 +1054,7 @@ storage: # Optional. Default is 0 (disabled) # Example: "hedge_requests_at: 500ms" # If set to a non-zero value a second request will be issued at the provided duration. Recommended to - # be set to p99 of Axure Blog Storage requests to reduce long tail latency. This setting is most impactful when + # be set to p99 of Azure Block Storage requests to reduce long tail latency. This setting is most impactful when # used with queriers and has minimal to no impact on other pieces. [hedge_requests_at: ] @@ -1017,7 +1111,7 @@ storage: # Cache type to use. Should be one of "redis", "memcached" # Example: "cache: memcached" - # Deprecated. See [cache](#cache) section below. + # Deprecated. See [cache](#cache) section. [cache: ] # Minimum compaction level of block to qualify for bloom filter caching. Default is 0 (disabled), meaning @@ -1031,129 +1125,39 @@ storage: [cache_max_block_age: ] # Configuration parameters that impact trace search - search: - - # Target number of bytes per GET request while scanning blocks. Default is 1MB. Reducing - # this value could positively impact trace search performance at the cost of more requests - # to object storage. - # Example: "chunk_size_bytes: 5_000_000" - [chunk_size_bytes: ] - - # Number of traces to prefetch while scanning blocks. Default is 1000. Increasing this value - # can improve trace search performance at the cost of memory. - # Example: "prefetch_trace_count: 10000" - [prefetch_trace_count: ] - - # Size of read buffers used when performing search on a vparquet block. This value times the read_buffer_count - # is the total amount of bytes used for buffering when performing search on a parquet block. - # Default: 1048576 - [read_buffer_size_bytes: ] - - # Number of read buffers used when performing search on a vparquet block. This value times the read_buffer_size_bytes - # is the total amount of bytes used for buffering when performing search on a parquet block. - # Default: 32 - [read_buffer_count: ] - - # Granular cache control settings for parquet metadata objects - # Deprecated. See [cache](#cache) section below. - cache_control: - - # Specifies if footer should be cached - [footer: | default = false] - - # Specifies if column index should be cached - [column_index: | default = false] - - # Specifies if offset index should be cached - [offset_index: | default = false] - + search: # Background cache configuration. Requires having a cache configured. - # Deprecated. See [cache](#cache) section below. + # Deprecated. See [cache](#cache) section. background_cache: # Memcached caching configuration block - # Deprecated. See [cache](#cache) section below. + # Deprecated. See [cache](#cache) section. memcached: # Redis configuration block # EXPERIMENTAL - # Deprecated. See [cache](#cache) section below. + # Deprecated. See [cache](#cache) section. redis: # the worker pool is used primarily when finding traces by id, but is also used by other pool: - # total number of workers pulling jobs from the queue (default: 400) - [max_workers: ] - - # length of job queue. imporatant for querier as it queues a job for every block it has to search - # (default: 20000) - [queue_depth: ] - - # Configuration block for the Write Ahead Log (WAL) - wal: + # total number of workers pulling jobs from the queue + [max_workers: | default = 30] - # where to store the head blocks while they are being appended to - # Example: "wal: /var/tempo/wal" - [path: ] + # length of job queue. important for querier as it queues a job for every block it has to search + [queue_depth: | default = 10000 ] - # wal encoding/compression. - # options: none, gzip, lz4-64k, lz4-256k, lz4-1M, lz4, snappy, zstd, s2 - [v2_encoding: | default = snappy] - - # Defines the search data encoding/compression protocol. - # Options: none, gzip, lz4-64k, lz4-256k, lz4-1M, lz4, snappy, zstd, s2 - [search_encoding: | default = none] - - # When a span is written to the WAL it adjusts the start and end times of the block it is written to. - # This block start and end time range is then used when choosing blocks for search. - # This is also used for querying traces by ID when the start and end parameters are specified. To prevent spans too far - # in the past or future from impacting the block start and end times we use this configuration option. - # This option only allows spans that occur within the configured duration to adjust the block start and - # end times. - # This can result in trace not being found if the trace falls outside the slack configuration value as the - # start and end times of the block will not be updated in this case. - [ingestion_time_range_slack: | default = 2m] + # configuration block for the Write Ahead Log (WAL) + wal: + [path: | default = "/var/tempo/wal"] + [v2_encoding: | default = snappy] + [search_encoding: | default = none] + [ingestion_time_range_slack: | default = 2m] # block configuration - block: - # block format version. options: v2, vParquet2, vParquet3, vParquet4 - [version: | default = vParquet4] - - # bloom filter false positive rate. lower values create larger filters but fewer false positives - [bloom_filter_false_positive: | default = 0.01] - - # maximum size of each bloom filter shard - [bloom_filter_shard_size_bytes: | default = 100KiB] - - # number of bytes per index record - [v2_index_downsample_bytes: | default = 1MiB] - - # block encoding/compression. options: none, gzip, lz4-64k, lz4-256k, lz4-1M, lz4, snappy, zstd, s2 - [v2_encoding: | default = zstd] - - # search data encoding/compression. same options as block encoding. - [search_encoding: | default = snappy] - - # number of bytes per search page - [search_page_size_bytes: | default = 1MiB] - - # an estimate of the number of bytes per row group when cutting Parquet blocks. lower values will - # create larger footers but will be harder to shard when searching. It is difficult to calculate - # this field directly and it may vary based on workload. This is roughly a lower bound. - [parquet_row_group_size_bytes: | default = 100MB] - - # Configures attributes to be stored in dedicated columns within the parquet file, rather than in the - # generic attribute key-value list. This allows for more efficient searching of these attributes. - # Up to 10 span attributes and 10 resource attributes can be configured as dedicated columns. - # Requires vParquet3 - parquet_dedicated_columns: - [ - name: , # name of the attribute - type: , # type of the attribute. options: string - scope: # scope of the attribute. options: resource, span - ] + block: ``` ## Memberlist @@ -1227,9 +1231,9 @@ memberlist: # Timeout for leaving memberlist cluster. [leave_timeout: | default = 5s] - # IP address to listen on for gossip messages. Multiple addresses may be - # specified. Defaults to 0.0.0.0 - [bind_addr: | default = ] + # IP address to listen on for gossip messages. + # Multiple addresses may be specified. + [bind_addr: | default = ["0.0.0.0"] ] # Port to listen on for gossip messages. [bind_port: | default = 7946] @@ -1242,6 +1246,226 @@ memberlist: ``` +## Configuration blocks + +Defines re-used configuration blocks. + +### Block config + +```yaml +# block format version. options: v2, vParquet2, vParquet3, vParquet4 +[version: | default = vParquet4] + +# bloom filter false positive rate. lower values create larger filters but fewer false positives +[bloom_filter_false_positive: | default = 0.01] + +# maximum size of each bloom filter shard +[bloom_filter_shard_size_bytes: | default = 100KiB] + +# number of bytes per index record +[v2_index_downsample_bytes: | default = 1MiB] + +# block encoding/compression. options: none, gzip, lz4-64k, lz4-256k, lz4-1M, lz4, snappy, zstd, s2 +[v2_encoding: | default = zstd] + +# search data encoding/compression. same options as block encoding. +[search_encoding: | default = snappy] + +# number of bytes per search page +[search_page_size_bytes: | default = 1MiB] + +# an estimate of the number of bytes per row group when cutting Parquet blocks. lower values will +# create larger footers but will be harder to shard when searching. It is difficult to calculate +# this field directly and it may vary based on workload. This is roughly a lower bound. +[parquet_row_group_size_bytes: | default = 100MB] + +# Configures attributes to be stored in dedicated columns within the parquet file, rather than in the +# generic attribute key-value list. This allows for more efficient searching of these attributes. +# Up to 10 span attributes and 10 resource attributes can be configured as dedicated columns. +# Requires vParquet3 +parquet_dedicated_columns: + + # name of the attribute + - [name: ] + + # type of the attribute. options: string + [type: ] + + # scope of the attribute. + # options: resource, span + [scope: ] +``` + +### Filter policy config + +Span filter config block + +#### Filter policy +```yaml +# Exclude filters (positive matching) +[include: ] + +# Exclude filters (negative matching) +[exclude: ] +``` + +#### Policy match +```yaml +# How to match the value of attributes +# Options: "strict", "regex" +[match_type: ] + +# List of attributes to match +attributes: + + # Attribute key + - [key: ] + + # Attribute value + [value: ] +``` + +#### Examples + +```yaml +exclude: + match_type: "regex" + attributes: + - key: "resource.service.name" + value: "unknown_service:myservice" +``` + +```yaml +include: + match_type: "strict" + attributes: + - key: "foo.bar" + value: "baz" +``` + +### KVStore config + +The kvstore configuration block + +```yaml +# Set backing store to use +[store: | default = "consul"] + +# What prefix to use for keys +[prefix: | default = "ring."] + +# Store specific configs +consul: + [host: | default = "localhost:8500"] + [acl_token: | default = "" ] + [http_client_timeout: | default = 20s] + [consistent_reads: | default = false] + [watch_rate_limit: | default = 1.0] + [watch_burst_size: | default = 1] + [cas_retry_delay: | default 1s] + +etcd: + [endpoints: | default = [] ] + [dial_timeout: | default = 10s] + [max_retries: | default = 10 ] + [tls_enabled: | default = false] + + # TLS config + [tls_cert_path: | default = ""] + [tls_key_path: | default = ""] + [tls_ca_path: | default = ""] + [tls_server_name: | default = ""] + [tls_insecure_skip_verify: | default = false] + [tls_cipher_suites: | default = ""] + [tls_min_version: | default = ""] + + [username: | default = ""] + [password: | default = ""] + +multi: + [primary: | default = ""] + [secondary: | default = ""] + [mirror_enabled: | default = false] + [mirror_timeout: | default = 2s] +``` + +### Search config + +```yaml +# Target number of bytes per GET request while scanning blocks. Default is 1MB. Reducing +# this value could positively impact trace search performance at the cost of more requests +# to object storage. +[chunk_size_bytes: | default = 1000000] + +# Number of traces to prefetch while scanning blocks. Default is 1000. Increasing this value +# can improve trace search performance at the cost of memory. +[prefetch_trace_count: | default = 1000] + +# Number of read buffers used when performing search on a vparquet block. This value times the read_buffer_size_bytes +# is the total amount of bytes used for buffering when performing search on a parquet block. +[read_buffer_count: | default = 32] + +# Size of read buffers used when performing search on a vparquet block. This value times the read_buffer_count +# is the total amount of bytes used for buffering when performing search on a parquet block. +[read_buffer_size_bytes: | default = 1048576] + +# Granular cache control settings for parquet metadata objects +# Deprecated. See [Cache](#cache) section. +cache_control: + + # Specifies if footer should be cached + [footer: | default = false] + + # Specifies if column index should be cached + [column_index: | default = false] + + # Specifies if offset index should be cached + [offset_index: | default = false] +``` + +### WAL config + +The storage WAL configuration block. + +```yaml +# Where to store the wal files while they are being appended to. +# Must be set. +# Example: "/var/tempo/wal +[path: | default = ""] + +# Where to store the completed wal files +# If not set (""), will join the `path` with "completed" to generate the effective path +# Example: "/var/tempo/wal/completed" +[completedfilepath: | default = join(.path, "/completed")] + +# Where to store the intermediate blocks while they are being appended to. +# Will always join the `path` with "blocks" to generate the effective path +# Example: "/var/tempo/wal/blocks" (ignored) +[blocksfilepath: | = join(.path, "/blocks")] + +# WAL encoding/compression. +# options: none, gzip, lz4-64k, lz4-256k, lz4-1M, lz4, snappy, zstd, s2 +[v2_encoding: | default = "zstd" ] + +# Defines the search data encoding/compression protocol. +# Options: none, gzip, lz4-64k, lz4-256k, lz4-1M, lz4, snappy, zstd, s2 +[search_encoding: | default = "snappy"] + +# When a span is written to the WAL it adjusts the start and end times of the block it is written to. +# This block start and end time range is then used when choosing blocks for search. +# This is also used for querying traces by ID when the start and end parameters are specified. To prevent spans too far +# in the past or future from impacting the block start and end times we use this configuration option. +# This option only allows spans that occur within the configured duration to adjust the block start and +# end times. +# This can result in trace not being found if the trace falls outside the slack configuration value as the +# start and end times of the block will not be updated in this case. +[ingestion_time_range_slack: | default = unset] + +# WAL file format version +# Options: v2, vParquet, vParquet2, vParquet3 +[version: | default = "vParquet3"] +``` + ## Overrides Tempo provides an overrides module for users to set global or per-tenant override settings. @@ -1469,7 +1693,7 @@ overrides: [per_tenant_override_config: | default = ""] # How frequent tenant-specific overrides are read from the configuration file. - [per_tenant_override_period: | default = 10s] + [per_tenant_override_period: | default = 10s] # User-configurable overrides configuration user_configurable_overrides: