Skip to content

Commit

Permalink
Merge pull request #398 from grafana/remove-chunks-storage-query-shar…
Browse files Browse the repository at this point in the history
…ding

Removed chunks storage query sharding config support
  • Loading branch information
pracucci committed Sep 20, 2021
2 parents c030037 + 67d8399 commit 41a44af
Show file tree
Hide file tree
Showing 4 changed files with 12 additions and 50 deletions.
4 changes: 4 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,10 @@
* [CHANGE] Changed default `job_names` for query-frontend, query-scheduler and querier to match custom deployments too. #376
* [CHANGE] Increase the rules per group and rule groups limits on different tiers. #396
* [CHANGE] Removed `max_samples_per_query` limit, since it only works with chunks and only when using `-distributor.shard-by-all-labels=false`. #397
* [CHANGE] Removed chunks storage query sharding config support. The following config options have been removed: #398
* `_config` > `queryFrontend` > `shard_factor`
* `_config` > `queryFrontend` > `sharded_queries_enabled`
* `_config` > `queryFrontend` > `query_split_factor`
* [ENHANCEMENT] Add overrides config to compactor. This allows setting retention configs per user. #386
* [ENHANCEMENT] cortex-mixin: Make `cluster_namespace_deployment:kube_pod_container_resource_requests_{cpu_cores,memory_bytes}:sum` backwards compatible with `kube-state-metrics` v2.0.0. #317
* [ENHANCEMENT] Cortex-mixin: Include `cortex-gw-internal` naming variation in default `gateway` job names. #328
Expand Down
19 changes: 2 additions & 17 deletions cortex/config.libsonnet
Original file line number Diff line number Diff line change
Expand Up @@ -35,28 +35,13 @@
test_exporter_start_time: error 'must specify test exporter start time',
test_exporter_user_id: error 'must specify test exporter used id',

// The expectation is that if sharding is enabled, we can force more (smaller)
// queries on the queriers. However this can't be extended too far because most queries
// concern recent (ingester) data, which isn't sharded. Therefore, we must strike a balance
// which allows us to process more sharded queries in parallel when requested, but not overload
// queriers during normal queries.
querier: {
replicas: if $._config.queryFrontend.sharded_queries_enabled then 12 else 6,
concurrency: if $._config.queryFrontend.sharded_queries_enabled then 16 else 8,
replicas: 6,
concurrency: 8,
},

queryFrontend: {
replicas: 2,
shard_factor: 16, // v10 schema shard factor
sharded_queries_enabled: false,
// Queries can technically be sharded an arbitrary number of times. Thus query_split_factor is used
// as a coefficient to multiply the frontend tenant queues by. The idea is that this
// yields a bit of headroom so tenant queues aren't underprovisioned. Therefore the split factor
// should be represent the highest reasonable split factor for a query. If too low, a long query
// (i.e. 30d) with a high split factor (i.e. 5) would result in
// (day_splits * shard_factor * split_factor) or 30 * 16 * 5 = 2400 sharded queries, which may be
// more than the max queue size and thus would always error.
query_split_factor:: 3,
},

jaeger_agent_host: null,
Expand Down
8 changes: 2 additions & 6 deletions cortex/querier.libsonnet
Original file line number Diff line number Diff line change
Expand Up @@ -47,12 +47,8 @@
$.jaeger_mixin +
$.util.readinessProbe +
container.withEnvMap($.querier_env_map) +
if $._config.queryFrontend.sharded_queries_enabled then
$.util.resourcesRequests('3', '12Gi') +
$.util.resourcesLimits(null, '24Gi')
else
$.util.resourcesRequests('1', '12Gi') +
$.util.resourcesLimits(null, '24Gi'),
$.util.resourcesRequests('1', '12Gi') +
$.util.resourcesLimits(null, '24Gi'),

local deployment = $.apps.v1.deployment,

Expand Down
31 changes: 4 additions & 27 deletions cortex/query-frontend.libsonnet
Original file line number Diff line number Diff line change
Expand Up @@ -36,46 +36,23 @@
// Limit queries to 500 days, allow this to be override per-user.
'store.max-query-length': '12000h', // 500 Days
'runtime-config.file': '/etc/cortex/overrides.yaml',
} + (
if $._config.queryFrontend.sharded_queries_enabled then
{
'querier.parallelise-shardable-queries': 'true',

// in process tenant queues on frontends. We divide by the number of frontends; 2 in this case in order to apply the global limit in aggregate.
// basically base * shard_factor * query_split_factor / num_frontends where
'querier.max-outstanding-requests-per-tenant': std.floor(200 * $._config.queryFrontend.shard_factor * $._config.queryFrontend.query_split_factor / $._config.queryFrontend.replicas),

'querier.query-ingesters-within': $._config.queryConfig['querier.query-ingesters-within'],
} + $._config.storageConfig
else {}
),
},

query_frontend_container::
container.new('query-frontend', $._images.query_frontend) +
container.withPorts($.util.defaultPorts) +
container.withArgsMixin($.util.mapToFlags($.query_frontend_args)) +
$.jaeger_mixin +
$.util.readinessProbe +
if $._config.queryFrontend.sharded_queries_enabled then
$.util.resourcesRequests('2', '2Gi') +
$.util.resourcesLimits(null, '6Gi') +
container.withEnvMap({
JAEGER_REPORTER_MAX_QUEUE_SIZE: '5000',
})
else
$.util.resourcesRequests('2', '600Mi') +
$.util.resourcesLimits(null, '1200Mi'),
$.util.resourcesRequests('2', '600Mi') +
$.util.resourcesLimits(null, '1200Mi'),

local deployment = $.apps.v1.deployment,

newQueryFrontendDeployment(name, container)::
deployment.new(name, $._config.queryFrontend.replicas, [container]) +
$.util.configVolumeMount($._config.overrides_configmap, '/etc/cortex') +
$.util.antiAffinity +
// inject storage schema in order to know what/how to shard
if $._config.queryFrontend.sharded_queries_enabled then
$.storage_config_mixin
else {},
$.util.antiAffinity,

query_frontend_deployment: self.newQueryFrontendDeployment('query-frontend', $.query_frontend_container),

Expand Down

0 comments on commit 41a44af

Please sign in to comment.