Merge pull request #398 from grafana/remove-chunks-storage-query-shar…

…ding Removed chunks storage query sharding config support
grafana · Sep 20, 2021 · 41a44af · 41a44af
2 parents c030037 + 67d8399
commit 41a44af
Show file tree

Hide file tree

Showing 4 changed files with 12 additions and 50 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -27,6 +27,10 @@
 * [CHANGE] Changed default `job_names` for query-frontend, query-scheduler and querier to match custom deployments too. #376
 * [CHANGE] Increase the rules per group and rule groups limits on different tiers. #396
 * [CHANGE] Removed `max_samples_per_query` limit, since it only works with chunks and only when using `-distributor.shard-by-all-labels=false`. #397
+* [CHANGE] Removed chunks storage query sharding config support. The following config options have been removed: #398
+  * `_config` > `queryFrontend` > `shard_factor`
+  * `_config` > `queryFrontend` > `sharded_queries_enabled`
+  * `_config` > `queryFrontend` > `query_split_factor`
 * [ENHANCEMENT] Add overrides config to compactor. This allows setting retention configs per user. #386
 * [ENHANCEMENT] cortex-mixin: Make `cluster_namespace_deployment:kube_pod_container_resource_requests_{cpu_cores,memory_bytes}:sum` backwards compatible with `kube-state-metrics` v2.0.0. #317
 * [ENHANCEMENT] Cortex-mixin: Include `cortex-gw-internal` naming variation in default `gateway` job names. #328

diff --git a/cortex/config.libsonnet b/cortex/config.libsonnet
@@ -35,28 +35,13 @@
     test_exporter_start_time: error 'must specify test exporter start time',
     test_exporter_user_id: error 'must specify test exporter used id',
 
-    // The expectation is that if sharding is enabled, we can force more (smaller)
-    // queries on the queriers. However this can't be extended too far because most queries
-    // concern recent (ingester) data, which isn't sharded. Therefore, we must strike a balance
-    // which allows us to process more sharded queries in parallel when requested, but not overload
-    // queriers during normal queries.
     querier: {
-      replicas: if $._config.queryFrontend.sharded_queries_enabled then 12 else 6,
-      concurrency: if $._config.queryFrontend.sharded_queries_enabled then 16 else 8,
+      replicas: 6,
+      concurrency: 8,
     },
 
     queryFrontend: {
       replicas: 2,
-      shard_factor: 16,  // v10 schema shard factor
-      sharded_queries_enabled: false,
-      // Queries can technically be sharded an arbitrary number of times. Thus query_split_factor is used
-      // as a coefficient to multiply the frontend tenant queues by. The idea is that this
-      // yields a bit of headroom so tenant queues aren't underprovisioned. Therefore the split factor
-      // should be represent the highest reasonable split factor for a query. If too low, a long query
-      // (i.e. 30d) with a high split factor (i.e. 5) would result in
-      // (day_splits * shard_factor * split_factor) or 30 * 16 * 5 = 2400 sharded queries, which may be
-      // more than the max queue size and thus would always error.
-      query_split_factor:: 3,
     },
 
     jaeger_agent_host: null,

diff --git a/cortex/querier.libsonnet b/cortex/querier.libsonnet
@@ -47,12 +47,8 @@
     $.jaeger_mixin +
     $.util.readinessProbe +
     container.withEnvMap($.querier_env_map) +
-    if $._config.queryFrontend.sharded_queries_enabled then
-      $.util.resourcesRequests('3', '12Gi') +
-      $.util.resourcesLimits(null, '24Gi')
-    else
-      $.util.resourcesRequests('1', '12Gi') +
-      $.util.resourcesLimits(null, '24Gi'),
+    $.util.resourcesRequests('1', '12Gi') +
+    $.util.resourcesLimits(null, '24Gi'),
 
   local deployment = $.apps.v1.deployment,
 

diff --git a/cortex/query-frontend.libsonnet b/cortex/query-frontend.libsonnet
@@ -36,46 +36,23 @@
       // Limit queries to 500 days, allow this to be override per-user.
       'store.max-query-length': '12000h',  // 500 Days
       'runtime-config.file': '/etc/cortex/overrides.yaml',
-    } + (
-      if $._config.queryFrontend.sharded_queries_enabled then
-        {
-          'querier.parallelise-shardable-queries': 'true',
-
-          // in process tenant queues on frontends. We divide by the number of frontends; 2 in this case in order to apply the global limit in aggregate.
-          // basically base * shard_factor * query_split_factor / num_frontends where
-          'querier.max-outstanding-requests-per-tenant': std.floor(200 * $._config.queryFrontend.shard_factor * $._config.queryFrontend.query_split_factor / $._config.queryFrontend.replicas),
-
-          'querier.query-ingesters-within': $._config.queryConfig['querier.query-ingesters-within'],
-        } + $._config.storageConfig
-      else {}
-    ),
+    },
 
   query_frontend_container::
     container.new('query-frontend', $._images.query_frontend) +
     container.withPorts($.util.defaultPorts) +
     container.withArgsMixin($.util.mapToFlags($.query_frontend_args)) +
     $.jaeger_mixin +
     $.util.readinessProbe +
-    if $._config.queryFrontend.sharded_queries_enabled then
-      $.util.resourcesRequests('2', '2Gi') +
-      $.util.resourcesLimits(null, '6Gi') +
-      container.withEnvMap({
-        JAEGER_REPORTER_MAX_QUEUE_SIZE: '5000',
-      })
-    else
-      $.util.resourcesRequests('2', '600Mi') +
-      $.util.resourcesLimits(null, '1200Mi'),
+    $.util.resourcesRequests('2', '600Mi') +
+    $.util.resourcesLimits(null, '1200Mi'),
 
   local deployment = $.apps.v1.deployment,
 
   newQueryFrontendDeployment(name, container)::
     deployment.new(name, $._config.queryFrontend.replicas, [container]) +
     $.util.configVolumeMount($._config.overrides_configmap, '/etc/cortex') +
-    $.util.antiAffinity +
-    // inject storage schema in order to know what/how to shard
-    if $._config.queryFrontend.sharded_queries_enabled then
-      $.storage_config_mixin
-    else {},
+    $.util.antiAffinity,
 
   query_frontend_deployment: self.newQueryFrontendDeployment('query-frontend', $.query_frontend_container),