Make results cache TTL configurable and settable per tenant (#4385)

* Make results cache ttl configurable. Signed-off-by: Peter Štibraný <pstibrany@gmail.com> * Update CHANGELOG.md Signed-off-by: Peter Štibraný <pstibrany@gmail.com> * Move new limits close to frontend limits. Signed-off-by: Peter Štibraný <pstibrany@gmail.com> * Enhance description on OOO flag. Signed-off-by: Peter Štibraný <pstibrany@gmail.com> * Fix tests. Signed-off-by: Peter Štibraný <pstibrany@gmail.com> --------- Signed-off-by: Peter Štibraný <pstibrany@gmail.com>
grafana · Mar 9, 2023 · 8c6cb52 · 8c6cb52
1 parent 5fb813d
commit 8c6cb52
Show file tree

Hide file tree

Showing 10 changed files with 121 additions and 44 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -16,6 +16,7 @@
 * [ENHANCEMENT] Query-frontend and ruler: add experimental, more performant protobuf internal query result response format enabled with `-ruler.query-frontend.query-result-response-format=protobuf`. #4331
 * [ENHANCEMENT] Ruler: increased tolerance for missed iterations on alerts, reducing the chances of flapping firing alerts during ruler restarts. #4432
 * [ENHANCEMENT] Querier and store-gateway: optimized `.*` and `.+` regular expression label matchers. #4432
+* [ENHANCEMENT] Query-frontend: results cache TTL is now configurable by using `-query-frontend.results-cache-ttl` and `-query-frontend.results-cache-ttl-for-out-of-order-time-window` options. These values can also be specified per tenant. Default values are unchanged (7 days and 10 minutes respectively). #4385
 * [BUGFIX] Querier: Streaming remote read will now continue to return multiple chunks per frame after the first frame. #4423
 
 ### Mixin

diff --git a/cmd/mimir/config-descriptor.json b/cmd/mimir/config-descriptor.json
@@ -3078,7 +3078,7 @@
           "kind": "field",
           "name": "out_of_order_time_window",
           "required": false,
-          "desc": "Non-zero value enables out-of-order support for most recent samples that are within the time window in relation to the TSDB's maximum time, i.e., within [db.maxTime-timeWindow, db.maxTime]). The ingester will need more memory as a factor of rate of out-of-order samples being ingested and the number of series that are getting out-of-order samples. A lower TTL of 10 minutes will be set for the query cache entries that overlap with this window.",
+          "desc": "Non-zero value enables out-of-order support for most recent samples that are within the time window in relation to the TSDB's maximum time, i.e., within [db.maxTime-timeWindow, db.maxTime]). The ingester will need more memory as a factor of rate of out-of-order samples being ingested and the number of series that are getting out-of-order samples. If query falls into this window, cached results will use value from -query-frontend.results-cache-ttl-for-out-of-order-time-window option to specify TTL for resulting cache entry.",
           "fieldValue": null,
           "fieldDefaultValue": 0,
           "fieldFlag": "ingester.out-of-order-time-window",
@@ -3239,6 +3239,28 @@
           "fieldFlag": "query-frontend.max-total-query-length",
           "fieldType": "duration"
         },
+        {
+          "kind": "field",
+          "name": "results_cache_ttl",
+          "required": false,
+          "desc": "Time to live duration for cached query results. If query falls into out-of-order time window, -query-frontend.results-cache-ttl-for-out-of-order-time-window is used instead.",
+          "fieldValue": null,
+          "fieldDefaultValue": 604800000000000,
+          "fieldFlag": "query-frontend.results-cache-ttl",
+          "fieldType": "duration",
+          "fieldCategory": "experimental"
+        },
+        {
+          "kind": "field",
+          "name": "results_cache_ttl_for_out_of_order_time_window",
+          "required": false,
+          "desc": "Time to live duration for cached query results if query falls into out-of-order time window. This is lower than -query-frontend.results-cache-ttl so that incoming out-of-order samples are returned in the query results sooner.",
+          "fieldValue": null,
+          "fieldDefaultValue": 600000000000,
+          "fieldFlag": "query-frontend.results-cache-ttl-for-out-of-order-time-window",
+          "fieldType": "duration",
+          "fieldCategory": "experimental"
+        },
         {
           "kind": "field",
           "name": "cardinality_analysis_enabled",

diff --git a/cmd/mimir/help-all.txt.tmpl b/cmd/mimir/help-all.txt.tmpl
@@ -1184,7 +1184,7 @@ Usage of ./cmd/mimir/mimir:
   -ingester.native-histograms-ingestion-enabled
     	[experimental] Enable ingestion of native histogram samples. If false, native histogram samples are ignored without an error.
   -ingester.out-of-order-time-window duration
-    	[experimental] Non-zero value enables out-of-order support for most recent samples that are within the time window in relation to the TSDB's maximum time, i.e., within [db.maxTime-timeWindow, db.maxTime]). The ingester will need more memory as a factor of rate of out-of-order samples being ingested and the number of series that are getting out-of-order samples. A lower TTL of 10 minutes will be set for the query cache entries that overlap with this window.
+    	[experimental] Non-zero value enables out-of-order support for most recent samples that are within the time window in relation to the TSDB's maximum time, i.e., within [db.maxTime-timeWindow, db.maxTime]). The ingester will need more memory as a factor of rate of out-of-order samples being ingested and the number of series that are getting out-of-order samples. If query falls into this window, cached results will use value from -query-frontend.results-cache-ttl-for-out-of-order-time-window option to specify TTL for resulting cache entry.
   -ingester.rate-update-period duration
     	Period with which to update the per-tenant ingestion rates. (default 15s)
   -ingester.ring.consul.acl-token string
@@ -1607,6 +1607,10 @@ Usage of ./cmd/mimir/mimir:
     	The amount of shards to use when doing parallelisation via query sharding by tenant. 0 to disable query sharding for tenant. Query sharding implementation will adjust the number of query shards based on compactor shards. This allows querier to not search the blocks which cannot possibly have the series for given query shard. (default 16)
   -query-frontend.query-stats-enabled
     	False to disable query statistics tracking. When enabled, a message with some statistics is logged for every query. (default true)
+  -query-frontend.results-cache-ttl duration
+    	[experimental] Time to live duration for cached query results. If query falls into out-of-order time window, -query-frontend.results-cache-ttl-for-out-of-order-time-window is used instead. (default 1w)
+  -query-frontend.results-cache-ttl-for-out-of-order-time-window duration
+    	[experimental] Time to live duration for cached query results if query falls into out-of-order time window. This is lower than -query-frontend.results-cache-ttl so that incoming out-of-order samples are returned in the query results sooner. (default 10m)
   -query-frontend.results-cache.backend string
     	Backend for query-frontend results cache, if not empty. Supported values: memcached, redis.
   -query-frontend.results-cache.compression string

diff --git a/docs/sources/mimir/operators-guide/configure/about-versioning.md b/docs/sources/mimir/operators-guide/configure/about-versioning.md
@@ -114,6 +114,7 @@ The following features are currently experimental:
 - Protobuf internal query result payload format
   - `-query-frontend.query-result-response-format=protobuf`
   - `-ruler.query-frontend.query-result-response-format=protobuf`
+- Per-tenant Results cache TTL (`-query-frontend.results-cache-ttl`, `-query-frontend.results-cache-ttl-for-out-of-order-time-window`)
 
 ## Deprecated features
 

diff --git a/docs/sources/mimir/references/configuration-parameters/index.md b/docs/sources/mimir/references/configuration-parameters/index.md
@@ -2596,9 +2596,10 @@ The `limits` block configures default and per-tenant limits imposed by component
 # samples that are within the time window in relation to the TSDB's maximum
 # time, i.e., within [db.maxTime-timeWindow, db.maxTime]). The ingester will
 # need more memory as a factor of rate of out-of-order samples being ingested
-# and the number of series that are getting out-of-order samples. A lower TTL of
-# 10 minutes will be set for the query cache entries that overlap with this
-# window.
+# and the number of series that are getting out-of-order samples. If query falls
+# into this window, cached results will use value from
+# -query-frontend.results-cache-ttl-for-out-of-order-time-window option to
+# specify TTL for resulting cache entry.
 # CLI flag: -ingester.out-of-order-time-window
 [out_of_order_time_window: <duration> | default = 0s]
 
@@ -2701,6 +2702,20 @@ The `limits` block configures default and per-tenant limits imposed by component
 # CLI flag: -query-frontend.max-total-query-length
 [max_total_query_length: <duration> | default = 0s]
 
+# (experimental) Time to live duration for cached query results. If query falls
+# into out-of-order time window,
+# -query-frontend.results-cache-ttl-for-out-of-order-time-window is used
+# instead.
+# CLI flag: -query-frontend.results-cache-ttl
+[results_cache_ttl: <duration> | default = 1w]
+
+# (experimental) Time to live duration for cached query results if query falls
+# into out-of-order time window. This is lower than
+# -query-frontend.results-cache-ttl so that incoming out-of-order samples are
+# returned in the query results sooner.
+# CLI flag: -query-frontend.results-cache-ttl-for-out-of-order-time-window
+[results_cache_ttl_for_out_of_order_time_window: <duration> | default = 10m]
+
 # Enables endpoints used for cardinality analysis.
 # CLI flag: -querier.cardinality-analysis-enabled
 [cardinality_analysis_enabled: <boolean> | default = false]

diff --git a/pkg/frontend/querymiddleware/limits.go b/pkg/frontend/querymiddleware/limits.go
@@ -69,6 +69,13 @@ type Limits interface {
 
 	// NativeHistogramsIngestionEnabled returns whether to ingest native histograms in the ingester
 	NativeHistogramsIngestionEnabled(userID string) bool
+
+	// ResultsCacheTTL returns TTL for cached results for query that doesn't fall into out of order window, or
+	// if out of order ingestion is disabled.
+	ResultsCacheTTL(userID string) time.Duration
+
+	// ResultsCacheForOutOfOrderWindowTTL returns TTL for cached results for query that falls into out-of-order ingestion window.
+	ResultsCacheTTLForOutOfOrderTimeWindow(userID string) time.Duration
 }
 
 type limitsMiddleware struct {

diff --git a/pkg/frontend/querymiddleware/limits_test.go b/pkg/frontend/querymiddleware/limits_test.go
@@ -291,6 +291,8 @@ type mockLimits struct {
 	outOfOrderTimeWindow             time.Duration
 	creationGracePeriod              time.Duration
 	nativeHistogramsIngestionEnabled bool
+	resultsCacheTTL                  time.Duration
+	resultsCacheOutOfOrderWindowTTL  time.Duration
 }
 
 func (m mockLimits) MaxQueryLookback(string) time.Duration {
@@ -343,6 +345,14 @@ func (m mockLimits) OutOfOrderTimeWindow(userID string) time.Duration {
 	return m.outOfOrderTimeWindow
 }
 
+func (m mockLimits) ResultsCacheTTL(userID string) time.Duration {
+	return m.resultsCacheTTL
+}
+
+func (m mockLimits) ResultsCacheTTLForOutOfOrderTimeWindow(userID string) time.Duration {
+	return m.resultsCacheOutOfOrderWindowTTL
+}
+
 func (m mockLimits) CreationGracePeriod(userID string) time.Duration {
 	return m.creationGracePeriod
 }

diff --git a/pkg/frontend/querymiddleware/split_and_cache.go b/pkg/frontend/querymiddleware/split_and_cache.go
@@ -32,10 +32,6 @@ import (
 )
 
 const (
-	// Cache entries for 7 days. We're not disabling TTL because the backend client currently doesn't support it.
-	resultsCacheTTL = 7 * 24 * time.Hour
-	// resultsCacheLowerTTL is the smaller TTL used in specific cases. For example OOO queries.
-	resultsCacheLowerTTL                  = 10 * time.Minute
 	notCachableReasonUnalignedTimeRange   = "unaligned-time-range"
 	notCachableReasonTooNew               = "too-new"
 	notCachableReasonModifiersNotCachable = "has-modifiers"
@@ -382,11 +378,10 @@ func (s *splitAndCacheMiddleware) fetchCacheExtents(ctx context.Context, keys []
 
 // storeCacheExtents stores the extents for given key in the cache.
 func (s *splitAndCacheMiddleware) storeCacheExtents(key string, tenantIDs []string, extents []Extent) {
-	ttl := resultsCacheTTL
+	ttl := validation.SmallestPositiveNonZeroDurationPerTenant(tenantIDs, s.limits.ResultsCacheTTL)
 	lowerTTLWithinTimePeriod := validation.MaxDurationPerTenant(tenantIDs, s.limits.OutOfOrderTimeWindow)
-	if lowerTTLWithinTimePeriod > 0 && len(extents) > 0 &&
-		extents[len(extents)-1].End >= time.Now().Add(-lowerTTLWithinTimePeriod).UnixMilli() {
-		ttl = resultsCacheLowerTTL
+	if lowerTTLWithinTimePeriod > 0 && len(extents) > 0 && extents[len(extents)-1].End >= time.Now().Add(-lowerTTLWithinTimePeriod).UnixMilli() {
+		ttl = validation.SmallestPositiveNonZeroDurationPerTenant(tenantIDs, s.limits.ResultsCacheTTLForOutOfOrderTimeWindow)
 	}
 
 	buf, err := proto.Marshal(&CachedResponse{

diff --git a/pkg/frontend/querymiddleware/split_and_cache_test.go b/pkg/frontend/querymiddleware/split_and_cache_test.go
@@ -39,6 +39,9 @@ import (
 	"github.com/grafana/mimir/pkg/util"
 )
 
+const resultsCacheTTL = 24 * time.Hour
+const resultsCacheLowerTTL = 10 * time.Minute
+
 func TestSplitAndCacheMiddleware_SplitByInterval(t *testing.T) {
 	var (
 		dayOneStartTime   = parseTimeRFC3339(t, "2021-10-14T00:00:00Z")
@@ -233,7 +236,7 @@ func TestSplitAndCacheMiddleware_ResultsCache(t *testing.T) {
 		true,
 		24*time.Hour,
 		false,
-		mockLimits{maxCacheFreshness: 10 * time.Minute},
+		mockLimits{maxCacheFreshness: 10 * time.Minute, resultsCacheTTL: resultsCacheTTL, resultsCacheOutOfOrderWindowTTL: resultsCacheLowerTTL},
 		newTestPrometheusCodec(),
 		cacheBackend,
 		ConstSplitter(day),
@@ -444,7 +447,7 @@ func TestSplitAndCacheMiddleware_ResultsCache_EnabledCachingOfStepUnalignedReque
 		true,
 		24*time.Hour,
 		true, // caching of step-unaligned requests is enabled in this test.
-		mockLimits{maxCacheFreshness: 10 * time.Minute},
+		mockLimits{maxCacheFreshness: 10 * time.Minute, resultsCacheTTL: resultsCacheTTL, resultsCacheOutOfOrderWindowTTL: resultsCacheLowerTTL},
 		newTestPrometheusCodec(),
 		cacheBackend,
 		ConstSplitter(day),
@@ -600,7 +603,7 @@ func TestSplitAndCacheMiddleware_ResultsCache_ShouldNotCacheRequestEarlierThanMa
 				true,
 				24*time.Hour,
 				false,
-				mockLimits{maxCacheFreshness: maxCacheFreshness},
+				mockLimits{maxCacheFreshness: maxCacheFreshness, resultsCacheTTL: resultsCacheTTL, resultsCacheOutOfOrderWindowTTL: resultsCacheLowerTTL},
 				newTestPrometheusCodec(),
 				cacheBackend,
 				cacheSplitter,
@@ -1048,7 +1051,7 @@ func TestSplitAndCacheMiddleware_ResultsCache_ExtentsEdgeCases(t *testing.T) {
 				true,
 				24*time.Hour,
 				false,
-				mockLimits{},
+				mockLimits{resultsCacheTTL: resultsCacheTTL, resultsCacheOutOfOrderWindowTTL: resultsCacheLowerTTL},
 				newTestPrometheusCodec(),
 				cacheBackend,
 				cacheSplitter,
@@ -1093,7 +1096,7 @@ func TestSplitAndCacheMiddleware_StoreAndFetchCacheExtents(t *testing.T) {
 		true,
 		24*time.Hour,
 		false,
-		mockLimits{},
+		mockLimits{resultsCacheTTL: resultsCacheTTL, resultsCacheOutOfOrderWindowTTL: resultsCacheLowerTTL},
 		newTestPrometheusCodec(),
 		cacheBackend,
 		ConstSplitter(day),
@@ -1112,8 +1115,8 @@ func TestSplitAndCacheMiddleware_StoreAndFetchCacheExtents(t *testing.T) {
 	})
 
 	t.Run("fetchCacheExtents() should return a slice with the same number of input keys and some extends filled up on partial cache hit", func(t *testing.T) {
-		mw.storeCacheExtents("key-1", nil, []Extent{mkExtent(10, 20)})
-		mw.storeCacheExtents("key-3", nil, []Extent{mkExtent(20, 30), mkExtent(40, 50)})
+		mw.storeCacheExtents("key-1", []string{"tenant"}, []Extent{mkExtent(10, 20)})
+		mw.storeCacheExtents("key-3", []string{"tenant"}, []Extent{mkExtent(20, 30), mkExtent(40, 50)})
 
 		actual := mw.fetchCacheExtents(ctx, []string{"key-1", "key-2", "key-3"})
 		expected := [][]Extent{{mkExtent(10, 20)}, nil, {mkExtent(20, 30), mkExtent(40, 50)}}
@@ -1126,7 +1129,7 @@ func TestSplitAndCacheMiddleware_StoreAndFetchCacheExtents(t *testing.T) {
 		require.NoError(t, err)
 		cacheBackend.StoreAsync(map[string][]byte{cacheHashKey("key-1"): buf}, 0)
 
-		mw.storeCacheExtents("key-3", nil, []Extent{mkExtent(20, 30), mkExtent(40, 50)})
+		mw.storeCacheExtents("key-3", []string{"tenant"}, []Extent{mkExtent(20, 30), mkExtent(40, 50)})
 
 		actual := mw.fetchCacheExtents(ctx, []string{"key-1", "key-2", "key-3"})
 		expected := [][]Extent{nil, nil, {mkExtent(20, 30), mkExtent(40, 50)}}
@@ -1697,7 +1700,9 @@ func TestSplitAndCacheMiddlewareLowerTTL(t *testing.T) {
 	mcache := cache.NewMockCache()
 	m := splitAndCacheMiddleware{
 		limits: mockLimits{
-			outOfOrderTimeWindow: time.Hour,
+			outOfOrderTimeWindow:            time.Hour,
+			resultsCacheTTL:                 resultsCacheTTL,
+			resultsCacheOutOfOrderWindowTTL: resultsCacheLowerTTL,
 		},
 		cache: mcache,
 	}