From 668d5090b5669f1905d971d7cfd9e3d7f7954403 Mon Sep 17 00:00:00 2001
From: Charles Korn <charles.korn@grafana.com>
Date: Tue, 14 Nov 2023 11:04:17 +1100
Subject: [PATCH 1/6] Enable streaming chunks from store-gateways to queriers
 by default.

# Conflicts:
#	cmd/mimir/help-all.txt.tmpl
#	docs/sources/mimir/configure/about-versioning.md
#	pkg/querier/querier.go
---
 cmd/mimir/config-descriptor.json                            | 4 ++--
 cmd/mimir/help-all.txt.tmpl                                 | 4 ++--
 docs/sources/mimir/configure/about-versioning.md            | 4 +++-
 .../mimir/configure/configuration-parameters/index.md       | 6 +++---
 pkg/querier/querier.go                                      | 6 +++---
 5 files changed, 13 insertions(+), 11 deletions(-)

diff --git a/cmd/mimir/config-descriptor.json b/cmd/mimir/config-descriptor.json
index fa2d5ee4b0e..c56ebc18253 100644
--- a/cmd/mimir/config-descriptor.json
+++ b/cmd/mimir/config-descriptor.json
@@ -1849,7 +1849,7 @@
           "required": false,
           "desc": "Request store-gateways stream chunks. Store-gateways will only respond with a stream of chunks if the target store-gateway supports this, and this preference will be ignored by store-gateways that do not support this.",
           "fieldValue": null,
-          "fieldDefaultValue": false,
+          "fieldDefaultValue": true,
           "fieldFlag": "querier.prefer-streaming-chunks-from-store-gateways",
           "fieldType": "boolean",
           "fieldCategory": "experimental"
@@ -1874,7 +1874,7 @@
           "fieldDefaultValue": 256,
           "fieldFlag": "querier.streaming-chunks-per-store-gateway-buffer-size",
           "fieldType": "int",
-          "fieldCategory": "experimental"
+          "fieldCategory": "advanced"
         },
         {
           "kind": "field",
diff --git a/cmd/mimir/help-all.txt.tmpl b/cmd/mimir/help-all.txt.tmpl
index deddd2ed984..46c52bd3df1 100644
--- a/cmd/mimir/help-all.txt.tmpl
+++ b/cmd/mimir/help-all.txt.tmpl
@@ -1760,7 +1760,7 @@ Usage of ./cmd/mimir/mimir:
   -querier.minimize-ingester-requests-hedging-delay duration
     	Delay before initiating requests to further ingesters when request minimization is enabled and the initially selected set of ingesters have not all responded. Ignored if -querier.minimize-ingester-requests is not enabled. (default 3s)
   -querier.prefer-streaming-chunks-from-store-gateways
-    	[experimental] Request store-gateways stream chunks. Store-gateways will only respond with a stream of chunks if the target store-gateway supports this, and this preference will be ignored by store-gateways that do not support this.
+    	[experimental] Request store-gateways stream chunks. Store-gateways will only respond with a stream of chunks if the target store-gateway supports this, and this preference will be ignored by store-gateways that do not support this. (default true)
   -querier.promql-engine string
     	[experimental] PromQL engine to use, either 'standard' or 'streaming' (default "standard")
   -querier.promql-experimental-functions-enabled
@@ -1838,7 +1838,7 @@ Usage of ./cmd/mimir/mimir:
   -querier.streaming-chunks-per-ingester-buffer-size uint
     	Number of series to buffer per ingester when streaming chunks from ingesters. (default 256)
   -querier.streaming-chunks-per-store-gateway-buffer-size uint
-    	[experimental] Number of series to buffer per store-gateway when streaming chunks from store-gateways. (default 256)
+    	Number of series to buffer per store-gateway when streaming chunks from store-gateways. (default 256)
   -querier.timeout duration
     	The timeout for a query. This config option should be set on query-frontend too when query sharding is enabled. This also applies to queries evaluated by the ruler (internally or remotely). (default 2m0s)
   -query-frontend.active-series-write-timeout duration
diff --git a/docs/sources/mimir/configure/about-versioning.md b/docs/sources/mimir/configure/about-versioning.md
index 409e1e14ac8..82357a17f09 100644
--- a/docs/sources/mimir/configure/about-versioning.md
+++ b/docs/sources/mimir/configure/about-versioning.md
@@ -124,7 +124,7 @@ The following features are currently experimental:
     - `-ingester.client.circuit-breaker.cooldown-period`
 - Querier
   - Use of Redis cache backend (`-blocks-storage.bucket-store.metadata-cache.backend=redis`)
-  - Streaming chunks from store-gateway to querier (`-querier.prefer-streaming-chunks-from-store-gateways`, `-querier.streaming-chunks-per-store-gateway-buffer-size`)
+  - Streaming chunks from store-gateway to querier (`-querier.prefer-streaming-chunks-from-store-gateways`)
   - Limiting queries based on the estimated number of chunks that will be used (`-querier.max-estimated-fetched-chunks-per-query-multiplier`)
   - Max concurrency for tenant federated queries (`-tenant-federation.max-concurrent`)
   - Maximum response size for active series queries (`-querier.active-series-results-max-size-bytes`)
@@ -200,3 +200,5 @@ The following features or configuration parameters are currently deprecated and
   - `-ingester.client.report-grpc-codes-in-instrumentation-label-enabled`
 - Mimirtool
   - the flag `--rule-files`
+- Querier
+  - the flag `-querier.prefer-streaming-chunks-from-store-gateways`
diff --git a/docs/sources/mimir/configure/configuration-parameters/index.md b/docs/sources/mimir/configure/configuration-parameters/index.md
index 03c2e14cb60..ab395ed3113 100644
--- a/docs/sources/mimir/configure/configuration-parameters/index.md
+++ b/docs/sources/mimir/configure/configuration-parameters/index.md
@@ -1310,15 +1310,15 @@ store_gateway_client:
 # respond with a stream of chunks if the target store-gateway supports this, and
 # this preference will be ignored by store-gateways that do not support this.
 # CLI flag: -querier.prefer-streaming-chunks-from-store-gateways
-[prefer_streaming_chunks_from_store_gateways: <boolean> | default = false]
+[prefer_streaming_chunks_from_store_gateways: <boolean> | default = true]
 
 # (advanced) Number of series to buffer per ingester when streaming chunks from
 # ingesters.
 # CLI flag: -querier.streaming-chunks-per-ingester-buffer-size
 [streaming_chunks_per_ingester_series_buffer_size: <int> | default = 256]
 
-# (experimental) Number of series to buffer per store-gateway when streaming
-# chunks from store-gateways.
+# (advanced) Number of series to buffer per store-gateway when streaming chunks
+# from store-gateways.
 # CLI flag: -querier.streaming-chunks-per-store-gateway-buffer-size
 [streaming_chunks_per_store_gateway_series_buffer_size: <int> | default = 256]
 
diff --git a/pkg/querier/querier.go b/pkg/querier/querier.go
index 762296b6e18..faa600b34b2 100644
--- a/pkg/querier/querier.go
+++ b/pkg/querier/querier.go
@@ -50,10 +50,10 @@ type Config struct {
 
 	ShuffleShardingIngestersEnabled bool `yaml:"shuffle_sharding_ingesters_enabled" category:"advanced"`
 
-	PreferStreamingChunksFromStoreGateways         bool          `yaml:"prefer_streaming_chunks_from_store_gateways" category:"experimental"`
+	PreferStreamingChunksFromStoreGateways         bool          `yaml:"prefer_streaming_chunks_from_store_gateways" category:"experimental"` // Enabled by default as of Mimir 2.13, remove altogether in 2.14.
 	PreferAvailabilityZone                         string        `yaml:"prefer_availability_zone" category:"experimental" doc:"hidden"`
 	StreamingChunksPerIngesterSeriesBufferSize     uint64        `yaml:"streaming_chunks_per_ingester_series_buffer_size" category:"advanced"`
-	StreamingChunksPerStoreGatewaySeriesBufferSize uint64        `yaml:"streaming_chunks_per_store_gateway_series_buffer_size" category:"experimental"`
+	StreamingChunksPerStoreGatewaySeriesBufferSize uint64        `yaml:"streaming_chunks_per_store_gateway_series_buffer_size" category:"advanced"`
 	MinimizeIngesterRequests                       bool          `yaml:"minimize_ingester_requests" category:"advanced"`
 	MinimiseIngesterRequestsHedgingDelay           time.Duration `yaml:"minimize_ingester_requests_hedging_delay" category:"advanced"`
 
@@ -77,7 +77,7 @@ func (cfg *Config) RegisterFlags(f *flag.FlagSet) {
 	f.DurationVar(&cfg.MaxQueryIntoFuture, "querier.max-query-into-future", 10*time.Minute, "Maximum duration into the future you can query. 0 to disable.")
 	f.DurationVar(&cfg.QueryStoreAfter, queryStoreAfterFlag, 12*time.Hour, "The time after which a metric should be queried from storage and not just ingesters. 0 means all queries are sent to store. If this option is enabled, the time range of the query sent to the store-gateway will be manipulated to ensure the query end is not more recent than 'now - query-store-after'.")
 	f.BoolVar(&cfg.ShuffleShardingIngestersEnabled, "querier.shuffle-sharding-ingesters-enabled", true, fmt.Sprintf("Fetch in-memory series from the minimum set of required ingesters, selecting only ingesters which may have received series since -%s. If this setting is false or -%s is '0', queriers always query all ingesters (ingesters shuffle sharding on read path is disabled).", validation.QueryIngestersWithinFlag, validation.QueryIngestersWithinFlag))
-	f.BoolVar(&cfg.PreferStreamingChunksFromStoreGateways, "querier.prefer-streaming-chunks-from-store-gateways", false, "Request store-gateways stream chunks. Store-gateways will only respond with a stream of chunks if the target store-gateway supports this, and this preference will be ignored by store-gateways that do not support this.")
+	f.BoolVar(&cfg.PreferStreamingChunksFromStoreGateways, "querier.prefer-streaming-chunks-from-store-gateways", true, "Request store-gateways stream chunks. Store-gateways will only respond with a stream of chunks if the target store-gateway supports this, and this preference will be ignored by store-gateways that do not support this.")
 	f.StringVar(&cfg.PreferAvailabilityZone, "querier.prefer-availability-zone", "", "Preferred availability zone to query ingesters from when using the ingest storage.")
 
 	const minimiseIngesterRequestsFlagName = "querier.minimize-ingester-requests"

From 20278ab10e5a9f7619d55e66af310d50f33c3f71 Mon Sep 17 00:00:00 2001
From: Charles Korn <charles.korn@grafana.com>
Date: Tue, 14 Nov 2023 11:07:08 +1100
Subject: [PATCH 2/6] Add changelog entry.

# Conflicts:
#	CHANGELOG.md
---
 CHANGELOG.md | 1 +
 1 file changed, 1 insertion(+)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 180b4934dfc..ca370c81083 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -11,6 +11,7 @@
   * Query results caching should be more stable as all equivalent queries receive the same cache key, but there may be cache churn on first deploy with the updated format
   * Query blocking can no longer be circumvented with an equivalent query in a different format; see [Configure queries to block](https://grafana.com/docs/mimir/latest/configure/configure-blocked-queries/)
 * [CHANGE] Query-frontend: stop using `-validation.create-grace-period` to clamp how far into the future a query can span.
+* [CHANGE] Store-gateway / querier: enable streaming chunks from store-gateways to queriers by default. #6646
 * [FEATURE] Continuous-test: now runable as a module with `mimir -target=continuous-test`. #7747
 * [FEATURE] Store-gateway: Allow specific tenants to be enabled or disabled via `-store-gateway.enabled-tenants` or `-store-gateway.disabled-tenants` CLI flags or their corresponding YAML settings. #7653
 * [FEATURE] New `-<prefix>.s3.bucket-lookup-type` flag configures lookup style type, used to access bucket in s3 compatible providers. #7684

From 648f09cdd75a2422b090029f8792f7c29deb254a Mon Sep 17 00:00:00 2001
From: Charles Korn <charles.korn@grafana.com>
Date: Tue, 14 Nov 2023 14:17:45 +1100
Subject: [PATCH 3/6] Fix failing
 TestQuerierWithBlocksStorageOnMissingBlocksFromStorage test with
 store-gateway chunks streaming enabled.

---
 integration/querier_test.go | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/integration/querier_test.go b/integration/querier_test.go
index a1cd9c57d34..73d96a9c112 100644
--- a/integration/querier_test.go
+++ b/integration/querier_test.go
@@ -15,7 +15,6 @@ import (
 	"github.com/grafana/e2e"
 	e2ecache "github.com/grafana/e2e/cache"
 	e2edb "github.com/grafana/e2e/db"
-	promv1 "github.com/prometheus/client_golang/api/prometheus/v1"
 	"github.com/prometheus/common/model"
 	"github.com/prometheus/prometheus/model/labels"
 	"github.com/prometheus/prometheus/prompb"
@@ -881,8 +880,7 @@ func TestQuerierWithBlocksStorageOnMissingBlocksFromStorage(t *testing.T) {
 	// missing from the storage.
 	_, err = c.Query(series1Name, series1Timestamp)
 	require.Error(t, err)
-	assert.Contains(t, err.Error(), "500")
-	assert.Contains(t, err.(*promv1.Error).Detail, "failed to fetch some blocks")
+	assert.Contains(t, err.Error(), "get range reader: The specified key does not exist")
 
 	// We expect this to still be queryable as it was not in the cleared storage
 	_, err = c.Query(series2Name, series2Timestamp)

From 81b55d1ddabf3055979435f9bc506a4d9d0768f1 Mon Sep 17 00:00:00 2001
From: Charles Korn <charles.korn@grafana.com>
Date: Wed, 15 Nov 2023 10:34:32 +1100
Subject: [PATCH 4/6] Partially fix cache-related assertions in
 TestQuerierWithBlocksStorageRunningInSingleBinaryMode

---
 integration/querier_test.go | 26 ++++++++++++++++----------
 1 file changed, 16 insertions(+), 10 deletions(-)

diff --git a/integration/querier_test.go b/integration/querier_test.go
index 73d96a9c112..4e25d5ee0f2 100644
--- a/integration/querier_test.go
+++ b/integration/querier_test.go
@@ -445,12 +445,14 @@ func TestQuerierWithBlocksStorageRunningInSingleBinaryMode(t *testing.T) {
 			require.NoError(t, cluster.WaitSumMetrics(e2e.GreaterOrEqual(float64(shippedBlocks*seriesReplicationFactor)), "cortex_bucket_store_blocks_loaded"))
 
 			var expectedCacheRequests int
+			var expectedCacheHits int
+			var expectedMemcachedOps int
 
 			// Query back the series (1 only in the storage, 1 only in the ingesters, 1 on both).
 			// For every series in a block we expect
 			// - 1 cache request for expanded postings
 			// - 1 cache request for postings for each matcher (provided matchers are simple MatchEQ matchers)
-			// - 1 cache request for each series
+			// - 2 cache requests for each series: one while sending label values and another while streaming chunks. The second request should be a cache hit.
 			//
 			// If the series does not exist in the block, then we return early after checking expanded postings and
 			// subsequently the index on disk.
@@ -458,25 +460,28 @@ func TestQuerierWithBlocksStorageRunningInSingleBinaryMode(t *testing.T) {
 			require.NoError(t, err)
 			require.Equal(t, model.ValVector, result.Type())
 			assert.Equal(t, expectedVector1, result.(model.Vector))
-			expectedCacheRequests += seriesReplicationFactor * 3 // expanded postings, postings, series
+			expectedCacheRequests += seriesReplicationFactor * 4        // expanded postings, postings, series (retrieving labels and then while streaming chunks)
+			expectedCacheHits += seriesReplicationFactor                // one for each series (for chunks)
+			expectedMemcachedOps += seriesReplicationFactor * (3*2 + 1) // Same reasoning as for expectedCacheRequests, but this also includes a set for each get that is not a hit
 
 			result, err = c.Query(series2Name, series2Timestamp)
 			require.NoError(t, err)
 			require.Equal(t, model.ValVector, result.Type())
 			assert.Equal(t, expectedVector2, result.(model.Vector))
-			expectedCacheRequests += seriesReplicationFactor*3 + seriesReplicationFactor // expanded postings, postings, series for 1 time range; only expaned postings for another
+			expectedCacheRequests += seriesReplicationFactor*4 + seriesReplicationFactor // expanded postings, postings, series for 1 time range; only expanded postings for another
+			expectedCacheHits += seriesReplicationFactor                                 // one for each series
+			expectedMemcachedOps += seriesReplicationFactor * (4 + 3 + 1)                // Same reasoning as for expectedCacheRequests, but this also includes a set for each get that is not a hit
 
 			result, err = c.Query(series3Name, series3Timestamp)
 			require.NoError(t, err)
 			require.Equal(t, model.ValVector, result.Type())
 			assert.Equal(t, expectedVector3, result.(model.Vector))
 			expectedCacheRequests += seriesReplicationFactor * 2 // expanded postings for 2 time ranges
-
-			expectedMemcachedOps := 2 * expectedCacheRequests // Same reasoning as for expectedCacheRequests, but this also includes a set for each get
+			expectedMemcachedOps += seriesReplicationFactor * 2
 
 			// Check the in-memory index cache metrics (in the store-gateway).
 			require.NoError(t, cluster.WaitSumMetrics(e2e.Equals(float64(expectedCacheRequests)), "thanos_store_index_cache_requests_total"))
-			require.NoError(t, cluster.WaitSumMetrics(e2e.Equals(0), "thanos_store_index_cache_hits_total")) // no cache hit cause the cache was empty
+			require.NoError(t, cluster.WaitSumMetrics(e2e.Equals(float64(expectedCacheHits)), "thanos_store_index_cache_hits_total")) // no cache hit cause the cache was empty
 			if testCfg.indexCacheBackend == tsdb.IndexCacheBackendInMemory {
 				require.NoError(t, cluster.WaitSumMetrics(e2e.Equals(float64((2*2+2+3)*seriesReplicationFactor)), "thanos_store_index_cache_items"))             // 2 series both for postings and series cache, 2 expanded postings on one block, 3 on another one
 				require.NoError(t, cluster.WaitSumMetrics(e2e.Equals(float64((2*2+2+3)*seriesReplicationFactor)), "thanos_store_index_cache_items_added_total")) // 2 series both for postings and series cache, 2 expanded postings on one block, 3 on another one
@@ -486,17 +491,18 @@ func TestQuerierWithBlocksStorageRunningInSingleBinaryMode(t *testing.T) {
 
 			// Query back again the 1st series from storage. This time it should use the index cache.
 			// It should get a hit on expanded postings; this means that it will not request individual postings for matchers.
-			// It should get a hit on series.
+			// It should get two hits on series (once for the labels, and again for the chunks).
 			// We expect 3 cache requests and 3 cache hits.
 			result, err = c.Query(series1Name, series1Timestamp)
 			require.NoError(t, err)
 			require.Equal(t, model.ValVector, result.Type())
 			assert.Equal(t, expectedVector1, result.(model.Vector))
-			expectedCacheRequests += seriesReplicationFactor * 2 // expanded postings and series
-			expectedMemcachedOps += seriesReplicationFactor * 2  // there is no set after the gets this time
+			expectedCacheRequests += seriesReplicationFactor * 3 // expanded postings and series
+			expectedCacheHits += seriesReplicationFactor * 3
+			expectedMemcachedOps += seriesReplicationFactor * 2 // there is no set after the gets this time
 
 			require.NoError(t, cluster.WaitSumMetrics(e2e.Equals(float64(expectedCacheRequests)), "thanos_store_index_cache_requests_total"))
-			require.NoError(t, cluster.WaitSumMetrics(e2e.Equals(float64(2*seriesReplicationFactor)), "thanos_store_index_cache_hits_total")) // this time has used the index cache
+			require.NoError(t, cluster.WaitSumMetrics(e2e.Equals(float64(expectedCacheHits)), "thanos_store_index_cache_hits_total")) // this time has used the index cache
 
 			if testCfg.indexCacheBackend == tsdb.IndexCacheBackendInMemory {
 				require.NoError(t, cluster.WaitSumMetrics(e2e.Equals(float64((2*2+2+3)*seriesReplicationFactor)), "thanos_store_index_cache_items"))             // as before

From 0cfc31f0949ba981e9b8135382d461c53b75dc35 Mon Sep 17 00:00:00 2001
From: Charles Korn <charles.korn@grafana.com>
Date: Thu, 16 Nov 2023 15:07:40 +1100
Subject: [PATCH 5/6] Address PR feedback, and add more details to failing
 assertions

# Conflicts:
#	integration/querier_test.go
---
 integration/querier_test.go | 26 +++++++++++++-------------
 1 file changed, 13 insertions(+), 13 deletions(-)

diff --git a/integration/querier_test.go b/integration/querier_test.go
index 4e25d5ee0f2..f0e5d7d038d 100644
--- a/integration/querier_test.go
+++ b/integration/querier_test.go
@@ -460,17 +460,17 @@ func TestQuerierWithBlocksStorageRunningInSingleBinaryMode(t *testing.T) {
 			require.NoError(t, err)
 			require.Equal(t, model.ValVector, result.Type())
 			assert.Equal(t, expectedVector1, result.(model.Vector))
-			expectedCacheRequests += seriesReplicationFactor * 4        // expanded postings, postings, series (retrieving labels and then while streaming chunks)
-			expectedCacheHits += seriesReplicationFactor                // one for each series (for chunks)
-			expectedMemcachedOps += seriesReplicationFactor * (3*2 + 1) // Same reasoning as for expectedCacheRequests, but this also includes a set for each get that is not a hit
+			expectedCacheRequests += seriesReplicationFactor * 4                                  // expanded postings, postings, series (retrieving labels and then while streaming chunks)
+			expectedCacheHits += seriesReplicationFactor                                          // one for each series (while streaming chunks)
+			expectedMemcachedOps += (seriesReplicationFactor * 4) + (seriesReplicationFactor * 3) // Same reasoning as for expectedCacheRequests, but this also includes a set for each get that is not a hit
 
 			result, err = c.Query(series2Name, series2Timestamp)
 			require.NoError(t, err)
 			require.Equal(t, model.ValVector, result.Type())
 			assert.Equal(t, expectedVector2, result.(model.Vector))
-			expectedCacheRequests += seriesReplicationFactor*4 + seriesReplicationFactor // expanded postings, postings, series for 1 time range; only expanded postings for another
-			expectedCacheHits += seriesReplicationFactor                                 // one for each series
-			expectedMemcachedOps += seriesReplicationFactor * (4 + 3 + 1)                // Same reasoning as for expectedCacheRequests, but this also includes a set for each get that is not a hit
+			expectedCacheRequests += seriesReplicationFactor*4 + seriesReplicationFactor          // expanded postings, postings, series for 1 time range; only expanded postings for another
+			expectedCacheHits += seriesReplicationFactor                                          // one for each series (while streaming chunks)
+			expectedMemcachedOps += (seriesReplicationFactor * 4) + (seriesReplicationFactor * 3) // Same reasoning as for expectedCacheRequests, but this also includes a set for each get that is not a hit
 
 			result, err = c.Query(series3Name, series3Timestamp)
 			require.NoError(t, err)
@@ -480,18 +480,18 @@ func TestQuerierWithBlocksStorageRunningInSingleBinaryMode(t *testing.T) {
 			expectedMemcachedOps += seriesReplicationFactor * 2
 
 			// Check the in-memory index cache metrics (in the store-gateway).
-			require.NoError(t, cluster.WaitSumMetrics(e2e.Equals(float64(expectedCacheRequests)), "thanos_store_index_cache_requests_total"))
-			require.NoError(t, cluster.WaitSumMetrics(e2e.Equals(float64(expectedCacheHits)), "thanos_store_index_cache_hits_total")) // no cache hit cause the cache was empty
+			require.NoError(t, cluster.WaitSumMetrics(e2e.Equals(float64(expectedCacheRequests)), "thanos_store_index_cache_requests_total"), "expected %v requests", expectedCacheRequests)
+			require.NoError(t, cluster.WaitSumMetrics(e2e.Equals(float64(expectedCacheHits)), "thanos_store_index_cache_hits_total"), "expected %v hits", expectedCacheHits)
 			if testCfg.indexCacheBackend == tsdb.IndexCacheBackendInMemory {
 				require.NoError(t, cluster.WaitSumMetrics(e2e.Equals(float64((2*2+2+3)*seriesReplicationFactor)), "thanos_store_index_cache_items"))             // 2 series both for postings and series cache, 2 expanded postings on one block, 3 on another one
 				require.NoError(t, cluster.WaitSumMetrics(e2e.Equals(float64((2*2+2+3)*seriesReplicationFactor)), "thanos_store_index_cache_items_added_total")) // 2 series both for postings and series cache, 2 expanded postings on one block, 3 on another one
 			} else if testCfg.indexCacheBackend == tsdb.IndexCacheBackendMemcached {
-				require.NoError(t, cluster.WaitSumMetrics(e2e.Equals(float64(expectedMemcachedOps)), "thanos_cache_operations_total"))
+				require.NoError(t, cluster.WaitSumMetrics(e2e.Equals(float64(expectedMemcachedOps)), "thanos_cache_operations_total"), "expected %v operations", expectedMemcachedOps)
 			}
 
 			// Query back again the 1st series from storage. This time it should use the index cache.
 			// It should get a hit on expanded postings; this means that it will not request individual postings for matchers.
-			// It should get two hits on series (once for the labels, and again for the chunks).
+			// It should get two hits on series (once for the labels, and again for the labels when streaming chunks).
 			// We expect 3 cache requests and 3 cache hits.
 			result, err = c.Query(series1Name, series1Timestamp)
 			require.NoError(t, err)
@@ -501,14 +501,14 @@ func TestQuerierWithBlocksStorageRunningInSingleBinaryMode(t *testing.T) {
 			expectedCacheHits += seriesReplicationFactor * 3
 			expectedMemcachedOps += seriesReplicationFactor * 2 // there is no set after the gets this time
 
-			require.NoError(t, cluster.WaitSumMetrics(e2e.Equals(float64(expectedCacheRequests)), "thanos_store_index_cache_requests_total"))
-			require.NoError(t, cluster.WaitSumMetrics(e2e.Equals(float64(expectedCacheHits)), "thanos_store_index_cache_hits_total")) // this time has used the index cache
+			require.NoError(t, cluster.WaitSumMetrics(e2e.Equals(float64(expectedCacheRequests)), "thanos_store_index_cache_requests_total"), "expected %v requests", expectedCacheRequests)
+			require.NoError(t, cluster.WaitSumMetrics(e2e.Equals(float64(expectedCacheHits)), "thanos_store_index_cache_hits_total"), "expected %v hits", expectedCacheHits) // this time has used the index cache
 
 			if testCfg.indexCacheBackend == tsdb.IndexCacheBackendInMemory {
 				require.NoError(t, cluster.WaitSumMetrics(e2e.Equals(float64((2*2+2+3)*seriesReplicationFactor)), "thanos_store_index_cache_items"))             // as before
 				require.NoError(t, cluster.WaitSumMetrics(e2e.Equals(float64((2*2+2+3)*seriesReplicationFactor)), "thanos_store_index_cache_items_added_total")) // as before
 			} else if testCfg.indexCacheBackend == tsdb.IndexCacheBackendMemcached {
-				require.NoError(t, cluster.WaitSumMetrics(e2e.Equals(float64(expectedMemcachedOps)), "thanos_cache_operations_total"))
+				require.NoError(t, cluster.WaitSumMetrics(e2e.Equals(float64(expectedMemcachedOps)), "thanos_cache_operations_total"), "expected %v operations", expectedMemcachedOps)
 			}
 
 			// Query metadata.

From 91646edaa32fc0fdbc36ae91ea7018a1ff54ed51 Mon Sep 17 00:00:00 2001
From: Charles Korn <charles.korn@grafana.com>
Date: Tue, 28 May 2024 21:33:10 +1000
Subject: [PATCH 6/6] Update tests to reflect behaviour of chunks streaming
 with https://github.com/grafana/mimir/pull/8039 in place.

---
 integration/querier_test.go | 24 +++++++++++-------------
 1 file changed, 11 insertions(+), 13 deletions(-)

diff --git a/integration/querier_test.go b/integration/querier_test.go
index f0e5d7d038d..18554647509 100644
--- a/integration/querier_test.go
+++ b/integration/querier_test.go
@@ -452,7 +452,7 @@ func TestQuerierWithBlocksStorageRunningInSingleBinaryMode(t *testing.T) {
 			// For every series in a block we expect
 			// - 1 cache request for expanded postings
 			// - 1 cache request for postings for each matcher (provided matchers are simple MatchEQ matchers)
-			// - 2 cache requests for each series: one while sending label values and another while streaming chunks. The second request should be a cache hit.
+			// - 1 cache request for each series
 			//
 			// If the series does not exist in the block, then we return early after checking expanded postings and
 			// subsequently the index on disk.
@@ -460,24 +460,22 @@ func TestQuerierWithBlocksStorageRunningInSingleBinaryMode(t *testing.T) {
 			require.NoError(t, err)
 			require.Equal(t, model.ValVector, result.Type())
 			assert.Equal(t, expectedVector1, result.(model.Vector))
-			expectedCacheRequests += seriesReplicationFactor * 4                                  // expanded postings, postings, series (retrieving labels and then while streaming chunks)
-			expectedCacheHits += seriesReplicationFactor                                          // one for each series (while streaming chunks)
-			expectedMemcachedOps += (seriesReplicationFactor * 4) + (seriesReplicationFactor * 3) // Same reasoning as for expectedCacheRequests, but this also includes a set for each get that is not a hit
+			expectedCacheRequests += seriesReplicationFactor * 3                                  // expanded postings, postings, series
+			expectedMemcachedOps += (seriesReplicationFactor * 3) + (seriesReplicationFactor * 3) // Same reasoning as for expectedCacheRequests, but this also includes a set for each get that is not a hit
 
 			result, err = c.Query(series2Name, series2Timestamp)
 			require.NoError(t, err)
 			require.Equal(t, model.ValVector, result.Type())
 			assert.Equal(t, expectedVector2, result.(model.Vector))
-			expectedCacheRequests += seriesReplicationFactor*4 + seriesReplicationFactor          // expanded postings, postings, series for 1 time range; only expanded postings for another
-			expectedCacheHits += seriesReplicationFactor                                          // one for each series (while streaming chunks)
-			expectedMemcachedOps += (seriesReplicationFactor * 4) + (seriesReplicationFactor * 3) // Same reasoning as for expectedCacheRequests, but this also includes a set for each get that is not a hit
+			expectedCacheRequests += seriesReplicationFactor*3 + seriesReplicationFactor      // expanded postings, postings, series for 1 time range; only expanded postings for another
+			expectedMemcachedOps += 2 * (seriesReplicationFactor*3 + seriesReplicationFactor) // Same reasoning as for expectedCacheRequests, but this also includes a set for each get that is not a hit
 
 			result, err = c.Query(series3Name, series3Timestamp)
 			require.NoError(t, err)
 			require.Equal(t, model.ValVector, result.Type())
 			assert.Equal(t, expectedVector3, result.(model.Vector))
-			expectedCacheRequests += seriesReplicationFactor * 2 // expanded postings for 2 time ranges
-			expectedMemcachedOps += seriesReplicationFactor * 2
+			expectedCacheRequests += seriesReplicationFactor * 2                          // expanded postings for 2 time ranges
+			expectedMemcachedOps += seriesReplicationFactor*2 + seriesReplicationFactor*2 // Same reasoning as for expectedCacheRequests, but this also includes a set for each get that is not a hit
 
 			// Check the in-memory index cache metrics (in the store-gateway).
 			require.NoError(t, cluster.WaitSumMetrics(e2e.Equals(float64(expectedCacheRequests)), "thanos_store_index_cache_requests_total"), "expected %v requests", expectedCacheRequests)
@@ -491,14 +489,14 @@ func TestQuerierWithBlocksStorageRunningInSingleBinaryMode(t *testing.T) {
 
 			// Query back again the 1st series from storage. This time it should use the index cache.
 			// It should get a hit on expanded postings; this means that it will not request individual postings for matchers.
-			// It should get two hits on series (once for the labels, and again for the labels when streaming chunks).
-			// We expect 3 cache requests and 3 cache hits.
+			// It should get a hit on series.
+			// We expect 2 cache requests and 2 cache hits.
 			result, err = c.Query(series1Name, series1Timestamp)
 			require.NoError(t, err)
 			require.Equal(t, model.ValVector, result.Type())
 			assert.Equal(t, expectedVector1, result.(model.Vector))
-			expectedCacheRequests += seriesReplicationFactor * 3 // expanded postings and series
-			expectedCacheHits += seriesReplicationFactor * 3
+			expectedCacheRequests += seriesReplicationFactor * 2 // expanded postings and series
+			expectedCacheHits += seriesReplicationFactor * 2
 			expectedMemcachedOps += seriesReplicationFactor * 2 // there is no set after the gets this time
 
 			require.NoError(t, cluster.WaitSumMetrics(e2e.Equals(float64(expectedCacheRequests)), "thanos_store_index_cache_requests_total"), "expected %v requests", expectedCacheRequests)