grafana · dimitarvdimitrov · May 23, 2024 · Mar 19, 2024 · Mar 19, 2024 · Mar 19, 2024
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -35,6 +35,7 @@
 * [ENHANCEMENT] Store-gateway: add `-blocks-storage.bucket-store.max-concurrent-queue-timeout`. When set, queries at the store-gateway's query gate will not wait longer than that to execute. If a query reaches the wait timeout, then the querier will retry the blocks on a different store-gateway. If all store-gateways are unavailable, then the query will fail with `err-mimir-store-consistency-check-failed`. #7777
 * [ENHANCEMENT] Ingester: Optimize querying with regexp matchers. #8106
 * [ENHANCEMENT] Distributor: Introduce `-distributor.max-request-pool-buffer-size` to allow configuring the maximum size of the request pool buffers. #8082
+* [ENHANCEMENT] Store-gateway: improve performance when streaming chunks to queriers is enabled (`-querier.prefer-streaming-chunks-from-store-gateways=true`) and the query selects fewer than `-blocks-storage.bucket-store.batch-series-size` series (defaults to 5000 series). #8039
 * [BUGFIX] Rules: improve error handling when querier is local to the ruler. #7567
 * [BUGFIX] Querier, store-gateway: Protect against panics raised during snappy encoding. #7520
 * [BUGFIX] Ingester: Prevent timely compaction of empty blocks. #7624

@@ -606,10 +606,8 @@ func (s *BucketStore) Series(req *storepb.SeriesRequest, srv storegatewaypb.Stor
 	defer done()
 
 	var (
-		// If we are streaming the series labels and chunks separately, we don't need to fetch the postings
-		// twice. So we use these slices to re-use them. Each reuse[i] corresponds to a single block.
-		reuse    []*reusedPostingsAndMatchers
-		resHints = &hintspb.SeriesResponseHints{}
+		streamingIterators *streamingSeriesIterators
+		resHints           = &hintspb.SeriesResponseHints{}
 	)
 	for _, b := range blocks {
 		resHints.AddQueriedBlock(b.meta.ULID)
@@ -633,7 +631,7 @@ func (s *BucketStore) Series(req *storepb.SeriesRequest, srv storegatewaypb.Stor
 			seriesLimiter   = s.seriesLimiterFactory(s.metrics.queriesDropped.WithLabelValues("series"))
 		)
 
-		seriesSet, reuse, err = s.streamingSeriesForBlocks(ctx, req, blocks, indexReaders, shardSelector, matchers, chunksLimiter, seriesLimiter, stats)
+		seriesSet, streamingIterators, err = s.createIteratorForChunksStreamingLabelsPhase(ctx, req, blocks, indexReaders, shardSelector, matchers, chunksLimiter, seriesLimiter, stats)
 		if err != nil {
 			return err
 		}
@@ -661,15 +659,11 @@ func (s *BucketStore) Series(req *storepb.SeriesRequest, srv storegatewaypb.Stor
 
 	start := time.Now()
 	if req.StreamingChunksBatchSize > 0 {
-		var seriesChunkIt iterator[seriesChunksSet]
-		seriesChunkIt, err = s.streamingChunksSetForBlocks(ctx, req, blocks, indexReaders, readers, shardSelector, matchers, chunksLimiter, seriesLimiter, stats, reuse)
-		if err != nil {
-			return err
-		}
+		seriesChunkIt := s.createIteratorForChunksStreamingChunksPhase(ctx, readers, stats, chunksLimiter, seriesLimiter, streamingIterators)
 		err = s.sendStreamingChunks(req, srv, seriesChunkIt, stats, streamingSeriesCount)
 	} else {
 		var seriesSet storepb.SeriesSet
-		seriesSet, err = s.nonStreamingSeriesSetForBlocks(ctx, req, blocks, indexReaders, readers, shardSelector, matchers, chunksLimiter, seriesLimiter, stats)
+		seriesSet, err = s.createIteratorForNonChunksStreamingRequest(ctx, req, blocks, indexReaders, readers, shardSelector, matchers, chunksLimiter, seriesLimiter, stats)
 		if err != nil {
 			return err
 		}
@@ -1027,8 +1021,8 @@ func chunksSize(chks []storepb.AggrChunk) (size int) {
 	return size
 }
 
-// nonStreamingSeriesSetForBlocks is used when the streaming feature is not enabled.
-func (s *BucketStore) nonStreamingSeriesSetForBlocks(
+// createIteratorForNonChunksStreamingRequest is used when the streaming feature is not enabled.
+func (s *BucketStore) createIteratorForNonChunksStreamingRequest(
 	ctx context.Context,
 	req *storepb.SeriesRequest,
 	blocks []*bucketBlock,
@@ -1044,7 +1038,7 @@ func (s *BucketStore) nonStreamingSeriesSetForBlocks(
 	if req.SkipChunks {
 		strategy = noChunkRefs
 	}
-	it, err := s.getSeriesIteratorFromBlocks(ctx, req, blocks, indexReaders, shardSelector, matchers, chunksLimiter, seriesLimiter, stats, nil, strategy)
+	it, err := s.getSeriesIteratorFromBlocks(ctx, req, blocks, indexReaders, shardSelector, matchers, chunksLimiter, seriesLimiter, stats, strategy, nil)
 	if err != nil {
 		return nil, err
 	}
@@ -1059,10 +1053,10 @@ func (s *BucketStore) nonStreamingSeriesSetForBlocks(
 	return set, nil
 }
 
-// streamingSeriesForBlocks is used when streaming feature is enabled.
+// createIteratorForChunksStreamingLabelsPhase is used when streaming feature is enabled.
 // It returns a series set that only contains the series labels without any chunks information.
-// The returned postings (series ref) and matches should be re-used when getting chunks to save on computation.
-func (s *BucketStore) streamingSeriesForBlocks(
+// The streamingSeriesIterators should be re-used when getting chunks to save on computation.
+func (s *BucketStore) createIteratorForChunksStreamingLabelsPhase(
 	ctx context.Context,
 	req *storepb.SeriesRequest,
 	blocks []*bucketBlock,
@@ -1072,43 +1066,64 @@ func (s *BucketStore) streamingSeriesForBlocks(
 	chunksLimiter ChunksLimiter, // Rate limiter for loading chunks.
 	seriesLimiter SeriesLimiter, // Rate limiter for loading series.
 	stats *safeQueryStats,
-) (storepb.SeriesSet, []*reusedPostingsAndMatchers, error) {
-	var (
-		reuse    = make([]*reusedPostingsAndMatchers, len(blocks))
-		strategy = noChunkRefs | overlapMintMaxt
-	)
-	for i := range reuse {
-		reuse[i] = &reusedPostingsAndMatchers{}
-	}
-	it, err := s.getSeriesIteratorFromBlocks(ctx, req, blocks, indexReaders, shardSelector, matchers, chunksLimiter, seriesLimiter, stats, reuse, strategy)
+) (storepb.SeriesSet, *streamingSeriesIterators, error) {
+	streamingIterators := newStreamingSeriesIterators()
+	it, err := s.getSeriesIteratorFromBlocks(ctx, req, blocks, indexReaders, shardSelector, matchers, chunksLimiter, seriesLimiter, stats, overlapMintMaxt, streamingIterators.iteratorWrapper)
 	if err != nil {
 		return nil, nil, err
 	}
-	return newSeriesSetWithoutChunks(ctx, it, stats), reuse, nil
+
+	return newSeriesSetWithoutChunks(ctx, it, stats), streamingIterators, nil
+}
+
+type streamingSeriesIterators struct {
+	iterators []*chunksStreamingCachingSeriesChunkRefsSetIterator
+	mtx       *sync.RWMutex
+}
+
+func newStreamingSeriesIterators() *streamingSeriesIterators {
+	return &streamingSeriesIterators{
+		mtx: &sync.RWMutex{},
+	}
+}
+
+func (i *streamingSeriesIterators) iteratorWrapper(strategy seriesIteratorStrategy, postingsSetsIterator *postingsSetsIterator, factory iteratorFactory) iterator[seriesChunkRefsSet] {
+	it := newChunksStreamingCachingSeriesChunkRefsSetIterator(strategy, postingsSetsIterator, factory)
+
+	i.mtx.Lock()
+	i.iterators = append(i.iterators, it)
+	i.mtx.Unlock()
+
+	return it
+}
+
+func (i *streamingSeriesIterators) prepareForChunksStreamingPhase() []iterator[seriesChunkRefsSet] {
+	prepared := make([]iterator[seriesChunkRefsSet], 0, len(i.iterators))
+
+	for _, it := range i.iterators {
+		it.PrepareForChunksStreamingPhase()
+		prepared = append(prepared, it)
+	}
+
+	return prepared
 }
 
-// streamingChunksSetForBlocks is used when streaming feature is enabled.
-// It returns an iterator to go over the chunks for the series returned in the streamingSeriesForBlocks call.
-// It is recommended to pass the reusePostings and reusePendingMatches returned by the streamingSeriesForBlocks call.
-func (s *BucketStore) streamingChunksSetForBlocks(
+// createIteratorForChunksStreamingChunksPhase is used when streaming feature is enabled.
+// It returns an iterator to go over the chunks for the series returned in the createIteratorForChunksStreamingLabelsPhase call.
+// It is required to pass the iterators returned by the createIteratorForChunksStreamingLabelsPhase call for reuse.
+func (s *BucketStore) createIteratorForChunksStreamingChunksPhase(
 	ctx context.Context,
-	req *storepb.SeriesRequest,
-	blocks []*bucketBlock,
-	indexReaders map[ulid.ULID]*bucketIndexReader,
 	chunkReaders *bucketChunkReaders,
-	shardSelector *sharding.ShardSelector,
-	matchers []*labels.Matcher,
-	chunksLimiter ChunksLimiter, // Rate limiter for loading chunks.
-	seriesLimiter SeriesLimiter, // Rate limiter for loading series.
 	stats *safeQueryStats,
-	reuse []*reusedPostingsAndMatchers, // Should come from streamingSeriesForBlocks.
-) (iterator[seriesChunksSet], error) {
-	it, err := s.getSeriesIteratorFromBlocks(ctx, req, blocks, indexReaders, shardSelector, matchers, chunksLimiter, seriesLimiter, stats, reuse, defaultStrategy)
-	if err != nil {
-		return nil, err
-	}
+	chunksLimiter ChunksLimiter,
+	seriesLimiter SeriesLimiter,
+	iterators *streamingSeriesIterators,
+) iterator[seriesChunksSet] {
+	preparedIterators := iterators.prepareForChunksStreamingPhase()
+	it := s.getSeriesIteratorFromPerBlockIterators(preparedIterators, chunksLimiter, seriesLimiter)
 	scsi := newChunksPreloadingIterator(ctx, s.logger, s.userID, *chunkReaders, it, s.maxSeriesPerBatch, stats)
-	return scsi, nil
+
+	return scsi
 }
 
 func (s *BucketStore) getSeriesIteratorFromBlocks(
@@ -1121,8 +1136,8 @@ func (s *BucketStore) getSeriesIteratorFromBlocks(
 	chunksLimiter ChunksLimiter, // Rate limiter for loading chunks.
 	seriesLimiter SeriesLimiter, // Rate limiter for loading series.
 	stats *safeQueryStats,
-	reuse []*reusedPostingsAndMatchers, // Used if not empty. If not empty, len(reuse) must be len(blocks).
 	strategy seriesIteratorStrategy,
+	wrapper func(strategy seriesIteratorStrategy, postingsSetsIterator *postingsSetsIterator, factory iteratorFactory) iterator[seriesChunkRefsSet],
 ) (iterator[seriesChunkRefsSet], error) {
 	var (
 		mtx                      = sync.Mutex{}
@@ -1131,9 +1146,8 @@ func (s *BucketStore) getSeriesIteratorFromBlocks(
 		begin                    = time.Now()
 		blocksQueriedByBlockMeta = make(map[blockQueriedMeta]int)
 	)
-	for i, b := range blocks {
+	for _, b := range blocks {
 		b := b
-		i := i
 
 		// Keep track of queried blocks.
 		indexr := indexReaders[b.meta.ULID]
@@ -1144,10 +1158,6 @@ func (s *BucketStore) getSeriesIteratorFromBlocks(
 		if shardSelector != nil {
 			blockSeriesHashCache = s.seriesHashCache.GetBlockCache(b.meta.ULID.String())
 		}
-		var r *reusedPostingsAndMatchers
-		if len(reuse) > 0 {
-			r = reuse[i]
-		}
 		g.Go(func() error {
 			part, err := openBlockSeriesChunkRefsSetsIterator(
 				ctx,
@@ -1162,8 +1172,8 @@ func (s *BucketStore) getSeriesIteratorFromBlocks(
 				strategy,
 				req.MinTime, req.MaxTime,
 				stats,
-				r,
 				s.logger,
+				wrapper,
 			)
 			if err != nil {
 				return errors.Wrapf(err, "fetch series for block %s", b.meta.ULID)
@@ -1192,13 +1202,17 @@ func (s *BucketStore) getSeriesIteratorFromBlocks(
 		stats.streamingSeriesExpandPostingsDuration += time.Since(begin)
 	})
 
-	mergedIterator := mergedSeriesChunkRefsSetIterators(s.maxSeriesPerBatch, batches...)
+	return s.getSeriesIteratorFromPerBlockIterators(batches, chunksLimiter, seriesLimiter), nil
+}
+
+func (s *BucketStore) getSeriesIteratorFromPerBlockIterators(perBlockIterators []iterator[seriesChunkRefsSet], chunksLimiter ChunksLimiter, seriesLimiter SeriesLimiter) iterator[seriesChunkRefsSet] {
+	mergedIterator := mergedSeriesChunkRefsSetIterators(s.maxSeriesPerBatch, perBlockIterators...)
 
 	// Apply limits after the merging, so that if the same series is part of multiple blocks it just gets
 	// counted once towards the limit.
 	mergedIterator = newLimitingSeriesChunkRefsSetIterator(mergedIterator, chunksLimiter, seriesLimiter)
 
-	return mergedIterator, nil
+	return mergedIterator
 }
 
 func (s *BucketStore) recordSeriesCallResult(safeStats *safeQueryStats) {
@@ -1467,8 +1481,8 @@ func blockLabelNames(ctx context.Context, indexr *bucketIndexReader, matchers []
 		noChunkRefs,
 		minTime, maxTime,
 		stats,
-		nil,
 		logger,
+		nil,
 	)
 	if err != nil {
 		return nil, errors.Wrap(err, "fetch series")

@@ -44,8 +44,8 @@ func TestBucketChunkReader_refetchChunks(t *testing.T) {
 		block.meta.MinTime,
 		block.meta.MaxTime,
 		newSafeQueryStats(),
-		nil,
 		log.NewNopLogger(),
+		nil,
 	)
 	require.NoError(t, err)