grafana · kavirajk · Jan 4, 2024 · Dec 18, 2023 · Dec 19, 2023 · Dec 19, 2023
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -42,6 +42,7 @@
 * [10956](https://github.com/grafana/loki/pull/10956) **jeschkies** do not wrap requests but send pure Protobuf from frontend v2 via scheduler to querier when `-frontend.encoding=protobuf`.
 * [10417](https://github.com/grafana/loki/pull/10417) **jeschkies** shard `quantile_over_time` range queries using probabilistic data structures.
 * [11284](https://github.com/grafana/loki/pull/11284) **ashwanthgoli** Config: Adds `frontend.max-query-capacity` to tune per-tenant query capacity.
+* [11539](https://github.com/grafana/loki/pull/11539) **kaviraj,ashwanthgoli** Support caching /series and /labels query results
 * [11545](https://github.com/grafana/loki/pull/11545) **dannykopping** Force correct memcached timeout when fetching chunks.
 
 ##### Fixes

@@ -0,0 +1,87 @@
+auth_enabled: false
+
+server:
+  http_listen_port: 3100
+  grpc_listen_port: 9096
+
+common:
+  instance_addr: 127.0.0.1
+  path_prefix: /tmp/loki
+  storage:
+    filesystem:
+      chunks_directory: /tmp/loki/chunks
+      rules_directory: /tmp/loki/rules
+  replication_factor: 1
+  ring:
+    kvstore:
+      store: inmemory
+
+query_range:
+  align_queries_with_step: true
+  cache_index_stats_results: true
+  cache_results: true
+  cache_volume_results: true
+  cache_series_results: true
+  series_results_cache:
+    cache:
+      default_validity: 12h
+      memcached_client:
+        consistent_hash: true
+        addresses: "dns+localhost:11211"
+        max_idle_conns: 16
+        timeout: 500ms
+        update_interval: 1m
+  index_stats_results_cache:
+    cache:
+      default_validity: 12h
+      memcached_client:
+        consistent_hash: true
+        addresses: "dns+localhost:11211"
+        max_idle_conns: 16
+        timeout: 500ms
+        update_interval: 1m
+  max_retries: 5
+  results_cache:
+    cache:
+      default_validity: 12h
+      memcached_client:
+        consistent_hash: true
+        addresses: "dns+localhost:11211"
+        max_idle_conns: 16
+        timeout: 500ms
+        update_interval: 1m
+  volume_results_cache:
+    cache:
+      default_validity: 12h
+      memcached_client:
+        consistent_hash: true
+        addresses: "dns+localhost:11211"
+        max_idle_conns: 16
+        timeout: 500ms
+        update_interval: 1m
+
+schema_config:
+  configs:
+    - from: 2020-10-24
+      store: tsdb
+      object_store: filesystem
+      schema: v12
+      index:
+        prefix: index_
+        period: 24h
+
+ruler:
+  alertmanager_url: http://localhost:9093
+
+# By default, Loki will send anonymous, but uniquely-identifiable usage and configuration
+# analytics to Grafana Labs. These statistics are sent to https://stats.grafana.org/
+#
+# Statistics help us better understand how Loki is used, and they show us performance
+# levels for most users. This helps us prioritize features and documentation.
+# For more information on what's sent, look at
+# https://github.com/grafana/loki/blob/main/pkg/analytics/stats.go
+# Refer to the buildReport method to see what goes into a report.
+#
+# If you would like to disable reporting, uncomment the following lines:
+#analytics:
+#  reporting_enabled: false
@@ -880,6 +880,40 @@ volume_results_cache:
   # compression. Supported values are: 'snappy' and ''.
   # CLI flag: -frontend.volume-results-cache.compression
   [compression: <string> | default = ""]
+
+# Cache series query results.
+# CLI flag: -querier.cache-series-results
+[cache_series_results: <boolean> | default = false]
+
+# If series_results_cache is not configured and cache_series_results is true,
+# the config for the results cache is used.
+series_results_cache:
   Cache result = 3 [ 
     (gogoproto.nullable) = false, 
     (gogoproto.jsontag) = "result" 
   ]; 
   Cache statsResult = 4 [ 
     (gogoproto.nullable) = false, 
     (gogoproto.jsontag) = "statsResult" 
   ]; 
   Cache volumeResult = 5 [ 
     (gogoproto.nullable) = false, 
     (gogoproto.jsontag) = "volumeResult" 
   ]; 
   Cache seriesResult = 6 [ 
     (gogoproto.nullable) = false, 
     (gogoproto.jsontag) = "seriesResult" 
   ]; 
   Cache labelResult = 7 [ 
     (gogoproto.nullable) = false, 
     (gogoproto.jsontag) = "labelResult" 
   ]; 
 } 
   Cache result = 3 [ 
     (gogoproto.nullable) = false, 
     (gogoproto.jsontag) = "result" 
   ]; 
   Cache statsResult = 4 [ 
     (gogoproto.nullable) = false, 
     (gogoproto.jsontag) = "statsResult" 
   ]; 
   Cache volumeResult = 5 [ 
     (gogoproto.nullable) = false, 
     (gogoproto.jsontag) = "volumeResult" 
   ]; 
   Cache seriesResult = 6 [ 
     (gogoproto.nullable) = false, 
     (gogoproto.jsontag) = "seriesResult" 
   ]; 
   Cache labelResult = 7 [ 
     (gogoproto.nullable) = false, 
     (gogoproto.jsontag) = "labelResult" 
   ]; 
 } 
+  # The cache block configures the cache backend.
+  # The CLI flags prefix for this block configuration is:
+  # frontend.series-results-cache
+  [cache: <cache_config>]
+
+  # Use compression in cache. The default is an empty value '', which disables
+  # compression. Supported values are: 'snappy' and ''.
+  # CLI flag: -frontend.series-results-cache.compression
+  [compression: <string> | default = ""]
+
+# Cache label query results.
+# CLI flag: -querier.cache-label-results
+[cache_label_results: <boolean> | default = false]
+
+# If label_results_cache is not configured and cache_label_results is true, the
+# config for the results cache is used.
+label_results_cache:
+  # The cache block configures the cache backend.
+  # The CLI flags prefix for this block configuration is:
+  # frontend.label-results-cache
+  [cache: <cache_config>]
+
+  # Use compression in cache. The default is an empty value '', which disables
+  # compression. Supported values are: 'snappy' and ''.
+  # CLI flag: -frontend.label-results-cache.compression
+  [compression: <string> | default = ""]
 ```
 
 ### ruler
@@ -2844,6 +2878,12 @@ The `limits_config` block configures global and per-tenant limits in Loki.
 # CLI flag: -querier.split-queries-by-interval
 [split_queries_by_interval: <duration> | default = 1h]
 
+# Split metadata queries by a time interval and execute in parallel. The value 0
+# disables splitting metadata queries by time. This also determines how cache
+# keys are chosen when label/series result caching is enabled.
+# CLI flag: -querier.split-metadata-queries-by-interval
+[split_metadata_queries_by_interval: <duration> | default = 1d]
+
 # Limit queries that can be sharded. Queries within the time range of now and
 # now minus this sharding lookback are not sharded. The default value of 0s
 # disables the lookback, causing sharding of all queries at all times.
@@ -4283,6 +4323,8 @@ The cache block configures the cache backend. The supported CLI flags `<prefix>`
 - `bloom-gateway-client.cache`
 - `frontend`
 - `frontend.index-stats-results-cache`
+- `frontend.label-results-cache`
+- `frontend.series-results-cache`
 - `frontend.volume-results-cache`
 - `store.chunks-cache`
 - `store.index-cache-read`

@@ -222,6 +222,10 @@ func RecordLabelQueryMetrics(
 		"query", query,
 		"query_hash", util.HashedQuery(query),
 		"total_entries", stats.Summary.TotalEntriesReturned,
+		"cache_label_results_req", stats.Caches.LabelResult.EntriesRequested,
+		"cache_label_results_hit", stats.Caches.LabelResult.EntriesFound,
+		"cache_label_results_stored", stats.Caches.LabelResult.EntriesStored,
+		"cache_label_results_download_time", stats.Caches.LabelResult.CacheDownloadTime(),
 	)
 
 	execLatency.WithLabelValues(status, queryType, "").Observe(stats.Summary.ExecTime)
@@ -272,7 +276,12 @@ func RecordSeriesQueryMetrics(ctx context.Context, log log.Logger, start, end ti
 		"status", status,
 		"match", PrintMatches(match),
 		"query_hash", util.HashedQuery(PrintMatches(match)),
-		"total_entries", stats.Summary.TotalEntriesReturned)
+		"total_entries", stats.Summary.TotalEntriesReturned,
+		"cache_series_results_req", stats.Caches.SeriesResult.EntriesRequested,
+		"cache_series_results_hit", stats.Caches.SeriesResult.EntriesFound,
+		"cache_series_results_stored", stats.Caches.SeriesResult.EntriesStored,
+		"cache_series_results_download_time", stats.Caches.SeriesResult.CacheDownloadTime(),
+	)
 
 	if shard != nil {
 		logValues = append(logValues,

@@ -106,10 +106,18 @@ func TestLogLabelsQuery(t *testing.T) {
 			TotalBytesProcessed:     100000,
 			TotalEntriesReturned:    12,
 		},
+		Caches: stats.Caches{
+			LabelResult: stats.Cache{
+				EntriesRequested: 2,
+				EntriesFound:     1,
+				EntriesStored:    1,
+				DownloadTime:     80,
+			},
+		},
 	})
 	require.Regexp(t,
 		fmt.Sprintf(
-			"level=info org_id=foo traceID=%s sampled=true latency=slow query_type=labels splits=0 start=.* end=.* start_delta=1h0m0.* end_delta=.* length=1h0m0s duration=25.25s status=200 label=foo query= query_hash=2166136261 total_entries=12\n",
+			"level=info org_id=foo traceID=%s sampled=true latency=slow query_type=labels splits=0 start=.* end=.* start_delta=1h0m0.* end_delta=.* length=1h0m0s duration=25.25s status=200 label=foo query= query_hash=2166136261 total_entries=12 cache_label_results_req=2 cache_label_results_hit=1 cache_label_results_stored=1 cache_label_results_download_time=80ns\n",
 			sp.Context().(jaeger.SpanContext).SpanID().String(),
 		),
 		buf.String())
@@ -132,10 +140,18 @@ func TestLogSeriesQuery(t *testing.T) {
 			TotalBytesProcessed:     100000,
 			TotalEntriesReturned:    10,
 		},
+		Caches: stats.Caches{
+			SeriesResult: stats.Cache{
+				EntriesRequested: 2,
+				EntriesFound:     1,
+				EntriesStored:    1,
+				DownloadTime:     80,
+			},
+		},
 	})
 	require.Regexp(t,
 		fmt.Sprintf(
-			"level=info org_id=foo traceID=%s sampled=true latency=slow query_type=series splits=0 start=.* end=.* start_delta=1h0m0.* end_delta=.* length=1h0m0s duration=25.25s status=200 match=\"{container_name=.*\"}:{app=.*}\" query_hash=23523089 total_entries=10\n",
+			"level=info org_id=foo traceID=%s sampled=true latency=slow query_type=series splits=0 start=.* end=.* start_delta=1h0m0.* end_delta=.* length=1h0m0s duration=25.25s status=200 match=\"{container_name=.*\"}:{app=.*}\" query_hash=23523089 total_entries=10 cache_series_results_req=2 cache_series_results_hit=1 cache_series_results_stored=1 cache_series_results_download_time=80ns\n",
 			sp.Context().(jaeger.SpanContext).SpanID().String(),
 		),
 		buf.String())

@@ -61,6 +61,8 @@ const (
 	StatsResultCache            = "stats-result"
 	VolumeResultCache           = "volume-result"
 	WriteDedupeCache            = "write-dedupe"
+	SeriesResultCache           = "series-result"
+	LabelResultCache            = "label-result"
 	BloomFilterCache            = "bloom-filter"
 	BloomBlocksCache            = "bloom-blocks"
 )
@@ -100,6 +102,8 @@ func (c *Context) Caches() Caches {
 		Result:       c.caches.Result,
 		StatsResult:  c.caches.StatsResult,
 		VolumeResult: c.caches.VolumeResult,
+		SeriesResult: c.caches.SeriesResult,
+		LabelResult:  c.caches.LabelResult,
 	}
 }
 
@@ -215,6 +219,8 @@ func (c *Caches) Merge(m Caches) {
 	c.Result.Merge(m.Result)
 	c.StatsResult.Merge(m.StatsResult)
 	c.VolumeResult.Merge(m.VolumeResult)
+	c.SeriesResult.Merge(m.SeriesResult)
+	c.LabelResult.Merge(m.LabelResult)
 }
 
 func (c *Cache) Merge(m Cache) {
@@ -444,6 +450,10 @@ func (c *Context) getCacheStatsByType(t CacheType) *Cache {
 		stats = &c.caches.StatsResult
 	case VolumeResultCache:
 		stats = &c.caches.VolumeResult
+	case SeriesResultCache:
+		stats = &c.caches.SeriesResult
+	case LabelResultCache:
+		stats = &c.caches.LabelResult
 	default:
 		return nil
 	}
@@ -526,6 +536,18 @@ func (c Caches) Log(log log.Logger) {
 		"Cache.VolumeResult.EntriesStored", c.VolumeResult.EntriesStored,
 		"Cache.VolumeResult.BytesSent", humanize.Bytes(uint64(c.VolumeResult.BytesSent)),
 		"Cache.VolumeResult.BytesReceived", humanize.Bytes(uint64(c.VolumeResult.BytesReceived)),
+		"Cache.SeriesResult.Requests", c.SeriesResult.Requests,
+		"Cache.SeriesResult.EntriesRequested", c.SeriesResult.EntriesRequested,
+		"Cache.SeriesResult.EntriesFound", c.SeriesResult.EntriesFound,
+		"Cache.SeriesResult.EntriesStored", c.SeriesResult.EntriesStored,
+		"Cache.SeriesResult.BytesSent", humanize.Bytes(uint64(c.SeriesResult.BytesSent)),
+		"Cache.SeriesResult.BytesReceived", humanize.Bytes(uint64(c.SeriesResult.BytesReceived)),
+		"Cache.LabelResult.Requests", c.LabelResult.Requests,
+		"Cache.LabelResult.EntriesRequested", c.LabelResult.EntriesRequested,
+		"Cache.LabelResult.EntriesFound", c.LabelResult.EntriesFound,
+		"Cache.LabelResult.EntriesStored", c.LabelResult.EntriesStored,
+		"Cache.LabelResult.BytesSent", humanize.Bytes(uint64(c.LabelResult.BytesSent)),
+		"Cache.LabelResult.BytesReceived", humanize.Bytes(uint64(c.LabelResult.BytesReceived)),
 		"Cache.Result.DownloadTime", c.Result.CacheDownloadTime(),
 		"Cache.Result.Requests", c.Result.Requests,
 		"Cache.Result.EntriesRequested", c.Result.EntriesRequested,