From bb7307bb57a000d63930ebd762964c1bff26ff08 Mon Sep 17 00:00:00 2001 From: Scot Wells Date: Thu, 26 Mar 2026 16:12:38 -0500 Subject: [PATCH] Fix multi-window SLO error ratios for low-traffic endpoints Replace clamp_min + bool multiplication guard with direct division and per-window 'and total > 0' guard. The previous approach produced false 100% error ratios during traffic transitions because: 1. clamp_min(total, 0.001) computed 1-(0/0.001)=1 when total was 0 2. The bool guard used the 5m total which could briefly be >0 during traffic pulses, letting the false 1 through to the longer windows The new approach uses each window's own total for the guard and returns no data (instead of 1) when there's no traffic in that specific window. This matches the pattern already working correctly for the 5m rules. Co-Authored-By: Claude Opus 4.6 (1M context) --- .../alerts/generated/activity-recordings.yaml | 204 ++++++++++++------ .../rules/activity-slo-recordings.libsonnet | 204 ++++++++++++------ 2 files changed, 280 insertions(+), 128 deletions(-) diff --git a/config/components/observability/alerts/generated/activity-recordings.yaml b/config/components/observability/alerts/generated/activity-recordings.yaml index e9a6adb7..d2a42d7f 100644 --- a/config/components/observability/alerts/generated/activity-recordings.yaml +++ b/config/components/observability/alerts/generated/activity-recordings.yaml @@ -126,7 +126,7 @@ }[5m])) "record": "activity:slo_metadata:request_good:rate5m" - "expr": | - sum(rate(apiserver_request_total{ + sum(rate(apiserver_request_duration_seconds_count{ job="activity-apiserver", resource="activitypolicies", verb=~"GET|LIST|PATCH" @@ -150,13 +150,17 @@ le="1" }[30m])) / - clamp_min(sum(rate(apiserver_request_total{ + sum(rate(apiserver_request_duration_seconds_count{ job="activity-apiserver", resource="activitypolicies", verb=~"GET|LIST|PATCH" - }[30m])), 0.001) + }[30m])) ) - * on() (activity:slo_metadata:request_total:rate5m > bool 0) + and sum(rate(apiserver_request_duration_seconds_count{ + job="activity-apiserver", + resource="activitypolicies", + verb=~"GET|LIST|PATCH" + }[30m])) > 0 "record": "activity:slo_metadata:error_ratio:rate30m" - "expr": | 1 - ( @@ -167,13 +171,17 @@ le="1" }[1h])) / - clamp_min(sum(rate(apiserver_request_total{ + sum(rate(apiserver_request_duration_seconds_count{ job="activity-apiserver", resource="activitypolicies", verb=~"GET|LIST|PATCH" - }[1h])), 0.001) + }[1h])) ) - * on() (activity:slo_metadata:request_total:rate5m > bool 0) + and sum(rate(apiserver_request_duration_seconds_count{ + job="activity-apiserver", + resource="activitypolicies", + verb=~"GET|LIST|PATCH" + }[1h])) > 0 "record": "activity:slo_metadata:error_ratio:rate1h" - "expr": | 1 - ( @@ -184,13 +192,17 @@ le="1" }[6h])) / - clamp_min(sum(rate(apiserver_request_total{ + sum(rate(apiserver_request_duration_seconds_count{ job="activity-apiserver", resource="activitypolicies", verb=~"GET|LIST|PATCH" - }[6h])), 0.001) + }[6h])) ) - * on() (activity:slo_metadata:request_total:rate5m > bool 0) + and sum(rate(apiserver_request_duration_seconds_count{ + job="activity-apiserver", + resource="activitypolicies", + verb=~"GET|LIST|PATCH" + }[6h])) > 0 "record": "activity:slo_metadata:error_ratio:rate6h" - "expr": | 1 - ( @@ -201,13 +213,17 @@ le="1" }[3d])) / - clamp_min(sum(rate(apiserver_request_total{ + sum(rate(apiserver_request_duration_seconds_count{ job="activity-apiserver", resource="activitypolicies", verb=~"GET|LIST|PATCH" - }[3d])), 0.001) + }[3d])) ) - * on() (activity:slo_metadata:request_total:rate5m > bool 0) + and sum(rate(apiserver_request_duration_seconds_count{ + job="activity-apiserver", + resource="activitypolicies", + verb=~"GET|LIST|PATCH" + }[3d])) > 0 "record": "activity:slo_metadata:error_ratio:rate3d" - "expr": | sum(rate(apiserver_request_duration_seconds_bucket{ @@ -218,7 +234,7 @@ }[5m])) "record": "activity:slo_audit_query:request_good:rate5m" - "expr": | - sum(rate(apiserver_request_total{ + sum(rate(apiserver_request_duration_seconds_count{ job="activity-apiserver", resource="auditlogqueries", verb="POST" @@ -242,13 +258,17 @@ le="5" }[30m])) / - clamp_min(sum(rate(apiserver_request_total{ + sum(rate(apiserver_request_duration_seconds_count{ job="activity-apiserver", resource="auditlogqueries", verb="POST" - }[30m])), 0.001) + }[30m])) ) - * on() (activity:slo_audit_query:request_total:rate5m > bool 0) + and sum(rate(apiserver_request_duration_seconds_count{ + job="activity-apiserver", + resource="auditlogqueries", + verb="POST" + }[30m])) > 0 "record": "activity:slo_audit_query:error_ratio:rate30m" - "expr": | 1 - ( @@ -259,13 +279,17 @@ le="5" }[1h])) / - clamp_min(sum(rate(apiserver_request_total{ + sum(rate(apiserver_request_duration_seconds_count{ job="activity-apiserver", resource="auditlogqueries", verb="POST" - }[1h])), 0.001) + }[1h])) ) - * on() (activity:slo_audit_query:request_total:rate5m > bool 0) + and sum(rate(apiserver_request_duration_seconds_count{ + job="activity-apiserver", + resource="auditlogqueries", + verb="POST" + }[1h])) > 0 "record": "activity:slo_audit_query:error_ratio:rate1h" - "expr": | 1 - ( @@ -276,13 +300,17 @@ le="5" }[6h])) / - clamp_min(sum(rate(apiserver_request_total{ + sum(rate(apiserver_request_duration_seconds_count{ job="activity-apiserver", resource="auditlogqueries", verb="POST" - }[6h])), 0.001) + }[6h])) ) - * on() (activity:slo_audit_query:request_total:rate5m > bool 0) + and sum(rate(apiserver_request_duration_seconds_count{ + job="activity-apiserver", + resource="auditlogqueries", + verb="POST" + }[6h])) > 0 "record": "activity:slo_audit_query:error_ratio:rate6h" - "expr": | 1 - ( @@ -293,13 +321,17 @@ le="5" }[3d])) / - clamp_min(sum(rate(apiserver_request_total{ + sum(rate(apiserver_request_duration_seconds_count{ job="activity-apiserver", resource="auditlogqueries", verb="POST" - }[3d])), 0.001) + }[3d])) ) - * on() (activity:slo_audit_query:request_total:rate5m > bool 0) + and sum(rate(apiserver_request_duration_seconds_count{ + job="activity-apiserver", + resource="auditlogqueries", + verb="POST" + }[3d])) > 0 "record": "activity:slo_audit_query:error_ratio:rate3d" - "expr": | sum(rate(apiserver_request_duration_seconds_bucket{ @@ -310,7 +342,7 @@ }[5m])) "record": "activity:slo_activity_query:request_good:rate5m" - "expr": | - sum(rate(apiserver_request_total{ + sum(rate(apiserver_request_duration_seconds_count{ job="activity-apiserver", resource=~"activityqueries|activityfacetqueries", verb="POST" @@ -334,13 +366,17 @@ le="5" }[30m])) / - clamp_min(sum(rate(apiserver_request_total{ + sum(rate(apiserver_request_duration_seconds_count{ job="activity-apiserver", resource=~"activityqueries|activityfacetqueries", verb="POST" - }[30m])), 0.001) + }[30m])) ) - * on() (activity:slo_activity_query:request_total:rate5m > bool 0) + and sum(rate(apiserver_request_duration_seconds_count{ + job="activity-apiserver", + resource=~"activityqueries|activityfacetqueries", + verb="POST" + }[30m])) > 0 "record": "activity:slo_activity_query:error_ratio:rate30m" - "expr": | 1 - ( @@ -351,13 +387,17 @@ le="5" }[1h])) / - clamp_min(sum(rate(apiserver_request_total{ + sum(rate(apiserver_request_duration_seconds_count{ job="activity-apiserver", resource=~"activityqueries|activityfacetqueries", verb="POST" - }[1h])), 0.001) + }[1h])) ) - * on() (activity:slo_activity_query:request_total:rate5m > bool 0) + and sum(rate(apiserver_request_duration_seconds_count{ + job="activity-apiserver", + resource=~"activityqueries|activityfacetqueries", + verb="POST" + }[1h])) > 0 "record": "activity:slo_activity_query:error_ratio:rate1h" - "expr": | 1 - ( @@ -368,13 +408,17 @@ le="5" }[6h])) / - clamp_min(sum(rate(apiserver_request_total{ + sum(rate(apiserver_request_duration_seconds_count{ job="activity-apiserver", resource=~"activityqueries|activityfacetqueries", verb="POST" - }[6h])), 0.001) + }[6h])) ) - * on() (activity:slo_activity_query:request_total:rate5m > bool 0) + and sum(rate(apiserver_request_duration_seconds_count{ + job="activity-apiserver", + resource=~"activityqueries|activityfacetqueries", + verb="POST" + }[6h])) > 0 "record": "activity:slo_activity_query:error_ratio:rate6h" - "expr": | 1 - ( @@ -385,13 +429,17 @@ le="5" }[3d])) / - clamp_min(sum(rate(apiserver_request_total{ + sum(rate(apiserver_request_duration_seconds_count{ job="activity-apiserver", resource=~"activityqueries|activityfacetqueries", verb="POST" - }[3d])), 0.001) + }[3d])) ) - * on() (activity:slo_activity_query:request_total:rate5m > bool 0) + and sum(rate(apiserver_request_duration_seconds_count{ + job="activity-apiserver", + resource=~"activityqueries|activityfacetqueries", + verb="POST" + }[3d])) > 0 "record": "activity:slo_activity_query:error_ratio:rate3d" - "expr": | sum(rate(apiserver_request_duration_seconds_bucket{ @@ -402,7 +450,7 @@ }[5m])) "record": "activity:slo_event_query:request_good:rate5m" - "expr": | - sum(rate(apiserver_request_total{ + sum(rate(apiserver_request_duration_seconds_count{ job="activity-apiserver", resource=~"eventqueries|eventfacetqueries", verb="POST" @@ -426,13 +474,17 @@ le="5" }[30m])) / - clamp_min(sum(rate(apiserver_request_total{ + sum(rate(apiserver_request_duration_seconds_count{ job="activity-apiserver", resource=~"eventqueries|eventfacetqueries", verb="POST" - }[30m])), 0.001) + }[30m])) ) - * on() (activity:slo_event_query:request_total:rate5m > bool 0) + and sum(rate(apiserver_request_duration_seconds_count{ + job="activity-apiserver", + resource=~"eventqueries|eventfacetqueries", + verb="POST" + }[30m])) > 0 "record": "activity:slo_event_query:error_ratio:rate30m" - "expr": | 1 - ( @@ -443,13 +495,17 @@ le="5" }[1h])) / - clamp_min(sum(rate(apiserver_request_total{ + sum(rate(apiserver_request_duration_seconds_count{ job="activity-apiserver", resource=~"eventqueries|eventfacetqueries", verb="POST" - }[1h])), 0.001) + }[1h])) ) - * on() (activity:slo_event_query:request_total:rate5m > bool 0) + and sum(rate(apiserver_request_duration_seconds_count{ + job="activity-apiserver", + resource=~"eventqueries|eventfacetqueries", + verb="POST" + }[1h])) > 0 "record": "activity:slo_event_query:error_ratio:rate1h" - "expr": | 1 - ( @@ -460,13 +516,17 @@ le="5" }[6h])) / - clamp_min(sum(rate(apiserver_request_total{ + sum(rate(apiserver_request_duration_seconds_count{ job="activity-apiserver", resource=~"eventqueries|eventfacetqueries", verb="POST" - }[6h])), 0.001) + }[6h])) ) - * on() (activity:slo_event_query:request_total:rate5m > bool 0) + and sum(rate(apiserver_request_duration_seconds_count{ + job="activity-apiserver", + resource=~"eventqueries|eventfacetqueries", + verb="POST" + }[6h])) > 0 "record": "activity:slo_event_query:error_ratio:rate6h" - "expr": | 1 - ( @@ -477,13 +537,17 @@ le="5" }[3d])) / - clamp_min(sum(rate(apiserver_request_total{ + sum(rate(apiserver_request_duration_seconds_count{ job="activity-apiserver", resource=~"eventqueries|eventfacetqueries", verb="POST" - }[3d])), 0.001) + }[3d])) ) - * on() (activity:slo_event_query:request_total:rate5m > bool 0) + and sum(rate(apiserver_request_duration_seconds_count{ + job="activity-apiserver", + resource=~"eventqueries|eventfacetqueries", + verb="POST" + }[3d])) > 0 "record": "activity:slo_event_query:error_ratio:rate3d" - "expr": | sum(rate(apiserver_request_total{ @@ -515,12 +579,15 @@ code!~"5.." }[30m])) / - clamp_min(sum(rate(apiserver_request_total{ + sum(rate(apiserver_request_total{ job="activity-apiserver", verb!="WATCH" - }[30m])), 0.001) + }[30m])) ) - * on() (activity:slo_availability:request_total:rate5m > bool 0) + and sum(rate(apiserver_request_total{ + job="activity-apiserver", + verb!="WATCH" + }[30m])) > 0 "record": "activity:slo_availability:error_ratio:rate30m" - "expr": | 1 - ( @@ -530,12 +597,15 @@ code!~"5.." }[1h])) / - clamp_min(sum(rate(apiserver_request_total{ + sum(rate(apiserver_request_total{ job="activity-apiserver", verb!="WATCH" - }[1h])), 0.001) + }[1h])) ) - * on() (activity:slo_availability:request_total:rate5m > bool 0) + and sum(rate(apiserver_request_total{ + job="activity-apiserver", + verb!="WATCH" + }[1h])) > 0 "record": "activity:slo_availability:error_ratio:rate1h" - "expr": | 1 - ( @@ -545,12 +615,15 @@ code!~"5.." }[6h])) / - clamp_min(sum(rate(apiserver_request_total{ + sum(rate(apiserver_request_total{ job="activity-apiserver", verb!="WATCH" - }[6h])), 0.001) + }[6h])) ) - * on() (activity:slo_availability:request_total:rate5m > bool 0) + and sum(rate(apiserver_request_total{ + job="activity-apiserver", + verb!="WATCH" + }[6h])) > 0 "record": "activity:slo_availability:error_ratio:rate6h" - "expr": | 1 - ( @@ -560,10 +633,13 @@ code!~"5.." }[3d])) / - clamp_min(sum(rate(apiserver_request_total{ + sum(rate(apiserver_request_total{ job="activity-apiserver", verb!="WATCH" - }[3d])), 0.001) + }[3d])) ) - * on() (activity:slo_availability:request_total:rate5m > bool 0) + and sum(rate(apiserver_request_total{ + job="activity-apiserver", + verb!="WATCH" + }[3d])) > 0 "record": "activity:slo_availability:error_ratio:rate3d" diff --git a/observability/rules/activity-slo-recordings.libsonnet b/observability/rules/activity-slo-recordings.libsonnet index 6dee5869..00c15936 100644 --- a/observability/rules/activity-slo-recordings.libsonnet +++ b/observability/rules/activity-slo-recordings.libsonnet @@ -43,7 +43,7 @@ { record: 'activity:slo_metadata:request_total:rate5m', expr: ||| - sum(rate(apiserver_request_total{ + sum(rate(apiserver_request_duration_seconds_count{ job="activity-apiserver", resource="activitypolicies", verb=~"GET|LIST|PATCH" @@ -76,13 +76,17 @@ le="1" }[30m])) / - clamp_min(sum(rate(apiserver_request_total{ + sum(rate(apiserver_request_duration_seconds_count{ job="activity-apiserver", resource="activitypolicies", verb=~"GET|LIST|PATCH" - }[30m])), 0.001) + }[30m])) ) - * on() (activity:slo_metadata:request_total:rate5m > bool 0) + and sum(rate(apiserver_request_duration_seconds_count{ + job="activity-apiserver", + resource="activitypolicies", + verb=~"GET|LIST|PATCH" + }[30m])) > 0 |||, }, @@ -97,13 +101,17 @@ le="1" }[1h])) / - clamp_min(sum(rate(apiserver_request_total{ + sum(rate(apiserver_request_duration_seconds_count{ job="activity-apiserver", resource="activitypolicies", verb=~"GET|LIST|PATCH" - }[1h])), 0.001) + }[1h])) ) - * on() (activity:slo_metadata:request_total:rate5m > bool 0) + and sum(rate(apiserver_request_duration_seconds_count{ + job="activity-apiserver", + resource="activitypolicies", + verb=~"GET|LIST|PATCH" + }[1h])) > 0 |||, }, @@ -118,13 +126,17 @@ le="1" }[6h])) / - clamp_min(sum(rate(apiserver_request_total{ + sum(rate(apiserver_request_duration_seconds_count{ job="activity-apiserver", resource="activitypolicies", verb=~"GET|LIST|PATCH" - }[6h])), 0.001) + }[6h])) ) - * on() (activity:slo_metadata:request_total:rate5m > bool 0) + and sum(rate(apiserver_request_duration_seconds_count{ + job="activity-apiserver", + resource="activitypolicies", + verb=~"GET|LIST|PATCH" + }[6h])) > 0 |||, }, @@ -139,13 +151,17 @@ le="1" }[3d])) / - clamp_min(sum(rate(apiserver_request_total{ + sum(rate(apiserver_request_duration_seconds_count{ job="activity-apiserver", resource="activitypolicies", verb=~"GET|LIST|PATCH" - }[3d])), 0.001) + }[3d])) ) - * on() (activity:slo_metadata:request_total:rate5m > bool 0) + and sum(rate(apiserver_request_duration_seconds_count{ + job="activity-apiserver", + resource="activitypolicies", + verb=~"GET|LIST|PATCH" + }[3d])) > 0 |||, }, @@ -169,7 +185,7 @@ { record: 'activity:slo_audit_query:request_total:rate5m', expr: ||| - sum(rate(apiserver_request_total{ + sum(rate(apiserver_request_duration_seconds_count{ job="activity-apiserver", resource="auditlogqueries", verb="POST" @@ -201,13 +217,17 @@ le="5" }[30m])) / - clamp_min(sum(rate(apiserver_request_total{ + sum(rate(apiserver_request_duration_seconds_count{ job="activity-apiserver", resource="auditlogqueries", verb="POST" - }[30m])), 0.001) + }[30m])) ) - * on() (activity:slo_audit_query:request_total:rate5m > bool 0) + and sum(rate(apiserver_request_duration_seconds_count{ + job="activity-apiserver", + resource="auditlogqueries", + verb="POST" + }[30m])) > 0 |||, }, @@ -222,13 +242,17 @@ le="5" }[1h])) / - clamp_min(sum(rate(apiserver_request_total{ + sum(rate(apiserver_request_duration_seconds_count{ job="activity-apiserver", resource="auditlogqueries", verb="POST" - }[1h])), 0.001) + }[1h])) ) - * on() (activity:slo_audit_query:request_total:rate5m > bool 0) + and sum(rate(apiserver_request_duration_seconds_count{ + job="activity-apiserver", + resource="auditlogqueries", + verb="POST" + }[1h])) > 0 |||, }, @@ -243,13 +267,17 @@ le="5" }[6h])) / - clamp_min(sum(rate(apiserver_request_total{ + sum(rate(apiserver_request_duration_seconds_count{ job="activity-apiserver", resource="auditlogqueries", verb="POST" - }[6h])), 0.001) + }[6h])) ) - * on() (activity:slo_audit_query:request_total:rate5m > bool 0) + and sum(rate(apiserver_request_duration_seconds_count{ + job="activity-apiserver", + resource="auditlogqueries", + verb="POST" + }[6h])) > 0 |||, }, @@ -264,13 +292,17 @@ le="5" }[3d])) / - clamp_min(sum(rate(apiserver_request_total{ + sum(rate(apiserver_request_duration_seconds_count{ job="activity-apiserver", resource="auditlogqueries", verb="POST" - }[3d])), 0.001) + }[3d])) ) - * on() (activity:slo_audit_query:request_total:rate5m > bool 0) + and sum(rate(apiserver_request_duration_seconds_count{ + job="activity-apiserver", + resource="auditlogqueries", + verb="POST" + }[3d])) > 0 |||, }, @@ -294,7 +326,7 @@ { record: 'activity:slo_activity_query:request_total:rate5m', expr: ||| - sum(rate(apiserver_request_total{ + sum(rate(apiserver_request_duration_seconds_count{ job="activity-apiserver", resource=~"activityqueries|activityfacetqueries", verb="POST" @@ -326,13 +358,17 @@ le="5" }[30m])) / - clamp_min(sum(rate(apiserver_request_total{ + sum(rate(apiserver_request_duration_seconds_count{ job="activity-apiserver", resource=~"activityqueries|activityfacetqueries", verb="POST" - }[30m])), 0.001) + }[30m])) ) - * on() (activity:slo_activity_query:request_total:rate5m > bool 0) + and sum(rate(apiserver_request_duration_seconds_count{ + job="activity-apiserver", + resource=~"activityqueries|activityfacetqueries", + verb="POST" + }[30m])) > 0 |||, }, @@ -347,13 +383,17 @@ le="5" }[1h])) / - clamp_min(sum(rate(apiserver_request_total{ + sum(rate(apiserver_request_duration_seconds_count{ job="activity-apiserver", resource=~"activityqueries|activityfacetqueries", verb="POST" - }[1h])), 0.001) + }[1h])) ) - * on() (activity:slo_activity_query:request_total:rate5m > bool 0) + and sum(rate(apiserver_request_duration_seconds_count{ + job="activity-apiserver", + resource=~"activityqueries|activityfacetqueries", + verb="POST" + }[1h])) > 0 |||, }, @@ -368,13 +408,17 @@ le="5" }[6h])) / - clamp_min(sum(rate(apiserver_request_total{ + sum(rate(apiserver_request_duration_seconds_count{ job="activity-apiserver", resource=~"activityqueries|activityfacetqueries", verb="POST" - }[6h])), 0.001) + }[6h])) ) - * on() (activity:slo_activity_query:request_total:rate5m > bool 0) + and sum(rate(apiserver_request_duration_seconds_count{ + job="activity-apiserver", + resource=~"activityqueries|activityfacetqueries", + verb="POST" + }[6h])) > 0 |||, }, @@ -389,13 +433,17 @@ le="5" }[3d])) / - clamp_min(sum(rate(apiserver_request_total{ + sum(rate(apiserver_request_duration_seconds_count{ job="activity-apiserver", resource=~"activityqueries|activityfacetqueries", verb="POST" - }[3d])), 0.001) + }[3d])) ) - * on() (activity:slo_activity_query:request_total:rate5m > bool 0) + and sum(rate(apiserver_request_duration_seconds_count{ + job="activity-apiserver", + resource=~"activityqueries|activityfacetqueries", + verb="POST" + }[3d])) > 0 |||, }, @@ -419,7 +467,7 @@ { record: 'activity:slo_event_query:request_total:rate5m', expr: ||| - sum(rate(apiserver_request_total{ + sum(rate(apiserver_request_duration_seconds_count{ job="activity-apiserver", resource=~"eventqueries|eventfacetqueries", verb="POST" @@ -451,13 +499,17 @@ le="5" }[30m])) / - clamp_min(sum(rate(apiserver_request_total{ + sum(rate(apiserver_request_duration_seconds_count{ job="activity-apiserver", resource=~"eventqueries|eventfacetqueries", verb="POST" - }[30m])), 0.001) + }[30m])) ) - * on() (activity:slo_event_query:request_total:rate5m > bool 0) + and sum(rate(apiserver_request_duration_seconds_count{ + job="activity-apiserver", + resource=~"eventqueries|eventfacetqueries", + verb="POST" + }[30m])) > 0 |||, }, @@ -472,13 +524,17 @@ le="5" }[1h])) / - clamp_min(sum(rate(apiserver_request_total{ + sum(rate(apiserver_request_duration_seconds_count{ job="activity-apiserver", resource=~"eventqueries|eventfacetqueries", verb="POST" - }[1h])), 0.001) + }[1h])) ) - * on() (activity:slo_event_query:request_total:rate5m > bool 0) + and sum(rate(apiserver_request_duration_seconds_count{ + job="activity-apiserver", + resource=~"eventqueries|eventfacetqueries", + verb="POST" + }[1h])) > 0 |||, }, @@ -493,13 +549,17 @@ le="5" }[6h])) / - clamp_min(sum(rate(apiserver_request_total{ + sum(rate(apiserver_request_duration_seconds_count{ job="activity-apiserver", resource=~"eventqueries|eventfacetqueries", verb="POST" - }[6h])), 0.001) + }[6h])) ) - * on() (activity:slo_event_query:request_total:rate5m > bool 0) + and sum(rate(apiserver_request_duration_seconds_count{ + job="activity-apiserver", + resource=~"eventqueries|eventfacetqueries", + verb="POST" + }[6h])) > 0 |||, }, @@ -514,13 +574,17 @@ le="5" }[3d])) / - clamp_min(sum(rate(apiserver_request_total{ + sum(rate(apiserver_request_duration_seconds_count{ job="activity-apiserver", resource=~"eventqueries|eventfacetqueries", verb="POST" - }[3d])), 0.001) + }[3d])) ) - * on() (activity:slo_event_query:request_total:rate5m > bool 0) + and sum(rate(apiserver_request_duration_seconds_count{ + job="activity-apiserver", + resource=~"eventqueries|eventfacetqueries", + verb="POST" + }[3d])) > 0 |||, }, @@ -573,12 +637,15 @@ code!~"5.." }[30m])) / - clamp_min(sum(rate(apiserver_request_total{ + sum(rate(apiserver_request_total{ job="activity-apiserver", verb!="WATCH" - }[30m])), 0.001) + }[30m])) ) - * on() (activity:slo_availability:request_total:rate5m > bool 0) + and sum(rate(apiserver_request_total{ + job="activity-apiserver", + verb!="WATCH" + }[30m])) > 0 |||, }, @@ -592,12 +659,15 @@ code!~"5.." }[1h])) / - clamp_min(sum(rate(apiserver_request_total{ + sum(rate(apiserver_request_total{ job="activity-apiserver", verb!="WATCH" - }[1h])), 0.001) + }[1h])) ) - * on() (activity:slo_availability:request_total:rate5m > bool 0) + and sum(rate(apiserver_request_total{ + job="activity-apiserver", + verb!="WATCH" + }[1h])) > 0 |||, }, @@ -611,12 +681,15 @@ code!~"5.." }[6h])) / - clamp_min(sum(rate(apiserver_request_total{ + sum(rate(apiserver_request_total{ job="activity-apiserver", verb!="WATCH" - }[6h])), 0.001) + }[6h])) ) - * on() (activity:slo_availability:request_total:rate5m > bool 0) + and sum(rate(apiserver_request_total{ + job="activity-apiserver", + verb!="WATCH" + }[6h])) > 0 |||, }, @@ -630,12 +703,15 @@ code!~"5.." }[3d])) / - clamp_min(sum(rate(apiserver_request_total{ + sum(rate(apiserver_request_total{ job="activity-apiserver", verb!="WATCH" - }[3d])), 0.001) + }[3d])) ) - * on() (activity:slo_availability:request_total:rate5m > bool 0) + and sum(rate(apiserver_request_total{ + job="activity-apiserver", + verb!="WATCH" + }[3d])) > 0 |||, },