From 39b3f53dcc1b4ab8e60eb6fd30a991b5e6f918ad Mon Sep 17 00:00:00 2001 From: Victor Sollerhed Date: Mon, 29 Nov 2021 11:32:13 +0100 Subject: [PATCH] Alert NoOutputBytesProcessed should combine outputs We were seeing issues where we got alerted due to some outputs not sending data for more than 15 minutes. For example, this is expected from journald logs on worker nodes in a cluster, thus this adjustment to the alert. We now aggregate output per pod and alert if no bytes are sent for 15 minutes. Signed-off-by: Victor Sollerhed --- monitoring/alerts.yaml | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/monitoring/alerts.yaml b/monitoring/alerts.yaml index 3a91250e3..6df8588ee 100644 --- a/monitoring/alerts.yaml +++ b/monitoring/alerts.yaml @@ -2,11 +2,10 @@ groups: - name: fluent-bit rules: - alert: NoOutputBytesProcessed - expr: rate(fluentbit_output_proc_bytes_total[5m]) == 0 + expr: sum(rate(fluentbit_output_proc_bytes_total[5m])) by (pod) == 0 annotations: message: | - Fluent Bit instance {{ $labels.instance }}'s output plugin {{ $labels.name }} has not processed any - bytes for at least 15 minutes. + Fluent Bit pod {{ $labels.pod }} has not processed any output bytes for at least 15 min. summary: No Output Bytes Processed for: 15m labels: