/
prometheus_alerts.yaml
36 lines (36 loc) · 2.26 KB
/
prometheus_alerts.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
"groups":
- "name": "nginx.rules"
"rules":
- "alert": "NginxConfigReloadFailed"
"annotations":
"dashboard_url": "https://grafana.com/d/ingress-nginx-overview-12mk/ingress-nginx-overview?var-job={{ $labels.job }}&var-controller_class={{ $labels.controller_class }}"
"description": "Nginx config reload failed for the controller with the class {{ $labels.controller_class }}."
"summary": "Nginx config reload failed."
"expr": |
sum(
nginx_ingress_controller_config_last_reload_successful{job=~"ingress-nginx-controller-metrics"}
) by (job, controller_class)
== 0
"for": "5m"
"labels":
"severity": "warning"
- "alert": "NginxHighHttp4xxErrorRate"
"annotations":
"dashboard_url": "https://grafana.com/d/ingress-nginx-overview-12mk/ingress-nginx-overview?var-exported_namespace={{ $labels.exported_namespace }}&var-ingress={{ $labels.ingress }}"
"description": "More than 5% HTTP requests with status 4xx for {{ $labels.exported_namespace }}/{{ $labels.ingress }} the past 5m."
"summary": "Nginx high HTTP 4xx error rate."
"expr": |
(sum(rate(nginx_ingress_controller_requests{job=~"ingress-nginx-controller-metrics", status=~"^4.*", ingress!~""}[5m])) by (exported_namespace, ingress) / sum(rate(nginx_ingress_controller_requests{job=~"ingress-nginx-controller-metrics", ingress!~""}[5m])) by (exported_namespace, ingress) * 100) > 5
"for": "1m"
"labels":
"severity": "info"
- "alert": "NginxHighHttp5xxErrorRate"
"annotations":
"dashboard_url": "https://grafana.com/d/ingress-nginx-overview-12mk/ingress-nginx-overview?var-exported_namespace={{ $labels.exported_namespace }}&var-ingress={{ $labels.ingress }}"
"description": "More than 5% HTTP requests with status 5xx for {{ $labels.exported_namespace }}/{{ $labels.ingress }} the past 5m."
"summary": "Nginx high HTTP 5xx error rate."
"expr": |
(sum(rate(nginx_ingress_controller_requests{job=~"ingress-nginx-controller-metrics", status=~"^5.*", ingress!~""}[5m])) by (exported_namespace, ingress) / sum(rate(nginx_ingress_controller_requests{job=~"ingress-nginx-controller-metrics", ingress!~""}[5m])) by (exported_namespace, ingress) * 100) > 5
"for": "1m"
"labels":
"severity": "warning"