Permalink
Browse files

more elastic alarm thresholds for cpu, ram and swap

  • Loading branch information...
1 parent d9c9892 commit ecfdcb34abc6bfd065e225900715b395f43740c5 @ktsaou ktsaou committed Nov 12, 2016
Showing with 10 additions and 10 deletions.
  1. +4 −4 conf.d/health.d/cpu.conf
  2. +2 −2 conf.d/health.d/ram.conf
  3. +4 −4 conf.d/health.d/swap.conf
@@ -4,8 +4,8 @@ template: 10min_cpu_usage
lookup: average -10m unaligned of user,system,nice,softirq,irq,guest,guest_nice
units: %
every: 1m
- warn: $this > (($status >= $WARNING) ? (70) : (80))
- crit: $this > (($status == $CRITICAL) ? (80) : (90))
+ warn: $this > (($status >= $WARNING) ? (75) : (85))
+ crit: $this > (($status == $CRITICAL) ? (85) : (95))
delay: down 15m multiplier 1.5 max 1h
info: average cpu utilization for the last 10 minutes
to: sysadmin
@@ -15,8 +15,8 @@ template: 10min_cpu_iowait
lookup: average -10m unaligned of iowait
units: %
every: 1m
- warn: $this > (($status >= $WARNING) ? (5) : (10))
- crit: $this > (($status == $CRITICAL) ? (20) : (30))
+ warn: $this > (($status >= $WARNING) ? (20) : (40))
+ crit: $this > (($status == $CRITICAL) ? (40) : (50))
delay: down 15m multiplier 1.5 max 1h
info: average CPU wait I/O for the last 10 minutes
to: sysadmin
@@ -4,8 +4,8 @@
calc: $used * 100 / ($used + $cached + $free)
units: %
every: 10s
- warn: $this > (($status >= $WARNING) ? (70) : (80))
- crit: $this > (($status == $CRITICAL) ? (80) : (90))
+ warn: $this > (($status >= $WARNING) ? (80) : (90))
+ crit: $this > (($status == $CRITICAL) ? (90) : (98))
delay: down 15m multiplier 1.5 max 1h
info: system RAM usage
to: sysadmin
@@ -6,8 +6,8 @@
calc: $this / 1024 * 100 / ( $system.ram.used + $system.ram.cached + $system.ram.free )
units: % of RAM
every: 1m
- warn: $this > (($status >= $WARNING) ? (5) : (10))
- crit: $this > (($status == $CRITICAL) ? (15) : (20))
+ warn: $this > (($status >= $WARNING) ? (10) : (20))
+ crit: $this > (($status == $CRITICAL) ? (20) : (30))
delay: up 0 down 15m multiplier 1.5 max 1h
info: the amount of memory swapped in the last 30 minutes, as a percentage of the system RAM
to: sysadmin
@@ -28,8 +28,8 @@
calc: $used * 100 / ( $used + $free )
units: %
every: 10s
- warn: $this > (($status >= $WARNING) ? (60) : (80))
- crit: $this > (($status == $CRITICAL) ? (80) : (90))
+ warn: $this > (($status >= $WARNING) ? (80) : (90))
+ crit: $this > (($status == $CRITICAL) ? (90) : (98))
delay: up 0 down 15m multiplier 1.5 max 1h
info: the percentage of swap memory used
to: sysadmin

0 comments on commit ecfdcb3

Please sign in to comment.