diff --git a/docs/en/setup/backend/backend-alarm.md b/docs/en/setup/backend/backend-alarm.md index cdfa3eee2e8b..95a4cdc995ff 100644 --- a/docs/en/setup/backend/backend-alarm.md +++ b/docs/en/setup/backend/backend-alarm.md @@ -11,6 +11,8 @@ Alarm rule is constituted by following keys [List of all potential metrics name](#list-of-all-potential-metrics-name). - **Include names**. The following entity names are included in this rule. Such as Service name, endpoint name. +- **Exclude names**. The following entity names are excluded in this rule. Such as Service name, + endpoint name. - **Threshold**. The target value. - **OP**. Operator, support `>`, `<`, `=`. Welcome to contribute all OPs. - **Period**. How long should the alarm rule should be checked. This is a time window, which goes with the @@ -43,6 +45,8 @@ rules: include-names: - service_a - service_b + exclude-names: + - service_c threshold: 85 op: < period: 10 diff --git a/oap-server/server-alarm-plugin/src/main/java/org/apache/skywalking/oap/server/core/alarm/provider/AlarmRule.java b/oap-server/server-alarm-plugin/src/main/java/org/apache/skywalking/oap/server/core/alarm/provider/AlarmRule.java index bf077cf428bc..9b40cce5c6b5 100644 --- a/oap-server/server-alarm-plugin/src/main/java/org/apache/skywalking/oap/server/core/alarm/provider/AlarmRule.java +++ b/oap-server/server-alarm-plugin/src/main/java/org/apache/skywalking/oap/server/core/alarm/provider/AlarmRule.java @@ -41,6 +41,7 @@ public class AlarmRule { private String metricsName; private ArrayList includeNames; + private ArrayList excludeNames; private String threshold; private String op; private int period; @@ -66,6 +67,7 @@ public boolean equals(final Object o) { && Objects.equals(alarmRuleName, alarmRule.alarmRuleName) && Objects.equals(metricsName, alarmRule.metricsName) && Objects.equals(includeNames, alarmRule.includeNames) + && Objects.equals(excludeNames, alarmRule.excludeNames) && Objects.equals(threshold, alarmRule.threshold) && Objects.equals(op, alarmRule.op) && Objects.equals(message, alarmRule.message); @@ -73,6 +75,6 @@ public boolean equals(final Object o) { @Override public int hashCode() { - return Objects.hash(alarmRuleName, metricsName, includeNames, threshold, op, period, count, silencePeriod, message); + return Objects.hash(alarmRuleName, metricsName, includeNames, excludeNames, threshold, op, period, count, silencePeriod, message); } } diff --git a/oap-server/server-alarm-plugin/src/main/java/org/apache/skywalking/oap/server/core/alarm/provider/RulesReader.java b/oap-server/server-alarm-plugin/src/main/java/org/apache/skywalking/oap/server/core/alarm/provider/RulesReader.java index 8cc9785d0755..74fb9dac1cfa 100644 --- a/oap-server/server-alarm-plugin/src/main/java/org/apache/skywalking/oap/server/core/alarm/provider/RulesReader.java +++ b/oap-server/server-alarm-plugin/src/main/java/org/apache/skywalking/oap/server/core/alarm/provider/RulesReader.java @@ -59,6 +59,7 @@ public Rules readRules() { alarmRule.setMetricsName((String)metricsName); alarmRule.setIncludeNames((ArrayList)settings.getOrDefault("include-names", new ArrayList(0))); + alarmRule.setExcludeNames((ArrayList)settings.getOrDefault("exclude-names", new ArrayList(0))); alarmRule.setThreshold(settings.get("threshold").toString()); alarmRule.setOp((String)settings.get("op")); alarmRule.setPeriod((Integer)settings.getOrDefault("period", 1)); diff --git a/oap-server/server-alarm-plugin/src/main/java/org/apache/skywalking/oap/server/core/alarm/provider/RunningRule.java b/oap-server/server-alarm-plugin/src/main/java/org/apache/skywalking/oap/server/core/alarm/provider/RunningRule.java index a244fff3db4e..6af7524e5686 100644 --- a/oap-server/server-alarm-plugin/src/main/java/org/apache/skywalking/oap/server/core/alarm/provider/RunningRule.java +++ b/oap-server/server-alarm-plugin/src/main/java/org/apache/skywalking/oap/server/core/alarm/provider/RunningRule.java @@ -59,6 +59,7 @@ public class RunningRule { private volatile MetricsValueType valueType; private int targetScopeId; private List includeNames; + private List excludeNames; private AlarmMessageFormatter formatter; public RunningRule(AlarmRule alarmRule) { @@ -77,6 +78,7 @@ public RunningRule(AlarmRule alarmRule) { this.silencePeriod = alarmRule.getSilencePeriod(); this.includeNames = alarmRule.getIncludeNames(); + this.excludeNames = alarmRule.getExcludeNames(); this.formatter = new AlarmMessageFormatter(alarmRule.getMessage()); } @@ -98,6 +100,12 @@ public void in(MetaInAlarm meta, Metrics metrics) { } } + if (CollectionUtils.isNotEmpty(excludeNames)) { + if (excludeNames.contains(meta.getName())) { + return; + } + } + if (valueType == null) { if (metrics instanceof LongValueHolder) { valueType = MetricsValueType.LONG; diff --git a/oap-server/server-alarm-plugin/src/test/java/org/apache/skywalking/oap/server/core/alarm/provider/AlarmRuleInitTest.java b/oap-server/server-alarm-plugin/src/test/java/org/apache/skywalking/oap/server/core/alarm/provider/AlarmRuleInitTest.java index debcf5cfb849..8a6434f705e4 100644 --- a/oap-server/server-alarm-plugin/src/test/java/org/apache/skywalking/oap/server/core/alarm/provider/AlarmRuleInitTest.java +++ b/oap-server/server-alarm-plugin/src/test/java/org/apache/skywalking/oap/server/core/alarm/provider/AlarmRuleInitTest.java @@ -34,9 +34,11 @@ public void testInit() { Assert.assertEquals("85", ruleList.get(1).getThreshold()); Assert.assertEquals("endpoint_percent_rule", ruleList.get(0).getAlarmRuleName()); Assert.assertEquals(0, ruleList.get(0).getIncludeNames().size()); + Assert.assertEquals(0, ruleList.get(0).getExcludeNames().size()); Assert.assertEquals("Successful rate of endpoint {name} is lower than 75%", ruleList.get(0).getMessage()); Assert.assertEquals("service_b", ruleList.get(1).getIncludeNames().get(1)); + Assert.assertEquals("service_c", ruleList.get(1).getExcludeNames().get(0)); Assert.assertEquals("Alarm caused by Rule service_percent_rule", ruleList.get(1).getMessage()); List rulesWebhooks = rules.getWebhooks(); diff --git a/oap-server/server-alarm-plugin/src/test/java/org/apache/skywalking/oap/server/core/alarm/provider/AlarmRulesWatcherTest.java b/oap-server/server-alarm-plugin/src/test/java/org/apache/skywalking/oap/server/core/alarm/provider/AlarmRulesWatcherTest.java index 572d590f8a34..8535c1d3a702 100644 --- a/oap-server/server-alarm-plugin/src/test/java/org/apache/skywalking/oap/server/core/alarm/provider/AlarmRulesWatcherTest.java +++ b/oap-server/server-alarm-plugin/src/test/java/org/apache/skywalking/oap/server/core/alarm/provider/AlarmRulesWatcherTest.java @@ -52,6 +52,12 @@ public class AlarmRulesWatcherTest { add("2"); } }) + .excludeNames(new ArrayList() { + { + add("3"); + add("4"); + } + }) .message("test") .metricsName("metrics1") .op(">") diff --git a/oap-server/server-alarm-plugin/src/test/java/org/apache/skywalking/oap/server/core/alarm/provider/RunningRuleTest.java b/oap-server/server-alarm-plugin/src/test/java/org/apache/skywalking/oap/server/core/alarm/provider/RunningRuleTest.java index 12d6d99b1014..a07a593cdd36 100644 --- a/oap-server/server-alarm-plugin/src/test/java/org/apache/skywalking/oap/server/core/alarm/provider/RunningRuleTest.java +++ b/oap-server/server-alarm-plugin/src/test/java/org/apache/skywalking/oap/server/core/alarm/provider/RunningRuleTest.java @@ -18,6 +18,7 @@ package org.apache.skywalking.oap.server.core.alarm.provider; +import com.google.common.collect.Lists; import java.util.*; import org.apache.skywalking.oap.server.core.alarm.*; import org.apache.skywalking.oap.server.core.analysis.metrics.*; @@ -181,6 +182,38 @@ public void testSilence() { Assert.assertNotEquals(0, runningRule.check().size()); //alarm } + @Test + public void testExclude() { + AlarmRule alarmRule = new AlarmRule(); + alarmRule.setAlarmRuleName("endpoint_percent_rule"); + alarmRule.setMetricsName("endpoint_percent"); + alarmRule.setOp("<"); + alarmRule.setThreshold("75"); + alarmRule.setCount(3); + alarmRule.setPeriod(15); + alarmRule.setMessage("Successful rate of endpoint {name} is lower than 75%"); + alarmRule.setExcludeNames(Lists.newArrayList("Service_123")); + + RunningRule runningRule = new RunningRule(alarmRule); + + long timeInPeriod1 = 201808301434L; + long timeInPeriod2 = 201808301436L; + long timeInPeriod3 = 201808301438L; + + runningRule.in(getMetaInAlarm(123), getMetrics(timeInPeriod1, 70)); + runningRule.in(getMetaInAlarm(123), getMetrics(timeInPeriod2, 71)); + runningRule.in(getMetaInAlarm(123), getMetrics(timeInPeriod3, 74)); + + // check at 201808301440 + Assert.assertEquals(0, runningRule.check().size()); + runningRule.moveTo(TIME_BUCKET_FORMATTER.parseLocalDateTime("201808301441")); + // check at 201808301441 + Assert.assertEquals(0, runningRule.check().size()); + runningRule.moveTo(TIME_BUCKET_FORMATTER.parseLocalDateTime("201808301442")); + // check at 201808301442 + Assert.assertEquals(0, runningRule.check().size()); + } + private MetaInAlarm getMetaInAlarm(int id) { return new MetaInAlarm() { @Override public String getScope() { diff --git a/oap-server/server-alarm-plugin/src/test/resources/alarm-settings.yml b/oap-server/server-alarm-plugin/src/test/resources/alarm-settings.yml index d46b17131515..880e21f17ad6 100755 --- a/oap-server/server-alarm-plugin/src/test/resources/alarm-settings.yml +++ b/oap-server/server-alarm-plugin/src/test/resources/alarm-settings.yml @@ -33,6 +33,8 @@ rules: include-names: - service_a - service_b + exclude-names: + - service_c threshold: 85 op: < period: 10