diff --git a/CHANGES.md b/CHANGES.md index bf08b72c4d..23f81e43e8 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -10,6 +10,7 @@ Release Notes. * Remove redundant `shade.package` property. * Add servicecomb-2.x plugin and Testcase. * Fix NPE in gateway plugin when the timer triggers webflux webclient call. +* Add an optional plugin, trace-sampler-cpu-policy-plugin, which could disable trace collecting in high CPU load. #### Documentation diff --git a/apm-sniffer/apm-agent-core/src/main/java/org/apache/skywalking/apm/agent/core/jvm/JVMService.java b/apm-sniffer/apm-agent-core/src/main/java/org/apache/skywalking/apm/agent/core/jvm/JVMService.java index 4c9b25ddbb..beca07afa5 100644 --- a/apm-sniffer/apm-agent-core/src/main/java/org/apache/skywalking/apm/agent/core/jvm/JVMService.java +++ b/apm-sniffer/apm-agent-core/src/main/java/org/apache/skywalking/apm/agent/core/jvm/JVMService.java @@ -47,6 +47,7 @@ public class JVMService implements BootService, Runnable { private volatile ScheduledFuture collectMetricFuture; private volatile ScheduledFuture sendMetricFuture; private JVMMetricsSender sender; + private volatile double cpuUsagePercent; @Override public void prepare() throws Throwable { @@ -103,9 +104,18 @@ public void run() { jvmBuilder.setThread(ThreadProvider.INSTANCE.getThreadMetrics()); jvmBuilder.setClazz(ClassProvider.INSTANCE.getClassMetrics()); - sender.offer(jvmBuilder.build()); + JVMMetric jvmMetric = jvmBuilder.build(); + sender.offer(jvmMetric); + + // refresh cpu usage percent + cpuUsagePercent = jvmMetric.getCpu().getUsagePercent(); } catch (Exception e) { LOGGER.error(e, "Collect JVM info fail."); } } + + public double getCpuUsagePercent() { + return this.cpuUsagePercent; + } + } diff --git a/apm-sniffer/apm-agent-core/src/main/java/org/apache/skywalking/apm/agent/core/sampling/SamplingService.java b/apm-sniffer/apm-agent-core/src/main/java/org/apache/skywalking/apm/agent/core/sampling/SamplingService.java index 83c29718a4..ada9825342 100644 --- a/apm-sniffer/apm-agent-core/src/main/java/org/apache/skywalking/apm/agent/core/sampling/SamplingService.java +++ b/apm-sniffer/apm-agent-core/src/main/java/org/apache/skywalking/apm/agent/core/sampling/SamplingService.java @@ -136,4 +136,4 @@ public void handleSamplingRateChanged() { } } } -} \ No newline at end of file +} diff --git a/apm-sniffer/config/agent.config b/apm-sniffer/config/agent.config index 08b54f6334..e3d545599b 100755 --- a/apm-sniffer/config/agent.config +++ b/apm-sniffer/config/agent.config @@ -270,3 +270,5 @@ plugin.neo4j.trace_cypher_parameters=${SW_PLUGIN_NEO4J_TRACE_CYPHER_PARAMETERS:f plugin.neo4j.cypher_parameters_max_length=${SW_PLUGIN_NEO4J_CYPHER_PARAMETERS_MAX_LENGTH:512} # If set to positive number, the `db.statement` would be truncated to this length, otherwise it would be completely saved, which may cause performance problem. plugin.neo4j.cypher_body_max_length=${SW_PLUGIN_NEO4J_CYPHER_BODY_MAX_LENGTH:2048} +# If set to a positive number and activate `trace sampler CPU policy plugin`, the trace would not be collected when agent process CPU usage percent is greater than `plugin.cpupolicy.sample_cpu_usage_percent_limit`. +plugin.cpupolicy.sample_cpu_usage_percent_limit=${SW_SAMPLE_CPU_USAGE_PERCENT_LIMIT:-1} diff --git a/apm-sniffer/optional-plugins/pom.xml b/apm-sniffer/optional-plugins/pom.xml index beea677c52..b01bbd2383 100644 --- a/apm-sniffer/optional-plugins/pom.xml +++ b/apm-sniffer/optional-plugins/pom.xml @@ -56,6 +56,7 @@ fastjson-1.2.x-plugin jackson-2.x-plugin shenyu-2.4.x-plugin + trace-sampler-cpu-policy-plugin diff --git a/apm-sniffer/optional-plugins/trace-sampler-cpu-policy-plugin/pom.xml b/apm-sniffer/optional-plugins/trace-sampler-cpu-policy-plugin/pom.xml new file mode 100644 index 0000000000..c33f638868 --- /dev/null +++ b/apm-sniffer/optional-plugins/trace-sampler-cpu-policy-plugin/pom.xml @@ -0,0 +1,45 @@ + + + + + optional-plugins + org.apache.skywalking + 8.11.0-SNAPSHOT + + 4.0.0 + + trace-sampler-cpu-policy-plugin + jar + + apm-trace-cpu-limit-plugin + http://maven.apache.org + + + 1.18.0 + + + + + com.github.stefanbirkner + system-rules + ${ststem-rules.version} + test + + + diff --git a/apm-sniffer/optional-plugins/trace-sampler-cpu-policy-plugin/src/main/java/org/apache/skywalking/apm/plugin/cpu/policy/TraceSamplerCpuPolicyExtendService.java b/apm-sniffer/optional-plugins/trace-sampler-cpu-policy-plugin/src/main/java/org/apache/skywalking/apm/plugin/cpu/policy/TraceSamplerCpuPolicyExtendService.java new file mode 100644 index 0000000000..da8749a8be --- /dev/null +++ b/apm-sniffer/optional-plugins/trace-sampler-cpu-policy-plugin/src/main/java/org/apache/skywalking/apm/plugin/cpu/policy/TraceSamplerCpuPolicyExtendService.java @@ -0,0 +1,76 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +package org.apache.skywalking.apm.plugin.cpu.policy; + +import org.apache.skywalking.apm.agent.core.boot.OverrideImplementor; +import org.apache.skywalking.apm.agent.core.boot.ServiceManager; +import org.apache.skywalking.apm.agent.core.jvm.JVMService; +import org.apache.skywalking.apm.agent.core.logging.api.ILog; +import org.apache.skywalking.apm.agent.core.logging.api.LogManager; +import org.apache.skywalking.apm.agent.core.sampling.SamplingService; +import org.apache.skywalking.apm.plugin.cpu.policy.conf.TraceSamplerCpuPolicyPluginConfig; + +@OverrideImplementor(SamplingService.class) +public class TraceSamplerCpuPolicyExtendService extends SamplingService { + private static final ILog LOGGER = LogManager.getLogger(TraceSamplerCpuPolicyExtendService.class); + + private volatile boolean cpuUsagePercentLimitOn = false; + private volatile JVMService jvmService; + + @Override + public void prepare() { + super.prepare(); + } + + @Override + public void boot() { + super.boot(); + if (TraceSamplerCpuPolicyPluginConfig.Plugin.CpuPolicy.SAMPLE_CPU_USAGE_PERCENT_LIMIT > 0) { + LOGGER.info("TraceSamplerCpuPolicyExtendService cpu usage percent limit open"); + jvmService = ServiceManager.INSTANCE.findService(JVMService.class); + cpuUsagePercentLimitOn = true; + } + } + + @Override + public void onComplete() { + } + + @Override + public void shutdown() { + super.shutdown(); + } + + @Override + public boolean trySampling(final String operationName) { + if (cpuUsagePercentLimitOn) { + double cpuUsagePercent = jvmService.getCpuUsagePercent(); + if (cpuUsagePercent > TraceSamplerCpuPolicyPluginConfig.Plugin.CpuPolicy.SAMPLE_CPU_USAGE_PERCENT_LIMIT) { + return false; + } + } + return super.trySampling(operationName); + } + + @Override + public void forceSampled() { + super.forceSampled(); + } + +} diff --git a/apm-sniffer/optional-plugins/trace-sampler-cpu-policy-plugin/src/main/java/org/apache/skywalking/apm/plugin/cpu/policy/conf/TraceSamplerCpuPolicyPluginConfig.java b/apm-sniffer/optional-plugins/trace-sampler-cpu-policy-plugin/src/main/java/org/apache/skywalking/apm/plugin/cpu/policy/conf/TraceSamplerCpuPolicyPluginConfig.java new file mode 100644 index 0000000000..5878f8a5f7 --- /dev/null +++ b/apm-sniffer/optional-plugins/trace-sampler-cpu-policy-plugin/src/main/java/org/apache/skywalking/apm/plugin/cpu/policy/conf/TraceSamplerCpuPolicyPluginConfig.java @@ -0,0 +1,30 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +package org.apache.skywalking.apm.plugin.cpu.policy.conf; + +import org.apache.skywalking.apm.agent.core.boot.PluginConfig; + +public class TraceSamplerCpuPolicyPluginConfig { + public static class Plugin { + @PluginConfig(root = TraceSamplerCpuPolicyPluginConfig.class) + public static class CpuPolicy { + public static double SAMPLE_CPU_USAGE_PERCENT_LIMIT = -1; + } + } +} diff --git a/apm-sniffer/optional-plugins/trace-sampler-cpu-policy-plugin/src/main/resources/META-INF/services/org.apache.skywalking.apm.agent.core.boot.BootService b/apm-sniffer/optional-plugins/trace-sampler-cpu-policy-plugin/src/main/resources/META-INF/services/org.apache.skywalking.apm.agent.core.boot.BootService new file mode 100644 index 0000000000..64f2baa7e2 --- /dev/null +++ b/apm-sniffer/optional-plugins/trace-sampler-cpu-policy-plugin/src/main/resources/META-INF/services/org.apache.skywalking.apm.agent.core.boot.BootService @@ -0,0 +1,19 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# + +org.apache.skywalking.apm.plugin.cpu.policy.TraceSamplerCpuPolicyExtendService diff --git a/apm-sniffer/optional-plugins/trace-sampler-cpu-policy-plugin/src/test/resources/org.apache.skywalking.apm.agent.core.boot.BootService b/apm-sniffer/optional-plugins/trace-sampler-cpu-policy-plugin/src/test/resources/org.apache.skywalking.apm.agent.core.boot.BootService new file mode 100644 index 0000000000..64f2baa7e2 --- /dev/null +++ b/apm-sniffer/optional-plugins/trace-sampler-cpu-policy-plugin/src/test/resources/org.apache.skywalking.apm.agent.core.boot.BootService @@ -0,0 +1,19 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# + +org.apache.skywalking.apm.plugin.cpu.policy.TraceSamplerCpuPolicyExtendService diff --git a/docs/en/setup/service-agent/java-agent/Optional-plugins.md b/docs/en/setup/service-agent/java-agent/Optional-plugins.md index 853e848469..ffc684900f 100644 --- a/docs/en/setup/service-agent/java-agent/Optional-plugins.md +++ b/docs/en/setup/service-agent/java-agent/Optional-plugins.md @@ -20,4 +20,5 @@ Now, we have the following known optional plugins. * Plugin of guava-cache in the optional plugin folder. The reason for being an optional plugin is, this plugin enhanced cache framework, generates large number of local spans, which have a potential performance impact. * Plugin of fastjson serialization lib in optional plugin folder. * Plugin of jackson serialization lib in optional plugin folder. -* Plugin of Apache ShenYu(incubating) Gateway 2.4.x in optional plugin folder. Please only activate this plugin when you install agent in Apache ShenYu Gateway. \ No newline at end of file +* Plugin of Apache ShenYu(incubating) Gateway 2.4.x in optional plugin folder. Please only activate this plugin when you install agent in Apache ShenYu Gateway. +* Plugin of trace sampler CPU policy in the optional plugin folder. Please only activate this plugin when you need to disable trace collecting when the agent process CPU usage is too high(over threshold). diff --git a/docs/en/setup/service-agent/java-agent/configurations.md b/docs/en/setup/service-agent/java-agent/configurations.md index a9b27f516f..54562d7eea 100644 --- a/docs/en/setup/service-agent/java-agent/configurations.md +++ b/docs/en/setup/service-agent/java-agent/configurations.md @@ -110,6 +110,7 @@ This is the properties list supported in `agent/config/agent.config`. | `plugin.neo4j.trace_cypher_parameters` | If set to true, the parameters of the cypher would be collected. | SW_PLUGIN_NEO4J_TRACE_CYPHER_PARAMETERS | `false` | | `plugin.neo4j.cypher_parameters_max_length` | If set to positive number, the `db.cypher.parameters` would be truncated to this length, otherwise it would be completely saved, which may cause performance problem. | SW_PLUGIN_NEO4J_CYPHER_PARAMETERS_MAX_LENGTH | `512` | | `plugin.neo4j.cypher_body_max_length` | If set to positive number, the `db.statement` would be truncated to this length, otherwise it would be completely saved, which may cause performance problem. | SW_PLUGIN_NEO4J_CYPHER_BODY_MAX_LENGTH | `2048` | +| `plugin.cpupolicy.sample_cpu_usage_percent_limit` | If set to a positive number and activate `trace sampler CPU policy plugin`, the trace would not be collected when agent process CPU usage percent is greater than `plugin.cpupolicy.sample_cpu_usage_percent_limit`. | SW_SAMPLE_CPU_USAGE_PERCENT_LIMIT | `-1` | # Dynamic Configurations All configurations above are static, if you need to change some agent settings at runtime, please read [CDS - Configuration Discovery Service document](configuration-discovery.md) for more details.