From 1fe67416fdb06d713c48c69b4cd5d2f26cd59b82 Mon Sep 17 00:00:00 2001 From: Ray Mattingly Date: Mon, 17 Apr 2023 10:21:14 -0400 Subject: [PATCH 1/4] separate slow query thresholds for Scan requests --- .../apache/hadoop/hbase/ipc/RpcServer.java | 42 +++++++++++++++++-- 1 file changed, 39 insertions(+), 3 deletions(-) diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/ipc/RpcServer.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/ipc/RpcServer.java index 19b5327844a4..7c3aea7d5df6 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/ipc/RpcServer.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/ipc/RpcServer.java @@ -176,6 +176,8 @@ public abstract class RpcServer implements RpcServerInterface, ConfigurationObse protected static final String WARN_RESPONSE_TIME = "hbase.ipc.warn.response.time"; protected static final String WARN_RESPONSE_SIZE = "hbase.ipc.warn.response.size"; + protected static final String WARN_RESPONSE_TIME_SCAN = "hbase.ipc.warn.response.time.scan"; + protected static final String WARN_RESPONSE_SIZE_SCAN = "hbase.ipc.warn.response.size.scan"; /** * Minimum allowable timeout (in milliseconds) in rpc request's header. This configuration exists @@ -198,6 +200,8 @@ public abstract class RpcServer implements RpcServerInterface, ConfigurationObse protected final int maxRequestSize; protected volatile int warnResponseTime; protected volatile int warnResponseSize; + protected volatile int warnResponseTimeScan; + protected volatile int warnResponseSizeScan; protected final int minClientRequestTimeout; @@ -277,6 +281,8 @@ public RpcServer(final Server server, final String name, this.warnResponseTime = getWarnResponseTime(conf); this.warnResponseSize = getWarnResponseSize(conf); + this.warnResponseTimeScan = getWarnResponseTimeScan(conf); + this.warnResponseSizeScan = getWarnResponseSizeScan(conf); this.minClientRequestTimeout = conf.getInt(MIN_CLIENT_REQUEST_TIMEOUT, DEFAULT_MIN_CLIENT_REQUEST_TIMEOUT); this.maxRequestSize = conf.getInt(MAX_REQUEST_SIZE, DEFAULT_MAX_REQUEST_SIZE); @@ -326,6 +332,14 @@ private void refreshSlowLogConfiguration(Configuration newConf) { if (warnResponseSize != newWarnResponseSize) { warnResponseSize = newWarnResponseSize; } + int newWarnResponseTimeScan = getWarnResponseTimeScan(newConf); + if (warnResponseTimeScan != newWarnResponseTimeScan) { + warnResponseTimeScan = newWarnResponseTimeScan; + } + int newWarnResponseSizeScan = getWarnResponseSizeScan(newConf); + if (warnResponseSizeScan != newWarnResponseSizeScan) { + warnResponseSizeScan = newWarnResponseSizeScan; + } } private static boolean getIsOnlineLogProviderEnabled(Configuration conf) { @@ -341,6 +355,14 @@ private static int getWarnResponseSize(Configuration conf) { return conf.getInt(WARN_RESPONSE_SIZE, DEFAULT_WARN_RESPONSE_SIZE); } + private static int getWarnResponseTimeScan(Configuration conf) { + return conf.getInt(WARN_RESPONSE_TIME_SCAN, getWarnResponseTime(conf)); + } + + private static int getWarnResponseSizeScan(Configuration conf) { + return conf.getInt(WARN_RESPONSE_SIZE_SCAN, getWarnResponseSize(conf)); + } + protected void initReconfigurable(Configuration confToLoad) { this.allowFallbackToSimpleAuth = confToLoad.getBoolean(FALLBACK_TO_INSECURE_CLIENT_AUTH, false); if (isSecurityEnabled && allowFallbackToSimpleAuth) { @@ -441,9 +463,8 @@ public Pair call(RpcCall call, MonitoredRPCHandler status) metrics.sentResponse(responseSize); // log any RPC responses that are slower than the configured warn // response time or larger than configured warning size - boolean tooSlow = (processingTime > warnResponseTime && warnResponseTime > -1); - boolean tooLarge = (warnResponseSize > -1 - && (responseSize > warnResponseSize || responseBlockSize > warnResponseSize)); + boolean tooSlow = isTooSlow(call, processingTime); + boolean tooLarge = isTooLarge(call, responseSize, responseBlockSize); if (tooSlow || tooLarge) { final String userName = call.getRequestUserName().orElse(StringUtils.EMPTY); // when tagging, we let TooLarge trump TooSmall to keep output simple @@ -560,6 +581,21 @@ void logResponse(Message param, String methodName, String call, boolean tooLarge LOG.warn("(response" + tag + "): " + GSON.toJson(responseInfo)); } + private boolean isTooSlow(RpcCall call, int processingTime) { + long warnResponseTime = call.getParam() instanceof ClientProtos.ScanRequest + ? warnResponseTimeScan + : this.warnResponseTime; + return (processingTime > warnResponseTime && warnResponseTime > -1); + } + + private boolean isTooLarge(RpcCall call, long responseSize, long responseBlockSize) { + long warnResponseSize = call.getParam() instanceof ClientProtos.ScanRequest + ? warnResponseSizeScan + : this.warnResponseSize; + return (warnResponseSize > -1 + && (responseSize > warnResponseSize || responseBlockSize > warnResponseSize)); + } + /** * Truncate to number of chars decided by conf hbase.ipc.trace.log.max.length if TRACE is on else * to 150 chars Refer to Jira HBASE-20826 and HBASE-20942 From e5afa3e9fc84dabfffa41bcc043b93c7f63e1be4 Mon Sep 17 00:00:00 2001 From: Ray Mattingly Date: Thu, 20 Apr 2023 08:48:51 -0400 Subject: [PATCH 2/4] PR feedback: improve scan slowlog threshold naming --- .../apache/hadoop/hbase/ipc/RpcServer.java | 36 +++++++++---------- 1 file changed, 18 insertions(+), 18 deletions(-) diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/ipc/RpcServer.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/ipc/RpcServer.java index 7c3aea7d5df6..f48183d0ee68 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/ipc/RpcServer.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/ipc/RpcServer.java @@ -176,8 +176,8 @@ public abstract class RpcServer implements RpcServerInterface, ConfigurationObse protected static final String WARN_RESPONSE_TIME = "hbase.ipc.warn.response.time"; protected static final String WARN_RESPONSE_SIZE = "hbase.ipc.warn.response.size"; - protected static final String WARN_RESPONSE_TIME_SCAN = "hbase.ipc.warn.response.time.scan"; - protected static final String WARN_RESPONSE_SIZE_SCAN = "hbase.ipc.warn.response.size.scan"; + protected static final String WARN_SCAN_RESPONSE_TIME = "hbase.ipc.warn.response.time.scan"; + protected static final String WARN_SCAN_RESPONSE_SIZE = "hbase.ipc.warn.response.size.scan"; /** * Minimum allowable timeout (in milliseconds) in rpc request's header. This configuration exists @@ -200,8 +200,8 @@ public abstract class RpcServer implements RpcServerInterface, ConfigurationObse protected final int maxRequestSize; protected volatile int warnResponseTime; protected volatile int warnResponseSize; - protected volatile int warnResponseTimeScan; - protected volatile int warnResponseSizeScan; + protected volatile int warnScanResponseTime; + protected volatile int warnScanResponseSize; protected final int minClientRequestTimeout; @@ -281,8 +281,8 @@ public RpcServer(final Server server, final String name, this.warnResponseTime = getWarnResponseTime(conf); this.warnResponseSize = getWarnResponseSize(conf); - this.warnResponseTimeScan = getWarnResponseTimeScan(conf); - this.warnResponseSizeScan = getWarnResponseSizeScan(conf); + this.warnScanResponseTime = getWarnScanResponseTime(conf); + this.warnScanResponseSize = getWarnScanResponseSize(conf); this.minClientRequestTimeout = conf.getInt(MIN_CLIENT_REQUEST_TIMEOUT, DEFAULT_MIN_CLIENT_REQUEST_TIMEOUT); this.maxRequestSize = conf.getInt(MAX_REQUEST_SIZE, DEFAULT_MAX_REQUEST_SIZE); @@ -332,13 +332,13 @@ private void refreshSlowLogConfiguration(Configuration newConf) { if (warnResponseSize != newWarnResponseSize) { warnResponseSize = newWarnResponseSize; } - int newWarnResponseTimeScan = getWarnResponseTimeScan(newConf); - if (warnResponseTimeScan != newWarnResponseTimeScan) { - warnResponseTimeScan = newWarnResponseTimeScan; + int newWarnResponseTimeScan = getWarnScanResponseTime(newConf); + if (warnScanResponseTime != newWarnResponseTimeScan) { + warnScanResponseTime = newWarnResponseTimeScan; } - int newWarnResponseSizeScan = getWarnResponseSizeScan(newConf); - if (warnResponseSizeScan != newWarnResponseSizeScan) { - warnResponseSizeScan = newWarnResponseSizeScan; + int newWarnScanResponseSize = getWarnScanResponseSize(newConf); + if (warnScanResponseSize != newWarnScanResponseSize) { + warnScanResponseSize = newWarnScanResponseSize; } } @@ -355,12 +355,12 @@ private static int getWarnResponseSize(Configuration conf) { return conf.getInt(WARN_RESPONSE_SIZE, DEFAULT_WARN_RESPONSE_SIZE); } - private static int getWarnResponseTimeScan(Configuration conf) { - return conf.getInt(WARN_RESPONSE_TIME_SCAN, getWarnResponseTime(conf)); + private static int getWarnScanResponseTime(Configuration conf) { + return conf.getInt(WARN_SCAN_RESPONSE_TIME, getWarnResponseTime(conf)); } - private static int getWarnResponseSizeScan(Configuration conf) { - return conf.getInt(WARN_RESPONSE_SIZE_SCAN, getWarnResponseSize(conf)); + private static int getWarnScanResponseSize(Configuration conf) { + return conf.getInt(WARN_SCAN_RESPONSE_SIZE, getWarnResponseSize(conf)); } protected void initReconfigurable(Configuration confToLoad) { @@ -583,14 +583,14 @@ void logResponse(Message param, String methodName, String call, boolean tooLarge private boolean isTooSlow(RpcCall call, int processingTime) { long warnResponseTime = call.getParam() instanceof ClientProtos.ScanRequest - ? warnResponseTimeScan + ? warnScanResponseTime : this.warnResponseTime; return (processingTime > warnResponseTime && warnResponseTime > -1); } private boolean isTooLarge(RpcCall call, long responseSize, long responseBlockSize) { long warnResponseSize = call.getParam() instanceof ClientProtos.ScanRequest - ? warnResponseSizeScan + ? warnScanResponseSize : this.warnResponseSize; return (warnResponseSize > -1 && (responseSize > warnResponseSize || responseBlockSize > warnResponseSize)); From f870a97bec899e93d0cf3a0a40de9d249852ff04 Mon Sep 17 00:00:00 2001 From: Ray Mattingly Date: Fri, 21 Apr 2023 11:45:05 -0400 Subject: [PATCH 3/4] update ops mgt doc --- src/main/asciidoc/_chapters/ops_mgt.adoc | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/src/main/asciidoc/_chapters/ops_mgt.adoc b/src/main/asciidoc/_chapters/ops_mgt.adoc index 84386520b0dd..1cebf9b61c86 100644 --- a/src/main/asciidoc/_chapters/ops_mgt.adoc +++ b/src/main/asciidoc/_chapters/ops_mgt.adoc @@ -1904,7 +1904,7 @@ It is also prepended with identifying tags `(responseTooSlow)`, `(responseTooLar ==== Configuration -There are two configuration knobs that can be used to adjust the thresholds for when queries are logged. +There are four configuration knobs that can be used to adjust the thresholds for when queries are logged. * `hbase.ipc.warn.response.time` Maximum number of milliseconds that a query can be run without being logged. Defaults to 10000, or 10 seconds. @@ -1912,6 +1912,12 @@ There are two configuration knobs that can be used to adjust the thresholds for * `hbase.ipc.warn.response.size` Maximum byte size of response that a query can return without being logged. Defaults to 100 megabytes. Can be set to -1 to disable logging by size. +* `hbase.ipc.warn.response.time.scan` Maximum number of milliseconds that a Scan can be run without being logged. + Defaults to the `hbase.ipc.warn.response.time` value. + Can be set to -1 to disable logging by time. +* `hbase.ipc.warn.response.size.scan` Maximum byte size of response that a Scan can return without being logged. + Defaults to the `hbase.ipc.warn.response.size` value. + Can be set to -1 to disable logging by size. ==== Metrics From cbf62386ceed4df12f1e36c6fd1ba338eddc1ae4 Mon Sep 17 00:00:00 2001 From: Ray Mattingly Date: Fri, 21 Apr 2023 12:30:19 -0400 Subject: [PATCH 4/4] more details on slow query log configuration doc --- src/main/asciidoc/_chapters/ops_mgt.adoc | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/main/asciidoc/_chapters/ops_mgt.adoc b/src/main/asciidoc/_chapters/ops_mgt.adoc index 1cebf9b61c86..34fb028f094f 100644 --- a/src/main/asciidoc/_chapters/ops_mgt.adoc +++ b/src/main/asciidoc/_chapters/ops_mgt.adoc @@ -1904,7 +1904,9 @@ It is also prepended with identifying tags `(responseTooSlow)`, `(responseTooLar ==== Configuration -There are four configuration knobs that can be used to adjust the thresholds for when queries are logged. +There are four configuration knobs that can be used to adjust the thresholds for when queries are logged. Two of these knobs +control the size and time thresholds for all queries. Because Scans can often be larger and slower than other types of +queries, there are two additional knobs which can control size and time thresholds for Scans specifically. * `hbase.ipc.warn.response.time` Maximum number of milliseconds that a query can be run without being logged. Defaults to 10000, or 10 seconds.