Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

HBASE-27535: Separate slowlog thresholds for scans vs other requests #5188

Merged
merged 4 commits into from
Apr 22, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -176,6 +176,8 @@ public abstract class RpcServer implements RpcServerInterface, ConfigurationObse

protected static final String WARN_RESPONSE_TIME = "hbase.ipc.warn.response.time";
protected static final String WARN_RESPONSE_SIZE = "hbase.ipc.warn.response.size";
protected static final String WARN_SCAN_RESPONSE_TIME = "hbase.ipc.warn.response.time.scan";
protected static final String WARN_SCAN_RESPONSE_SIZE = "hbase.ipc.warn.response.size.scan";

/**
* Minimum allowable timeout (in milliseconds) in rpc request's header. This configuration exists
Expand All @@ -198,6 +200,8 @@ public abstract class RpcServer implements RpcServerInterface, ConfigurationObse
protected final int maxRequestSize;
protected volatile int warnResponseTime;
protected volatile int warnResponseSize;
protected volatile int warnScanResponseTime;
protected volatile int warnScanResponseSize;

protected final int minClientRequestTimeout;

Expand Down Expand Up @@ -277,6 +281,8 @@ public RpcServer(final Server server, final String name,

this.warnResponseTime = getWarnResponseTime(conf);
this.warnResponseSize = getWarnResponseSize(conf);
this.warnScanResponseTime = getWarnScanResponseTime(conf);
this.warnScanResponseSize = getWarnScanResponseSize(conf);
this.minClientRequestTimeout =
conf.getInt(MIN_CLIENT_REQUEST_TIMEOUT, DEFAULT_MIN_CLIENT_REQUEST_TIMEOUT);
this.maxRequestSize = conf.getInt(MAX_REQUEST_SIZE, DEFAULT_MAX_REQUEST_SIZE);
Expand Down Expand Up @@ -326,6 +332,14 @@ private void refreshSlowLogConfiguration(Configuration newConf) {
if (warnResponseSize != newWarnResponseSize) {
warnResponseSize = newWarnResponseSize;
}
int newWarnResponseTimeScan = getWarnScanResponseTime(newConf);
if (warnScanResponseTime != newWarnResponseTimeScan) {
warnScanResponseTime = newWarnResponseTimeScan;
}
int newWarnScanResponseSize = getWarnScanResponseSize(newConf);
if (warnScanResponseSize != newWarnScanResponseSize) {
warnScanResponseSize = newWarnScanResponseSize;
}
}

private static boolean getIsOnlineLogProviderEnabled(Configuration conf) {
Expand All @@ -341,6 +355,14 @@ private static int getWarnResponseSize(Configuration conf) {
return conf.getInt(WARN_RESPONSE_SIZE, DEFAULT_WARN_RESPONSE_SIZE);
}

private static int getWarnScanResponseTime(Configuration conf) {
return conf.getInt(WARN_SCAN_RESPONSE_TIME, getWarnResponseTime(conf));
}

private static int getWarnScanResponseSize(Configuration conf) {
return conf.getInt(WARN_SCAN_RESPONSE_SIZE, getWarnResponseSize(conf));
}

protected void initReconfigurable(Configuration confToLoad) {
this.allowFallbackToSimpleAuth = confToLoad.getBoolean(FALLBACK_TO_INSECURE_CLIENT_AUTH, false);
if (isSecurityEnabled && allowFallbackToSimpleAuth) {
Expand Down Expand Up @@ -441,9 +463,8 @@ public Pair<Message, CellScanner> call(RpcCall call, MonitoredRPCHandler status)
metrics.sentResponse(responseSize);
// log any RPC responses that are slower than the configured warn
// response time or larger than configured warning size
boolean tooSlow = (processingTime > warnResponseTime && warnResponseTime > -1);
boolean tooLarge = (warnResponseSize > -1
&& (responseSize > warnResponseSize || responseBlockSize > warnResponseSize));
boolean tooSlow = isTooSlow(call, processingTime);
boolean tooLarge = isTooLarge(call, responseSize, responseBlockSize);
if (tooSlow || tooLarge) {
final String userName = call.getRequestUserName().orElse(StringUtils.EMPTY);
// when tagging, we let TooLarge trump TooSmall to keep output simple
Expand Down Expand Up @@ -560,6 +581,21 @@ void logResponse(Message param, String methodName, String call, boolean tooLarge
LOG.warn("(response" + tag + "): " + GSON.toJson(responseInfo));
}

private boolean isTooSlow(RpcCall call, int processingTime) {
long warnResponseTime = call.getParam() instanceof ClientProtos.ScanRequest
? warnScanResponseTime
: this.warnResponseTime;
return (processingTime > warnResponseTime && warnResponseTime > -1);
}

private boolean isTooLarge(RpcCall call, long responseSize, long responseBlockSize) {
long warnResponseSize = call.getParam() instanceof ClientProtos.ScanRequest
? warnScanResponseSize
: this.warnResponseSize;
return (warnResponseSize > -1
&& (responseSize > warnResponseSize || responseBlockSize > warnResponseSize));
}

/**
* Truncate to number of chars decided by conf hbase.ipc.trace.log.max.length if TRACE is on else
* to 150 chars Refer to Jira HBASE-20826 and HBASE-20942
Expand Down
10 changes: 9 additions & 1 deletion src/main/asciidoc/_chapters/ops_mgt.adoc
Original file line number Diff line number Diff line change
Expand Up @@ -1904,14 +1904,22 @@ It is also prepended with identifying tags `(responseTooSlow)`, `(responseTooLar

==== Configuration

There are two configuration knobs that can be used to adjust the thresholds for when queries are logged.
There are four configuration knobs that can be used to adjust the thresholds for when queries are logged. Two of these knobs
control the size and time thresholds for all queries. Because Scans can often be larger and slower than other types of
queries, there are two additional knobs which can control size and time thresholds for Scans specifically.

* `hbase.ipc.warn.response.time` Maximum number of milliseconds that a query can be run without being logged.
Defaults to 10000, or 10 seconds.
Can be set to -1 to disable logging by time.
* `hbase.ipc.warn.response.size` Maximum byte size of response that a query can return without being logged.
Defaults to 100 megabytes.
Can be set to -1 to disable logging by size.
* `hbase.ipc.warn.response.time.scan` Maximum number of milliseconds that a Scan can be run without being logged.
Defaults to the `hbase.ipc.warn.response.time` value.
Can be set to -1 to disable logging by time.
* `hbase.ipc.warn.response.size.scan` Maximum byte size of response that a Scan can return without being logged.
Defaults to the `hbase.ipc.warn.response.size` value.
Can be set to -1 to disable logging by size.

==== Metrics

Expand Down