Skip to content

Commit

Permalink
ZOOKEEPER-3858: Add metrics to track server unavailable time
Browse files Browse the repository at this point in the history
Author: Jie Huang <jiehuang@fb.com>

Reviewers: Michael Han <hanm@apache.org>

Closes #1378 from jhuan31/ZOOKEEPER-3858
  • Loading branch information
Jie Huang authored and hanm committed Oct 1, 2020
1 parent bd06861 commit e4c175f
Show file tree
Hide file tree
Showing 2 changed files with 20 additions and 0 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -134,6 +134,9 @@ private ServerMetrics(MetricsProvider metricsProvider) {
SESSIONLESS_CONNECTIONS_EXPIRED = metricsContext.getCounter("sessionless_connections_expired");
STALE_SESSIONS_EXPIRED = metricsContext.getCounter("stale_sessions_expired");

UNAVAILABLE_TIME = metricsContext.getSummary("unavailable_time", DetailLevel.BASIC);
LEADER_UNAVAILABLE_TIME = metricsContext.getSummary("leader_unavailable_time", DetailLevel.BASIC);

/*
* Number of requests that are in the session queue.
*/
Expand Down Expand Up @@ -310,6 +313,9 @@ private ServerMetrics(MetricsProvider metricsProvider) {
public final Counter SESSIONLESS_CONNECTIONS_EXPIRED;
public final Counter STALE_SESSIONS_EXPIRED;

public final Summary UNAVAILABLE_TIME;
public final Summary LEADER_UNAVAILABLE_TIME;

// Connection throttling related
public final Summary CONNECTION_TOKEN_DEFICIT;
public final Counter CONNECTION_REJECTED;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,7 @@
import org.apache.zookeeper.common.AtomicFileWritingIdiom;
import org.apache.zookeeper.common.AtomicFileWritingIdiom.WriterStatement;
import org.apache.zookeeper.common.QuorumX509Util;
import org.apache.zookeeper.common.Time;
import org.apache.zookeeper.common.X509Exception;
import org.apache.zookeeper.jmx.MBeanRegistry;
import org.apache.zookeeper.jmx.ZKMBeanInfo;
Expand Down Expand Up @@ -530,6 +531,7 @@ public enum LearnerType {
*/
public long start_fle, end_fle; // fle = fast leader election
public static final String FLE_TIME_UNIT = "MS";
private long unavailableStartTime;

/*
* Default value of peer is participant
Expand Down Expand Up @@ -859,6 +861,14 @@ public synchronized void setPeerState(ServerState newState) {
}

public void setZabState(ZabState zabState) {
if ((zabState == ZabState.BROADCAST) && (unavailableStartTime != 0)) {
long unavailableTime = Time.currentElapsedTime() - unavailableStartTime;
ServerMetrics.getMetrics().UNAVAILABLE_TIME.add(unavailableTime);
if (getPeerState() == ServerState.LEADING) {
ServerMetrics.getMetrics().LEADER_UNAVAILABLE_TIME.add(unavailableTime);
}
unavailableStartTime = 0;
}
this.zabState.set(zabState);
LOG.info("Peer state changed: {}", getDetailedPeerState());
}
Expand Down Expand Up @@ -1369,6 +1379,10 @@ public void run() {
* Main loop
*/
while (running) {
if (unavailableStartTime == 0) {
unavailableStartTime = Time.currentElapsedTime();
}

switch (getPeerState()) {
case LOOKING:
LOG.info("LOOKING");
Expand Down

0 comments on commit e4c175f

Please sign in to comment.