Skip to content

Commit

Permalink
HDDS-1502. Add metrics for Ozone Ratis performance.Contributed by Sha…
Browse files Browse the repository at this point in the history
…shikant Banerjee(#833).
  • Loading branch information
bshashikant committed May 30, 2019
1 parent 2b303e9 commit 18c1eeb
Show file tree
Hide file tree
Showing 3 changed files with 82 additions and 20 deletions.
Expand Up @@ -37,13 +37,18 @@ public class CSMMetrics {

// ratis op metrics metrics
private @Metric MutableCounterLong numWriteStateMachineOps;
private @Metric MutableCounterLong numReadStateMachineOps;
private @Metric MutableCounterLong numQueryStateMachineOps;
private @Metric MutableCounterLong numApplyTransactionOps;
private @Metric MutableCounterLong numReadStateMachineOps;
private @Metric MutableCounterLong numBytesWrittenCount;
private @Metric MutableCounterLong numBytesCommittedCount;

// Failure Metrics
private @Metric MutableCounterLong numWriteStateMachineFails;
private @Metric MutableCounterLong numReadStateMachineFails;
private @Metric MutableCounterLong numQueryStateMachineFails;
private @Metric MutableCounterLong numApplyTransactionFails;
private @Metric MutableCounterLong numReadStateMachineFails;
private @Metric MutableCounterLong numReadStateMachineMissCount;

public CSMMetrics() {
}
Expand All @@ -59,6 +64,10 @@ public void incNumWriteStateMachineOps() {
numWriteStateMachineOps.incr();
}

public void incNumQueryStateMachineOps() {
numQueryStateMachineOps.incr();
}

public void incNumReadStateMachineOps() {
numReadStateMachineOps.incr();
}
Expand All @@ -71,10 +80,26 @@ public void incNumWriteStateMachineFails() {
numWriteStateMachineFails.incr();
}

public void incNumQueryStateMachineFails() {
numQueryStateMachineFails.incr();
}

public void incNumBytesWrittenCount(long value) {
numBytesWrittenCount.incr(value);
}

public void incNumBytesCommittedCount(long value) {
numBytesCommittedCount.incr(value);
}

public void incNumReadStateMachineFails() {
numReadStateMachineFails.incr();
}

public void incNumReadStateMachineMissCount() {
numReadStateMachineMissCount.incr();
}

public void incNumApplyTransactionsFails() {
numApplyTransactionFails.incr();
}
Expand All @@ -85,8 +110,8 @@ public long getNumWriteStateMachineOps() {
}

@VisibleForTesting
public long getNumReadStateMachineOps() {
return numReadStateMachineOps.value();
public long getNumQueryStateMachineOps() {
return numQueryStateMachineOps.value();
}

@VisibleForTesting
Expand All @@ -100,15 +125,36 @@ public long getNumWriteStateMachineFails() {
}

@VisibleForTesting
public long getNumReadStateMachineFails() {
return numReadStateMachineFails.value();
public long getNumQueryStateMachineFails() {
return numQueryStateMachineFails.value();
}

@VisibleForTesting
public long getNumApplyTransactionsFails() {
return numApplyTransactionFails.value();
}

@VisibleForTesting
public long getNumReadStateMachineFails() {
return numReadStateMachineFails.value();
}

@VisibleForTesting
public long getNumReadStateMachineMissCount() {
return numReadStateMachineMissCount.value();
}

@VisibleForTesting
public long getNumBytesWrittenCount() {
return numBytesWrittenCount.value();
}

@VisibleForTesting
public long getNumBytesCommittedCount() {
return numBytesCommittedCount.value();
}


public void unRegister() {
MetricsSystem ms = DefaultMetricsSystem.instance();
ms.unregisterSource(SOURCE_NAME);
Expand Down
Expand Up @@ -391,6 +391,8 @@ private CompletableFuture<Message> handleWriteChunk(
// Remove the future once it finishes execution from the
// writeChunkFutureMap.
writeChunkFuture.thenApply(r -> {
metrics.incNumBytesWrittenCount(
requestProto.getWriteChunk().getChunkData().getLen());
writeChunkFutureMap.remove(entryIndex);
LOG.debug("writeChunk writeStateMachineData completed: blockId " + write
.getBlockID() + " logIndex " + entryIndex + " chunkName " + write
Expand Down Expand Up @@ -438,12 +440,12 @@ public CompletableFuture<Message> writeStateMachineData(LogEntryProto entry) {
@Override
public CompletableFuture<Message> query(Message request) {
try {
metrics.incNumReadStateMachineOps();
metrics.incNumQueryStateMachineOps();
final ContainerCommandRequestProto requestProto =
getContainerCommandRequestProto(request.getContent());
return CompletableFuture.completedFuture(runCommand(requestProto, null));
} catch (IOException e) {
metrics.incNumReadStateMachineFails();
metrics.incNumQueryStateMachineFails();
return completeExceptionally(e);
}
}
Expand Down Expand Up @@ -520,10 +522,14 @@ public CompletableFuture<Void> flushStateMachineData(long index) {
public CompletableFuture<ByteString> readStateMachineData(
LogEntryProto entry) {
StateMachineLogEntryProto smLogEntryProto = entry.getStateMachineLogEntry();
metrics.incNumReadStateMachineOps();
if (!getStateMachineData(smLogEntryProto).isEmpty()) {
return CompletableFuture.completedFuture(ByteString.EMPTY);
}
try {
// the stateMachine data is not present in the stateMachine cache,
// increment the stateMachine cache miss count
metrics.incNumReadStateMachineMissCount();
final ContainerCommandRequestProto requestProto =
getContainerCommandRequestProto(
entry.getStateMachineLogEntry().getLogData());
Expand All @@ -537,6 +543,7 @@ public CompletableFuture<ByteString> readStateMachineData(
getCachedStateMachineData(entry.getIndex(), entry.getTerm(),
requestProto));
} catch (ExecutionException e) {
metrics.incNumReadStateMachineFails();
future.completeExceptionally(e);
}
return future;
Expand All @@ -547,6 +554,7 @@ public CompletableFuture<ByteString> readStateMachineData(
+ " cannot have state machine data");
}
} catch (Exception e) {
metrics.incNumReadStateMachineFails();
LOG.error("unable to read stateMachineData:" + e);
return completeExceptionally(e);
}
Expand Down Expand Up @@ -618,6 +626,10 @@ public CompletableFuture<Message> applyTransaction(TransactionContext trx) {
applyTransactionCompletionMap
.put(index, trx.getLogEntry().getTerm());
Preconditions.checkState(previous == null);
if (cmdType == Type.WriteChunk || cmdType == Type.PutSmallFile) {
metrics.incNumBytesCommittedCount(
requestProto.getWriteChunk().getChunkData().getLen());
}
updateLastApplied();
});
return future;
Expand Down
Expand Up @@ -14,8 +14,10 @@
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
= GenericTestUtils.getTestDir("dfs").getAbsolutePath() + File.separator;
*/
package org.apache.hadoop.ozone.container.common.transport.server.ratis;
package org.apache.hadoop.ozone.container.common.transport.server.ratis;

import static org.apache.hadoop.test.MetricsAsserts.assertCounter;
import static org.apache.hadoop.test.MetricsAsserts.getMetrics;
Expand All @@ -29,9 +31,9 @@
import org.apache.hadoop.hdds.protocol.DatanodeDetails;
import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos;
import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos
.ContainerCommandRequestProto;
.ContainerCommandRequestProto;
import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos
.ContainerCommandResponseProto;
.ContainerCommandResponseProto;
import org.apache.hadoop.hdds.scm.*;
import org.apache.hadoop.hdds.scm.container.common.helpers.StorageContainerException;
import org.apache.hadoop.hdds.scm.pipeline.Pipeline;
Expand All @@ -42,7 +44,7 @@
import org.apache.hadoop.ozone.container.common.interfaces.ContainerDispatcher;
import org.apache.hadoop.ozone.container.common.interfaces.Handler;
import org.apache.hadoop.ozone.container.common.transport.server
.XceiverServerSpi;
.XceiverServerSpi;
import org.apache.hadoop.ozone.web.utils.OzoneUtils;
import org.apache.hadoop.test.GenericTestUtils;
import org.apache.hadoop.hdds.conf.OzoneConfiguration;
Expand All @@ -57,13 +59,11 @@
import org.junit.Test;
import org.junit.Assert;

/**
* This class tests the metrics of ContainerStateMachine.
*/
public class TestCSMMetrics {
static final String TEST_DIR
= GenericTestUtils.getTestDir("dfs").getAbsolutePath() + File.separator;

/**
* This class tests the metrics of ContainerStateMachine.
*/
public class TestCSMMetrics {
static final String TEST_DIR
@FunctionalInterface
interface CheckedBiFunction<LEFT, RIGHT, OUT, THROWABLE extends Throwable> {
OUT apply(LEFT left, RIGHT right) throws THROWABLE;
Expand Down Expand Up @@ -112,6 +112,8 @@ static void runContainerStateMachineMetrics(
assertCounter("NumWriteStateMachineOps", 0L, metric);
assertCounter("NumReadStateMachineOps", 0L, metric);
assertCounter("NumApplyTransactionOps", 0L, metric);
assertCounter("NumBytesWrittenCount", 0L, metric);
assertCounter("NumBytesCommittedCount", 0L, metric);

// Write Chunk
BlockID blockID = ContainerTestHelper.getTestBlockID(ContainerTestHelper.
Expand All @@ -127,7 +129,9 @@ static void runContainerStateMachineMetrics(
metric = getMetrics(CSMMetrics.SOURCE_NAME +
RaftGroupId.valueOf(pipeline.getId().getId()).toString());
assertCounter("NumWriteStateMachineOps", 1L, metric);
assertCounter("NumBytesWrittenCount", 1024L, metric);
assertCounter("NumApplyTransactionOps", 1L, metric);
assertCounter("NumBytesCommittedCount", 1024L, metric);

//Read Chunk
ContainerProtos.ContainerCommandRequestProto readChunkRequest =
Expand All @@ -139,7 +143,7 @@ static void runContainerStateMachineMetrics(

metric = getMetrics(CSMMetrics.SOURCE_NAME +
RaftGroupId.valueOf(pipeline.getId().getId()).toString());
assertCounter("NumReadStateMachineOps", 1L, metric);
assertCounter("NumQueryStateMachineOps", 1L, metric);
assertCounter("NumApplyTransactionOps", 1L, metric);
} finally {
if (client != null) {
Expand Down

0 comments on commit 18c1eeb

Please sign in to comment.