From affc2d3ddfb8e54d69678642251f91584ee3a2c0 Mon Sep 17 00:00:00 2001 From: Dan Lambright Date: Sun, 12 May 2024 14:29:49 -0400 Subject: [PATCH 1/2] Only emit version vector counters when enabled. --- fdbserver/GrvProxyServer.actor.cpp | 21 +++++----- .../include/fdbserver/MasterData.actor.h | 6 +-- fdbserver/masterserver.actor.cpp | 39 ++++++++++--------- 3 files changed, 35 insertions(+), 31 deletions(-) diff --git a/fdbserver/GrvProxyServer.actor.cpp b/fdbserver/GrvProxyServer.actor.cpp index 4b4781aabe2..b5df26330ae 100644 --- a/fdbserver/GrvProxyServer.actor.cpp +++ b/fdbserver/GrvProxyServer.actor.cpp @@ -199,7 +199,7 @@ struct GrvProxyData { Optional latencyBandConfig; double lastStartCommit; double lastCommitLatency; - LatencySample versionVectorSizeOnGRVReply; + LatencySample* versionVectorSizeOnGRVReply; int updateCommitRequests; NotifiedDouble lastCommitTime; @@ -234,13 +234,16 @@ struct GrvProxyData { Reference const> db) : dbgid(dbgid), stats(dbgid), master(master), getConsistentReadVersion(getConsistentReadVersion), cx(openDBOnServer(db, TaskPriority::DefaultEndpoint, LockAware::True)), db(db), lastStartCommit(0), - lastCommitLatency(SERVER_KNOBS->REQUIRED_MIN_RECOVERY_DURATION), - versionVectorSizeOnGRVReply("VersionVectorSizeOnGRVReply", - dbgid, - SERVER_KNOBS->LATENCY_METRICS_LOGGING_INTERVAL, - SERVER_KNOBS->LATENCY_SKETCH_ACCURACY), - updateCommitRequests(0), lastCommitTime(0), version(0), minKnownCommittedVersion(invalidVersion), - tagThrottler(CLIENT_KNOBS->PROXY_MAX_TAG_THROTTLE_DURATION) {} + lastCommitLatency(SERVER_KNOBS->REQUIRED_MIN_RECOVERY_DURATION), updateCommitRequests(0), lastCommitTime(0), + version(0), minKnownCommittedVersion(invalidVersion), + tagThrottler(CLIENT_KNOBS->PROXY_MAX_TAG_THROTTLE_DURATION) { + if (SERVER_KNOBS->ENABLE_VERSION_VECTOR) { + versionVectorSizeOnGRVReply = new LatencySample("VersionVectorSizeOnGRVReply", + dbgid, + SERVER_KNOBS->LATENCY_METRICS_LOGGING_INTERVAL, + SERVER_KNOBS->LATENCY_SKETCH_ACCURACY); + } + } }; ACTOR Future healthMetricsRequestServer(GrvProxyInterface grvProxy, @@ -748,7 +751,7 @@ ACTOR Future sendGrvReplies(Future replyFuture, reply.tagThrottleInfo.clear(); if (SERVER_KNOBS->ENABLE_VERSION_VECTOR) { grvProxyData->ssVersionVectorCache.getDelta(request.maxVersion, reply.ssVersionVectorDelta); - grvProxyData->versionVectorSizeOnGRVReply.addMeasurement(reply.ssVersionVectorDelta.size()); + grvProxyData->versionVectorSizeOnGRVReply->addMeasurement(reply.ssVersionVectorDelta.size()); } reply.proxyId = grvProxyData->dbgid; reply.proxyTagThrottledDuration = request.proxyTagThrottledDuration; diff --git a/fdbserver/include/fdbserver/MasterData.actor.h b/fdbserver/include/fdbserver/MasterData.actor.h index 24a0555a98b..9485751a67f 100644 --- a/fdbserver/include/fdbserver/MasterData.actor.h +++ b/fdbserver/include/fdbserver/MasterData.actor.h @@ -127,11 +127,11 @@ struct SWIFT_CXX_REF_MASTERDATA MasterData : NonCopyable, ReferenceCounted> addActor; diff --git a/fdbserver/masterserver.actor.cpp b/fdbserver/masterserver.actor.cpp index 0e0fb2e33ed..d1c66feddd2 100644 --- a/fdbserver/masterserver.actor.cpp +++ b/fdbserver/masterserver.actor.cpp @@ -101,7 +101,7 @@ SWIFT_ACTOR Future waitForPrev(Reference self, ReportRawCommit state double startTime = now(); wait(self->liveCommittedVersion.whenAtLeast(req.prevVersion.get())); double latency = now() - startTime; - self->waitForPrevLatencies.addMeasurement(latency); + self->waitForPrevLatencies->addMeasurement(latency); ++self->waitForPrevCommitRequests; updateLiveCommittedVersion(self, req); req.reply.send(Void()); @@ -113,8 +113,7 @@ SWIFT_ACTOR Future waitForPrev(Reference self, ReportRawCommit ACTOR Future waitForPrev(Reference self, ReportRawCommittedVersionRequest req) { state double startTime = now(); wait(self->liveCommittedVersion.whenAtLeast(req.prevVersion.get())); - double latency = now() - startTime; - self->waitForPrevLatencies.addMeasurement(latency); + self->waitForPrevLatencies->addMeasurement(now() - startTime); ++self->waitForPrevCommitRequests; updateLiveCommittedVersion(self, req); req.reply.send(Void()); @@ -263,21 +262,8 @@ MasterData::MasterData(Reference const> const& dbInfo, getCommitVersionRequests("GetCommitVersionRequests", cc), getLiveCommittedVersionRequests("GetLiveCommittedVersionRequests", cc), reportLiveCommittedVersionRequests("ReportLiveCommittedVersionRequests", cc), - versionVectorTagUpdates("VersionVectorTagUpdates", - dbgid, - SERVER_KNOBS->LATENCY_METRICS_LOGGING_INTERVAL, - SERVER_KNOBS->LATENCY_SKETCH_ACCURACY), waitForPrevCommitRequests("WaitForPrevCommitRequests", cc), - nonWaitForPrevCommitRequests("NonWaitForPrevCommitRequests", cc), - versionVectorSizeOnCVReply("VersionVectorSizeOnCVReply", - dbgid, - SERVER_KNOBS->LATENCY_METRICS_LOGGING_INTERVAL, - SERVER_KNOBS->LATENCY_SKETCH_ACCURACY), - waitForPrevLatencies("WaitForPrevLatencies", - dbgid, - SERVER_KNOBS->LATENCY_METRICS_LOGGING_INTERVAL, - SERVER_KNOBS->LATENCY_SKETCH_ACCURACY), - addActor(addActor) { + nonWaitForPrevCommitRequests("NonWaitForPrevCommitRequests", cc), addActor(addActor) { logger = cc.traceCounters("MasterMetrics", dbgid, SERVER_KNOBS->WORKER_LOGGING_INTERVAL, "MasterMetrics"); if (forceRecovery && !myInterface.locality.dcId().present()) { TraceEvent(SevError, "ForcedRecoveryRequiresDcID").log(); @@ -286,6 +272,21 @@ MasterData::MasterData(Reference const> const& dbInfo, balancer = resolutionBalancer.resolutionBalancing(); locality = tagLocalityInvalid; + if (SERVER_KNOBS->ENABLE_VERSION_VECTOR) { + versionVectorTagUpdates = new LatencySample("VersionVectorTagUpdates", + dbgid, + SERVER_KNOBS->LATENCY_METRICS_LOGGING_INTERVAL, + SERVER_KNOBS->LATENCY_SKETCH_ACCURACY); + versionVectorSizeOnCVReply = new LatencySample("VersionVectorSizeOnCVReply", + dbgid, + SERVER_KNOBS->LATENCY_METRICS_LOGGING_INTERVAL, + SERVER_KNOBS->LATENCY_SKETCH_ACCURACY); + waitForPrevLatencies = new LatencySample("WaitForPrevLatencies", + dbgid, + SERVER_KNOBS->LATENCY_METRICS_LOGGING_INTERVAL, + SERVER_KNOBS->LATENCY_SKETCH_ACCURACY); + } + #ifdef WITH_SWIFT using namespace fdbserver_swift; // FIXME(swift): can we make a cleaner init? @@ -352,7 +353,7 @@ void updateLiveCommittedVersionCxx(Reference self, ReportRawCommitte int8_t primaryLocality = SERVER_KNOBS->ENABLE_VERSION_VECTOR_HA_OPTIMIZATION ? self->locality : tagLocalityInvalid; self->ssVersionVector.setVersion(req.writtenTags.get(), req.version, primaryLocality); - self->versionVectorTagUpdates.addMeasurement(req.writtenTags.get().size()); + self->versionVectorTagUpdates->addMeasurement(req.writtenTags.get().size()); } auto curTime = now(); // add debug here to change liveCommittedVersion to time bound of now() @@ -409,7 +410,7 @@ ACTOR Future serveLiveCommittedVersionCxx(Reference self) { reply.minKnownCommittedVersion = self->minKnownCommittedVersion; if (SERVER_KNOBS->ENABLE_VERSION_VECTOR) { self->ssVersionVector.getDelta(req.maxVersion, reply.ssVersionVectorDelta); - self->versionVectorSizeOnCVReply.addMeasurement(reply.ssVersionVectorDelta.size()); + self->versionVectorSizeOnCVReply->addMeasurement(reply.ssVersionVectorDelta.size()); } req.reply.send(reply); } From a66621ce644b168e7bbeca47a3279777de628862 Mon Sep 17 00:00:00 2001 From: Dan Lambright Date: Mon, 13 May 2024 13:15:33 -0400 Subject: [PATCH 2/2] Respond to review comments --- fdbserver/GrvProxyServer.actor.cpp | 2 +- fdbserver/include/fdbserver/MasterData.actor.h | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/fdbserver/GrvProxyServer.actor.cpp b/fdbserver/GrvProxyServer.actor.cpp index b5df26330ae..91961710af5 100644 --- a/fdbserver/GrvProxyServer.actor.cpp +++ b/fdbserver/GrvProxyServer.actor.cpp @@ -199,7 +199,7 @@ struct GrvProxyData { Optional latencyBandConfig; double lastStartCommit; double lastCommitLatency; - LatencySample* versionVectorSizeOnGRVReply; + LatencySample* versionVectorSizeOnGRVReply = nullptr; int updateCommitRequests; NotifiedDouble lastCommitTime; diff --git a/fdbserver/include/fdbserver/MasterData.actor.h b/fdbserver/include/fdbserver/MasterData.actor.h index 9485751a67f..ff7a20b9c91 100644 --- a/fdbserver/include/fdbserver/MasterData.actor.h +++ b/fdbserver/include/fdbserver/MasterData.actor.h @@ -127,11 +127,11 @@ struct SWIFT_CXX_REF_MASTERDATA MasterData : NonCopyable, ReferenceCounted> addActor;