Skip to content

Commit

Permalink
Only emit version vector counters when enabled.
Browse files Browse the repository at this point in the history
  • Loading branch information
Dan Lambright committed May 12, 2024
1 parent 5d6333f commit 965390d
Show file tree
Hide file tree
Showing 3 changed files with 35 additions and 31 deletions.
21 changes: 12 additions & 9 deletions fdbserver/GrvProxyServer.actor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -199,7 +199,7 @@ struct GrvProxyData {
Optional<LatencyBandConfig> latencyBandConfig;
double lastStartCommit;
double lastCommitLatency;
LatencySample versionVectorSizeOnGRVReply;
LatencySample* versionVectorSizeOnGRVReply;
int updateCommitRequests;
NotifiedDouble lastCommitTime;

Expand Down Expand Up @@ -234,13 +234,16 @@ struct GrvProxyData {
Reference<AsyncVar<ServerDBInfo> const> db)
: dbgid(dbgid), stats(dbgid), master(master), getConsistentReadVersion(getConsistentReadVersion),
cx(openDBOnServer(db, TaskPriority::DefaultEndpoint, LockAware::True)), db(db), lastStartCommit(0),
lastCommitLatency(SERVER_KNOBS->REQUIRED_MIN_RECOVERY_DURATION),
versionVectorSizeOnGRVReply("VersionVectorSizeOnGRVReply",
dbgid,
SERVER_KNOBS->LATENCY_METRICS_LOGGING_INTERVAL,
SERVER_KNOBS->LATENCY_SKETCH_ACCURACY),
updateCommitRequests(0), lastCommitTime(0), version(0), minKnownCommittedVersion(invalidVersion),
tagThrottler(CLIENT_KNOBS->PROXY_MAX_TAG_THROTTLE_DURATION) {}
lastCommitLatency(SERVER_KNOBS->REQUIRED_MIN_RECOVERY_DURATION), updateCommitRequests(0), lastCommitTime(0),
version(0), minKnownCommittedVersion(invalidVersion),
tagThrottler(CLIENT_KNOBS->PROXY_MAX_TAG_THROTTLE_DURATION) {
if (SERVER_KNOBS->ENABLE_VERSION_VECTOR) {
versionVectorSizeOnGRVReply = new LatencySample("VersionVectorSizeOnGRVReply",
dbgid,
SERVER_KNOBS->LATENCY_METRICS_LOGGING_INTERVAL,
SERVER_KNOBS->LATENCY_SKETCH_ACCURACY);
}
}
};

ACTOR Future<Void> healthMetricsRequestServer(GrvProxyInterface grvProxy,
Expand Down Expand Up @@ -748,7 +751,7 @@ ACTOR Future<Void> sendGrvReplies(Future<GetReadVersionReply> replyFuture,
reply.tagThrottleInfo.clear();
if (SERVER_KNOBS->ENABLE_VERSION_VECTOR) {
grvProxyData->ssVersionVectorCache.getDelta(request.maxVersion, reply.ssVersionVectorDelta);
grvProxyData->versionVectorSizeOnGRVReply.addMeasurement(reply.ssVersionVectorDelta.size());
grvProxyData->versionVectorSizeOnGRVReply->addMeasurement(reply.ssVersionVectorDelta.size());
}
reply.proxyId = grvProxyData->dbgid;
reply.proxyTagThrottledDuration = request.proxyTagThrottledDuration;
Expand Down
6 changes: 3 additions & 3 deletions fdbserver/include/fdbserver/MasterData.actor.h
Original file line number Diff line number Diff line change
Expand Up @@ -127,11 +127,11 @@ struct SWIFT_CXX_REF_MASTERDATA MasterData : NonCopyable, ReferenceCounted<Maste
CounterValue reportLiveCommittedVersionRequests;
// This counter gives an estimate of the number of non-empty peeks that storage servers
// should do from tlogs (in the worst case, ignoring blocking peek timeouts).
LatencySample versionVectorTagUpdates;
LatencySample* versionVectorTagUpdates;
CounterValue waitForPrevCommitRequests;
CounterValue nonWaitForPrevCommitRequests;
LatencySample versionVectorSizeOnCVReply;
LatencySample waitForPrevLatencies;
LatencySample* versionVectorSizeOnCVReply;
LatencySample* waitForPrevLatencies;

PromiseStream<Future<Void>> addActor;

Expand Down
39 changes: 20 additions & 19 deletions fdbserver/masterserver.actor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -101,7 +101,7 @@ SWIFT_ACTOR Future<Void> waitForPrev(Reference<MasterData> self, ReportRawCommit
state double startTime = now();
wait(self->liveCommittedVersion.whenAtLeast(req.prevVersion.get()));
double latency = now() - startTime;
self->waitForPrevLatencies.addMeasurement(latency);
self->waitForPrevLatencies->addMeasurement(latency);
++self->waitForPrevCommitRequests;
updateLiveCommittedVersion(self, req);
req.reply.send(Void());
Expand All @@ -113,8 +113,7 @@ SWIFT_ACTOR Future<Void> waitForPrev(Reference<MasterData> self, ReportRawCommit
ACTOR Future<Void> waitForPrev(Reference<MasterData> self, ReportRawCommittedVersionRequest req) {
state double startTime = now();
wait(self->liveCommittedVersion.whenAtLeast(req.prevVersion.get()));
double latency = now() - startTime;
self->waitForPrevLatencies.addMeasurement(latency);
self->waitForPrevLatencies->addMeasurement(now() - startTime);
++self->waitForPrevCommitRequests;
updateLiveCommittedVersion(self, req);
req.reply.send(Void());
Expand Down Expand Up @@ -263,21 +262,8 @@ MasterData::MasterData(Reference<AsyncVar<ServerDBInfo> const> const& dbInfo,
getCommitVersionRequests("GetCommitVersionRequests", cc),
getLiveCommittedVersionRequests("GetLiveCommittedVersionRequests", cc),
reportLiveCommittedVersionRequests("ReportLiveCommittedVersionRequests", cc),
versionVectorTagUpdates("VersionVectorTagUpdates",
dbgid,
SERVER_KNOBS->LATENCY_METRICS_LOGGING_INTERVAL,
SERVER_KNOBS->LATENCY_SKETCH_ACCURACY),
waitForPrevCommitRequests("WaitForPrevCommitRequests", cc),
nonWaitForPrevCommitRequests("NonWaitForPrevCommitRequests", cc),
versionVectorSizeOnCVReply("VersionVectorSizeOnCVReply",
dbgid,
SERVER_KNOBS->LATENCY_METRICS_LOGGING_INTERVAL,
SERVER_KNOBS->LATENCY_SKETCH_ACCURACY),
waitForPrevLatencies("WaitForPrevLatencies",
dbgid,
SERVER_KNOBS->LATENCY_METRICS_LOGGING_INTERVAL,
SERVER_KNOBS->LATENCY_SKETCH_ACCURACY),
addActor(addActor) {
nonWaitForPrevCommitRequests("NonWaitForPrevCommitRequests", cc), addActor(addActor) {
logger = cc.traceCounters("MasterMetrics", dbgid, SERVER_KNOBS->WORKER_LOGGING_INTERVAL, "MasterMetrics");
if (forceRecovery && !myInterface.locality.dcId().present()) {
TraceEvent(SevError, "ForcedRecoveryRequiresDcID").log();
Expand All @@ -286,6 +272,21 @@ MasterData::MasterData(Reference<AsyncVar<ServerDBInfo> const> const& dbInfo,
balancer = resolutionBalancer.resolutionBalancing();
locality = tagLocalityInvalid;

if (SERVER_KNOBS->ENABLE_VERSION_VECTOR) {
versionVectorTagUpdates = new LatencySample("VersionVectorTagUpdates",
dbgid,
SERVER_KNOBS->LATENCY_METRICS_LOGGING_INTERVAL,
SERVER_KNOBS->LATENCY_SKETCH_ACCURACY);
versionVectorSizeOnCVReply = new LatencySample("VersionVectorSizeOnCVReply",
dbgid,
SERVER_KNOBS->LATENCY_METRICS_LOGGING_INTERVAL,
SERVER_KNOBS->LATENCY_SKETCH_ACCURACY);
waitForPrevLatencies = new LatencySample("WaitForPrevLatencies",
dbgid,
SERVER_KNOBS->LATENCY_METRICS_LOGGING_INTERVAL,
SERVER_KNOBS->LATENCY_SKETCH_ACCURACY);
}

#ifdef WITH_SWIFT
using namespace fdbserver_swift;
// FIXME(swift): can we make a cleaner init?
Expand Down Expand Up @@ -352,7 +353,7 @@ void updateLiveCommittedVersionCxx(Reference<MasterData> self, ReportRawCommitte
int8_t primaryLocality =
SERVER_KNOBS->ENABLE_VERSION_VECTOR_HA_OPTIMIZATION ? self->locality : tagLocalityInvalid;
self->ssVersionVector.setVersion(req.writtenTags.get(), req.version, primaryLocality);
self->versionVectorTagUpdates.addMeasurement(req.writtenTags.get().size());
self->versionVectorTagUpdates->addMeasurement(req.writtenTags.get().size());
}
auto curTime = now();
// add debug here to change liveCommittedVersion to time bound of now()
Expand Down Expand Up @@ -409,7 +410,7 @@ ACTOR Future<Void> serveLiveCommittedVersionCxx(Reference<MasterData> self) {
reply.minKnownCommittedVersion = self->minKnownCommittedVersion;
if (SERVER_KNOBS->ENABLE_VERSION_VECTOR) {
self->ssVersionVector.getDelta(req.maxVersion, reply.ssVersionVectorDelta);
self->versionVectorSizeOnCVReply.addMeasurement(reply.ssVersionVectorDelta.size());
self->versionVectorSizeOnCVReply->addMeasurement(reply.ssVersionVectorDelta.size());
}
req.reply.send(reply);
}
Expand Down

0 comments on commit 965390d

Please sign in to comment.