Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Only emit version vector counters when enabled. #11385

Merged
merged 2 commits into from
May 14, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
21 changes: 12 additions & 9 deletions fdbserver/GrvProxyServer.actor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -199,7 +199,7 @@ struct GrvProxyData {
Optional<LatencyBandConfig> latencyBandConfig;
double lastStartCommit;
double lastCommitLatency;
LatencySample versionVectorSizeOnGRVReply;
LatencySample* versionVectorSizeOnGRVReply = nullptr;
int updateCommitRequests;
NotifiedDouble lastCommitTime;

Expand Down Expand Up @@ -234,13 +234,16 @@ struct GrvProxyData {
Reference<AsyncVar<ServerDBInfo> const> db)
: dbgid(dbgid), stats(dbgid), master(master), getConsistentReadVersion(getConsistentReadVersion),
cx(openDBOnServer(db, TaskPriority::DefaultEndpoint, LockAware::True)), db(db), lastStartCommit(0),
lastCommitLatency(SERVER_KNOBS->REQUIRED_MIN_RECOVERY_DURATION),
versionVectorSizeOnGRVReply("VersionVectorSizeOnGRVReply",
dbgid,
SERVER_KNOBS->LATENCY_METRICS_LOGGING_INTERVAL,
SERVER_KNOBS->LATENCY_SKETCH_ACCURACY),
updateCommitRequests(0), lastCommitTime(0), version(0), minKnownCommittedVersion(invalidVersion),
tagThrottler(CLIENT_KNOBS->PROXY_MAX_TAG_THROTTLE_DURATION) {}
lastCommitLatency(SERVER_KNOBS->REQUIRED_MIN_RECOVERY_DURATION), updateCommitRequests(0), lastCommitTime(0),
version(0), minKnownCommittedVersion(invalidVersion),
tagThrottler(CLIENT_KNOBS->PROXY_MAX_TAG_THROTTLE_DURATION) {
if (SERVER_KNOBS->ENABLE_VERSION_VECTOR) {
versionVectorSizeOnGRVReply = new LatencySample("VersionVectorSizeOnGRVReply",
dbgid,
SERVER_KNOBS->LATENCY_METRICS_LOGGING_INTERVAL,
SERVER_KNOBS->LATENCY_SKETCH_ACCURACY);
}
}
};

ACTOR Future<Void> healthMetricsRequestServer(GrvProxyInterface grvProxy,
Expand Down Expand Up @@ -748,7 +751,7 @@ ACTOR Future<Void> sendGrvReplies(Future<GetReadVersionReply> replyFuture,
reply.tagThrottleInfo.clear();
if (SERVER_KNOBS->ENABLE_VERSION_VECTOR) {
grvProxyData->ssVersionVectorCache.getDelta(request.maxVersion, reply.ssVersionVectorDelta);
grvProxyData->versionVectorSizeOnGRVReply.addMeasurement(reply.ssVersionVectorDelta.size());
grvProxyData->versionVectorSizeOnGRVReply->addMeasurement(reply.ssVersionVectorDelta.size());
}
reply.proxyId = grvProxyData->dbgid;
reply.proxyTagThrottledDuration = request.proxyTagThrottledDuration;
Expand Down
6 changes: 3 additions & 3 deletions fdbserver/include/fdbserver/MasterData.actor.h
Original file line number Diff line number Diff line change
Expand Up @@ -127,11 +127,11 @@ struct SWIFT_CXX_REF_MASTERDATA MasterData : NonCopyable, ReferenceCounted<Maste
CounterValue reportLiveCommittedVersionRequests;
// This counter gives an estimate of the number of non-empty peeks that storage servers
// should do from tlogs (in the worst case, ignoring blocking peek timeouts).
LatencySample versionVectorTagUpdates;
LatencySample* versionVectorTagUpdates = nullptr;
CounterValue waitForPrevCommitRequests;
CounterValue nonWaitForPrevCommitRequests;
LatencySample versionVectorSizeOnCVReply;
LatencySample waitForPrevLatencies;
LatencySample* versionVectorSizeOnCVReply = nullptr;
LatencySample* waitForPrevLatencies = nullptr;

PromiseStream<Future<Void>> addActor;

Expand Down
39 changes: 20 additions & 19 deletions fdbserver/masterserver.actor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -101,7 +101,7 @@ SWIFT_ACTOR Future<Void> waitForPrev(Reference<MasterData> self, ReportRawCommit
state double startTime = now();
wait(self->liveCommittedVersion.whenAtLeast(req.prevVersion.get()));
double latency = now() - startTime;
self->waitForPrevLatencies.addMeasurement(latency);
self->waitForPrevLatencies->addMeasurement(latency);
++self->waitForPrevCommitRequests;
updateLiveCommittedVersion(self, req);
req.reply.send(Void());
Expand All @@ -113,8 +113,7 @@ SWIFT_ACTOR Future<Void> waitForPrev(Reference<MasterData> self, ReportRawCommit
ACTOR Future<Void> waitForPrev(Reference<MasterData> self, ReportRawCommittedVersionRequest req) {
state double startTime = now();
wait(self->liveCommittedVersion.whenAtLeast(req.prevVersion.get()));
double latency = now() - startTime;
self->waitForPrevLatencies.addMeasurement(latency);
self->waitForPrevLatencies->addMeasurement(now() - startTime);
++self->waitForPrevCommitRequests;
updateLiveCommittedVersion(self, req);
req.reply.send(Void());
Expand Down Expand Up @@ -263,21 +262,8 @@ MasterData::MasterData(Reference<AsyncVar<ServerDBInfo> const> const& dbInfo,
getCommitVersionRequests("GetCommitVersionRequests", cc),
getLiveCommittedVersionRequests("GetLiveCommittedVersionRequests", cc),
reportLiveCommittedVersionRequests("ReportLiveCommittedVersionRequests", cc),
versionVectorTagUpdates("VersionVectorTagUpdates",
dbgid,
SERVER_KNOBS->LATENCY_METRICS_LOGGING_INTERVAL,
SERVER_KNOBS->LATENCY_SKETCH_ACCURACY),
waitForPrevCommitRequests("WaitForPrevCommitRequests", cc),
nonWaitForPrevCommitRequests("NonWaitForPrevCommitRequests", cc),
versionVectorSizeOnCVReply("VersionVectorSizeOnCVReply",
dbgid,
SERVER_KNOBS->LATENCY_METRICS_LOGGING_INTERVAL,
SERVER_KNOBS->LATENCY_SKETCH_ACCURACY),
waitForPrevLatencies("WaitForPrevLatencies",
dbgid,
SERVER_KNOBS->LATENCY_METRICS_LOGGING_INTERVAL,
SERVER_KNOBS->LATENCY_SKETCH_ACCURACY),
addActor(addActor) {
nonWaitForPrevCommitRequests("NonWaitForPrevCommitRequests", cc), addActor(addActor) {
logger = cc.traceCounters("MasterMetrics", dbgid, SERVER_KNOBS->WORKER_LOGGING_INTERVAL, "MasterMetrics");
if (forceRecovery && !myInterface.locality.dcId().present()) {
TraceEvent(SevError, "ForcedRecoveryRequiresDcID").log();
Expand All @@ -286,6 +272,21 @@ MasterData::MasterData(Reference<AsyncVar<ServerDBInfo> const> const& dbInfo,
balancer = resolutionBalancer.resolutionBalancing();
locality = tagLocalityInvalid;

if (SERVER_KNOBS->ENABLE_VERSION_VECTOR) {
versionVectorTagUpdates = new LatencySample("VersionVectorTagUpdates",
dbgid,
SERVER_KNOBS->LATENCY_METRICS_LOGGING_INTERVAL,
SERVER_KNOBS->LATENCY_SKETCH_ACCURACY);
versionVectorSizeOnCVReply = new LatencySample("VersionVectorSizeOnCVReply",
dbgid,
SERVER_KNOBS->LATENCY_METRICS_LOGGING_INTERVAL,
SERVER_KNOBS->LATENCY_SKETCH_ACCURACY);
waitForPrevLatencies = new LatencySample("WaitForPrevLatencies",
dbgid,
SERVER_KNOBS->LATENCY_METRICS_LOGGING_INTERVAL,
SERVER_KNOBS->LATENCY_SKETCH_ACCURACY);
}

#ifdef WITH_SWIFT
using namespace fdbserver_swift;
// FIXME(swift): can we make a cleaner init?
Expand Down Expand Up @@ -352,7 +353,7 @@ void updateLiveCommittedVersionCxx(Reference<MasterData> self, ReportRawCommitte
int8_t primaryLocality =
SERVER_KNOBS->ENABLE_VERSION_VECTOR_HA_OPTIMIZATION ? self->locality : tagLocalityInvalid;
self->ssVersionVector.setVersion(req.writtenTags.get(), req.version, primaryLocality);
self->versionVectorTagUpdates.addMeasurement(req.writtenTags.get().size());
self->versionVectorTagUpdates->addMeasurement(req.writtenTags.get().size());
}
auto curTime = now();
// add debug here to change liveCommittedVersion to time bound of now()
Expand Down Expand Up @@ -409,7 +410,7 @@ ACTOR Future<Void> serveLiveCommittedVersionCxx(Reference<MasterData> self) {
reply.minKnownCommittedVersion = self->minKnownCommittedVersion;
if (SERVER_KNOBS->ENABLE_VERSION_VECTOR) {
self->ssVersionVector.getDelta(req.maxVersion, reply.ssVersionVectorDelta);
self->versionVectorSizeOnCVReply.addMeasurement(reply.ssVersionVectorDelta.size());
self->versionVectorSizeOnCVReply->addMeasurement(reply.ssVersionVectorDelta.size());
}
req.reply.send(reply);
}
Expand Down