From be4ee005861aea52df43770d5da44bfdeecf8469 Mon Sep 17 00:00:00 2001 From: Erik Grinaker Date: Sun, 14 May 2023 20:55:00 +0000 Subject: [PATCH 1/2] kvserver: move `testingDisableQuiescence` to `replica_raft_quiesce.go` Epic: none Release note: None --- pkg/kv/kvserver/replica.go | 3 --- pkg/kv/kvserver/replica_raft_quiesce.go | 4 ++++ 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/pkg/kv/kvserver/replica.go b/pkg/kv/kvserver/replica.go index 7e1d3868db83..0cc25a6e2caf 100644 --- a/pkg/kv/kvserver/replica.go +++ b/pkg/kv/kvserver/replica.go @@ -41,7 +41,6 @@ import ( "github.com/cockroachdb/cockroach/pkg/storage" "github.com/cockroachdb/cockroach/pkg/storage/enginepb" "github.com/cockroachdb/cockroach/pkg/util" - "github.com/cockroachdb/cockroach/pkg/util/envutil" "github.com/cockroachdb/cockroach/pkg/util/grunning" "github.com/cockroachdb/cockroach/pkg/util/hlc" "github.com/cockroachdb/cockroach/pkg/util/log" @@ -85,8 +84,6 @@ const ( defaultReplicaRaftMuWarnThreshold = 500 * time.Millisecond ) -var testingDisableQuiescence = envutil.EnvOrDefaultBool("COCKROACH_DISABLE_QUIESCENCE", false) - // StrictGCEnforcement controls whether requests are rejected based on the GC // threshold and the current GC TTL (true) or just based on the GC threshold // (false). diff --git a/pkg/kv/kvserver/replica_raft_quiesce.go b/pkg/kv/kvserver/replica_raft_quiesce.go index 2a10dd7c927a..b690706d6003 100644 --- a/pkg/kv/kvserver/replica_raft_quiesce.go +++ b/pkg/kv/kvserver/replica_raft_quiesce.go @@ -20,12 +20,16 @@ import ( "github.com/cockroachdb/cockroach/pkg/kv/kvserver/raftlog" "github.com/cockroachdb/cockroach/pkg/roachpb" "github.com/cockroachdb/cockroach/pkg/settings/cluster" + "github.com/cockroachdb/cockroach/pkg/util/envutil" "github.com/cockroachdb/cockroach/pkg/util/log" "github.com/cockroachdb/cockroach/pkg/util/timeutil" "go.etcd.io/raft/v3" "go.etcd.io/raft/v3/raftpb" ) +// testingDisableQuiescence disables replica quiescence. +var testingDisableQuiescence = envutil.EnvOrDefaultBool("COCKROACH_DISABLE_QUIESCENCE", false) + func (r *Replica) quiesceLocked(ctx context.Context, lagging laggingReplicaSet) { if !r.mu.quiescent { if log.V(3) { From da5be736ddb6579055b237b33c61024a3e4a0d70 Mon Sep 17 00:00:00 2001 From: Erik Grinaker Date: Sun, 14 May 2023 21:01:36 +0000 Subject: [PATCH 2/2] kvserver: delay range quiescence This patch only quiesces ranges after 6 ticks (3 seconds) without any proposals, configurable via `COCKROACH_QUIESCE_AFTER_TICKS`. Unquiescence incurs a proposal, which has a non-negligible cost, and on low-latency clusters with steady write load this may otherwise (un)quiesce ranges very frequently, as often as every tick. Epic: none Release note (performance improvement): ranges now only quiesce after 3 seconds without proposals, to avoid frequent unquiescence which incurs an additional Raft proposal. This is configurable via `COCKROACH_QUIESCE_AFTER_TICKS` which defaults to 6. --- pkg/kv/kvserver/replica.go | 3 ++ pkg/kv/kvserver/replica_proposal_buf.go | 1 + pkg/kv/kvserver/replica_proposal_quota.go | 1 + pkg/kv/kvserver/replica_raft.go | 6 +++ pkg/kv/kvserver/replica_raft_quiesce.go | 14 ++++++ pkg/kv/kvserver/replica_test.go | 52 +++++++++++++++++------ 6 files changed, 64 insertions(+), 13 deletions(-) diff --git a/pkg/kv/kvserver/replica.go b/pkg/kv/kvserver/replica.go index 0cc25a6e2caf..1eedfd5e3ecd 100644 --- a/pkg/kv/kvserver/replica.go +++ b/pkg/kv/kvserver/replica.go @@ -741,6 +741,9 @@ type Replica struct { // Counts calls to Replica.tick() ticks int + // lastProposalAtTicks tracks the time of the last proposal, in ticks. + lastProposalAtTicks int + // Counts Raft messages refused due to queue congestion. droppedMessages int diff --git a/pkg/kv/kvserver/replica_proposal_buf.go b/pkg/kv/kvserver/replica_proposal_buf.go index d4486161d4fd..e32428da2028 100644 --- a/pkg/kv/kvserver/replica_proposal_buf.go +++ b/pkg/kv/kvserver/replica_proposal_buf.go @@ -1188,6 +1188,7 @@ func (rp *replicaProposer) registerProposalLocked(p *ProposalData) { if p.createdAtTicks == 0 { p.createdAtTicks = rp.mu.ticks } + rp.mu.lastProposalAtTicks = rp.mu.ticks // monotonically increasing if prev := rp.mu.proposals[p.idKey]; prev != nil && prev != p { log.Fatalf(rp.store.AnnotateCtx(context.Background()), "two proposals under same ID:\n%+v,\n%+v", prev, p) } diff --git a/pkg/kv/kvserver/replica_proposal_quota.go b/pkg/kv/kvserver/replica_proposal_quota.go index 346b1317f41e..52d8e6b0bd77 100644 --- a/pkg/kv/kvserver/replica_proposal_quota.go +++ b/pkg/kv/kvserver/replica_proposal_quota.go @@ -114,6 +114,7 @@ func (r *Replica) updateProposalQuotaRaftMuLocked( ) r.mu.lastUpdateTimes = make(map[roachpb.ReplicaID]time.Time) r.mu.lastUpdateTimes.updateOnBecomeLeader(r.mu.state.Desc.Replicas().Descriptors(), timeutil.Now()) + r.mu.lastProposalAtTicks = r.mu.ticks // delay imminent quiescence } else if r.mu.proposalQuota != nil { // We're becoming a follower. // We unblock all ongoing and subsequent quota acquisition goroutines diff --git a/pkg/kv/kvserver/replica_raft.go b/pkg/kv/kvserver/replica_raft.go index c0b9fe986cb2..0c8ef9ac2f29 100644 --- a/pkg/kv/kvserver/replica_raft.go +++ b/pkg/kv/kvserver/replica_raft.go @@ -553,6 +553,12 @@ func (r *Replica) hasPendingProposalQuotaRLocked() bool { return !r.mu.proposalQuota.Full() } +// ticksSinceLastProposalRLocked returns the number of ticks since the last +// proposal. +func (r *Replica) ticksSinceLastProposalRLocked() int { + return r.mu.ticks - r.mu.lastProposalAtTicks +} + // isRaftLeader returns true if this replica believes it is the current // Raft leader. // diff --git a/pkg/kv/kvserver/replica_raft_quiesce.go b/pkg/kv/kvserver/replica_raft_quiesce.go index b690706d6003..148cd073758e 100644 --- a/pkg/kv/kvserver/replica_raft_quiesce.go +++ b/pkg/kv/kvserver/replica_raft_quiesce.go @@ -27,6 +27,12 @@ import ( "go.etcd.io/raft/v3/raftpb" ) +// quiesceAfterTicks is the number of ticks without proposals after which ranges +// should quiesce. Unquiescing incurs a raft proposal which has a non-neglible +// cost, and low-latency clusters may otherwise (un)quiesce very frequently, +// e.g. on every tick. +var quiesceAfterTicks = envutil.EnvOrDefaultInt("COCKROACH_QUIESCE_AFTER_TICKS", 6) + // testingDisableQuiescence disables replica quiescence. var testingDisableQuiescence = envutil.EnvOrDefaultBool("COCKROACH_DISABLE_QUIESCENCE", false) @@ -95,6 +101,7 @@ func (r *Replica) maybeUnquiesceAndWakeLeaderLocked() bool { // Propose an empty command which will wake the leader. data := raftlog.EncodeRaftCommand(raftlog.EntryEncodingStandardWithoutAC, makeIDKey(), nil) _ = r.mu.internalRaftGroup.Propose(data) + r.mu.lastProposalAtTicks = r.mu.ticks // delay imminent quiescence return true } @@ -205,6 +212,7 @@ type quiescer interface { hasRaftReadyRLocked() bool hasPendingProposalsRLocked() bool hasPendingProposalQuotaRLocked() bool + ticksSinceLastProposalRLocked() int mergeInProgressRLocked() bool isDestroyedRLocked() (DestroyReason, error) } @@ -291,6 +299,12 @@ func shouldReplicaQuiesce( } return nil, nil, false } + if ticks := q.ticksSinceLastProposalRLocked(); ticks < quiesceAfterTicks { + if log.V(4) { + log.Infof(ctx, "not quiescing: proposed %d ticks ago", ticks) + } + return nil, nil, false + } // Fast path: don't quiesce expiration-based leases, since they'll likely be // renewed soon. The lease may not be ours, but in that case we wouldn't be // able to quiesce anyway (see leaseholder condition below). diff --git a/pkg/kv/kvserver/replica_test.go b/pkg/kv/kvserver/replica_test.go index 30b8d29ed3d7..c537128ff093 100644 --- a/pkg/kv/kvserver/replica_test.go +++ b/pkg/kv/kvserver/replica_test.go @@ -9951,17 +9951,18 @@ func TestApplyPaginatedCommittedEntries(t *testing.T) { } type testQuiescer struct { - st *cluster.Settings - storeID roachpb.StoreID - desc roachpb.RangeDescriptor - numProposals int - pendingQuota bool - status *raftSparseStatus - lastIndex kvpb.RaftIndex - raftReady bool - leaseStatus kvserverpb.LeaseStatus - mergeInProgress bool - isDestroyed bool + st *cluster.Settings + storeID roachpb.StoreID + desc roachpb.RangeDescriptor + numProposals int + pendingQuota bool + ticksSinceLastProposal int + status *raftSparseStatus + lastIndex kvpb.RaftIndex + raftReady bool + leaseStatus kvserverpb.LeaseStatus + mergeInProgress bool + isDestroyed bool // Not used to implement quiescer, but used by tests. livenessMap livenesspb.IsLiveMap @@ -10008,6 +10009,10 @@ func (q *testQuiescer) hasPendingProposalQuotaRLocked() bool { return q.pendingQuota } +func (q *testQuiescer) ticksSinceLastProposalRLocked() int { + return q.ticksSinceLastProposal +} + func (q *testQuiescer) mergeInProgressRLocked() bool { return q.mergeInProgress } @@ -10058,8 +10063,9 @@ func TestShouldReplicaQuiesce(t *testing.T) { 3: {Match: logIndex}, }, }, - lastIndex: logIndex, - raftReady: false, + lastIndex: logIndex, + raftReady: false, + ticksSinceLastProposal: quiesceAfterTicks, leaseStatus: kvserverpb.LeaseStatus{ State: kvserverpb.LeaseState_VALID, Lease: roachpb.Lease{ @@ -10106,6 +10112,26 @@ func TestShouldReplicaQuiesce(t *testing.T) { q.pendingQuota = true return q }) + test(true, func(q *testQuiescer) *testQuiescer { + q.ticksSinceLastProposal = quiesceAfterTicks // quiesce on quiesceAfterTicks + return q + }) + test(true, func(q *testQuiescer) *testQuiescer { + q.ticksSinceLastProposal = quiesceAfterTicks + 1 // quiesce above quiesceAfterTicks + return q + }) + test(false, func(q *testQuiescer) *testQuiescer { + q.ticksSinceLastProposal = quiesceAfterTicks - 1 // don't quiesce below quiesceAfterTicks + return q + }) + test(false, func(q *testQuiescer) *testQuiescer { + q.ticksSinceLastProposal = 0 // don't quiesce on 0 + return q + }) + test(false, func(q *testQuiescer) *testQuiescer { + q.ticksSinceLastProposal = -1 // don't quiesce on negative (shouldn't happen) + return q + }) test(false, func(q *testQuiescer) *testQuiescer { q.mergeInProgress = true return q