From 85a02f75b169303373cf38bd7134fcb14544135a Mon Sep 17 00:00:00 2001 From: chris erway <51567+cce@users.noreply.github.com> Date: Tue, 12 Apr 2022 16:02:08 -0400 Subject: [PATCH] add counters around dropping pseudonode and voteVerifier tasks, and slow pseudonode responses (#3861) --- agreement/cryptoVerifier.go | 5 +++++ agreement/pseudonode.go | 9 +++++++++ 2 files changed, 14 insertions(+) diff --git a/agreement/cryptoVerifier.go b/agreement/cryptoVerifier.go index ff8a6d6aa4..cf6c466e5d 100644 --- a/agreement/cryptoVerifier.go +++ b/agreement/cryptoVerifier.go @@ -22,8 +22,12 @@ import ( "github.com/algorand/go-algorand/logging" "github.com/algorand/go-algorand/protocol" + "github.com/algorand/go-algorand/util/metrics" ) +var voteVerifierOutFullCounter = metrics.MakeCounter( + metrics.MetricName{Name: "algod_agreement_vote_verifier_responses_dropped", Description: "Number of voteVerifier responses dropped due to full channel"}) + // TODO put these in config const ( voteParallelism = 16 @@ -210,6 +214,7 @@ func (c *poolCryptoVerifier) voteFillWorker(toBundleWait chan<- bundleFuture) { select { case c.votes.out <- asyncVerifyVoteResponse{index: votereq.TaskIndex, err: err, cancelled: true}: default: + voteVerifierOutFullCounter.Inc(nil) c.log.Infof("poolCryptoVerifier.voteFillWorker unable to write failed enqueue response to output channel") } } diff --git a/agreement/pseudonode.go b/agreement/pseudonode.go index f52854d6a5..bdaa2f359d 100644 --- a/agreement/pseudonode.go +++ b/agreement/pseudonode.go @@ -29,6 +29,7 @@ import ( "github.com/algorand/go-algorand/logging/logspec" "github.com/algorand/go-algorand/logging/telemetryspec" "github.com/algorand/go-algorand/protocol" + "github.com/algorand/go-algorand/util/metrics" ) // TODO put these in config @@ -43,6 +44,9 @@ var errPseudonodeVerifierClosedChannel = errors.New("crypto verifier closed the var errPseudonodeNoVotes = errors.New("no valid participation keys to generate votes for given round") var errPseudonodeNoProposals = errors.New("no valid participation keys to generate proposals for given round") +var pseudonodeBacklogFullByType = metrics.NewTagCounter("algod_agreement_pseudonode_tasks_dropped_{TAG}", "Number of pseudonode tasks dropped per type") +var pseudonodeResultTimeoutsByType = metrics.NewTagCounter("algod_agreement_pseudonode_tasks_timeouts_{TAG}", "Number of pseudonode task result timeouts per type") + // A pseudonode creates proposals and votes with a KeyManager which holds participation keys. // // It constructs these messages as if they arrived from an external source and were verified. @@ -176,6 +180,7 @@ func (n asyncPseudonode) MakeProposals(ctx context.Context, r round, p period) ( return proposalTask.outputChannel(), nil default: proposalTask.close() + pseudonodeBacklogFullByType.Add("proposal", 1) return nil, fmt.Errorf("unable to make proposal for (%d, %d): %w", r, p, errPseudonodeBacklogFull) } } @@ -193,6 +198,7 @@ func (n asyncPseudonode) MakeVotes(ctx context.Context, r round, p period, s ste return proposalTask.outputChannel(), nil default: proposalTask.close() + pseudonodeBacklogFullByType.Add("vote", 1) return nil, fmt.Errorf("unable to make vote for (%d, %d, %d): %w", r, p, s, errPseudonodeBacklogFull) } } @@ -474,6 +480,7 @@ verifiedVotesLoop: return case <-outputTimeout: // we've been waiting for too long for this vote to be written to the output. + pseudonodeResultTimeoutsByType.Add("vote", 1) t.node.log.Warnf("pseudonode.makeVotes: unable to write vote to output channel for round %d, period %d", t.round, t.period) outputTimeout = nil } @@ -577,6 +584,7 @@ verifiedVotesLoop: return case <-outputTimeout: // we've been waiting for too long for this vote to be written to the output. + pseudonodeResultTimeoutsByType.Add("pvote", 1) t.node.log.Warnf("pseudonode.makeProposals: unable to write proposal vote to output channel for round %d, period %d", t.round, t.period) outputTimeout = nil } @@ -597,6 +605,7 @@ verifiedPayloadsLoop: return case <-outputTimeout: // we've been waiting for too long for this vote to be written to the output. + pseudonodeResultTimeoutsByType.Add("ppayload", 1) t.node.log.Warnf("pseudonode.makeProposals: unable to write proposal payload to output channel for round %d, period %d", t.round, t.period) outputTimeout = nil }