Skip to content

Commit

Permalink
fix(consensus): prevote nil upon timeout when Proposal is missing (co…
Browse files Browse the repository at this point in the history
…metbft#2218)

This bug was possibly introduced by
cometbft#1203. This PR refactored the
consensus operation, propose -> prevote transition (`defaultDoPrevote`
method), to match the pseudo-code.

The refactoring assumed that when `cs.ProposalBlock` is unset (nil), so
should be `cs.Proposal` (meaning that the node didn't receive the round
proposal). In this case, the node must prevote nil.

The added test unit describes a scenario where `cs.Proposal` is nil (not
received), but the block ` cs.ProposalBlock` is received because the
node sees a Polka for a block, then receives the full associated block.
The node must prevote nil in this scenario, in line with cometbft#1203. But the
lack of the test on `cs.Proposal` leads to a bug.

This bug does not affect previous releases except for `v1.0`.

---

- [ ] Tests written/updated
- [ ] Changelog entry added in `.changelog` (we use
[unclog](https://github.com/informalsystems/unclog) to manage our
changelog)
- [ ] Updated relevant documentation (`docs/` or `spec/`) and code
comments
- [ ] Title follows the [Conventional
Commits](https://www.conventionalcommits.org/en/v1.0.0/) spec
  • Loading branch information
cason authored and roy-dydx committed Apr 14, 2024
1 parent 5da9820 commit 8d218ea
Show file tree
Hide file tree
Showing 3 changed files with 148 additions and 3 deletions.
4 changes: 3 additions & 1 deletion consensus/common_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -284,7 +284,9 @@ func validatePrevote(t *testing.T, cs *State, round int32, privVal *validatorStu
panic(fmt.Sprintf("Expected prevote to be for nil, got %X", vote.BlockID.Hash))
}
} else {
if !bytes.Equal(vote.BlockID.Hash, blockHash) {
if vote.BlockID.Hash == nil {
panic(fmt.Sprintf("Expected prevote to be for %X, got <nil>", blockHash))
} else if !bytes.Equal(vote.BlockID.Hash, blockHash) {
panic(fmt.Sprintf("Expected prevote to be for %X, got %X", blockHash, vote.BlockID.Hash))
}
}
Expand Down
8 changes: 6 additions & 2 deletions consensus/state.go
Original file line number Diff line number Diff line change
Expand Up @@ -1347,8 +1347,8 @@ func (cs *State) defaultDoPrevote(height int64, round int32) {
logger := cs.Logger.With("height", height, "round", round)

// We did not receive a proposal within this round. (and thus executing this from a timeout)
if cs.ProposalBlock == nil {
logger.Debug("prevote step: ProposalBlock is nil; prevoting nil")
if cs.Proposal == nil || cs.ProposalBlock == nil {
logger.Debug("prevote step: Proposal or ProposalBlock is nil; prevoting nil")
cs.signAddVote(cmtproto.PrevoteType, nil, types.PartSetHeader{}, nil)
return
}
Expand Down Expand Up @@ -2041,6 +2041,10 @@ func (cs *State) addProposalBlockPart(msg *BlockPartMessage, peerID p2p.ID) (add
cs.metrics.DuplicateBlockPart.Add(1)
}

count, total := cs.ProposalBlockParts.Count(), cs.ProposalBlockParts.Total()
cs.Logger.Debug("receive block part", "height", height, "round", round,
"index", part.Index, "count", count, "total", total, "from", peerID)

maxBytes := cs.state.ConsensusParams.Block.MaxBytes
if maxBytes == -1 {
maxBytes = int64(types.MaxBlockSizeBytes)
Expand Down
139 changes: 139 additions & 0 deletions consensus/state_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,8 @@ x * TestStateLockPOLDoesNotUnlock 4 vals, one precommits, other 3 polka nil at
next round, so we precommit nil but maintain lock
x * TestStateLockMissingProposalWhenPOLSeenDoesNotUpdateLock - 4 vals, 1 misses proposal but sees POL.
x * TestStateLockMissingProposalWhenPOLSeenDoesNotUnlock - 4 vals, 1 misses proposal but sees POL.
* TestStateLockMissingProposalWhenPOLForLockedBlock - 4 vals, 1 misses proposal but sees POL for locked block.
x * TestStateMissingProposalValidBlockReceivedTimeout - 4 vals, 1 misses proposal but receives full block.
x * TestStateLockPOLSafety1 - 4 vals. We shouldn't change lock based on polka at earlier round
x * TestStateLockPOLSafety2 - 4 vals. After unlocking, we shouldn't relock based on polka at earlier round
x * TestStatePrevotePOLFromPreviousRound 4 vals, prevote a proposal if a POL was seen for it in a previous round.
Expand Down Expand Up @@ -1247,6 +1249,143 @@ func TestStateLockMissingProposalWhenPOLSeenDoesNotUpdateLock(t *testing.T) {
validatePrecommit(t, cs1, round, 0, vss[0], nil, firstBlockHash)
}

// TestStateLockMissingProposalWhenPOLForLockedBlock tests that observing
// a two thirds majority for a block that matches the validator's locked block
// causes a validator to update its lock round and Precommit the locked block.
func TestStateLockMissingProposalWhenPOLForLockedBlock(t *testing.T) {
cs1, vss := randState(4)
vs2, vs3, vs4 := vss[1], vss[2], vss[3]
height, round := cs1.Height, cs1.Round

timeoutWaitCh := subscribe(cs1.eventBus, types.EventQueryTimeoutWait)
proposalCh := subscribe(cs1.eventBus, types.EventQueryCompleteProposal)
pv1, err := cs1.privValidator.GetPubKey()
require.NoError(t, err)
addr := pv1.Address()
voteCh := subscribeToVoter(cs1, addr)
newRoundCh := subscribe(cs1.eventBus, types.EventQueryNewRound)

/*
Round 0:
cs1 creates a proposal for block B.
Send a prevote for B from each of the validators to cs1.
Send a precommit for nil from all of the validators to cs1.
This ensures that cs1 will lock on B in this round but not commit it.
*/
t.Log("### Starting Round 0")
startTestRound(cs1, height, round)

ensureNewRound(newRoundCh, height, round)
ensureNewProposal(proposalCh, height, round)
rs := cs1.GetRoundState()
blockHash := rs.ProposalBlock.Hash()
blockParts := rs.ProposalBlockParts.Header()

ensurePrevote(voteCh, height, round) // prevote

signAddVotes(cs1, cmtproto.PrevoteType, blockHash, blockParts, false, vs2, vs3, vs4)

ensurePrecommit(voteCh, height, round) // our precommit
// the proposed block should now be locked and our precommit added
validatePrecommit(t, cs1, round, round, vss[0], blockHash, blockHash)

// add precommits from the rest
signAddVotes(cs1, cmtproto.PrecommitType, nil, types.PartSetHeader{}, true, vs2, vs3, vs4)

// timeout to new round
ensureNewTimeout(timeoutWaitCh, height, round, cs1.config.Precommit(round).Nanoseconds())

/*
Round 1:
The same block B is re-proposed, but it is not sent to cs1.
Send a prevote for B from each of the validators to cs1.
Check that cs1 precommits B, since B matches its locked value.
Check that cs1 maintains its lock on block B but updates its locked round.
*/
t.Log("### Starting Round 1")
incrementRound(vs2, vs3, vs4)
round++

ensureNewRound(newRoundCh, height, round)

// prevote for nil since the proposal was not seen (although it matches the locked block)
ensurePrevote(voteCh, height, round)
validatePrevote(t, cs1, round, vss[0], nil)

// now lets add prevotes from everyone else for the locked block
signAddVotes(cs1, cmtproto.PrevoteType, blockHash, blockParts, false, vs2, vs3, vs4)

ensurePrecommit(voteCh, height, round)

// the validator precommits the block, because it matches its locked block,
// maintains the same locked block and updates its locked round
validatePrecommit(t, cs1, round, 1, vss[0], blockHash, blockHash)

// NOTE: this behavior is inconsistent with Tendermint consensus pseudo-code.
// In the pseudo-code, if a process does not receive the proposal (and block) for
// the current round, it cannot Precommit the proposed block ID, even thought it
// sees a POL for that block that matches the locked value (block).
}

// TestStateMissingProposalValidBlockReceivedTimeout tests if a node that
// misses the round's Proposal but receives a Polka for a block and the full
// block will not prevote for the valid block because the Proposal was missing.
func TestStateMissingProposalValidBlockReceivedTimeout(t *testing.T) {
ctx, cancel := context.WithCancel(context.Background())
defer cancel()

cs1, vss := randState(4)
height, round := cs1.Height, cs1.Round

timeoutProposeCh := subscribe(cs1.eventBus, types.EventQueryTimeoutPropose)
voteCh := subscribe(cs1.eventBus, types.EventQueryVote)
validBlockCh := subscribe(cs1.eventBus, types.EventQueryValidBlock)

// Produce a block
block, err := cs1.createProposalBlock(ctx)
require.NoError(t, err)
blockParts, err := block.MakePartSet(types.BlockPartSizeBytes)
require.NoError(t, err)
blockID := types.BlockID{
Hash: block.Hash(),
PartSetHeader: blockParts.Header(),
}

// Skip round 0 and start consensus threads
round++
incrementRound(vss[1:]...)
startTestRound(cs1, height, round)

// Receive prevotes(height, round=1, blockID) from all other validators.
for i := 1; i < len(vss); i++ {
signAddVotes(cs1, cmtproto.PrevoteType, blockID.Hash, blockID.PartSetHeader, false, vss[i])
ensurePrevote(voteCh, height, round)
}

// We have polka for blockID so we can accept the associated full block.
for i := 0; i < int(blockParts.Total()); i++ {
err := cs1.AddProposalBlockPart(height, round, blockParts.GetPart(i), "peer")
require.NoError(t, err)
}
ensureNewValidBlock(validBlockCh, height, round)

// We don't prevote right now because we didn't receive the round's
// Proposal. Wait for the propose timeout.
ensureNewTimeout(timeoutProposeCh, height, round, cs1.config.Propose(round).Nanoseconds())

rs := cs1.GetRoundState()
assert.Equal(t, rs.ValidRound, round)
assert.Equal(t, rs.ValidBlock.Hash(), blockID.Hash)

// Since we didn't see the round's Proposal, we should prevote nil.
// NOTE: introduced by https://github.com/cometbft/cometbft/pull/1203.
// In branches v0.{34,37,38}.x, the node prevotes for the valid block.
ensurePrevote(voteCh, height, round)
validatePrevote(t, cs1, round, vss[0], nil)
}

// TestStateLockDoesNotLockOnOldProposal tests that observing
// a two thirds majority for a block does not cause a validator to lock on the
// block if a proposal was not seen for that block in the current round, but
Expand Down

0 comments on commit 8d218ea

Please sign in to comment.