Skip to content

Commit

Permalink
Add logging for NthNextHmy panic (#4341)
Browse files Browse the repository at this point in the history
* improve logging

* [consensus] do not try to use negative index

* Revert "do not try to use negative index"

This reverts commit b434fd3. We have
fixed the cause of the issue, which was time drift on a new cloud
provider's nodes. See `systemd-timesyncd.service`

Even if this fix had been merged, it would likely not have solved the
problem given those nodes with the correct time would pick a different
leader from those with time drift. Or, in other words, the view change
would not have gone through.

* improve logging
  • Loading branch information
MaxMustermann2 committed Jan 20, 2023
1 parent 8e6bbd0 commit 20e4892
Show file tree
Hide file tree
Showing 6 changed files with 20 additions and 3 deletions.
3 changes: 3 additions & 0 deletions consensus/quorum/one-node-staked-vote.go
Original file line number Diff line number Diff line change
Expand Up @@ -157,6 +157,9 @@ func (v *stakedVoteWeight) IsQuorumAchievedByMask(mask *bls_cosi.Mask) bool {
if currentTotalPower == nil {
return false
}
const msg = "[IsQuorumAchievedByMask] Voting power: need %+v, have %+v"
utils.Logger().Debug().
Msgf(msg, threshold, currentTotalPower)
return (*currentTotalPower).GT(threshold)
}

Expand Down
5 changes: 5 additions & 0 deletions consensus/quorum/quorum.go
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ import (
"github.com/harmony-one/harmony/consensus/votepower"
bls_cosi "github.com/harmony-one/harmony/crypto/bls"
shardingconfig "github.com/harmony-one/harmony/internal/configs/sharding"
"github.com/harmony-one/harmony/internal/utils"
"github.com/harmony-one/harmony/multibls"
"github.com/harmony-one/harmony/numeric"
"github.com/harmony-one/harmony/shard"
Expand Down Expand Up @@ -224,6 +225,10 @@ func (s *cIdentities) NthNextHmy(instance shardingconfig.Instance, pubKey *bls.P
idx := s.IndexOf(pubKey.Bytes)
if idx != -1 {
found = true
} else {
utils.Logger().Error().
Str("key", pubKey.Bytes.Hex()).
Msg("[NthNextHmy] pubKey not found")
}
numNodes := instance.NumHarmonyOperatedNodesPerShard()
// sanity check to avoid out of bound access
Expand Down
4 changes: 4 additions & 0 deletions consensus/view_change.go
Original file line number Diff line number Diff line change
Expand Up @@ -142,6 +142,10 @@ func (consensus *Consensus) getNextViewID() (uint64, time.Duration) {

// timestamp messed up in current validator node
if curTimestamp <= blockTimestamp {
consensus.getLogger().Error().
Int64("curTimestamp", curTimestamp).
Int64("blockTimestamp", blockTimestamp).
Msg("[getNextViewID] timestamp of block too high")
return consensus.fallbackNextViewID()
}
// diff only increases, since view change timeout is shorter than
Expand Down
6 changes: 4 additions & 2 deletions consensus/view_change_construct.go
Original file line number Diff line number Diff line change
Expand Up @@ -444,7 +444,8 @@ func (vc *viewChange) InitPayload(
vc.nilBitmap[viewID] = nilBitmap
}
if err := vc.nilBitmap[viewID].SetKey(key.Pub.Bytes, true); err != nil {
vc.getLogger().Warn().Str("key", key.Pub.Bytes.Hex()).Msg("[InitPayload] nilBitmap setkey failed")
vc.getLogger().Warn().Err(err).
Str("key", key.Pub.Bytes.Hex()).Msg("[InitPayload] nilBitmap setkey failed")
continue
}
if _, ok := vc.nilSigs[viewID]; !ok {
Expand Down Expand Up @@ -475,7 +476,8 @@ func (vc *viewChange) InitPayload(
vc.viewIDBitmap[viewID] = viewIDBitmap
}
if err := vc.viewIDBitmap[viewID].SetKey(key.Pub.Bytes, true); err != nil {
vc.getLogger().Warn().Str("key", key.Pub.Bytes.Hex()).Msg("[InitPayload] viewIDBitmap setkey failed")
vc.getLogger().Warn().Err(err).
Str("key", key.Pub.Bytes.Hex()).Msg("[InitPayload] viewIDBitmap setkey failed")
continue
}
if _, ok := vc.viewIDSigs[viewID]; !ok {
Expand Down
1 change: 1 addition & 0 deletions core/blockchain_impl.go
Original file line number Diff line number Diff line change
Expand Up @@ -428,6 +428,7 @@ func (bc *BlockChainImpl) ValidateNewBlock(block *types.Block, beaconChain Block
bc, block.Header(),
); err != nil {
utils.Logger().Error().
Uint64("blockNum", block.NumberU64()).
Str("blockHash", block.Hash().Hex()).
Err(err).
Msg("[ValidateNewBlock] Cannot verify vrf for the new block")
Expand Down
4 changes: 3 additions & 1 deletion core/rawdb/accessors_offchain.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,9 @@ func ReadShardState(
) (*shard.State, error) {
data, err := db.Get(shardStateKey(epoch))
if err != nil {
return nil, errors.New(MsgNoShardStateFromDB)
return nil, errors.Errorf(
MsgNoShardStateFromDB, "epoch: %d", epoch,
)
}
ss, err2 := shard.DecodeWrapper(data)
if err2 != nil {
Expand Down

0 comments on commit 20e4892

Please sign in to comment.