From 20e4892fd7fc0da9b5582668b9d31e8e7106b43e Mon Sep 17 00:00:00 2001 From: Max <82761650+MaxMustermann2@users.noreply.github.com> Date: Fri, 20 Jan 2023 19:25:22 +0000 Subject: [PATCH] Add logging for `NthNextHmy` panic (#4341) * improve logging * [consensus] do not try to use negative index * Revert "do not try to use negative index" This reverts commit b434fd3f4af39f32650e909292cc0123bedba86e. We have fixed the cause of the issue, which was time drift on a new cloud provider's nodes. See `systemd-timesyncd.service` Even if this fix had been merged, it would likely not have solved the problem given those nodes with the correct time would pick a different leader from those with time drift. Or, in other words, the view change would not have gone through. * improve logging --- consensus/quorum/one-node-staked-vote.go | 3 +++ consensus/quorum/quorum.go | 5 +++++ consensus/view_change.go | 4 ++++ consensus/view_change_construct.go | 6 ++++-- core/blockchain_impl.go | 1 + core/rawdb/accessors_offchain.go | 4 +++- 6 files changed, 20 insertions(+), 3 deletions(-) diff --git a/consensus/quorum/one-node-staked-vote.go b/consensus/quorum/one-node-staked-vote.go index b51af6a15d..e3a45540a1 100644 --- a/consensus/quorum/one-node-staked-vote.go +++ b/consensus/quorum/one-node-staked-vote.go @@ -157,6 +157,9 @@ func (v *stakedVoteWeight) IsQuorumAchievedByMask(mask *bls_cosi.Mask) bool { if currentTotalPower == nil { return false } + const msg = "[IsQuorumAchievedByMask] Voting power: need %+v, have %+v" + utils.Logger().Debug(). + Msgf(msg, threshold, currentTotalPower) return (*currentTotalPower).GT(threshold) } diff --git a/consensus/quorum/quorum.go b/consensus/quorum/quorum.go index b279d8ebfe..867fd99677 100644 --- a/consensus/quorum/quorum.go +++ b/consensus/quorum/quorum.go @@ -12,6 +12,7 @@ import ( "github.com/harmony-one/harmony/consensus/votepower" bls_cosi "github.com/harmony-one/harmony/crypto/bls" shardingconfig "github.com/harmony-one/harmony/internal/configs/sharding" + "github.com/harmony-one/harmony/internal/utils" "github.com/harmony-one/harmony/multibls" "github.com/harmony-one/harmony/numeric" "github.com/harmony-one/harmony/shard" @@ -224,6 +225,10 @@ func (s *cIdentities) NthNextHmy(instance shardingconfig.Instance, pubKey *bls.P idx := s.IndexOf(pubKey.Bytes) if idx != -1 { found = true + } else { + utils.Logger().Error(). + Str("key", pubKey.Bytes.Hex()). + Msg("[NthNextHmy] pubKey not found") } numNodes := instance.NumHarmonyOperatedNodesPerShard() // sanity check to avoid out of bound access diff --git a/consensus/view_change.go b/consensus/view_change.go index 7e3be93a7d..5bfd49f83d 100644 --- a/consensus/view_change.go +++ b/consensus/view_change.go @@ -142,6 +142,10 @@ func (consensus *Consensus) getNextViewID() (uint64, time.Duration) { // timestamp messed up in current validator node if curTimestamp <= blockTimestamp { + consensus.getLogger().Error(). + Int64("curTimestamp", curTimestamp). + Int64("blockTimestamp", blockTimestamp). + Msg("[getNextViewID] timestamp of block too high") return consensus.fallbackNextViewID() } // diff only increases, since view change timeout is shorter than diff --git a/consensus/view_change_construct.go b/consensus/view_change_construct.go index 71f9385a99..b818507eb7 100644 --- a/consensus/view_change_construct.go +++ b/consensus/view_change_construct.go @@ -444,7 +444,8 @@ func (vc *viewChange) InitPayload( vc.nilBitmap[viewID] = nilBitmap } if err := vc.nilBitmap[viewID].SetKey(key.Pub.Bytes, true); err != nil { - vc.getLogger().Warn().Str("key", key.Pub.Bytes.Hex()).Msg("[InitPayload] nilBitmap setkey failed") + vc.getLogger().Warn().Err(err). + Str("key", key.Pub.Bytes.Hex()).Msg("[InitPayload] nilBitmap setkey failed") continue } if _, ok := vc.nilSigs[viewID]; !ok { @@ -475,7 +476,8 @@ func (vc *viewChange) InitPayload( vc.viewIDBitmap[viewID] = viewIDBitmap } if err := vc.viewIDBitmap[viewID].SetKey(key.Pub.Bytes, true); err != nil { - vc.getLogger().Warn().Str("key", key.Pub.Bytes.Hex()).Msg("[InitPayload] viewIDBitmap setkey failed") + vc.getLogger().Warn().Err(err). + Str("key", key.Pub.Bytes.Hex()).Msg("[InitPayload] viewIDBitmap setkey failed") continue } if _, ok := vc.viewIDSigs[viewID]; !ok { diff --git a/core/blockchain_impl.go b/core/blockchain_impl.go index 9fa618c442..73f37f8623 100644 --- a/core/blockchain_impl.go +++ b/core/blockchain_impl.go @@ -428,6 +428,7 @@ func (bc *BlockChainImpl) ValidateNewBlock(block *types.Block, beaconChain Block bc, block.Header(), ); err != nil { utils.Logger().Error(). + Uint64("blockNum", block.NumberU64()). Str("blockHash", block.Hash().Hex()). Err(err). Msg("[ValidateNewBlock] Cannot verify vrf for the new block") diff --git a/core/rawdb/accessors_offchain.go b/core/rawdb/accessors_offchain.go index c95cf73f55..5324aa0e50 100644 --- a/core/rawdb/accessors_offchain.go +++ b/core/rawdb/accessors_offchain.go @@ -21,7 +21,9 @@ func ReadShardState( ) (*shard.State, error) { data, err := db.Get(shardStateKey(epoch)) if err != nil { - return nil, errors.New(MsgNoShardStateFromDB) + return nil, errors.Errorf( + MsgNoShardStateFromDB, "epoch: %d", epoch, + ) } ss, err2 := shard.DecodeWrapper(data) if err2 != nil {