Skip to content

Commit

Permalink
Merge pull request #5694 from ethereum-optimism/felipe/prevent-ban-ou…
Browse files Browse the repository at this point in the history
…t-of-sync

feat(proxyd): prevent banning out-of-sync backend
  • Loading branch information
OptimismBot committed May 15, 2023
2 parents 1382586 + 25dd1ce commit 06933ea
Show file tree
Hide file tree
Showing 2 changed files with 32 additions and 22 deletions.
51 changes: 29 additions & 22 deletions proxyd/consensus_poller.go
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@ type backendState struct {
latestBlockNumber hexutil.Uint64
latestBlockHash string
peerCount uint64
inSync bool

lastUpdate time.Time

Expand Down Expand Up @@ -215,7 +216,13 @@ func (cp *ConsensusPoller) UpdateBackend(ctx context.Context, be *Backend) {
RecordConsensusBackendBanned(be, banned)

if banned {
log.Debug("skipping backend banned", "backend", be.Name)
log.Debug("skipping backend - banned", "backend", be.Name)
return
}

// if backend exhausted rate limit we'll skip it for now
if be.IsRateLimited() {
log.Debug("skipping backend - rate limited", "backend", be.Name)
return
}

Expand All @@ -228,42 +235,34 @@ func (cp *ConsensusPoller) UpdateBackend(ctx context.Context, be *Backend) {

// if backend it not in sync we'll check again after ban
inSync, err := cp.isInSync(ctx, be)
if err != nil || !inSync {
log.Warn("backend banned - not in sync", "backend", be.Name)
cp.Ban(be)
return
}
RecordConsensusBackendInSync(be, inSync)

// if backend exhausted rate limit we'll skip it for now
if be.IsRateLimited() {
return
RecordConsensusBackendInSync(be, err == nil && inSync)
if err != nil {
log.Warn("error updating backend sync state", "name", be.Name, "err", err)
}

var peerCount uint64
if !be.skipPeerCountCheck {
peerCount, err = cp.getPeerCount(ctx, be)
if err != nil {
log.Warn("error updating backend", "name", be.Name, "err", err)
return
log.Warn("error updating backend peer count", "name", be.Name, "err", err)
}
RecordConsensusBackendPeerCount(be, peerCount)
}

latestBlockNumber, latestBlockHash, err := cp.fetchBlock(ctx, be, "latest")
if err != nil {
log.Warn("error updating backend", "name", be.Name, "err", err)
return
}

changed, updateDelay := cp.setBackendState(be, peerCount, latestBlockNumber, latestBlockHash)
changed, updateDelay := cp.setBackendState(be, peerCount, inSync, latestBlockNumber, latestBlockHash)

if changed {
RecordBackendLatestBlock(be, latestBlockNumber)
RecordConsensusBackendUpdateDelay(be, updateDelay)
log.Debug("backend state updated",
"name", be.Name,
"peerCount", peerCount,
"inSync", inSync,
"latestBlockNumber", latestBlockNumber,
"latestBlockHash", latestBlockHash,
"updateDelay", updateDelay)
Expand All @@ -280,11 +279,14 @@ func (cp *ConsensusPoller) UpdateBackendGroupConsensus(ctx context.Context) {

// find the highest block, in order to use it defining the highest non-lagging ancestor block
for _, be := range cp.backendGroup.Backends {
peerCount, backendLatestBlockNumber, _, lastUpdate, _ := cp.getBackendState(be)
peerCount, inSync, backendLatestBlockNumber, _, lastUpdate, _ := cp.getBackendState(be)

if !be.skipPeerCountCheck && peerCount < cp.minPeerCount {
continue
}
if !inSync {
continue
}
if lastUpdate.Add(cp.maxUpdateThreshold).Before(time.Now()) {
continue
}
Expand All @@ -296,11 +298,14 @@ func (cp *ConsensusPoller) UpdateBackendGroupConsensus(ctx context.Context) {

// find the highest common ancestor block
for _, be := range cp.backendGroup.Backends {
peerCount, backendLatestBlockNumber, backendLatestBlockHash, lastUpdate, _ := cp.getBackendState(be)
peerCount, inSync, backendLatestBlockNumber, backendLatestBlockHash, lastUpdate, _ := cp.getBackendState(be)

if !be.skipPeerCountCheck && peerCount < cp.minPeerCount {
continue
}
if !inSync {
continue
}
if lastUpdate.Add(cp.maxUpdateThreshold).Before(time.Now()) {
continue
}
Expand Down Expand Up @@ -351,12 +356,12 @@ func (cp *ConsensusPoller) UpdateBackendGroupConsensus(ctx context.Context) {
- not lagging
*/

peerCount, latestBlockNumber, _, lastUpdate, bannedUntil := cp.getBackendState(be)
peerCount, inSync, latestBlockNumber, _, lastUpdate, bannedUntil := cp.getBackendState(be)
notUpdated := lastUpdate.Add(cp.maxUpdateThreshold).Before(time.Now())
isBanned := time.Now().Before(bannedUntil)
notEnoughPeers := !be.skipPeerCountCheck && peerCount < cp.minPeerCount
lagging := latestBlockNumber < proposedBlock
if !be.IsHealthy() || be.IsRateLimited() || !be.Online() || notUpdated || isBanned || notEnoughPeers || lagging {
if !be.IsHealthy() || be.IsRateLimited() || !be.Online() || notUpdated || isBanned || notEnoughPeers || lagging || !inSync {
filteredBackendsNames = append(filteredBackendsNames, be.Name)
continue
}
Expand Down Expand Up @@ -498,23 +503,25 @@ func (cp *ConsensusPoller) isInSync(ctx context.Context, be *Backend) (result bo
return res, nil
}

func (cp *ConsensusPoller) getBackendState(be *Backend) (peerCount uint64, blockNumber hexutil.Uint64, blockHash string, lastUpdate time.Time, bannedUntil time.Time) {
func (cp *ConsensusPoller) getBackendState(be *Backend) (peerCount uint64, inSync bool, blockNumber hexutil.Uint64, blockHash string, lastUpdate time.Time, bannedUntil time.Time) {
bs := cp.backendState[be]
defer bs.backendStateMux.Unlock()
bs.backendStateMux.Lock()
peerCount = bs.peerCount
inSync = bs.inSync
blockNumber = bs.latestBlockNumber
blockHash = bs.latestBlockHash
lastUpdate = bs.lastUpdate
bannedUntil = bs.bannedUntil
bs.backendStateMux.Unlock()
return
}

func (cp *ConsensusPoller) setBackendState(be *Backend, peerCount uint64, blockNumber hexutil.Uint64, blockHash string) (changed bool, updateDelay time.Duration) {
func (cp *ConsensusPoller) setBackendState(be *Backend, peerCount uint64, inSync bool, blockNumber hexutil.Uint64, blockHash string) (changed bool, updateDelay time.Duration) {
bs := cp.backendState[be]
bs.backendStateMux.Lock()
changed = bs.latestBlockHash != blockHash
bs.peerCount = peerCount
bs.inSync = inSync
bs.latestBlockNumber = blockNumber
bs.latestBlockHash = blockHash
updateDelay = time.Since(bs.lastUpdate)
Expand Down
3 changes: 3 additions & 0 deletions proxyd/integration_tests/consensus_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,7 @@ func TestConsensus(t *testing.T) {
consensusGroup := bg.Consensus.GetConsensusGroup()

require.NotContains(t, consensusGroup, be)
require.False(t, bg.Consensus.IsBanned(be))
require.Equal(t, 1, len(consensusGroup))
})

Expand Down Expand Up @@ -132,6 +133,7 @@ func TestConsensus(t *testing.T) {
be := backend(bg, "node1")
require.NotNil(t, be)
require.NotContains(t, consensusGroup, be)
require.False(t, bg.Consensus.IsBanned(be))
require.Equal(t, 1, len(consensusGroup))
})

Expand Down Expand Up @@ -232,6 +234,7 @@ func TestConsensus(t *testing.T) {
consensusGroup := bg.Consensus.GetConsensusGroup()

require.NotContains(t, consensusGroup, be)
require.False(t, bg.Consensus.IsBanned(be))
require.Equal(t, 1, len(consensusGroup))
})

Expand Down

0 comments on commit 06933ea

Please sign in to comment.