Skip to content

Commit

Permalink
p2p-governor: exp. backoff in failures of cold to warm transition
Browse files Browse the repository at this point in the history
  • Loading branch information
coot committed Mar 2, 2021
1 parent 0846170 commit 80e6ed4
Show file tree
Hide file tree
Showing 3 changed files with 38 additions and 15 deletions.
Expand Up @@ -13,6 +13,7 @@ import qualified Data.Set as Set

import Control.Concurrent.JobPool (Job(..))
import Control.Monad.Class.MonadSTM
import Control.Monad.Class.MonadTime
import Control.Exception (SomeException)

import Ouroboros.Network.PeerSelection.Types
Expand Down Expand Up @@ -102,6 +103,13 @@ belowTarget actions
availableToConnect = KnownPeers.availableToConnect knownPeers


baseColdPeerRetryDiffTime :: Int
baseColdPeerRetryDiffTime = 5

maxColdPeerRetryBackoff :: Int
maxColdPeerRetryBackoff = 5


jobPromoteColdPeer :: forall peeraddr peerconn m.
(Monad m, Ord peeraddr)
=> PeerSelectionActions peeraddr peerconn m
Expand All @@ -112,16 +120,28 @@ jobPromoteColdPeer PeerSelectionActions{peerStateActions = PeerStateActions {est
where
handler :: SomeException -> Completion m peeraddr peerconn
handler e =
Completion $ \st _now -> Decision {
decisionTrace = TracePromoteColdFailed peeraddr e,
decisionState = st {
knownPeers = KnownPeers.incrementFailCount
peeraddr (knownPeers st),
inProgressPromoteCold = Set.delete peeraddr
(inProgressPromoteCold st)
},
decisionJobs = []
}
Completion $ \st now ->
let (failCount, knownPeers') = KnownPeers.incrementFailCount
peeraddr
(knownPeers st)

-- exponential backoff: 5s, 10s, 20s, 40s, 80s, 160s.
delay :: DiffTime
delay = fromIntegral $
2 ^ (pred failCount `min` maxColdPeerRetryBackoff) * baseColdPeerRetryDiffTime
in
Decision {
decisionTrace = TracePromoteColdFailed peeraddr delay e,
decisionState = st {
knownPeers = KnownPeers.setConnectTime
(Set.singleton peeraddr)
(delay `addTime` now)
knownPeers',
inProgressPromoteCold = Set.delete peeraddr
(inProgressPromoteCold st)
},
decisionJobs = []
}

job :: m (Completion m peeraddr peerconn)
job = do
Expand Down
Expand Up @@ -419,7 +419,7 @@ data TracePeerSelection peeraddr =
| TraceGossipResults [(peeraddr, Either SomeException [peeraddr])] --TODO: classify failures
| TraceForgetColdPeers Int Int (Set peeraddr) -- target, actual, selected
| TracePromoteColdPeers Int Int (Set peeraddr)
| TracePromoteColdFailed peeraddr SomeException
| TracePromoteColdFailed peeraddr DiffTime SomeException
| TracePromoteColdDone peeraddr
| TracePromoteWarmPeers Int Int (Set peeraddr)
| TracePromoteWarmFailed peeraddr SomeException
Expand Down
Expand Up @@ -283,12 +283,15 @@ setCurrentTime now knownPeers@KnownPeers {
incrementFailCount :: Ord peeraddr
=> peeraddr
-> KnownPeers peeraddr
-> KnownPeers peeraddr
-> (Int, KnownPeers peeraddr)
incrementFailCount peeraddr knownPeers@KnownPeers{allPeers} =
assert (peeraddr `Map.member` allPeers) $
knownPeers {
allPeers = Map.adjust incr peeraddr allPeers
}
let allPeers' = Map.update (Just . incr) peeraddr allPeers
in ( -- since the `peeraddr` is assumed to be part of `allPeers` the `Map.!`
-- is safe
knownPeerFailCount (allPeers' Map.! peeraddr)
, knownPeers { allPeers = allPeers' }
)
where
incr kpi = kpi { knownPeerFailCount = knownPeerFailCount kpi + 1 }

Expand Down

0 comments on commit 80e6ed4

Please sign in to comment.