Skip to content

Commit

Permalink
p2p-governor: exp. backoff in failures of cold to warm transition
Browse files Browse the repository at this point in the history
  • Loading branch information
coot committed Jan 28, 2021
1 parent 593ca63 commit 21ea99f
Show file tree
Hide file tree
Showing 3 changed files with 39 additions and 15 deletions.
Expand Up @@ -13,6 +13,7 @@ import qualified Data.Set as Set

import Control.Concurrent.JobPool (Job(..))
import Control.Monad.Class.MonadSTM
import Control.Monad.Class.MonadTime
import Control.Exception (SomeException)

import Ouroboros.Network.PeerSelection.Types
Expand Down Expand Up @@ -102,6 +103,13 @@ belowTarget actions
availableToConnect = KnownPeers.availableToConnect knownPeers


baseColdPeerRetryDiffTime :: Int
baseColdPeerRetryDiffTime = 5

maxColdPeerRetryBackoff :: Int
maxColdPeerRetryBackoff = 5


jobPromoteColdPeer :: forall peeraddr peerconn m.
(Monad m, Ord peeraddr)
=> PeerSelectionActions peeraddr peerconn m
Expand All @@ -112,16 +120,28 @@ jobPromoteColdPeer PeerSelectionActions{peerStateActions = PeerStateActions {est
where
handler :: SomeException -> Completion m peeraddr peerconn
handler e =
Completion $ \st _now -> Decision {
decisionTrace = TracePromoteColdFailed peeraddr e,
decisionState = st {
knownPeers = KnownPeers.incrementFailCount
peeraddr (knownPeers st),
inProgressPromoteCold = Set.delete peeraddr
(inProgressPromoteCold st)
},
decisionJobs = []
}
Completion $ \st now ->
let (failCount, knownPeers') = KnownPeers.incrementFailCount
peeraddr
(knownPeers st)

-- exponential backoff: 5s, 10s, 20s, 40s, 80s, 160s.
delay :: DiffTime
delay = fromIntegral $
2 ^ (pred failCount `min` maxColdPeerRetryBackoff) * baseColdPeerRetryDiffTime
in
Decision {
decisionTrace = TracePromoteColdFailed peeraddr delay e,
decisionState = st {
knownPeers = KnownPeers.setConnectTime
(Set.singleton peeraddr)
(delay `addTime` now)
knownPeers',
inProgressPromoteCold = Set.delete peeraddr
(inProgressPromoteCold st)
},
decisionJobs = []
}

job :: m (Completion m peeraddr peerconn)
job = do
Expand Down
Expand Up @@ -419,7 +419,7 @@ data TracePeerSelection peeraddr =
| TraceGossipResults [(peeraddr, Either SomeException [peeraddr])] --TODO: classify failures
| TraceForgetColdPeers Int Int (Set peeraddr) -- target, actual, selected
| TracePromoteColdPeers Int Int (Set peeraddr)
| TracePromoteColdFailed peeraddr SomeException
| TracePromoteColdFailed peeraddr DiffTime SomeException
| TracePromoteColdDone peeraddr
| TracePromoteWarmPeers Int Int (Set peeraddr)
| TracePromoteWarmFailed peeraddr SomeException
Expand Down
Expand Up @@ -36,6 +36,7 @@ import qualified Data.Set as Set
import Data.Set (Set)
import qualified Data.Map.Strict as Map
import Data.Map.Strict (Map)
import Data.Maybe (fromJust)
import Data.Semigroup (Min (..))
import qualified Data.OrdPSQ as PSQ
import Data.OrdPSQ (OrdPSQ)
Expand Down Expand Up @@ -283,12 +284,15 @@ setCurrentTime now knownPeers@KnownPeers {
incrementFailCount :: Ord peeraddr
=> peeraddr
-> KnownPeers peeraddr
-> KnownPeers peeraddr
-> (Int, KnownPeers peeraddr)
incrementFailCount peeraddr knownPeers@KnownPeers{allPeers} =
assert (peeraddr `Map.member` allPeers) $
knownPeers {
allPeers = Map.adjust incr peeraddr allPeers
}
case Map.updateLookupWithKey (\_ -> Just . incr) peeraddr allPeers of
(mbPeerInfo, allPeers') ->
-- since the `peeraddr` is assumed to be part of `allPeers` the `fromJust`
-- is a total function.
(knownPeerFailCount (fromJust mbPeerInfo),
knownPeers { allPeers = allPeers' })
where
incr kpi = kpi { knownPeerFailCount = knownPeerFailCount kpi + 1 }

Expand Down

0 comments on commit 21ea99f

Please sign in to comment.