Skip to content

Commit

Permalink
Improve retry logic in metadata thread
Browse files Browse the repository at this point in the history
  • Loading branch information
hasufell committed Nov 27, 2020
1 parent 7353ac9 commit f9f8825
Showing 1 changed file with 52 additions and 35 deletions.
87 changes: 52 additions & 35 deletions lib/shelley/src/Cardano/Wallet/Shelley/Pools.hs
Expand Up @@ -130,6 +130,8 @@ import Control.Monad.Trans.Except
( ExceptT (..), mapExceptT, runExceptT, throwE, withExceptT )
import Control.Monad.Trans.State
( State, evalState, state )
import Control.Retry
( RetryStatus (..), constantDelay, retrying )
import Control.Tracer
( Tracer, contramap, traceWith )
import Data.Bifunctor
Expand Down Expand Up @@ -740,44 +742,53 @@ monitorMetadata gcStatus tr sp db@(DBLayer{..}) = do
settings <- atomically readSettings
manager <- newManager defaultManagerSettings

case poolMetadataSource settings of
healthy <- case poolMetadataSource settings of
FetchSMASH uri -> do
let loop = do
let checkHealth _ = do
r <- healthCheck (Just trFetch) (unSmashServer uri) manager
case r of
(Just health)
| isHealthy health -> pure ()
_ -> do
let ms = (* 1_000_000)
let sleepTime = ms 60
threadDelay sleepTime
loop
loop
_ -> pure ()

let fetcher fetchStrategies = fetchFromRemote trFetch fetchStrategies manager
loop getPoolMetadata = forever $ do
(refs, successes) <- getPoolMetadata
when (null refs || null successes) $ do
traceWith tr $ MsgFetchTakeBreak blockFrequency
threadDelay blockFrequency

case poolMetadataSource settings of
FetchNone -> do
STM.atomically $ writeTVar gcStatus NotApplicable
loop (pure ([], [])) -- TODO: exit loop?
FetchDirect -> do
STM.atomically $ writeTVar gcStatus NotApplicable
loop (fetchThem $ fetcher [identityUrlBuilder])
FetchSMASH (unSmashServer -> uri) -> do
STM.atomically $ writeTVar gcStatus NotStarted
let getDelistedPools =
fetchDelistedPools trFetch uri manager
tid <- forkFinally
(gcDelistedPools gcStatus tr db getDelistedPools)
onExit
flip finally (killThread tid) $
loop (fetchThem $ fetcher [registryUrlBuilder uri])
(Just health) -> pure $ isHealthy health
_ -> pure False

maxRetries = 8
retryCheck RetryStatus{rsIterNumber} b
| rsIterNumber < maxRetries = pure (not b)
| otherwise = pure False

ms = (* 1_000_000)
baseSleepTime = ms 15

retrying (constantDelay baseSleepTime) retryCheck checkHealth

-- no smash configured, equivalent to healthy
_ -> pure True

if healthy
then do
let fetcher fetchStrategies = fetchFromRemote trFetch fetchStrategies manager
loop getPoolMetadata = forever $ do
(refs, successes) <- getPoolMetadata
when (null refs || null successes) $ do
traceWith tr $ MsgFetchTakeBreak blockFrequency
threadDelay blockFrequency

case poolMetadataSource settings of
FetchNone -> do
STM.atomically $ writeTVar gcStatus NotApplicable
loop (pure ([], [])) -- TODO: exit loop?
FetchDirect -> do
STM.atomically $ writeTVar gcStatus NotApplicable
loop (fetchThem $ fetcher [identityUrlBuilder])
FetchSMASH (unSmashServer -> uri) -> do
STM.atomically $ writeTVar gcStatus NotStarted
let getDelistedPools =
fetchDelistedPools trFetch uri manager
tid <- forkFinally
(gcDelistedPools gcStatus tr db getDelistedPools)
onExit
flip finally (killThread tid) $
loop (fetchThem $ fetcher [registryUrlBuilder uri])
else traceWith tr MsgSMASHUnreachable

where
trFetch = contramap MsgFetchPoolMetadata tr
Expand Down Expand Up @@ -863,6 +874,7 @@ data StakePoolLog
| MsgGCThreadKilled
| MsgGCUserInterrupt
| MsgGCUnhandledException Text
| MsgSMASHUnreachable
deriving (Show, Eq)

data PoolGarbageCollectionInfo = PoolGarbageCollectionInfo
Expand Down Expand Up @@ -895,6 +907,7 @@ instance HasSeverityAnnotation StakePoolLog where
MsgGCThreadKilled{} -> Debug
MsgGCUserInterrupt{} -> Debug
MsgGCUnhandledException{} -> Debug
MsgSMASHUnreachable{} -> Warning

instance ToText StakePoolLog where
toText = \case
Expand Down Expand Up @@ -947,3 +960,7 @@ instance ToText StakePoolLog where
MsgGCUserInterrupt -> "GC thread has exited: killed by user."
MsgGCUnhandledException err ->
"GC thread has exited unexpectedly: " <> err
MsgSMASHUnreachable -> mconcat
["The SMASH server is unreachable or unhealthy."
, "Metadata monitoring thread aborting."
]

0 comments on commit f9f8825

Please sign in to comment.