From fccb250ff7628ad0d2a30f2aa36a4f35f542be1b Mon Sep 17 00:00:00 2001 From: KtorZ Date: Mon, 3 Aug 2020 12:50:57 +0200 Subject: [PATCH 1/2] revised SQL query returning next pool metadata to fetch There are two fundamentals change here: a) This now does a LEFT JOIN on the fetch attempts table on BOTH the metadata_hash and metadata_url. Before, we would discard metadata based solely on their metadata_hash. Now, we select on the joined table directly and keep all pool metadata with either no fetch attempts, or a fetch_after that is prior to the current datetime. b) It now _sorts_ the result based on their fetch_after date, the most "urgent" first. This is to cope with the arbitrary batch limit of 100 pools and make sure that all metadata are eventually fetched. Before, we could potentially retry fetching the same metadata over and over again until they expire. --- lib/core/src/Cardano/Pool/DB/Sqlite.hs | 26 ++++++++++++++++++-------- 1 file changed, 18 insertions(+), 8 deletions(-) diff --git a/lib/core/src/Cardano/Pool/DB/Sqlite.hs b/lib/core/src/Cardano/Pool/DB/Sqlite.hs index c3324980489..fec41571d1f 100644 --- a/lib/core/src/Cardano/Pool/DB/Sqlite.hs +++ b/lib/core/src/Cardano/Pool/DB/Sqlite.hs @@ -268,22 +268,32 @@ newDBLayer trace fp timeInterpreter = do let fetchAttempts = tableName (DBField PoolFetchAttemptsMetadataHash) let metadata = tableName (DBField PoolMetadataHash) let query = T.unwords - [ "SELECT" - , metadataUrl, ",", metadataHash - , "FROM", registrations + [ "SELECT", "a." <> metadataUrl, ",", "a." <> metadataHash + , "FROM", registrations, "AS a" + , "LEFT JOIN", fetchAttempts, "AS b" + , "ON" + , "a." <> metadataUrl, "=", "b." <> metadataUrl, "AND" + , "a." <> metadataHash, "=", "b." <> metadataHash , "WHERE" - , metadataHash, "NOT", "IN" -- Successfully fetched metadata + -- Successfully fetched metadata + , "a." <> metadataHash, "NOT", "IN" , "(" , "SELECT", metadataHash , "FROM", metadata , ")" , "AND" - , metadataHash, "NOT", "IN" -- Recently failed urls + -- Discard recent failed attempts , "(" - , "SELECT", metadataHash - , "FROM", fetchAttempts - , "WHERE", retryAfter, ">=", "datetime('now')" + , retryAfter, "<", "datetime('now')" + , "OR" + , retryAfter, "IS NULL" , ")" + -- Important, since we have a limit, we order all results by + -- earlist "retry_after", so that we are sure that all + -- metadata gets _eventually_ processed. + -- + -- Note that `NULL` is smaller than everything. + , "ORDER BY", retryAfter, "ASC" , "LIMIT", nLimit , ";" ] From 5c22399b59f55b55409a44b8269fe5359296a2ed Mon Sep 17 00:00:00 2001 From: KtorZ Date: Mon, 3 Aug 2020 13:58:48 +0200 Subject: [PATCH 2/2] use 'Either InvalidUrlException' as a base monad for requestFromURI Again... errors from 'requestFromURI' are thrown via 'MonadThrow', which, in the case of 'ExceptT' throws in the base monad, which is IO in this case. --- lib/core/src/Cardano/Pool/Metadata.hs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/core/src/Cardano/Pool/Metadata.hs b/lib/core/src/Cardano/Pool/Metadata.hs index d4b87e56c44..e5a70d227c9 100644 --- a/lib/core/src/Cardano/Pool/Metadata.hs +++ b/lib/core/src/Cardano/Pool/Metadata.hs @@ -169,7 +169,7 @@ fetchFromRemote tr builders manager url hash = runExceptTLog $ do getChunk :: URI -> ExceptT String IO (Maybe ByteString) getChunk uri = do - req <- requestFromURI uri + req <- withExceptT show $ except $ requestFromURI uri liftIO $ traceWith tr $ MsgFetchPoolMetadata hash uri ExceptT $ handle fromIOException