From f305cf77b64dd4125da66b43f6791f053f0ca86c Mon Sep 17 00:00:00 2001 From: Alexander Block Date: Wed, 27 Feb 2019 14:10:12 +0100 Subject: [PATCH] Multiple fixes and optimizations for LLMQs and ChainLocks (#2724) * Indicate success when signing was unnecessary * Fix typo in name of LLMQ_400_60 * Move RemoveAskFor call for CLSIGs into ProcessNewChainLock In case we got INV items for the same CLSIG that we recreated through HandleNewRecoveredSig, (re-)requesting of the CLSIG from other peers becomes unnecessary. * Move Cleanup() call in CChainLocksHandler::UpdatedBlockTip up We bail out early in a few situations from this method, so that Cleanup() might not be called while its at the bottom. * Bail out from CChainLocksHandler::UpdatedBlockTip if we already got the CLSIG * Call RemoveAskFor when QFCOMMITMENT was received Otherwise we might end up re-requesting it for a very long time when the commitment INV was received shortly before it got mined. * Call RemoveSigSharesForSession when a recovered sig is received Otherwise we end up with session data in node states lingering around until a fake "timeout" occurs (can be seen in the logs). * Better handling of false-positive conflicts in CSigningManager The old code was emitting a lot of messages in logs as it treated sigs for exactly the same session as a conflict. This commit fixes this by looking at the signHash before logging. Also handle a corner-case where a recovered sig might be deleted between the HasRecoveredSigForId and GetRecoveredSigById call. * Don't run into session timeout when sig shares come in slow Instead of just tracking when the first share was received, we now also track when the last (non-duplicate) share was received. Sessios will now timeout 5 minutes after the first share arrives, or 1 minute after the last one arrived. --- src/chainparams.cpp | 2 +- src/llmq/quorums_blockprocessor.cpp | 6 +++++ src/llmq/quorums_chainlocks.cpp | 20 ++++++++++------ src/llmq/quorums_init.cpp | 2 ++ src/llmq/quorums_signing.cpp | 35 +++++++++++++++++++++------- src/llmq/quorums_signing_shares.cpp | 36 +++++++++++++++++++++++++---- src/llmq/quorums_signing_shares.h | 13 ++++++++--- 7 files changed, 90 insertions(+), 24 deletions(-) diff --git a/src/chainparams.cpp b/src/chainparams.cpp index 40a837afb0f15..2cc5aafb2f888 100644 --- a/src/chainparams.cpp +++ b/src/chainparams.cpp @@ -145,7 +145,7 @@ static Consensus::LLMQParams llmq50_60 = { static Consensus::LLMQParams llmq400_60 = { .type = Consensus::LLMQ_400_60, - .name = "llmq_400_51", + .name = "llmq_400_60", .size = 400, .minSize = 300, .threshold = 240, diff --git a/src/llmq/quorums_blockprocessor.cpp b/src/llmq/quorums_blockprocessor.cpp index bd322bcaafd2f..49f4425b2afca 100644 --- a/src/llmq/quorums_blockprocessor.cpp +++ b/src/llmq/quorums_blockprocessor.cpp @@ -31,6 +31,12 @@ void CQuorumBlockProcessor::ProcessMessage(CNode* pfrom, const std::string& strC CFinalCommitment qc; vRecv >> qc; + auto hash = ::SerializeHash(qc); + { + LOCK(cs_main); + connman.RemoveAskFor(hash); + } + if (qc.IsNull()) { LOCK(cs_main); LogPrintf("CQuorumBlockProcessor::%s -- null commitment from peer=%d\n", __func__, pfrom->id); diff --git a/src/llmq/quorums_chainlocks.cpp b/src/llmq/quorums_chainlocks.cpp index 7ab7226fffffa..51e85b8ebfd6c 100644 --- a/src/llmq/quorums_chainlocks.cpp +++ b/src/llmq/quorums_chainlocks.cpp @@ -75,17 +75,17 @@ void CChainLocksHandler::ProcessMessage(CNode* pfrom, const std::string& strComm auto hash = ::SerializeHash(clsig); - { - LOCK(cs_main); - connman.RemoveAskFor(hash); - } - ProcessNewChainLock(pfrom->id, clsig, hash); } } void CChainLocksHandler::ProcessNewChainLock(NodeId from, const llmq::CChainLockSig& clsig, const uint256& hash) { + { + LOCK(cs_main); + g_connman->RemoveAskFor(hash); + } + { LOCK(cs); if (!seenChainLocks.emplace(hash, GetTimeMillis()).second) { @@ -188,6 +188,8 @@ void CChainLocksHandler::UpdatedBlockTip(const CBlockIndex* pindexNew, const CBl return; } + Cleanup(); + // DIP8 defines a process called "Signing attempts" which should run before the CLSIG is finalized // To simplify the initial implementation, we skip this process and directly try to create a CLSIG // This will fail when multiple blocks compete, but we accept this for the initial implementation. @@ -199,6 +201,12 @@ void CChainLocksHandler::UpdatedBlockTip(const CBlockIndex* pindexNew, const CBl { LOCK(cs); + if (bestChainLockBlockIndex == pindexNew) { + // we first got the CLSIG, then the header, and then the block was connected. + // In this case there is no need to continue here. + return; + } + if (InternalHasConflictingChainLock(pindexNew->nHeight, pindexNew->GetBlockHash())) { if (!inEnforceBestChainLock) { // we accepted this block when there was no lock yet, but now a conflicting lock appeared. Invalidate it. @@ -224,8 +232,6 @@ void CChainLocksHandler::UpdatedBlockTip(const CBlockIndex* pindexNew, const CBl } quorumSigningManager->AsyncSignIfMember(Params().GetConsensus().llmqChainLocks, requestId, msgHash); - - Cleanup(); } // WARNING: cs_main and cs should not be held! diff --git a/src/llmq/quorums_init.cpp b/src/llmq/quorums_init.cpp index 5b3c4aaceb196..ec03b46403e9b 100644 --- a/src/llmq/quorums_init.cpp +++ b/src/llmq/quorums_init.cpp @@ -58,6 +58,7 @@ void StartLLMQSystem() quorumDKGSessionManager->StartMessageHandlerPool(); } if (quorumSigSharesManager) { + quorumSigSharesManager->RegisterAsRecoveredSigsListener(); quorumSigSharesManager->StartWorkerThread(); } if (chainLocksHandler) { @@ -72,6 +73,7 @@ void StopLLMQSystem() } if (quorumSigSharesManager) { quorumSigSharesManager->StopWorkerThread(); + quorumSigSharesManager->UnregisterAsRecoveredSigsListener(); } if (quorumDKGSessionManager) { quorumDKGSessionManager->StopMessageHandlerPool(); diff --git a/src/llmq/quorums_signing.cpp b/src/llmq/quorums_signing.cpp index b8a364c54edda..acd7fef165644 100644 --- a/src/llmq/quorums_signing.cpp +++ b/src/llmq/quorums_signing.cpp @@ -500,11 +500,25 @@ void CSigningManager::ProcessRecoveredSig(NodeId nodeId, const CRecoveredSig& re signHash.ToString(), recoveredSig.id.ToString(), recoveredSig.msgHash.ToString(), nodeId); if (db.HasRecoveredSigForId(llmqType, recoveredSig.id)) { - // this should really not happen, as each masternode is participating in only one vote, - // even if it's a member of multiple quorums. so a majority is only possible on one quorum and one msgHash per id - LogPrintf("CSigningManager::%s -- conflicting recoveredSig for id=%s, msgHash=%s\n", __func__, - recoveredSig.id.ToString(), recoveredSig.msgHash.ToString()); - return; + CRecoveredSig otherRecoveredSig; + if (db.GetRecoveredSigById(llmqType, recoveredSig.id, otherRecoveredSig)) { + auto otherSignHash = CLLMQUtils::BuildSignHash(recoveredSig); + if (signHash != otherSignHash) { + // this should really not happen, as each masternode is participating in only one vote, + // even if it's a member of multiple quorums. so a majority is only possible on one quorum and one msgHash per id + LogPrintf("CSigningManager::%s -- conflicting recoveredSig for signHash=%s, id=%s, msgHash=%s, otherSignHash=%s\n", __func__, + signHash.ToString(), recoveredSig.id.ToString(), recoveredSig.msgHash.ToString(), otherSignHash.ToString()); + } else { + // Looks like we're trying to process a recSig that is already known. This might happen if the same + // recSig comes in through regular QRECSIG messages and at the same time through some other message + // which allowed to reconstruct a recSig (e.g. IXLOCK). In this case, just bail out. + } + return; + } else { + // This case is very unlikely. It can only happen when cleanup caused this specific recSig to vanish + // between the HasRecoveredSigForId and GetRecoveredSigById call. If that happens, treat it as if we + // never had that recSig + } } db.WriteRecoveredSig(recoveredSig); @@ -559,14 +573,19 @@ bool CSigningManager::AsyncSignIfMember(Consensus::LLMQType llmqType, const uint if (db.HasVotedOnId(llmqType, id)) { uint256 prevMsgHash; db.GetVoteForId(llmqType, id, prevMsgHash); - LogPrintf("CSigningManager::%s -- already voted for id=%s and msgHash=%s. Not voting on conflicting msgHash=%s\n", __func__, - id.ToString(), prevMsgHash.ToString(), msgHash.ToString()); + if (msgHash != prevMsgHash) { + LogPrintf("CSigningManager::%s -- already voted for id=%s and msgHash=%s. Not voting on conflicting msgHash=%s\n", __func__, + id.ToString(), prevMsgHash.ToString(), msgHash.ToString()); + } else { + LogPrintf("CSigningManager::%s -- already voted for id=%s and msgHash=%s. Not voting again.\n", __func__, + id.ToString(), prevMsgHash.ToString()); + } return false; } if (db.HasRecoveredSigForId(llmqType, id)) { // no need to sign it if we already have a recovered sig - return false; + return true; } db.WriteVoteForId(llmqType, id, msgHash); } diff --git a/src/llmq/quorums_signing_shares.cpp b/src/llmq/quorums_signing_shares.cpp index 183a28ef7d7d3..b11bf3e202dea 100644 --- a/src/llmq/quorums_signing_shares.cpp +++ b/src/llmq/quorums_signing_shares.cpp @@ -179,6 +179,16 @@ void CSigSharesManager::StopWorkerThread() } } +void CSigSharesManager::RegisterAsRecoveredSigsListener() +{ + quorumSigningManager->RegisterRecoveredSigsListener(this); +} + +void CSigSharesManager::UnregisterAsRecoveredSigsListener() +{ + quorumSigningManager->UnregisterRecoveredSigsListener(this); +} + void CSigSharesManager::InterruptWorkerThread() { workInterrupt(); @@ -557,7 +567,16 @@ void CSigSharesManager::ProcessSigShare(NodeId nodeId, const CSigShare& sigShare } sigSharesToAnnounce.Add(sigShare.GetKey(), true); - firstSeenForSessions.emplace(sigShare.GetSignHash(), GetTimeMillis()); + + auto it = timeSeenForSessions.find(sigShare.GetSignHash()); + if (it == timeSeenForSessions.end()) { + auto t = GetTimeMillis(); + // insert first-seen and last-seen time + timeSeenForSessions.emplace(sigShare.GetSignHash(), std::make_pair(t, t)); + } else { + // update last-seen time + it->second.second = GetTimeMillis(); + } if (!quorumNodes.empty()) { // don't announce and wait for other nodes to request this share and directly send it to them @@ -958,11 +977,12 @@ void CSigSharesManager::Cleanup() // Remove sessions which timed out std::unordered_set timeoutSessions; - for (auto& p : firstSeenForSessions) { + for (auto& p : timeSeenForSessions) { auto& signHash = p.first; - int64_t time = p.second; + int64_t firstSeenTime = p.second.first; + int64_t lastSeenTime = p.second.second; - if (now - time >= SIGNING_SESSION_TIMEOUT) { + if (now - firstSeenTime >= SESSION_TOTAL_TIMEOUT || now - lastSeenTime >= SESSION_NEW_SHARES_TIMEOUT) { timeoutSessions.emplace(signHash); } } @@ -1044,7 +1064,7 @@ void CSigSharesManager::RemoveSigSharesForSession(const uint256& signHash) sigSharesRequested.EraseAllForSignHash(signHash); sigSharesToAnnounce.EraseAllForSignHash(signHash); sigShares.EraseAllForSignHash(signHash); - firstSeenForSessions.erase(signHash); + timeSeenForSessions.erase(signHash); } void CSigSharesManager::RemoveBannedNodeStates() @@ -1192,4 +1212,10 @@ void CSigSharesManager::Sign(const CQuorumCPtr& quorum, const uint256& id, const ProcessSigShare(-1, sigShare, *g_connman, quorum); } +void CSigSharesManager::HandleNewRecoveredSig(const llmq::CRecoveredSig& recoveredSig) +{ + LOCK(cs); + RemoveSigSharesForSession(CLLMQUtils::BuildSignHash(recoveredSig)); +} + } diff --git a/src/llmq/quorums_signing_shares.h b/src/llmq/quorums_signing_shares.h index ecbd9d0c517c9..c843a98901a3a 100644 --- a/src/llmq/quorums_signing_shares.h +++ b/src/llmq/quorums_signing_shares.h @@ -332,9 +332,10 @@ class CSigSharesNodeState void RemoveSession(const uint256& signHash); }; -class CSigSharesManager +class CSigSharesManager : public CRecoveredSigsListener { - static const int64_t SIGNING_SESSION_TIMEOUT = 60 * 1000; + static const int64_t SESSION_NEW_SHARES_TIMEOUT = 60 * 1000; + static const int64_t SESSION_TOTAL_TIMEOUT = 5 * 60 * 1000; static const int64_t SIG_SHARE_REQUEST_TIMEOUT = 5 * 1000; private: @@ -344,7 +345,9 @@ class CSigSharesManager CThreadInterrupt workInterrupt; SigShareMap sigShares; - std::unordered_map firstSeenForSessions; + + // stores time of first and last receivedSigShare. Used to detect timeouts + std::unordered_map> timeSeenForSessions; std::unordered_map nodeStates; SigShareMap> sigSharesRequested; @@ -363,6 +366,8 @@ class CSigSharesManager void StartWorkerThread(); void StopWorkerThread(); + void RegisterAsRecoveredSigsListener(); + void UnregisterAsRecoveredSigsListener(); void InterruptWorkerThread(); public: @@ -371,6 +376,8 @@ class CSigSharesManager void AsyncSign(const CQuorumCPtr& quorum, const uint256& id, const uint256& msgHash); void Sign(const CQuorumCPtr& quorum, const uint256& id, const uint256& msgHash); + void HandleNewRecoveredSig(const CRecoveredSig& recoveredSig); + private: void ProcessMessageSigSharesInv(CNode* pfrom, const CSigSharesInv& inv, CConnman& connman); void ProcessMessageGetSigShares(CNode* pfrom, const CSigSharesInv& inv, CConnman& connman);