Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[p2p] Add Peer struct for per-peer data in net processing #19607

Merged
merged 4 commits into from
Aug 28, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
160 changes: 98 additions & 62 deletions src/net_processing.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -276,12 +276,6 @@ struct CNodeState {
const CService address;
//! Whether we have a fully established connection.
bool fCurrentlyConnected;
//! Accumulated misbehaviour score for this peer.
int nMisbehavior;
//! Whether this peer should be disconnected and marked as discouraged (unless it has the noban permission).
bool m_should_discourage;
//! String name of this peer (debugging/logging purposes).
const std::string name;
jnewbery marked this conversation as resolved.
Show resolved Hide resolved
//! The best known block we know this peer has announced.
const CBlockIndex *pindexBestKnownBlock;
//! The hash of the last unknown block this peer has announced.
Expand Down Expand Up @@ -430,13 +424,10 @@ struct CNodeState {
//! Whether this peer relays txs via wtxid
bool m_wtxid_relay{false};

CNodeState(CAddress addrIn, std::string addrNameIn, bool is_inbound, bool is_manual) :
address(addrIn), name(std::move(addrNameIn)), m_is_inbound(is_inbound),
m_is_manual_connection (is_manual)
CNodeState(CAddress addrIn, bool is_inbound, bool is_manual)
: address(addrIn), m_is_inbound(is_inbound), m_is_manual_connection(is_manual)
{
fCurrentlyConnected = false;
nMisbehavior = 0;
m_should_discourage = false;
pindexBestKnownBlock = nullptr;
hashLastUnknownBlock.SetNull();
pindexLastCommonBlock = nullptr;
Expand Down Expand Up @@ -474,6 +465,50 @@ static CNodeState *State(NodeId pnode) EXCLUSIVE_LOCKS_REQUIRED(cs_main) {
return &it->second;
}

/**
* Data structure for an individual peer. This struct is not protected by
* cs_main since it does not contain validation-critical data.
*
* Memory is owned by shared pointers and this object is destructed when
* the refcount drops to zero.
*
* TODO: move most members from CNodeState to this structure.
* TODO: move remaining application-layer data members from CNode to this structure.
*/
struct Peer {
/** Same id as the CNode object for this peer */
const NodeId m_id{0};

/** Protects misbehavior data members */
jnewbery marked this conversation as resolved.
Show resolved Hide resolved
Mutex m_misbehavior_mutex;
jnewbery marked this conversation as resolved.
Show resolved Hide resolved
/** Accumulated misbehavior score for this peer */
int m_misbehavior_score GUARDED_BY(m_misbehavior_mutex){0};
/** Whether this peer should be disconnected and marked as discouraged (unless it has the noban permission). */
bool m_should_discourage GUARDED_BY(m_misbehavior_mutex){false};

Peer(NodeId id) : m_id(id) {}
};

using PeerRef = std::shared_ptr<Peer>;

/**
* Map of all Peer objects, keyed by peer id. This map is protected
* by the global g_peer_mutex. Once a shared pointer reference is
* taken, the lock may be released. Individual fields are protected by
* their own locks.
*/
Mutex g_peer_mutex;
static std::map<NodeId, PeerRef> g_peer_map GUARDED_BY(g_peer_mutex);
jnewbery marked this conversation as resolved.
Show resolved Hide resolved

/** Get a shared pointer to the Peer object.
* May return nullptr if the Peer object can't be found. */
static PeerRef GetPeerRef(NodeId id)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

So even apart from not using cs_main, there's another design change happening here in how we use these objects, when compared with CNodeState -- no lock on the map is maintained in order to have a shared_ptr to the peer object.

That means that our code needs to be able to handle having the entry in the map erased out from under it, correct? That might be worth mentioning somewhere as a design consideration for future code.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, that's possible. Do you think the comment on the map needs expanding? It currently says "Once a shared pointer reference is taken, the lock may be released. Individual fields are protected by their own locks."

{
LOCK(g_peer_mutex);
auto it = g_peer_map.find(id);
return it != g_peer_map.end() ? it->second : nullptr;
}

static void UpdatePreferredDownload(const CNode& node, CNodeState* state) EXCLUSIVE_LOCKS_REQUIRED(cs_main)
{
nPreferredDownload -= state->fPreferredDownload;
Expand Down Expand Up @@ -839,7 +874,12 @@ void PeerLogicValidation::InitializeNode(CNode *pnode) {
NodeId nodeid = pnode->GetId();
{
LOCK(cs_main);
mapNodeState.emplace_hint(mapNodeState.end(), std::piecewise_construct, std::forward_as_tuple(nodeid), std::forward_as_tuple(addr, std::move(addrName), pnode->IsInboundConn(), pnode->IsManualConn()));
mapNodeState.emplace_hint(mapNodeState.end(), std::piecewise_construct, std::forward_as_tuple(nodeid), std::forward_as_tuple(addr, pnode->IsInboundConn(), pnode->IsManualConn()));
}
{
PeerRef peer = std::make_shared<Peer>(nodeid);
LOCK(g_peer_mutex);
g_peer_map.emplace_hint(g_peer_map.end(), nodeid, std::move(peer));
}
if(!pnode->IsInboundConn())
PushNodeVersion(*pnode, *connman, GetTime());
Expand Down Expand Up @@ -868,13 +908,21 @@ void PeerLogicValidation::ReattemptInitialBroadcast(CScheduler& scheduler) const
void PeerLogicValidation::FinalizeNode(NodeId nodeid, bool& fUpdateConnectionTime) {
fUpdateConnectionTime = false;
LOCK(cs_main);
int misbehavior{0};
{
PeerRef peer = GetPeerRef(nodeid);
jnewbery marked this conversation as resolved.
Show resolved Hide resolved
assert(peer != nullptr);
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is mostly a question for future stuff that gets moved in but... What if the refcount is more than 1 here? Do we continue tearing it down?

As a contrived (bad) example: what if someone bumps the misbehavior score after it's been evaluated here?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We'll remove the Peer shared ptr from g_peer_map, but we don't destruct the object until the refcount drops to 0. Your contrived example is fine. There would have to be another thread currently in Misbehaving() holding a shared ptr to the Peer object. We'd remove it from the map, the other thread would increment the misbehaving score, and then the Peer object would be destructed when that thread left the Misbehaving() function.

I've taken a look at the future commits in https://github.com/jnewbery/bitcoin/tree/2020-06-cs-main-split and I can't see any problems with this style of teardown.

Copy link
Member

@theuni theuni Aug 13, 2020

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

My point here was that calls to Misbehaving() might go unobserved, because the score may have already been evaluated before the other thread bumped it. This is not possible in with the current behavior because of cs_main: once we're in FinalizeNode, we're guaranteed that no other thread is accessing it (unless they've cached the CNodeState*, which would be a bug).

Obviously we don't have any threads doing that currently, so I'm wondering if we want to continue to enforce that invariant with a:

// This isn't a guarantee but it's close enough
assert(peer.use_count() <= 1);

I'm not too bothered if there's a rare misbehaving bump that gets missed. My concern is that Peer's destructor may eventually gain some functionality, and we'll want to know if we need to keep track of which thread deletes it.

Edit: Trying my point one more time. As I see it, this PR removes the guarantee that every call into Misbehaving() will be reflected upon evaluation in FinalizeNode(). That's not a huge deal, but it should at least be noted in case more important guarantees are removed by future moves into Peer.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks @theuni. This is a very good point. I'll add a comment about not relying on this being the final state of Peer in a future commit.

misbehavior = WITH_LOCK(peer->m_misbehavior_mutex, return peer->m_misbehavior_score);
LOCK(g_peer_mutex);
g_peer_map.erase(nodeid);
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should this and mapNodeState.erase(nodeid) happen at the same time, under the same cs_main lock?

What are the consequences of them being out of sync? (Still reachable by State() but not by GetPeerRef())

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

No bad consequences, but I've moved the Peer cleanup underneath the cs_main lock anyway to make this simpler to think about.

}
CNodeState *state = State(nodeid);
assert(state != nullptr);

if (state->fSyncStarted)
nSyncStarted--;

if (state->nMisbehavior == 0 && state->fCurrentlyConnected) {
if (misbehavior == 0 && state->fCurrentlyConnected) {
fUpdateConnectionTime = true;
}

Expand Down Expand Up @@ -904,17 +952,23 @@ void PeerLogicValidation::FinalizeNode(NodeId nodeid, bool& fUpdateConnectionTim
}

bool GetNodeStateStats(NodeId nodeid, CNodeStateStats &stats) {
LOCK(cs_main);
CNodeState *state = State(nodeid);
if (state == nullptr)
return false;
stats.nMisbehavior = state->nMisbehavior;
stats.nSyncHeight = state->pindexBestKnownBlock ? state->pindexBestKnownBlock->nHeight : -1;
stats.nCommonHeight = state->pindexLastCommonBlock ? state->pindexLastCommonBlock->nHeight : -1;
for (const QueuedBlock& queue : state->vBlocksInFlight) {
if (queue.pindex)
stats.vHeightInFlight.push_back(queue.pindex->nHeight);
{
LOCK(cs_main);
CNodeState* state = State(nodeid);
if (state == nullptr)
return false;
stats.nSyncHeight = state->pindexBestKnownBlock ? state->pindexBestKnownBlock->nHeight : -1;
stats.nCommonHeight = state->pindexLastCommonBlock ? state->pindexLastCommonBlock->nHeight : -1;
for (const QueuedBlock& queue : state->vBlocksInFlight) {
if (queue.pindex)
stats.vHeightInFlight.push_back(queue.pindex->nHeight);
}
}

PeerRef peer = GetPeerRef(nodeid);
if (peer == nullptr) return false;
stats.m_misbehavior_score = WITH_LOCK(peer->m_misbehavior_mutex, return peer->m_misbehavior_score);

return true;
}

Expand Down Expand Up @@ -1058,21 +1112,21 @@ unsigned int LimitOrphanTxSize(unsigned int nMaxOrphans)
* Increment peer's misbehavior score. If the new value >= DISCOURAGEMENT_THRESHOLD, mark the node
* to be discouraged, meaning the peer might be disconnected and added to the discouragement filter.
*/
void Misbehaving(const NodeId pnode, const int howmuch, const std::string& message) EXCLUSIVE_LOCKS_REQUIRED(cs_main)
void Misbehaving(const NodeId pnode, const int howmuch, const std::string& message)
{
assert(howmuch > 0);

CNodeState* const state = State(pnode);
if (state == nullptr) return;
PeerRef peer = GetPeerRef(pnode);
if (peer == nullptr) return;

state->nMisbehavior += howmuch;
LOCK(peer->m_misbehavior_mutex);
peer->m_misbehavior_score += howmuch;
const std::string message_prefixed = message.empty() ? "" : (": " + message);
if (state->nMisbehavior >= DISCOURAGEMENT_THRESHOLD && state->nMisbehavior - howmuch < DISCOURAGEMENT_THRESHOLD)
{
LogPrint(BCLog::NET, "Misbehaving: peer=%d (%d -> %d) DISCOURAGE THRESHOLD EXCEEDED%s\n", pnode, state->nMisbehavior - howmuch, state->nMisbehavior, message_prefixed);
state->m_should_discourage = true;
if (peer->m_misbehavior_score >= DISCOURAGEMENT_THRESHOLD && peer->m_misbehavior_score - howmuch < DISCOURAGEMENT_THRESHOLD) {
LogPrint(BCLog::NET, "Misbehaving: peer=%d (%d -> %d) DISCOURAGE THRESHOLD EXCEEDED%s\n", pnode, peer->m_misbehavior_score - howmuch, peer->m_misbehavior_score, message_prefixed);
peer->m_should_discourage = true;
} else {
LogPrint(BCLog::NET, "Misbehaving: peer=%d (%d -> %d)%s\n", pnode, state->nMisbehavior - howmuch, state->nMisbehavior, message_prefixed);
LogPrint(BCLog::NET, "Misbehaving: peer=%d (%d -> %d)%s\n", pnode, peer->m_misbehavior_score - howmuch, peer->m_misbehavior_score, message_prefixed);
}
}

Expand All @@ -1094,7 +1148,6 @@ static bool MaybePunishNodeForBlock(NodeId nodeid, const BlockValidationState& s
case BlockValidationResult::BLOCK_CONSENSUS:
case BlockValidationResult::BLOCK_MUTATED:
if (!via_compact_block) {
LOCK(cs_main);
Misbehaving(nodeid, 100, message);
return true;
}
Expand All @@ -1118,18 +1171,12 @@ static bool MaybePunishNodeForBlock(NodeId nodeid, const BlockValidationState& s
case BlockValidationResult::BLOCK_INVALID_HEADER:
case BlockValidationResult::BLOCK_CHECKPOINT:
case BlockValidationResult::BLOCK_INVALID_PREV:
{
LOCK(cs_main);
Misbehaving(nodeid, 100, message);
}
Misbehaving(nodeid, 100, message);
return true;
// Conflicting (but not necessarily invalid) data or different policy:
case BlockValidationResult::BLOCK_MISSING_PREV:
{
// TODO: Handle this much more gracefully (10 DoS points is super arbitrary)
LOCK(cs_main);
Misbehaving(nodeid, 10, message);
}
// TODO: Handle this much more gracefully (10 DoS points is super arbitrary)
Misbehaving(nodeid, 10, message);
return true;
case BlockValidationResult::BLOCK_RECENT_CONSENSUS_CHANGE:
case BlockValidationResult::BLOCK_TIME_FUTURE:
Expand All @@ -1153,11 +1200,8 @@ static bool MaybePunishNodeForTx(NodeId nodeid, const TxValidationState& state,
break;
// The node is providing invalid data:
case TxValidationResult::TX_CONSENSUS:
{
LOCK(cs_main);
Misbehaving(nodeid, 100, message);
return true;
}
Misbehaving(nodeid, 100, message);
return true;
// Conflicting (but not necessarily invalid) data or different policy:
case TxValidationResult::TX_RECENT_CONSENSUS_CHANGE:
case TxValidationResult::TX_INPUTS_NOT_STANDARD:
Expand Down Expand Up @@ -1804,7 +1848,6 @@ inline void static SendBlockTransactions(const CBlock& block, const BlockTransac
BlockTransactions resp(req);
for (size_t i = 0; i < req.indexes.size(); i++) {
if (req.indexes[i] >= block.vtx.size()) {
LOCK(cs_main);
Misbehaving(pfrom.GetId(), 100, "getblocktxn with out-of-bounds tx indices");
return;
}
Expand Down Expand Up @@ -2325,7 +2368,6 @@ void ProcessMessage(
// Each connection can only send one version message
if (pfrom.nVersion != 0)
{
LOCK(cs_main);
Misbehaving(pfrom.GetId(), 1, "redundant version message");
return;
}
Expand Down Expand Up @@ -2485,7 +2527,6 @@ void ProcessMessage(

if (pfrom.nVersion == 0) {
// Must have a version message before anything else
LOCK(cs_main);
Misbehaving(pfrom.GetId(), 1, "non-version message before version handshake");
return;
}
Expand Down Expand Up @@ -2552,7 +2593,6 @@ void ProcessMessage(

if (!pfrom.fSuccessfullyConnected) {
// Must have a verack message before anything else
LOCK(cs_main);
Misbehaving(pfrom.GetId(), 1, "non-verack message before version handshake");
return;
}
Expand All @@ -2566,7 +2606,6 @@ void ProcessMessage(
}
if (vAddr.size() > MAX_ADDR_TO_SEND)
{
LOCK(cs_main);
Misbehaving(pfrom.GetId(), 20, strprintf("addr message size = %u", vAddr.size()));
return;
}
Expand Down Expand Up @@ -2645,7 +2684,6 @@ void ProcessMessage(
vRecv >> vInv;
if (vInv.size() > MAX_INV_SZ)
{
LOCK(cs_main);
Misbehaving(pfrom.GetId(), 20, strprintf("inv message size = %u", vInv.size()));
return;
}
Expand Down Expand Up @@ -2721,7 +2759,6 @@ void ProcessMessage(
vRecv >> vInv;
if (vInv.size() > MAX_INV_SZ)
{
LOCK(cs_main);
Misbehaving(pfrom.GetId(), 20, strprintf("getdata message size = %u", vInv.size()));
return;
}
Expand Down Expand Up @@ -3446,7 +3483,6 @@ void ProcessMessage(
// Bypass the normal CBlock deserialization, as we don't want to risk deserializing 2000 full blocks.
unsigned int nCount = ReadCompactSize(vRecv);
if (nCount > MAX_HEADERS_RESULTS) {
LOCK(cs_main);
Misbehaving(pfrom.GetId(), 20, strprintf("headers message size = %u", nCount));
return;
}
Expand Down Expand Up @@ -3648,7 +3684,6 @@ void ProcessMessage(
if (!filter.IsWithinSizeConstraints())
{
// There is no excuse for sending a too-large filter
LOCK(cs_main);
Misbehaving(pfrom.GetId(), 100, "too-large bloom filter");
}
else if (pfrom.m_tx_relay != nullptr)
Expand Down Expand Up @@ -3682,7 +3717,6 @@ void ProcessMessage(
}
}
if (bad) {
LOCK(cs_main);
Misbehaving(pfrom.GetId(), 100, "bad filteradd message");
}
return;
Expand Down Expand Up @@ -3768,15 +3802,17 @@ void ProcessMessage(
bool PeerLogicValidation::MaybeDiscourageAndDisconnect(CNode& pnode)
{
const NodeId peer_id{pnode.GetId()};
PeerRef peer = GetPeerRef(peer_id);
jnewbery marked this conversation as resolved.
Show resolved Hide resolved
if (peer == nullptr) return false;

{
LOCK(cs_main);
CNodeState& state = *State(peer_id);
LOCK(peer->m_misbehavior_mutex);

// There's nothing to do if the m_should_discourage flag isn't set
if (!state.m_should_discourage) return false;
if (!peer->m_should_discourage) return false;

state.m_should_discourage = false;
} // cs_main
peer->m_should_discourage = false;
} // peer.m_misbehavior_mutex

if (pnode.HasPermission(PF_NOBAN)) {
// We never disconnect or discourage peers for bad behavior if they have the NOBAN permission flag
Expand Down
2 changes: 1 addition & 1 deletion src/net_processing.h
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,7 @@ class PeerLogicValidation final : public CValidationInterface, public NetEventsI
};

struct CNodeStateStats {
int nMisbehavior = 0;
int m_misbehavior_score = 0;
int nSyncHeight = -1;
int nCommonHeight = -1;
std::vector<int> vHeightInFlight;
Expand Down
2 changes: 1 addition & 1 deletion src/rpc/net.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -193,7 +193,7 @@ static UniValue getpeerinfo(const JSONRPCRequest& request)
if (fStateStats) {
if (IsDeprecatedRPCEnabled("banscore")) {
// banscore is deprecated in v0.21 for removal in v0.22
obj.pushKV("banscore", statestats.nMisbehavior);
obj.pushKV("banscore", statestats.m_misbehavior_score);
}
obj.pushKV("synced_headers", statestats.nSyncHeight);
obj.pushKV("synced_blocks", statestats.nCommonHeight);
Expand Down
20 changes: 4 additions & 16 deletions src/test/denialofservice_tests.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -232,10 +232,7 @@ BOOST_AUTO_TEST_CASE(peer_discouragement)
peerLogic->InitializeNode(&dummyNode1);
dummyNode1.nVersion = 1;
dummyNode1.fSuccessfullyConnected = true;
{
LOCK(cs_main);
Misbehaving(dummyNode1.GetId(), DISCOURAGEMENT_THRESHOLD); // Should be discouraged
}
Misbehaving(dummyNode1.GetId(), DISCOURAGEMENT_THRESHOLD); // Should be discouraged
{
LOCK(dummyNode1.cs_sendProcessing);
BOOST_CHECK(peerLogic->SendMessages(&dummyNode1));
Expand All @@ -249,20 +246,14 @@ BOOST_AUTO_TEST_CASE(peer_discouragement)
peerLogic->InitializeNode(&dummyNode2);
dummyNode2.nVersion = 1;
dummyNode2.fSuccessfullyConnected = true;
{
LOCK(cs_main);
Misbehaving(dummyNode2.GetId(), DISCOURAGEMENT_THRESHOLD - 1);
}
Misbehaving(dummyNode2.GetId(), DISCOURAGEMENT_THRESHOLD - 1);
{
LOCK(dummyNode2.cs_sendProcessing);
BOOST_CHECK(peerLogic->SendMessages(&dummyNode2));
}
BOOST_CHECK(!banman->IsDiscouraged(addr2)); // 2 not discouraged yet...
BOOST_CHECK(banman->IsDiscouraged(addr1)); // ... but 1 still should be
{
LOCK(cs_main);
Misbehaving(dummyNode2.GetId(), 1); // 2 reaches discouragement threshold
}
Misbehaving(dummyNode2.GetId(), 1); // 2 reaches discouragement threshold
{
LOCK(dummyNode2.cs_sendProcessing);
BOOST_CHECK(peerLogic->SendMessages(&dummyNode2));
Expand Down Expand Up @@ -292,10 +283,7 @@ BOOST_AUTO_TEST_CASE(DoS_bantime)
dummyNode.nVersion = 1;
dummyNode.fSuccessfullyConnected = true;

{
LOCK(cs_main);
Misbehaving(dummyNode.GetId(), DISCOURAGEMENT_THRESHOLD);
}
Misbehaving(dummyNode.GetId(), DISCOURAGEMENT_THRESHOLD);
{
LOCK(dummyNode.cs_sendProcessing);
BOOST_CHECK(peerLogic->SendMessages(&dummyNode));
Expand Down