Skip to content

Commit

Permalink
Integrate ASN bucketing in Addrman and add tests
Browse files Browse the repository at this point in the history
Instead of using /16 netgroups to bucket nodes in Addrman for connection
diversification, ASN, which better represents an actor in terms
of network-layer infrastructure, is used.
For testing, asmap.raw is used. It represents a minimal
asmap needed for testing purposes.

Github-Pull: bitcoin#16702
Rebased-From: ec45646
  • Loading branch information
naumenkogs authored and luke-jr committed Feb 15, 2020
1 parent b42b932 commit 050811d
Show file tree
Hide file tree
Showing 11 changed files with 473 additions and 78 deletions.
12 changes: 11 additions & 1 deletion src/Makefile.test.include
Expand Up @@ -46,7 +46,8 @@ JSON_TEST_FILES = \
test/data/tx_invalid.json \
test/data/tx_valid.json

RAW_TEST_FILES =
RAW_TEST_FILES = \
test/data/asmap.raw

GENERATED_TEST_FILES = $(JSON_TEST_FILES:.json=.json.h) $(RAW_TEST_FILES:.raw=.raw.h)

Expand Down Expand Up @@ -379,3 +380,12 @@ endif
echo "};};"; \
} > "$@.new" && mv -f "$@.new" "$@"
@echo "Generated $@"

%.raw.h: %.raw
@$(MKDIR_P) $(@D)
@{ \
echo "static unsigned const char $(*F)_raw[] = {" && \
$(HEXDUMP) -v -e '8/1 "0x%02x, "' -e '"\n"' $< | $(SED) -e 's/0x ,//g' && \
echo "};"; \
} > "$@.new" && mv -f "$@.new" "$@"
@echo "Generated $@"
47 changes: 35 additions & 12 deletions src/addrman.cpp
Expand Up @@ -8,17 +8,17 @@
#include <hash.h>
#include <serialize.h>

int CAddrInfo::GetTriedBucket(const uint256& nKey) const
int CAddrInfo::GetTriedBucket(const uint256& nKey, const std::vector<bool> &asmap) const
{
uint64_t hash1 = (CHashWriter(SER_GETHASH, 0) << nKey << GetKey()).GetCheapHash();
uint64_t hash2 = (CHashWriter(SER_GETHASH, 0) << nKey << GetGroup() << (hash1 % ADDRMAN_TRIED_BUCKETS_PER_GROUP)).GetCheapHash();
uint64_t hash2 = (CHashWriter(SER_GETHASH, 0) << nKey << GetGroup(asmap) << (hash1 % ADDRMAN_TRIED_BUCKETS_PER_GROUP)).GetCheapHash();
return hash2 % ADDRMAN_TRIED_BUCKET_COUNT;
}

int CAddrInfo::GetNewBucket(const uint256& nKey, const CNetAddr& src) const
int CAddrInfo::GetNewBucket(const uint256& nKey, const CNetAddr& src, const std::vector<bool> &asmap) const
{
std::vector<unsigned char> vchSourceGroupKey = src.GetGroup();
uint64_t hash1 = (CHashWriter(SER_GETHASH, 0) << nKey << GetGroup() << vchSourceGroupKey).GetCheapHash();
std::vector<unsigned char> vchSourceGroupKey = src.GetGroup(asmap);
uint64_t hash1 = (CHashWriter(SER_GETHASH, 0) << nKey << GetGroup(asmap) << vchSourceGroupKey).GetCheapHash();
uint64_t hash2 = (CHashWriter(SER_GETHASH, 0) << nKey << vchSourceGroupKey << (hash1 % ADDRMAN_NEW_BUCKETS_PER_SOURCE_GROUP)).GetCheapHash();
return hash2 % ADDRMAN_NEW_BUCKET_COUNT;
}
Expand Down Expand Up @@ -153,7 +153,7 @@ void CAddrMan::MakeTried(CAddrInfo& info, int nId)
assert(info.nRefCount == 0);

// which tried bucket to move the entry to
int nKBucket = info.GetTriedBucket(nKey);
int nKBucket = info.GetTriedBucket(nKey, m_asmap);
int nKBucketPos = info.GetBucketPosition(nKey, false, nKBucket);

// first make space to add it (the existing tried entry there is moved to new, deleting whatever is there).
Expand All @@ -169,7 +169,7 @@ void CAddrMan::MakeTried(CAddrInfo& info, int nId)
nTried--;

// find which new bucket it belongs to
int nUBucket = infoOld.GetNewBucket(nKey);
int nUBucket = infoOld.GetNewBucket(nKey, m_asmap);
int nUBucketPos = infoOld.GetBucketPosition(nKey, true, nUBucket);
ClearNew(nUBucket, nUBucketPos);
assert(vvNew[nUBucket][nUBucketPos] == -1);
Expand Down Expand Up @@ -233,7 +233,7 @@ void CAddrMan::Good_(const CService& addr, bool test_before_evict, int64_t nTime
return;

// which tried bucket to move the entry to
int tried_bucket = info.GetTriedBucket(nKey);
int tried_bucket = info.GetTriedBucket(nKey, m_asmap);
int tried_bucket_pos = info.GetBucketPosition(nKey, false, tried_bucket);

// Will moving this address into tried evict another entry?
Expand Down Expand Up @@ -301,7 +301,7 @@ bool CAddrMan::Add_(const CAddress& addr, const CNetAddr& source, int64_t nTimeP
fNew = true;
}

int nUBucket = pinfo->GetNewBucket(nKey, source);
int nUBucket = pinfo->GetNewBucket(nKey, source, m_asmap);
int nUBucketPos = pinfo->GetBucketPosition(nKey, true, nUBucket);
if (vvNew[nUBucket][nUBucketPos] != nId) {
bool fInsert = vvNew[nUBucket][nUBucketPos] == -1;
Expand Down Expand Up @@ -439,7 +439,7 @@ int CAddrMan::Check_()
if (vvTried[n][i] != -1) {
if (!setTried.count(vvTried[n][i]))
return -11;
if (mapInfo[vvTried[n][i]].GetTriedBucket(nKey) != n)
if (mapInfo[vvTried[n][i]].GetTriedBucket(nKey, m_asmap) != n)
return -17;
if (mapInfo[vvTried[n][i]].GetBucketPosition(nKey, false, n) != i)
return -18;
Expand Down Expand Up @@ -545,7 +545,7 @@ void CAddrMan::ResolveCollisions_()
CAddrInfo& info_new = mapInfo[id_new];

// Which tried bucket to move the entry to.
int tried_bucket = info_new.GetTriedBucket(nKey);
int tried_bucket = info_new.GetTriedBucket(nKey, m_asmap);
int tried_bucket_pos = info_new.GetBucketPosition(nKey, false, tried_bucket);
if (!info_new.IsValid()) { // id_new may no longer map to a valid address
erase_collision = true;
Expand Down Expand Up @@ -609,10 +609,33 @@ CAddrInfo CAddrMan::SelectTriedCollision_()
CAddrInfo& newInfo = mapInfo[id_new];

// which tried bucket to move the entry to
int tried_bucket = newInfo.GetTriedBucket(nKey);
int tried_bucket = newInfo.GetTriedBucket(nKey, m_asmap);
int tried_bucket_pos = newInfo.GetBucketPosition(nKey, false, tried_bucket);

int id_old = vvTried[tried_bucket][tried_bucket_pos];

return mapInfo[id_old];
}

std::vector<bool> CAddrMan::DecodeAsmap(fs::path path)
{
std::vector<bool> bits;
FILE *filestr = fsbridge::fopen(path, "rb");
CAutoFile file(filestr, SER_DISK, CLIENT_VERSION);
if (file.IsNull()) {
LogPrintf("Failed to open asmap file from disk.\n");
return bits;
}
fseek(filestr, 0, SEEK_END);
int length = ftell(filestr);
LogPrintf("Opened asmap file %s (%d bytes) from disk.\n", path, length);
fseek(filestr, 0, SEEK_SET);
char cur_byte;
for (int i = 0; i < length; ++i) {
file >> cur_byte;
for (int bit = 0; bit < 8; ++bit) {
bits.push_back((cur_byte >> bit) & 1);
}
}
return bits;
}
101 changes: 76 additions & 25 deletions src/addrman.h
Expand Up @@ -12,11 +12,17 @@
#include <sync.h>
#include <timedata.h>
#include <util/system.h>
#include <clientversion.h>

#include <map>
#include <set>
#include <stdint.h>
#include <vector>
#include <iostream>
#include <streams.h>
#include <fs.h>
#include <hash.h>


/**
* Extended statistics about a CAddress
Expand Down Expand Up @@ -72,15 +78,15 @@ class CAddrInfo : public CAddress
}

//! Calculate in which "tried" bucket this entry belongs
int GetTriedBucket(const uint256 &nKey) const;
int GetTriedBucket(const uint256 &nKey, const std::vector<bool> &asmap) const;

//! Calculate in which "new" bucket this entry belongs, given a certain source
int GetNewBucket(const uint256 &nKey, const CNetAddr& src) const;
int GetNewBucket(const uint256 &nKey, const CNetAddr& src, const std::vector<bool> &asmap) const;

//! Calculate in which "new" bucket this entry belongs, using its default source
int GetNewBucket(const uint256 &nKey) const
int GetNewBucket(const uint256 &nKey, const std::vector<bool> &asmap) const
{
return GetNewBucket(nKey, source);
return GetNewBucket(nKey, source, asmap);
}

//! Calculate in which position of a bucket to store this entry.
Expand Down Expand Up @@ -174,6 +180,7 @@ static const int64_t ADDRMAN_TEST_WINDOW = 40*60; // 40 minutes
*/
class CAddrMan
{
friend class CAddrManTest;
protected:
//! critical section to protect the inner data structures
mutable CCriticalSection cs;
Expand Down Expand Up @@ -268,9 +275,29 @@ class CAddrMan
void SetServices_(const CService &addr, ServiceFlags nServices) EXCLUSIVE_LOCKS_REQUIRED(cs);

public:
// Compressed IP->ASN mapping, loaded from a file when a node starts.
// Should be always empty if no file was provided.
// This mapping is then used for bucketing nodes in Addrman.
//
// If asmap is provided, nodes will be bucketed by
// AS they belong to, in order to make impossible for a node
// to connect to several nodes hosted in a single AS.
// This is done in response to Erebus attack, but also to generally
// diversify the connections every node creates,
// especially useful when a large fraction of nodes
// operate under a couple of cloud providers.
//
// If a new asmap was provided, the existing records
// would be re-bucketed accordingly.
std::vector<bool> m_asmap;

// Read asmap from provided binary file
static std::vector<bool> DecodeAsmap(fs::path path);


/**
* serialized format:
* * version byte (currently 1)
* * version byte (1 for pre-asmap files, 2 for files including asmap version)
* * 0x20 + nKey (serialized as if it were a vector, for backward compatibility)
* * nNew
* * nTried
Expand Down Expand Up @@ -302,7 +329,7 @@ class CAddrMan
{
LOCK(cs);

unsigned char nVersion = 1;
unsigned char nVersion = 2;
s << nVersion;
s << ((unsigned char)32);
s << nKey;
Expand Down Expand Up @@ -345,6 +372,13 @@ class CAddrMan
}
}
}
// Store asmap version after bucket entries so that it
// can be ignored by older clients for backward compatibility.
uint256 asmap_version;
if (m_asmap.size() != 0) {
asmap_version = SerializeHash(m_asmap);
}
s << asmap_version;
}

template<typename Stream>
Expand All @@ -353,7 +387,6 @@ class CAddrMan
LOCK(cs);

Clear();

unsigned char nVersion;
s >> nVersion;
unsigned char nKeySize;
Expand Down Expand Up @@ -383,16 +416,6 @@ class CAddrMan
mapAddr[info] = n;
info.nRandomPos = vRandom.size();
vRandom.push_back(n);
if (nVersion != 1 || nUBuckets != ADDRMAN_NEW_BUCKET_COUNT) {
// In case the new table data cannot be used (nVersion unknown, or bucket count wrong),
// immediately try to give them a reference based on their primary source address.
int nUBucket = info.GetNewBucket(nKey);
int nUBucketPos = info.GetBucketPosition(nKey, true, nUBucket);
if (vvNew[nUBucket][nUBucketPos] == -1) {
vvNew[nUBucket][nUBucketPos] = n;
info.nRefCount++;
}
}
}
nIdCount = nNew;

Expand All @@ -401,7 +424,7 @@ class CAddrMan
for (int n = 0; n < nTried; n++) {
CAddrInfo info;
s >> info;
int nKBucket = info.GetTriedBucket(nKey);
int nKBucket = info.GetTriedBucket(nKey, m_asmap);
int nKBucketPos = info.GetBucketPosition(nKey, false, nKBucket);
if (vvTried[nKBucket][nKBucketPos] == -1) {
info.nRandomPos = vRandom.size();
Expand All @@ -417,20 +440,48 @@ class CAddrMan
}
nTried -= nLost;

// Deserialize positions in the new table (if possible).
// Store positions in the new table buckets to apply later (if possible).
std::map<int, int> entryToBucket; // Represents which entry belonged to which bucket when serializing

for (int bucket = 0; bucket < nUBuckets; bucket++) {
int nSize = 0;
s >> nSize;
for (int n = 0; n < nSize; n++) {
int nIndex = 0;
s >> nIndex;
if (nIndex >= 0 && nIndex < nNew) {
CAddrInfo &info = mapInfo[nIndex];
int nUBucketPos = info.GetBucketPosition(nKey, true, bucket);
if (nVersion == 1 && nUBuckets == ADDRMAN_NEW_BUCKET_COUNT && vvNew[bucket][nUBucketPos] == -1 && info.nRefCount < ADDRMAN_NEW_BUCKETS_PER_ADDRESS) {
info.nRefCount++;
vvNew[bucket][nUBucketPos] = nIndex;
}
entryToBucket[nIndex] = bucket;
}
}
}

uint256 supplied_asmap_version;
if (m_asmap.size() != 0) {
supplied_asmap_version = SerializeHash(m_asmap);
}
uint256 serialized_asmap_version;
if (nVersion > 1) {
s >> serialized_asmap_version;
}

for (int n = 0; n < nNew; n++) {
CAddrInfo &info = mapInfo[n];
int bucket = entryToBucket[n];
int nUBucketPos = info.GetBucketPosition(nKey, true, bucket);
if (nVersion == 2 && nUBuckets == ADDRMAN_NEW_BUCKET_COUNT && vvNew[bucket][nUBucketPos] == -1 &&
info.nRefCount < ADDRMAN_NEW_BUCKETS_PER_ADDRESS && serialized_asmap_version == supplied_asmap_version) {
// Bucketing has not changed, using existing bucket positions for the new table
vvNew[bucket][nUBucketPos] = n;
info.nRefCount++;
} else {
// In case the new table data cannot be used (nVersion unknown, bucket count wrong or new asmap),
// try to give them a reference based on their primary source address.
LogPrint(BCLog::ADDRMAN, "Bucketing method was updated, re-bucketing addrman entries from disk\n");
bucket = info.GetNewBucket(nKey, m_asmap);
nUBucketPos = info.GetBucketPosition(nKey, true, bucket);
if (vvNew[bucket][nUBucketPos] == -1) {
vvNew[bucket][nUBucketPos] = n;
info.nRefCount++;
}
}
}
Expand Down
26 changes: 26 additions & 0 deletions src/init.cpp
Expand Up @@ -51,7 +51,11 @@
#include <util/threadnames.h>
#include <util/translation.h>
#include <util/validation.h>
#include <util/asmap.h>
#include <validation.h>
#include <hash.h>


#include <validationinterface.h>
#include <walletinitinterface.h>

Expand Down Expand Up @@ -99,6 +103,8 @@ std::unique_ptr<BanMan> g_banman;

static const char* FEE_ESTIMATES_FILENAME="fee_estimates.dat";

static const char* DEFAULT_ASMAP_FILENAME="ip_asn.map";

/**
* The PID file facilities.
*/
Expand Down Expand Up @@ -429,6 +435,7 @@ void SetupServerArgs()
gArgs.AddArg("-peertimeout=<n>", strprintf("Specify p2p connection timeout in seconds. This option determines the amount of time a peer may be inactive before the connection to it is dropped. (minimum: 1, default: %d)", DEFAULT_PEER_CONNECT_TIMEOUT), ArgsManager::ALLOW_ANY | ArgsManager::DEBUG_ONLY, OptionsCategory::CONNECTION);
gArgs.AddArg("-torcontrol=<ip>:<port>", strprintf("Tor control port to use if onion listening enabled (default: %s)", DEFAULT_TOR_CONTROL), ArgsManager::ALLOW_ANY, OptionsCategory::CONNECTION);
gArgs.AddArg("-torpassword=<pass>", "Tor control port password (default: empty)", ArgsManager::ALLOW_ANY, OptionsCategory::CONNECTION);
gArgs.AddArg("-asmap=<file>", "Specify asn mapping used for bucketing of the peers. Path should be relative to the -datadir path.", ArgsManager::ALLOW_ANY, OptionsCategory::CONNECTION);
#ifdef USE_UPNP
#if USE_UPNP
gArgs.AddArg("-upnp", "Use UPnP to map the listening port (default: 1 when listening and no -proxy)", ArgsManager::ALLOW_ANY, OptionsCategory::CONNECTION);
Expand Down Expand Up @@ -1811,6 +1818,25 @@ bool AppInitMain(InitInterfaces& interfaces)
return false;
}

// Read asmap file if configured
if (gArgs.IsArgSet("-asmap")) {
std::string asmap_file = gArgs.GetArg("-asmap", "");
if (asmap_file.empty()) {
asmap_file = DEFAULT_ASMAP_FILENAME;
}
const fs::path asmap_path = GetDataDir() / asmap_file;
std::vector<bool> asmap = CAddrMan::DecodeAsmap(asmap_path);
if (asmap.size() == 0) {
InitError(strprintf(_("Could not find or parse specified asmap: '%s'").translated, asmap_path));
return false;
}
g_connman->SetAsmap(asmap);
const uint256 asmap_version = SerializeHash(asmap);
LogPrintf("Using asmap version %s for IP bucketing.\n", asmap_version.ToString());
} else {
LogPrintf("Using /16 prefix for IP bucketing.\n");
}

// ********************************************************* Step 13: finished

SetRPCWarmupFinished();
Expand Down

0 comments on commit 050811d

Please sign in to comment.