Skip to content

Commit b46e7c2

Browse files
committed
Merge pull request #6064
f46a680 Better mruset unit test (Pieter Wuille) d4d5022 Use ring buffer of set iterators instead of deque of copies in mruset (Pieter Wuille) d81cff3 Replace mruset setAddrKnown with CRollingBloomFilter addrKnown (Gavin Andresen) 69a5f8b Rolling bloom filter class (Gavin Andresen)
2 parents 8a10000 + f46a680 commit b46e7c2

File tree

8 files changed

+255
-118
lines changed

8 files changed

+255
-118
lines changed

src/bloom.cpp

Lines changed: 67 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -21,22 +21,33 @@
2121
using namespace std;
2222

2323
CBloomFilter::CBloomFilter(unsigned int nElements, double nFPRate, unsigned int nTweakIn, unsigned char nFlagsIn) :
24-
/**
25-
* The ideal size for a bloom filter with a given number of elements and false positive rate is:
26-
* - nElements * log(fp rate) / ln(2)^2
27-
* We ignore filter parameters which will create a bloom filter larger than the protocol limits
28-
*/
29-
vData(min((unsigned int)(-1 / LN2SQUARED * nElements * log(nFPRate)), MAX_BLOOM_FILTER_SIZE * 8) / 8),
30-
/**
31-
* The ideal number of hash functions is filter size * ln(2) / number of elements
32-
* Again, we ignore filter parameters which will create a bloom filter with more hash functions than the protocol limits
33-
* See https://en.wikipedia.org/wiki/Bloom_filter for an explanation of these formulas
34-
*/
35-
isFull(false),
36-
isEmpty(false),
37-
nHashFuncs(min((unsigned int)(vData.size() * 8 / nElements * LN2), MAX_HASH_FUNCS)),
38-
nTweak(nTweakIn),
39-
nFlags(nFlagsIn)
24+
/**
25+
* The ideal size for a bloom filter with a given number of elements and false positive rate is:
26+
* - nElements * log(fp rate) / ln(2)^2
27+
* We ignore filter parameters which will create a bloom filter larger than the protocol limits
28+
*/
29+
vData(min((unsigned int)(-1 / LN2SQUARED * nElements * log(nFPRate)), MAX_BLOOM_FILTER_SIZE * 8) / 8),
30+
/**
31+
* The ideal number of hash functions is filter size * ln(2) / number of elements
32+
* Again, we ignore filter parameters which will create a bloom filter with more hash functions than the protocol limits
33+
* See https://en.wikipedia.org/wiki/Bloom_filter for an explanation of these formulas
34+
*/
35+
isFull(false),
36+
isEmpty(false),
37+
nHashFuncs(min((unsigned int)(vData.size() * 8 / nElements * LN2), MAX_HASH_FUNCS)),
38+
nTweak(nTweakIn),
39+
nFlags(nFlagsIn)
40+
{
41+
}
42+
43+
// Private constructor used by CRollingBloomFilter
44+
CBloomFilter::CBloomFilter(unsigned int nElements, double nFPRate, unsigned int nTweakIn) :
45+
vData((unsigned int)(-1 / LN2SQUARED * nElements * log(nFPRate)) / 8),
46+
isFull(false),
47+
isEmpty(true),
48+
nHashFuncs((unsigned int)(vData.size() * 8 / nElements * LN2)),
49+
nTweak(nTweakIn),
50+
nFlags(BLOOM_UPDATE_NONE)
4051
{
4152
}
4253

@@ -197,3 +208,43 @@ void CBloomFilter::UpdateEmptyFull()
197208
isFull = full;
198209
isEmpty = empty;
199210
}
211+
212+
CRollingBloomFilter::CRollingBloomFilter(unsigned int nElements, double fpRate, unsigned int nTweak) :
213+
b1(nElements * 2, fpRate, nTweak), b2(nElements * 2, fpRate, nTweak)
214+
{
215+
// Implemented using two bloom filters of 2 * nElements each.
216+
// We fill them up, and clear them, staggered, every nElements
217+
// inserted, so at least one always contains the last nElements
218+
// inserted.
219+
nBloomSize = nElements * 2;
220+
nInsertions = 0;
221+
}
222+
223+
void CRollingBloomFilter::insert(const std::vector<unsigned char>& vKey)
224+
{
225+
if (nInsertions == 0) {
226+
b1.clear();
227+
} else if (nInsertions == nBloomSize / 2) {
228+
b2.clear();
229+
}
230+
b1.insert(vKey);
231+
b2.insert(vKey);
232+
if (++nInsertions == nBloomSize) {
233+
nInsertions = 0;
234+
}
235+
}
236+
237+
bool CRollingBloomFilter::contains(const std::vector<unsigned char>& vKey) const
238+
{
239+
if (nInsertions < nBloomSize / 2) {
240+
return b2.contains(vKey);
241+
}
242+
return b1.contains(vKey);
243+
}
244+
245+
void CRollingBloomFilter::clear()
246+
{
247+
b1.clear();
248+
b2.clear();
249+
nInsertions = 0;
250+
}

src/bloom.h

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,10 @@ class CBloomFilter
5353

5454
unsigned int Hash(unsigned int nHashNum, const std::vector<unsigned char>& vDataToHash) const;
5555

56+
// Private constructor for CRollingBloomFilter, no restrictions on size
57+
CBloomFilter(unsigned int nElements, double nFPRate, unsigned int nTweak);
58+
friend class CRollingBloomFilter;
59+
5660
public:
5761
/**
5862
* Creates a new bloom filter which will provide the given fp rate when filled with the given number of elements
@@ -97,4 +101,28 @@ class CBloomFilter
97101
void UpdateEmptyFull();
98102
};
99103

104+
/**
105+
* RollingBloomFilter is a probabilistic "keep track of most recently inserted" set.
106+
* Construct it with the number of items to keep track of, and a false-positive rate.
107+
*
108+
* contains(item) will always return true if item was one of the last N things
109+
* insert()'ed ... but may also return true for items that were not inserted.
110+
*/
111+
class CRollingBloomFilter
112+
{
113+
public:
114+
CRollingBloomFilter(unsigned int nElements, double nFPRate, unsigned int nTweak);
115+
116+
void insert(const std::vector<unsigned char>& vKey);
117+
bool contains(const std::vector<unsigned char>& vKey) const;
118+
119+
void clear();
120+
121+
private:
122+
unsigned int nBloomSize;
123+
unsigned int nInsertions;
124+
CBloomFilter b1, b2;
125+
};
126+
127+
100128
#endif // BITCOIN_BLOOM_H

src/main.cpp

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -3995,7 +3995,7 @@ bool static ProcessMessage(CNode* pfrom, string strCommand, CDataStream& vRecv,
39953995
{
39963996
LOCK(cs_vNodes);
39973997
// Use deterministic randomness to send to the same nodes for 24 hours
3998-
// at a time so the setAddrKnowns of the chosen nodes prevent repeats
3998+
// at a time so the addrKnowns of the chosen nodes prevent repeats
39993999
static uint256 hashSalt;
40004000
if (hashSalt.IsNull())
40014001
hashSalt = GetRandHash();
@@ -4779,9 +4779,9 @@ bool SendMessages(CNode* pto, bool fSendTrickle)
47794779
LOCK(cs_vNodes);
47804780
BOOST_FOREACH(CNode* pnode, vNodes)
47814781
{
4782-
// Periodically clear setAddrKnown to allow refresh broadcasts
4782+
// Periodically clear addrKnown to allow refresh broadcasts
47834783
if (nLastRebroadcast)
4784-
pnode->setAddrKnown.clear();
4784+
pnode->addrKnown.clear();
47854785

47864786
// Rebroadcast our address
47874787
AdvertizeLocal(pnode);
@@ -4799,9 +4799,9 @@ bool SendMessages(CNode* pto, bool fSendTrickle)
47994799
vAddr.reserve(pto->vAddrToSend.size());
48004800
BOOST_FOREACH(const CAddress& addr, pto->vAddrToSend)
48014801
{
4802-
// returns true if wasn't already contained in the set
4803-
if (pto->setAddrKnown.insert(addr).second)
4802+
if (!pto->addrKnown.contains(addr.GetKey()))
48044803
{
4804+
pto->addrKnown.insert(addr.GetKey());
48054805
vAddr.push_back(addr);
48064806
// receiver rejects addr messages larger than 1000
48074807
if (vAddr.size() >= 1000)

src/mruset.h

Lines changed: 16 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,12 @@
1-
// Copyright (c) 2012 The Bitcoin Core developers
1+
// Copyright (c) 2012-2015 The Bitcoin Core developers
22
// Distributed under the MIT software license, see the accompanying
33
// file COPYING or http://www.opensource.org/licenses/mit-license.php.
44

55
#ifndef BITCOIN_MRUSET_H
66
#define BITCOIN_MRUSET_H
77

8-
#include <deque>
98
#include <set>
9+
#include <vector>
1010
#include <utility>
1111

1212
/** STL-like set container that only keeps the most recent N elements. */
@@ -22,11 +22,13 @@ class mruset
2222

2323
protected:
2424
std::set<T> set;
25-
std::deque<T> queue;
26-
size_type nMaxSize;
25+
std::vector<iterator> order;
26+
size_type first_used;
27+
size_type first_unused;
28+
const size_type nMaxSize;
2729

2830
public:
29-
mruset(size_type nMaxSizeIn = 0) { nMaxSize = nMaxSizeIn; }
31+
mruset(size_type nMaxSizeIn = 1) : nMaxSize(nMaxSizeIn) { clear(); }
3032
iterator begin() const { return set.begin(); }
3133
iterator end() const { return set.end(); }
3234
size_type size() const { return set.size(); }
@@ -36,7 +38,9 @@ class mruset
3638
void clear()
3739
{
3840
set.clear();
39-
queue.clear();
41+
order.assign(nMaxSize, set.end());
42+
first_used = 0;
43+
first_unused = 0;
4044
}
4145
bool inline friend operator==(const mruset<T>& a, const mruset<T>& b) { return a.set == b.set; }
4246
bool inline friend operator==(const mruset<T>& a, const std::set<T>& b) { return a.set == b; }
@@ -45,25 +49,17 @@ class mruset
4549
{
4650
std::pair<iterator, bool> ret = set.insert(x);
4751
if (ret.second) {
48-
if (nMaxSize && queue.size() == nMaxSize) {
49-
set.erase(queue.front());
50-
queue.pop_front();
52+
if (set.size() == nMaxSize + 1) {
53+
set.erase(order[first_used]);
54+
order[first_used] = set.end();
55+
if (++first_used == nMaxSize) first_used = 0;
5156
}
52-
queue.push_back(x);
57+
order[first_unused] = ret.first;
58+
if (++first_unused == nMaxSize) first_unused = 0;
5359
}
5460
return ret;
5561
}
5662
size_type max_size() const { return nMaxSize; }
57-
size_type max_size(size_type s)
58-
{
59-
if (s)
60-
while (queue.size() > s) {
61-
set.erase(queue.front());
62-
queue.pop_front();
63-
}
64-
nMaxSize = s;
65-
return nMaxSize;
66-
}
6763
};
6864

6965
#endif // BITCOIN_MRUSET_H

src/net.cpp

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1905,7 +1905,10 @@ bool CAddrDB::Read(CAddrMan& addr)
19051905
unsigned int ReceiveFloodSize() { return 1000*GetArg("-maxreceivebuffer", 5*1000); }
19061906
unsigned int SendBufferSize() { return 1000*GetArg("-maxsendbuffer", 1*1000); }
19071907

1908-
CNode::CNode(SOCKET hSocketIn, CAddress addrIn, std::string addrNameIn, bool fInboundIn) : ssSend(SER_NETWORK, INIT_PROTO_VERSION), setAddrKnown(5000)
1908+
CNode::CNode(SOCKET hSocketIn, CAddress addrIn, std::string addrNameIn, bool fInboundIn) :
1909+
ssSend(SER_NETWORK, INIT_PROTO_VERSION),
1910+
addrKnown(5000, 0.001, insecure_rand()),
1911+
setInventoryKnown(SendBufferSize() / 1000)
19091912
{
19101913
nServices = 0;
19111914
hSocket = hSocketIn;
@@ -1934,7 +1937,6 @@ CNode::CNode(SOCKET hSocketIn, CAddress addrIn, std::string addrNameIn, bool fIn
19341937
nStartingHeight = -1;
19351938
fGetAddr = false;
19361939
fRelayTxes = false;
1937-
setInventoryKnown.max_size(SendBufferSize() / 1000);
19381940
pfilter = new CBloomFilter();
19391941
nPingNonceSent = 0;
19401942
nPingUsecStart = 0;

src/net.h

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -300,7 +300,7 @@ class CNode
300300

301301
// flood relay
302302
std::vector<CAddress> vAddrToSend;
303-
mruset<CAddress> setAddrKnown;
303+
CRollingBloomFilter addrKnown;
304304
bool fGetAddr;
305305
std::set<uint256> setKnown;
306306

@@ -380,15 +380,15 @@ class CNode
380380

381381
void AddAddressKnown(const CAddress& addr)
382382
{
383-
setAddrKnown.insert(addr);
383+
addrKnown.insert(addr.GetKey());
384384
}
385385

386386
void PushAddress(const CAddress& addr)
387387
{
388388
// Known checking here is only to save space from duplicates.
389389
// SendMessages will filter it again for knowns that were added
390390
// after addresses were pushed.
391-
if (addr.IsValid() && !setAddrKnown.count(addr)) {
391+
if (addr.IsValid() && !addrKnown.contains(addr.GetKey())) {
392392
if (vAddrToSend.size() >= MAX_ADDR_TO_SEND) {
393393
vAddrToSend[insecure_rand() % vAddrToSend.size()] = addr;
394394
} else {

src/test/bloom_tests.cpp

Lines changed: 78 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
#include "clientversion.h"
99
#include "key.h"
1010
#include "merkleblock.h"
11+
#include "random.h"
1112
#include "serialize.h"
1213
#include "streams.h"
1314
#include "uint256.h"
@@ -459,4 +460,81 @@ BOOST_AUTO_TEST_CASE(merkle_block_4_test_update_none)
459460
BOOST_CHECK(!filter.contains(COutPoint(uint256S("0x02981fa052f0481dbc5868f4fc2166035a10f27a03cfd2de67326471df5bc041"), 0)));
460461
}
461462

463+
static std::vector<unsigned char> RandomData()
464+
{
465+
uint256 r = GetRandHash();
466+
return std::vector<unsigned char>(r.begin(), r.end());
467+
}
468+
469+
BOOST_AUTO_TEST_CASE(rolling_bloom)
470+
{
471+
// last-100-entry, 1% false positive:
472+
CRollingBloomFilter rb1(100, 0.01, 0);
473+
474+
// Overfill:
475+
static const int DATASIZE=399;
476+
std::vector<unsigned char> data[DATASIZE];
477+
for (int i = 0; i < DATASIZE; i++) {
478+
data[i] = RandomData();
479+
rb1.insert(data[i]);
480+
}
481+
// Last 100 guaranteed to be remembered:
482+
for (int i = 299; i < DATASIZE; i++) {
483+
BOOST_CHECK(rb1.contains(data[i]));
484+
}
485+
486+
// false positive rate is 1%, so we should get about 100 hits if
487+
// testing 10,000 random keys. We get worst-case false positive
488+
// behavior when the filter is as full as possible, which is
489+
// when we've inserted one minus an integer multiple of nElement*2.
490+
unsigned int nHits = 0;
491+
for (int i = 0; i < 10000; i++) {
492+
if (rb1.contains(RandomData()))
493+
++nHits;
494+
}
495+
// Run test_bitcoin with --log_level=message to see BOOST_TEST_MESSAGEs:
496+
BOOST_TEST_MESSAGE("RollingBloomFilter got " << nHits << " false positives (~100 expected)");
497+
498+
// Insanely unlikely to get a fp count outside this range:
499+
BOOST_CHECK(nHits > 25);
500+
BOOST_CHECK(nHits < 175);
501+
502+
BOOST_CHECK(rb1.contains(data[DATASIZE-1]));
503+
rb1.clear();
504+
BOOST_CHECK(!rb1.contains(data[DATASIZE-1]));
505+
506+
// Now roll through data, make sure last 100 entries
507+
// are always remembered:
508+
for (int i = 0; i < DATASIZE; i++) {
509+
if (i >= 100)
510+
BOOST_CHECK(rb1.contains(data[i-100]));
511+
rb1.insert(data[i]);
512+
}
513+
514+
// Insert 999 more random entries:
515+
for (int i = 0; i < 999; i++) {
516+
rb1.insert(RandomData());
517+
}
518+
// Sanity check to make sure the filter isn't just filling up:
519+
nHits = 0;
520+
for (int i = 0; i < DATASIZE; i++) {
521+
if (rb1.contains(data[i]))
522+
++nHits;
523+
}
524+
// Expect about 5 false positives, more than 100 means
525+
// something is definitely broken.
526+
BOOST_TEST_MESSAGE("RollingBloomFilter got " << nHits << " false positives (~5 expected)");
527+
BOOST_CHECK(nHits < 100);
528+
529+
// last-1000-entry, 0.01% false positive:
530+
CRollingBloomFilter rb2(1000, 0.001, 0);
531+
for (int i = 0; i < DATASIZE; i++) {
532+
rb2.insert(data[i]);
533+
}
534+
// ... room for all of them:
535+
for (int i = 0; i < DATASIZE; i++) {
536+
BOOST_CHECK(rb2.contains(data[i]));
537+
}
538+
}
539+
462540
BOOST_AUTO_TEST_SUITE_END()

0 commit comments

Comments
 (0)