Skip to content

Commit

Permalink
make BF and SBF variables easier to understand
Browse files Browse the repository at this point in the history
  • Loading branch information
jaybaird committed Mar 24, 2009
1 parent a4239da commit 5e62565
Showing 1 changed file with 23 additions and 22 deletions.
45 changes: 23 additions & 22 deletions pybloom/pybloom.py
Expand Up @@ -5,28 +5,29 @@
Requires the bitarray library: http://pypi.python.org/pypi/bitarray/ Requires the bitarray library: http://pypi.python.org/pypi/bitarray/
>>> from pybloom import BloomFilter >>> from pybloom import BloomFilter
>>> filter = BloomFilter(bits=8192, probability=0.001) >>> f = BloomFilter(bits=8192, probability=0.001)
>>> [filter.add(x) for x in range(10)] >>> for i in xrange(0, f.capacity):
[False, False, False, False, False, False, False, False, False, False] ... _ = f.add(i)
>>> all([(x in filter) for x in range(10)]) ...
>>> 500 in f
True True
>>> 10 in filter >>> f.capacity in f
False False
>>> 5 in filter >>> abs((len(sbf) / 100000.0) - 1.0) <= f.probability
True True
>>> from pybloom import ScalableBloomFilter >>> from pybloom import ScalableBloomFilter
>>> sbf = ScalableBloomFilter(mode=ScalableBloomFilter.SMALL_SET_GROWTH) >>> sbf = ScalableBloomFilter(mode=ScalableBloomFilter.SMALL_SET_GROWTH)
>>> for i in xrange(0, 100000): >>> for i in xrange(0, 100000):
... _ = sbf.add(i) ... _ = sbf.add(i)
... ...
>>> (sum([f.m for f in sbf.filters]) / 8) / 1024.0 >>> (sum([f.bits for f in sbf.filters]) / 8) / 1024.0
255.0 255.0
>>> sbf.capacity >>> sbf.capacity
133100 133100
>>> len(sbf) >>> len(sbf)
94609 94609
>>> abs((len(sbf) / 100000.0) - 1.0) <= sbf.p >>> abs((len(sbf) / 100000.0) - 1.0) <= sbf.probability
True True
# len(sbf) may not equal the entire input length. 0.006% error is well # len(sbf) may not equal the entire input length. 0.006% error is well
# below the default 0.1% error threshold # below the default 0.1% error threshold
Expand Down Expand Up @@ -84,13 +85,13 @@ def __init__(self, bits, probability=0.001):
raise ValueError("Bits must be a power of two.") raise ValueError("Bits must be a power of two.")
if not probability or probability < 0: if not probability or probability < 0:
raise ValueError("Probability must be a decimal less than 0.") raise ValueError("Probability must be a decimal less than 0.")
self.m = bits self.bits = bits
self.p = probability self.probability = probability
self.k = int(round(math.log(1/self.p, 2))) self.hashes = int(round(math.log(1/self.probability, 2)))
self.capacity = int(round(self.m * pow(math.log(2), 2) / self.capacity = int(round(self.bits * pow(math.log(2), 2) /
abs(math.log(self.p)))) abs(math.log(self.probability))))
self.count = 0 self.count = 0
self.filter = bitarray.bitarray(self.m) self.filter = bitarray.bitarray(self.bits)
self.filter.setall(False) self.filter.setall(False)


def __contains__(self, key): def __contains__(self, key):
Expand All @@ -104,7 +105,7 @@ def __contains__(self, key):
""" """
if not isinstance(key, list): if not isinstance(key, list):
hashes = fnv_hashes(key, self.k, self.m) hashes = fnv_hashes(key, self.hashes, self.bits)
else: else:
hashes = key hashes = key


Expand All @@ -128,7 +129,7 @@ def add(self, key):
True True
""" """
h = fnv_hashes(key, self.k, self.m) h = fnv_hashes(key, self.hashes, self.bits)
if h in self: if h in self:
return True return True
for k in h: for k in h:
Expand Down Expand Up @@ -170,10 +171,10 @@ def __init__(self, bits=8192, probability=0.001, mode=SMALL_SET_GROWTH):
raise ValueError("Bits must be a power of two.") raise ValueError("Bits must be a power of two.")
if not probability or probability < 0: if not probability or probability < 0:
raise ValueError("Probability must be a decimal less than 0.") raise ValueError("Probability must be a decimal less than 0.")
self.s = mode self.scale = mode
self.r = 0.9 self.ratio = 0.9
self.m = bits self.bits = bits
self.p = probability self.probability = probability
self.filters = [BloomFilter(bits=bits, probability=probability)] self.filters = [BloomFilter(bits=bits, probability=probability)]
self.filter = self.filters[0] self.filter = self.filters[0]


Expand Down Expand Up @@ -210,8 +211,8 @@ def add(self, key):
if dupe: if dupe:
return dupe return dupe
if self.filter.count == self.filter.capacity: if self.filter.count == self.filter.capacity:
prob = self.filter.p * self.r prob = self.filter.probability * self.ratio
bits = self.m * pow(self.s, len(self.filters)) bits = self.bits * pow(self.scale, len(self.filters))
new_filter = BloomFilter(bits=bits, probability=prob) new_filter = BloomFilter(bits=bits, probability=prob)
self.filter = new_filter self.filter = new_filter
self.filters = [new_filter] + self.filters self.filters = [new_filter] + self.filters
Expand Down

0 comments on commit 5e62565

Please sign in to comment.