Skip to content

Commit

Permalink
Implement make_hashes as a generator
Browse files Browse the repository at this point in the history
Can eliminate hashes if key is found early. Result is 30% faster.
  • Loading branch information
nick pisarro committed Mar 23, 2014
1 parent 881cd92 commit 8d7c3a0
Showing 1 changed file with 13 additions and 12 deletions.
25 changes: 13 additions & 12 deletions pybloom/pybloom.py
Expand Up @@ -69,19 +69,22 @@ def make_hashfuncs(num_slices, num_bits):
num_salts, extra = divmod(num_slices, len(fmt))
if extra:
num_salts += 1
salts = [hashfn(hashfn(pack('I', i)).digest()) for i in xrange(num_salts)]
salts = tuple(hashfn(hashfn(pack('I', i)).digest()) for i in xrange(num_salts))
def _make_hashfuncs(key):
if isinstance(key, unicode):
key = key.encode('utf-8')
else:
key = str(key)
rval = []
i = 0
for salt in salts:
h = salt.copy()
h.update(key)
rval.extend(uint % num_bits for uint in unpack(fmt, h.digest()))
del rval[num_slices:]
return rval
for uint in unpack(fmt, h.digest()):
yield uint % num_bits
i += 1
if i >= num_slices:
return

return _make_hashfuncs


Expand Down Expand Up @@ -146,10 +149,7 @@ def __contains__(self, key):
"""
bits_per_slice = self.bits_per_slice
bitarray = self.bitarray
if not isinstance(key, list):
hashes = self.make_hashes(key)
else:
hashes = key
hashes = self.make_hashes(key)
offset = 0
for k in hashes:
if not bitarray[offset + k]:
Expand All @@ -175,16 +175,17 @@ def add(self, key, skip_check=False):
bitarray = self.bitarray
bits_per_slice = self.bits_per_slice
hashes = self.make_hashes(key)
if not skip_check and hashes in self:
return True
found_all_bits = True
if self.count > self.capacity:
raise IndexError("BloomFilter is at capacity")
offset = 0
for k in hashes:
if not skip_check and found_all_bits and not bitarray[offset + k]:
found_all_bits = False
self.bitarray[offset + k] = True
offset += bits_per_slice
self.count += 1
return False
return not skip_check and found_all_bits

def copy(self):
"""Return a copy of this bloom filter.
Expand Down

0 comments on commit 8d7c3a0

Please sign in to comment.