Skip to content

Commit

Permalink
Merge pull request #11 from grandinquisitor/master
Browse files Browse the repository at this point in the history
Implement make_hashes as a generator
  • Loading branch information
jaybaird committed Jun 10, 2014
2 parents f12e7b8 + ecc9615 commit 9f968c7
Showing 1 changed file with 25 additions and 15 deletions.
40 changes: 25 additions & 15 deletions pybloom/pybloom.py
Expand Up @@ -73,19 +73,22 @@ def make_hashfuncs(num_slices, num_bits):
num_salts, extra = divmod(num_slices, len(fmt))
if extra:
num_salts += 1
salts = [hashfn(hashfn(pack('I', i)).digest()) for i in xrange(num_salts)]
salts = tuple(hashfn(hashfn(pack('I', i)).digest()) for i in xrange(num_salts))
def _make_hashfuncs(key):
if isinstance(key, unicode):
key = key.encode('utf-8')
else:
key = str(key)
rval = []
i = 0
for salt in salts:
h = salt.copy()
h.update(key)
rval.extend(uint % num_bits for uint in unpack(fmt, h.digest()))
del rval[num_slices:]
return rval
for uint in unpack(fmt, h.digest()):
yield uint % num_bits
i += 1
if i >= num_slices:
return

return _make_hashfuncs


Expand Down Expand Up @@ -150,10 +153,7 @@ def __contains__(self, key):
"""
bits_per_slice = self.bits_per_slice
bitarray = self.bitarray
if not isinstance(key, list):
hashes = self.make_hashes(key)
else:
hashes = key
hashes = self.make_hashes(key)
offset = 0
for k in hashes:
if not bitarray[offset + k]:
Expand All @@ -174,21 +174,31 @@ def add(self, key, skip_check=False):
False
>>> b.add("hello")
True
>>> b.count
1
"""
bitarray = self.bitarray
bits_per_slice = self.bits_per_slice
hashes = self.make_hashes(key)
if not skip_check and hashes in self:
return True
found_all_bits = True
if self.count > self.capacity:
raise IndexError("BloomFilter is at capacity")
offset = 0
for k in hashes:
if not skip_check and found_all_bits and not bitarray[offset + k]:
found_all_bits = False
self.bitarray[offset + k] = True
offset += bits_per_slice
self.count += 1
return False

if skip_check:
self.count += 1
return False
elif not found_all_bits:
self.count += 1
return False
else:
return True

def copy(self):
"""Return a copy of this bloom filter.
Expand Down Expand Up @@ -370,7 +380,7 @@ def add(self, key):
@property
def capacity(self):
"""Returns the total capacity for all filters in this SBF"""
return sum([f.capacity for f in self.filters])
return sum(f.capacity for f in self.filters)

@property
def count(self):
Expand Down Expand Up @@ -419,7 +429,7 @@ def fromfile(cls, f):

def __len__(self):
"""Returns the total number of elements stored in this SBF"""
return sum([f.count for f in self.filters])
return sum(f.count for f in self.filters)


if __name__ == "__main__":
Expand Down

0 comments on commit 9f968c7

Please sign in to comment.