Permalink
Browse files

add exception for when set operations are used on filters of differen…

…t sizes or error
  • Loading branch information...
1 parent 0688743 commit 3692fadc6aa061b098dabfa45eab62237b50b725 @jaybaird committed Mar 11, 2011
Showing with 45 additions and 9 deletions.
  1. +12 −4 pybloom/pybloom.py
  2. +33 −5 pybloom/tests.py
View
16 pybloom/pybloom.py
@@ -44,7 +44,8 @@
__version__ = '1.1'
__author__ = "Jay Baird <jay@mochimedia.com>, Bob Ippolito <bob@redivi.com>,\
- Marius Eriksen <marius@monkey.org>, Alex Brassetvik <alex@brasetvik.com>"
+ Marius Eriksen <marius@monkey.org>,\
+ Alex Brassetvik <alex@brasetvik.com>"
def make_hashfuncs(num_slices, num_bits):
if num_bits >= (1 << 31):
@@ -132,7 +133,6 @@ def _setup(self, error_rate, num_slices, bits_per_slice, capacity, count):
self.capacity = capacity
self.num_bits = num_slices * bits_per_slice
self.count = count
- #print '\n'.join('%s = %s' % tpl for tpl in sorted(self.__dict__.items()))
self.make_hashes = make_hashfuncs(self.num_slices, self.bits_per_slice)
def __contains__(self, key):
@@ -197,6 +197,10 @@ def copy(self):
def union(self, other):
""" Calculates the union of the two underlying bitarrays and returns
a new bloom filter object."""
+ if self.capacity != other.capacity or \
+ self.error_rate != other.error_rate:
+ raise ValueError("Unioning filters requires both filters to have \
+both the same capacity and error rate")
new_bloom = self.copy()
new_bloom.bitarray = new_bloom.bitarray | other.bitarray
return new_bloom
@@ -207,13 +211,17 @@ def __or__(self, other):
def intersection(self, other):
""" Calculates the union of the two underlying bitarrays and returns
a new bloom filter object."""
+ if self.capacity != other.capacity or \
+ self.error_rate != other.error_rate:
+ raise ValueError("Intersecting filters requires both filters to \
+have equal capacity and error rate")
new_bloom = self.copy()
new_bloom.bitarray = new_bloom.bitarray & other.bitarray
return new_bloom
-
+
def __and__(self, other):
return self.intersection(other)
-
+
def tofile(self, f):
"""Write the bloom filter to file object `f'. Underlying bits
are written as machine values. This is much more space
View
38 pybloom/tests.py
@@ -5,7 +5,7 @@
import tempfile
from pybloom import BloomFilter, ScalableBloomFilter
from unittest import TestSuite
-
+
def additional_tests():
proj_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
readme_fn = os.path.join(proj_dir, 'README.txt')
@@ -25,8 +25,8 @@ def test_union(self):
bloom_two.add(char)
new_bloom = bloom_one.union(bloom_two)
for char in chars:
- assert(char in new_bloom)
-
+ self.assert_(char in new_bloom)
+
def test_intersection(self):
bloom_one = BloomFilter(100, 0.001)
bloom_two = BloomFilter(100, 0.001)
@@ -37,9 +37,37 @@ def test_intersection(self):
bloom_two.add(char)
new_bloom = bloom_one.intersection(bloom_two)
for char in chars[:len(chars)/2]:
- assert(char in new_bloom)
+ self.assert_(char in new_bloom)
for char in chars[len(chars)/2:]:
- assert(char not in new_bloom)
+ self.assert_(char not in new_bloom)
+
+ def test_intersection_capacity_fail(self):
+ bloom_one = BloomFilter(1000, 0.001)
+ bloom_two = BloomFilter(100, 0.001)
+ def _run():
+ new_bloom = bloom_one.intersection(bloom_two)
+ self.assertRaises(ValueError, _run)
+
+ def test_union_capacity_fail(self):
+ bloom_one = BloomFilter(1000, 0.001)
+ bloom_two = BloomFilter(100, 0.001)
+ def _run():
+ new_bloom = bloom_one.union(bloom_two)
+ self.assertRaises(ValueError, _run)
+
+ def test_intersection_k_fail(self):
+ bloom_one = BloomFilter(100, 0.001)
+ bloom_two = BloomFilter(100, 0.01)
+ def _run():
+ new_bloom = bloom_one.intersection(bloom_two)
+ self.assertRaises(ValueError, _run)
+
+ def test_union_k_fail(self):
+ bloom_one = BloomFilter(100, 0.01)
+ bloom_two = BloomFilter(100, 0.001)
+ def _run():
+ new_bloom = bloom_one.union(bloom_two)
+ self.assertRaises(ValueError, _run)
class Serialization(unittest.TestCase):
SIZE = 12345

0 comments on commit 3692fad

Please sign in to comment.