Permalink
Browse files

re #2, add changelog, union, intersection and copy to BloomFilter, no…

…t yet available in SBF. Bumped version number to 1.1
  • Loading branch information...
1 parent e505911 commit 0688743c21e9d13c5ce4e0f27d5170aa9b7e464f @jaybaird committed Mar 11, 2011
Showing with 59 additions and 2 deletions.
  1. +3 −0 CHANGES.txt
  2. +1 −1 LICENSE.txt
  3. +28 −1 pybloom/pybloom.py
  4. +27 −0 pybloom/tests.py
View
@@ -0,0 +1,3 @@
+Changes in 1.1
+==============
+Added copy, intersection and union functions to BloomFilter
View
@@ -1,4 +1,4 @@
-Copyright (c) <2009> <Jay Baird and Bob Ippolito>
+Copyright (c) <2011> <Jay Baird and Bob Ippolito>
Permission is hereby granted, free of charge, to any person
obtaining a copy of this software and associated documentation
View
@@ -42,7 +42,7 @@
except ImportError:
raise ImportError('pybloom requires bitarray >= 0.3.4')
-__version__ = '1.0.3'
+__version__ = '1.1'
__author__ = "Jay Baird <jay@mochimedia.com>, Bob Ippolito <bob@redivi.com>,\
Marius Eriksen <marius@monkey.org>, Alex Brassetvik <alex@brasetvik.com>"
@@ -187,6 +187,33 @@ def add(self, key, skip_check=False):
self.count += 1
return False
+ def copy(self):
+ """Return a copy of this bloom filter.
+ """
+ new_filter = BloomFilter(self.capacity, self.error_rate)
+ new_filter.bitarray = self.bitarray.copy()
+ return new_filter
+
+ def union(self, other):
+ """ Calculates the union of the two underlying bitarrays and returns
+ a new bloom filter object."""
+ new_bloom = self.copy()
+ new_bloom.bitarray = new_bloom.bitarray | other.bitarray
+ return new_bloom
+
+ def __or__(self, other):
+ return self.union(other)
+
+ def intersection(self, other):
+ """ Calculates the union of the two underlying bitarrays and returns
+ a new bloom filter object."""
+ new_bloom = self.copy()
+ new_bloom.bitarray = new_bloom.bitarray & other.bitarray
+ return new_bloom
+
+ def __and__(self, other):
+ return self.intersection(other)
+
def tofile(self, f):
"""Write the bloom filter to file object `f'. Underlying bits
are written as machine values. This is much more space
View
@@ -14,6 +14,33 @@ def additional_tests():
suite.addTest(doctest.DocFileSuite(readme_fn, module_relative=False))
return suite
+class TestUnionIntersection(unittest.TestCase):
+ def test_union(self):
+ bloom_one = BloomFilter(100, 0.001)
+ bloom_two = BloomFilter(100, 0.001)
+ chars = [chr(i) for i in range(97, 123)]
+ for char in chars[len(chars)/2:]:
+ bloom_one.add(char)
+ for char in chars[:len(chars)/2]:
+ bloom_two.add(char)
+ new_bloom = bloom_one.union(bloom_two)
+ for char in chars:
+ assert(char in new_bloom)
+
+ def test_intersection(self):
+ bloom_one = BloomFilter(100, 0.001)
+ bloom_two = BloomFilter(100, 0.001)
+ chars = [chr(i) for i in range(97, 123)]
+ for char in chars:
+ bloom_one.add(char)
+ for char in chars[:len(chars)/2]:
+ bloom_two.add(char)
+ new_bloom = bloom_one.intersection(bloom_two)
+ for char in chars[:len(chars)/2]:
+ assert(char in new_bloom)
+ for char in chars[len(chars)/2:]:
+ assert(char not in new_bloom)
+
class Serialization(unittest.TestCase):
SIZE = 12345
EXPECTED = set([random.randint(0, 10000100) for _ in xrange(SIZE)])

0 comments on commit 0688743

Please sign in to comment.