-
Notifications
You must be signed in to change notification settings - Fork 9
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
first pass at an expanding bloom filter!
- Loading branch information
Showing
5 changed files
with
139 additions
and
5 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,74 @@ | ||
''' BloomFilter, python implementation | ||
License: MIT | ||
Author: Tyler Barrus (barrust@gmail.com) | ||
URL: https://github.com/barrust/pyprobables | ||
''' | ||
from __future__ import (unicode_literals, absolute_import, print_function) | ||
|
||
from . bloom import (BloomFilter) | ||
|
||
|
||
class ExpandingBloomFilter(object): | ||
|
||
def __init__(self, est_elements=None, false_positive_rate=None, | ||
hash_function=None): | ||
''' ''' | ||
self._blooms = list() | ||
self.__fpr = false_positive_rate | ||
self.__est_elements = est_elements | ||
self.__hash_func = hash_function | ||
self.__added_elements = 0 # total added... | ||
# add in the initial bloom filter! | ||
self.__add_bloom_filter() | ||
|
||
def __contains__(self, key): | ||
return self.check(key) | ||
|
||
@property | ||
def blooms(self): | ||
return self._blooms | ||
|
||
@property | ||
def false_positive_rate(self): | ||
return self.__fpr | ||
|
||
@property | ||
def estimated_elements(self): | ||
return self.__est_elements | ||
|
||
@property | ||
def elements_added(self): | ||
return self.__added_elements | ||
|
||
def __add_bloom_filter(self): | ||
''' build a new bloom and add it on! ''' | ||
blm = BloomFilter(self.__est_elements, self.__fpr, self.__hash_func) | ||
self._blooms.append(blm) | ||
|
||
def __check_for_growth(self): | ||
if self._blooms[-1].elements_added >= self.__est_elements: | ||
self.__add_bloom_filter() | ||
|
||
def check(self, key): | ||
''' check to see if it is in any of the bloom filters ''' | ||
hashes = self._blooms[0].hashes(key) | ||
return self.check_alt(hashes) | ||
|
||
def check_alt(self, hashes): | ||
''' an alternative method to check the bloom filter ''' | ||
for blm in self._blooms: | ||
if blm.check_alt(hashes): | ||
return True | ||
return False | ||
|
||
def add(self, key, force=False): | ||
''' Adds the key if it isn't in the bloom filter ''' | ||
hashes = self._blooms[0].hashes(key) | ||
self.add_alt(hashes, force) | ||
|
||
def add_alt(self, hashes, force=False): | ||
''' ''' | ||
self.__added_elements += 1 | ||
if force or not self.check_alt(hashes): | ||
self.__check_for_growth() | ||
self._blooms[-1].add_alt(hashes) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,56 @@ | ||
# -*- coding: utf-8 -*- | ||
''' Unittest class ''' | ||
from __future__ import (unicode_literals, absolute_import, print_function) | ||
import unittest | ||
from probables import (ExpandingBloomFilter) | ||
|
||
class TestExpandingBloomFilter(unittest.TestCase): | ||
|
||
def test_ebf_init(self): | ||
''' test the initialization of an expanding bloom filter ''' | ||
blm = ExpandingBloomFilter(est_elements=10, false_positive_rate=0.05) | ||
self.assertEqual(len(blm.blooms), 1) | ||
|
||
def test_ebf_add_lots(self): | ||
''' test adding "lots" of elements to force the expansion ''' | ||
blm = ExpandingBloomFilter(est_elements=10, false_positive_rate=0.05) | ||
for i in range(100): | ||
blm.add("{}".format(i), True) | ||
self.assertEqual(len(blm.blooms), 10) | ||
|
||
def test_ebf_add_lots_without_force(self): | ||
''' testing adding "lots" but force them to be inserted multiple times''' | ||
blm = ExpandingBloomFilter(est_elements=10, false_positive_rate=0.05) | ||
# simulate false positives... notice it didn't grow a few... | ||
for i in range(120): | ||
blm.add("{}".format(i)) | ||
self.assertEqual(len(blm.blooms), 10) | ||
self.assertEqual(blm.elements_added, 120) | ||
|
||
def test_ebf_check(self): | ||
''' ensure that checking the expanding bloom filter works ''' | ||
blm = ExpandingBloomFilter(est_elements=25, false_positive_rate=0.05) | ||
# expand it out some first! | ||
for i in range(100): | ||
blm.add("{}".format(i)) | ||
blm.add('this is a test') | ||
blm.add('this is another test') | ||
self.assertGreater(len(blm.blooms), 2) | ||
self.assertEqual(blm.check('this is a test'), True) | ||
self.assertEqual(blm.check('this is another test'), True) | ||
self.assertEqual(blm.check('this is yet another test'), False) | ||
self.assertEqual(blm.check('this is not another test'), False) | ||
|
||
def test_ebf_contains(self): | ||
''' ensure that "in" functionality for the expanding bloom filter works ''' | ||
blm = ExpandingBloomFilter(est_elements=25, false_positive_rate=0.05) | ||
# expand it out some first! | ||
for i in range(100): | ||
blm.add("{}".format(i)) | ||
blm.add('this is a test') | ||
blm.add('this is another test') | ||
self.assertGreater(len(blm.blooms), 2) | ||
self.assertEqual('this is a test' in blm, True) | ||
self.assertEqual('this is another test' in blm, True) | ||
self.assertEqual('this is yet another test' in blm, False) | ||
self.assertEqual('this is not another test' in blm, False) |