Permalink
Browse files

removed default fns, moved make-optimial so it compiles, fixed tests …

…added helpers for the permuted fns
  • Loading branch information...
1 parent fb1472a commit 319f735970fde9e657c7b883845d27a525a21982 @rn-superg rn-superg committed May 21, 2010
Showing with 24 additions and 20 deletions.
  1. +1 −1 README.textile
  2. +14 −9 src/com/github/kyleburton/clj_bloom.clj
  3. +9 −10 test/com/github/kyleburton/clj_bloom_test.clj
View
@@ -162,7 +162,7 @@ h1. References
* "Bloom Filters: A Powerful Tool":http://www.rubyinside.com/bloom-filters-a-powerful-tool-599.html
* "Counting Bloom Filter implemented in Ruby":http://github.com/igrigorik/bloomfilter
* "Scalable Data-sets: Bloom Filters in Ruby":http://www.igvita.com/2008/12/27/scalable-datasets-bloom-filters-in-ruby/
-
+* "Bloom Filters - the math":http://pages.cs.wisc.edu/~cao/papers/summary-cache/node8.html
h1. License
@@ -49,14 +49,6 @@
(defn make-bloom-filter [num-bits hash-fn]
(struct bloom-filter hash-fn num-bits (java.util.BitSet. num-bits) (atom 0)))
-(defn make-optimal-filter [entries prob & [hash-fn]]
- (let [[m k] (optimal-n-and-k entries prob)]
- (make-bloom-filter
- m
- (make-permuted-hash-fn
- (or hash-fn make-hash-fn-crc32)
- (map str (range 0 k))))))
-
(defn add! [filter #^String string]
(reset! (:insertions filter)
(inc @(:insertions filter)))
@@ -105,4 +97,17 @@
;; (optimal-n-and-k 300000 0.01) [2875518 7]
;; (optimal-n-and-k 300000 0.001) [4313277 10]
-;; (* 9.6 300000) 2880000.0
+;; (* 9.6 300000) 2880000.0
+
+(defn make-optimal-filter [entries prob & [hash-fn]]
+ (let [[m k] (optimal-n-and-k entries prob)]
+ (make-bloom-filter
+ m
+ (make-permuted-hash-fn
+ (or hash-fn make-hash-fn-crc32)
+ (map str (range 0 k))))))
+
+(defn make-crc32 [k] (make-permuted-hash-fn make-hash-fn-crc32 (map str (range 0 k))))
+(defn make-adler32 [k] (make-permuted-hash-fn make-hash-fn-adler32 (map str (range 0 k))))
+(defn make-md5 [k] (make-permuted-hash-fn make-hash-fn-md5 (map str (range 0 k))))
+(defn make-sha1 [k] (make-permuted-hash-fn make-hash-fn-sha1 (map str (range 0 k))))
@@ -12,20 +12,20 @@
(deftest make-bloom-filter-test
(testing "creating a bloom filter"
(is (thrown? Exception (bf/make-bloom-filter)))
- (is (bf/make-bloom-filter 1024)))
+ (is (bf/make-bloom-filter 1024 (bf/make-crc32 5))))
(testing "new bloom filters should be empty"
- (is (.isEmpty (:bitarray (bf/make-bloom-filter 1024))))))
+ (is (.isEmpty (:bitarray (bf/make-bloom-filter 1024 (bf/make-crc32 5)))))))
(deftest add-test
(testing "add shoud not be empty"
- (let [filter (bf/make-bloom-filter 1024)]
+ (let [filter (bf/make-bloom-filter 1024 (bf/make-crc32 5))]
(bf/add! filter "foo")
(is (not (.isEmpty (:bitarray filter))))
(is (= 1 (bf/insertions filter))))))
(deftest include?-test
(testing "after adding, a string should be in the filter"
- (let [filter (bf/make-bloom-filter 1024)]
+ (let [filter (bf/make-bloom-filter 1024 (bf/make-crc32 5))]
(is (not (bf/include? filter "foo")))
(bf/add! filter "foo")
(is (bf/include? filter "foo"))
@@ -36,14 +36,13 @@
(testing "The core hash functions should produce different reuslts"
(dorun
(doseq [pair (cmb/combinations
- [(sort (bf/*hash-code-fn* "foo" 100))
- (sort (bf/*crc32-fn* "foo" 100))
- (sort (bf/*adler32-fn* "foo" 100))
- (sort (bf/*md5-fn* "foo" 100))
- (sort (bf/*sha1-fn* "Foo" 100))]
+ [(sort ((bf/make-permuted-hash-fn bf/make-hash-fn-hash-code ["1" "2" "3" "4" "5"]) "foo" 100))
+ (sort ((bf/make-permuted-hash-fn bf/make-hash-fn-crc32 ["1" "2" "3" "4" "5"]) "foo" 100))
+ (sort ((bf/make-permuted-hash-fn bf/make-hash-fn-adler32 ["1" "2" "3" "4" "5"]) "foo" 100))
+ (sort ((bf/make-permuted-hash-fn bf/make-hash-fn-md5 ["1" "2" "3" "4" "5"]) "foo" 100))
+ (sort ((bf/make-permuted-hash-fn bf/make-hash-fn-sha1 ["1" "2" "3" "4" "5"]) "foo" 100))]
2)]
(is (not (= (first pair) (second pair))))))))
-;; (cmb/combinations [1 2 3] 2)

0 comments on commit 319f735

Please sign in to comment.