Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with
or
.
Download ZIP
Browse files

simple bloom impl

  • Loading branch information...
commit df85c589bceee08f86ca058c068145404f9960be 0 parents
@rn-superg rn-superg authored
4 .gitignore
@@ -0,0 +1,4 @@
+pom.xml
+*jar
+lib
+classes
17 README.textile
@@ -0,0 +1,17 @@
+h1. clj-bloom
+
+"Bloom Filter":http://en.wikipedia.org/wiki/Bloom_filter implementation in Clojure. Based loosely on "Jeff Foster's Implementation":http://github.com/fffej/clojure-snippets/blob/master/bloom.clj
+
+h1. Usage
+
+ (use 'clj-bloom)
+
+h1. Installation
+
+Instructions for Leiningen...
+
+Instructions for Maven...
+
+h1. License
+
+Same as Clojure, need to link to it from here...
49 examples/words.clj
@@ -0,0 +1,49 @@
+(ns words
+ (:require
+ [ clojure.contrib.duck-streams :as ds]
+ [com.github.kyleburton.clj-bloom :as bf]))
+
+(def *words-file* "/usr/share/dict/words")
+
+(defn make-hash-fn-crc32 [#^String x]
+ (let [crc (java.util.zip.CRC32.)]
+ (fn [#^String s bytes]
+ (.reset crc)
+ (.update crc (.getBytes (.toLowerCase (str s x))))
+ (mod (.getValue crc)
+ bytes))))
+
+(defn make-hash-fn-adler32 [#^String x]
+ (let [crc (java.util.zip.Adler32.)]
+ (fn [#^String s bytes]
+ (.reset crc)
+ (.update crc (.getBytes (.toLowerCase (str s x))))
+ (mod (.getValue crc)
+ bytes))))
+
+
+(defn run [hash-fns]
+ (let [filter (bf/make-bloom-filter (* 10 1024 1024) hash-fns
+ )]
+ (dorun
+ (doseq [line (ds/read-lines *words-file*)]
+ (bf/add! filter (.toLowerCase line))))
+ (dorun
+ (doseq [w (.split "The quick brown ornithopter hyper-jumped over the lazy trollusk" "\\s+")]
+ (if (bf/include? filter (.toLowerCase w))
+ (prn (format "HIT: '%s' in the filter" w))
+ (prn (format "MISS: '%s' not in the filter" w)))))))
+
+;; CRC32:12s, hashCode:11s, Adler32:12s, md5:13s, sha1:14s
+;; (time (run))
+
+(prn "fn:hashCode")
+(time (run bf/*default-hash-fns*))
+(prn "fn:adler32")
+(time (run (map make-hash-fn-adler32 ["1" "2" "3" "4" "5"])))
+(prn "fn:crc32")
+(time (run (map make-hash-fn-crc32 ["1" "2" "3" "4" "5"])))
+(prn "fn:md5")
+(time (run (map bf/make-hash-fn-md5 ["1" "2" "3" "4" "5"])))
+(prn "fn:sha1")
+(time (run (map bf/make-hash-fn-sha1 ["1" "2" "3" "4" "5"])))
6 project.clj
@@ -0,0 +1,6 @@
+(defproject com.github.kyleburton/clj-bloom "1.0.0-SNAPSHOT"
+ :description "FIXME: write"
+ :dependencies
+ [[org.clojure/clojure "1.1.0"]
+ [org.clojure/clojure-contrib "1.1.0"]
+ [swank-clojure "1.2.1"]])
79 src/com/github/kyleburton/clj_bloom.clj
@@ -0,0 +1,79 @@
+(ns com.github.kyleburton.clj-bloom)
+
+(defn make-hash-fn-hash-code [#^String x]
+ (fn [#^String s bytes]
+ (mod (.hashCode (str s x))
+ bytes)))
+
+(defn make-hash-fn-crc32 [#^String x]
+ (let [crc (java.util.zip.CRC32.)]
+ (fn [#^String s bits]
+ (.reset crc)
+ (.update crc (.getBytes (str s x)))
+ (mod (.getValue crc)
+ bits))))
+
+(defn make-hash-fn-adler32 [#^String x]
+ (let [crc (java.util.zip.Adler32.)]
+ (fn [#^String s bits]
+ (.reset crc)
+ (.update crc (.getBytes (str s x)))
+ (mod (.getValue crc)
+ bits))))
+
+(defn make-hash-fn-md5 [#^String x]
+ (let [md5 (java.security.MessageDigest/getInstance "MD5")]
+ (fn [#^String s bits]
+ (.reset md5)
+ (.update md5 (.getBytes (str s x)))
+ (.longValue
+ (.mod (java.math.BigInteger. 1 (.digest md5))
+ (java.math.BigInteger/valueOf bits))))))
+
+
+(defn make-hash-fn-sha1 [#^String x]
+ (let [sha1 (java.security.MessageDigest/getInstance "SHA1")]
+ (fn [#^String s bits]
+ (.reset sha1)
+ (.update sha1 (.getBytes (str s x)))
+ (.longValue
+ (.mod (java.math.BigInteger. 1 (.digest sha1))
+ (java.math.BigInteger/valueOf bits))))))
+
+(def *default-hash-fns*
+ [(make-hash-fn-hash-code "1")
+ (make-hash-fn-hash-code "2")
+ (make-hash-fn-hash-code "3")
+ (make-hash-fn-hash-code "4")
+ (make-hash-fn-hash-code "5")])
+
+(defstruct bloom-filter :hash-fns :num-bits :bitarray)
+
+(defn make-bloom-filter
+ ([num-bits] (make-bloom-filter num-bits *default-hash-fns*))
+ ([num-bits hash-fns] (struct bloom-filter hash-fns num-bits (java.util.BitSet. num-bits))))
+
+(defn add! [filter #^String string]
+ (dorun
+ (doseq [hfn (:hash-fns filter)]
+ (.set (:bitarray filter)
+ (hfn string (:num-bits filter))))))
+
+
+(defn include? [filter #^String string]
+ (loop [[hfn & hash-fns] (:hash-fns filter)]
+ (cond
+ (not hfn)
+ true
+
+ (.get (:bitarray filter)
+ (hfn string (:num-bits filter)))
+
+ (recur hash-fns)
+
+ :else
+ false)))
+
+
+
+
30 test/com/github/kyleburton/clj_bloom_test.clj
@@ -0,0 +1,30 @@
+(ns com.github.kyleburton.clj-bloom-test
+ (:require [com.github.kyleburton.clj-bloom :as bf])
+ (:use [clojure.test]))
+
+(deftest make-bloom-filter-test
+ (testing "creating a bloom filter"
+ (is (thrown? Exception (bf/make-bloom-filter)))
+ (is (bf/make-bloom-filter 1024)))
+ (testing "new bloom filters should be empty"
+ (is (.isEmpty (:bitarray (bf/make-bloom-filter 1024))))
+ (is (not (nil? (:hash-fns (bf/make-bloom-filter 1024)))))))
+
+(deftest add-test
+ (testing "add shoud not be empty"
+ (let [filter (bf/make-bloom-filter 1024)]
+ (bf/add! filter "foo")
+ (is (not (.isEmpty (:bitarray filter)))))))
+
+;; (add-test)
+
+(deftest include?-test
+ (testing "after adding, a string should be in the filter"
+ (let [filter (bf/make-bloom-filter 1024)]
+ (is (not (bf/include? filter "foo")))
+ (bf/add! filter "foo")
+ (is (bf/include? filter "foo"))
+ (is (not (bf/include? filter "bar"))))))
+
+;; (include?-test)
+
Please sign in to comment.
Something went wrong with that request. Please try again.