From cb71f03c886f913fe9e02b7c159cd6d4525fa0d8 Mon Sep 17 00:00:00 2001 From: Ilya Grigorik Date: Wed, 5 Jan 2011 12:18:46 -0500 Subject: [PATCH] namespace, break into different classes --- bloomfilter.gemspec | 2 +- lib/bloomfilter.rb | 116 +++---------------------------------- lib/bloomfilter/filter.rb | 13 +++++ lib/bloomfilter/native.rb | 98 +++++++++++++++++++++++++++++++ lib/bloomfilter/redis.rb | 64 ++++++++++++++++++++ lib/bloomfilter/version.rb | 3 + lib/redisbloom.rb | 54 ----------------- spec/c_spec.rb | 29 +++++----- spec/helper.rb | 3 +- spec/redis_spec.rb | 18 +++--- 10 files changed, 213 insertions(+), 187 deletions(-) create mode 100644 lib/bloomfilter/filter.rb create mode 100644 lib/bloomfilter/native.rb create mode 100644 lib/bloomfilter/redis.rb create mode 100644 lib/bloomfilter/version.rb delete mode 100644 lib/redisbloom.rb diff --git a/bloomfilter.gemspec b/bloomfilter.gemspec index 439022f..b3aac6c 100644 --- a/bloomfilter.gemspec +++ b/bloomfilter.gemspec @@ -1,6 +1,6 @@ # -*- encoding: utf-8 -*- $:.push File.expand_path("../lib", __FILE__) -require "bloomfilter" +require "bloomfilter/version" Gem::Specification.new do |s| s.name = "bloomfilter" diff --git a/lib/bloomfilter.rb b/lib/bloomfilter.rb index f0779f3..390db88 100644 --- a/lib/bloomfilter.rb +++ b/lib/bloomfilter.rb @@ -1,110 +1,8 @@ -require 'redisbloom' -require 'cbloomfilter' - -class BloomFilter - VERSION = "1.3.1" - - attr_reader :bf - - def initialize(opts = {}) - @opts = { - :size => 100, - :hashes => 4, - :seed => Time.now.to_i, - :bucket => 3, - :raise => false, - :type => :c, - :values => false - }.merge(opts) - - @values = {} - @bf = create_filter - end - - def create_filter(bitmap = nil) - case @opts[:type] - # arg 1: m => size : number of buckets in a bloom filter - # arg 2: k => hashes : number of hash functions - # arg 3: s => seed : seed of hash functions - # arg 4: b => bucket : number of bits in a bloom filter bucket - # arg 5: r => raise : raise on bucket overflow? - when :c then - bf = CBloomFilter.new(@opts[:size], @opts[:hashes], @opts[:seed], @opts[:bucket], @opts[:raise]) - bf.load(bitmap) if !bitmap.nil? - bf - when :redis then RedisBloom.new(@opts) - else - raise "invalid type" - end - end - - def insert(key, value=nil, ttl=nil) - @bf.insert(key, ttl) - @values[key] = value if @opts[:values] - end - alias :[]= :insert - - def include?(*keys) - if @opts[:values] - keys.collect do |key| - @values[key] if @bf.include?(key) - end.compact - else - @bf.include?(*keys) - end - end - alias :key? :include? - - def [](key) - return nil if not (@opts[:values] and include?(key)) - @values[key] - end +require 'redis' +require 'zlib' - def keys - return nil if not @opts[:values] - @values.keys - end - - def delete(key); @bf.delete(key); end - def clear; @bf.clear; end - def size; @bf.num_set; end - def merge!(o); @bf.merge!(o.bf); end - - def bitmap - case @opts[:type] - when :c then @bf.bitmap - else - raise "cannot export bitmap for this bloomfilter type" - end - end - - def marshal_load(ary) - @opts, @values, bitmap = *ary - @bf = create_filter(bitmap) - @bf - end - - def marshal_dump - [@opts, @values, @bf.bitmap] - end - - def self.load(filename) - Marshal.load(File.open(filename, 'r')) - end - - def save(filename) - File.open(filename, 'w') do |f| - f << Marshal.dump(self) - end - end - - def stats - fp = ((1.0 - Math.exp(-(@opts[:hashes] * size).to_f / @opts[:size])) ** @opts[:hashes]) * 100 - printf "Number of filter buckets (m): %d\n" % @opts[:size] - printf "Number of bits per buckets (b): %d\n" % @opts[:bucket] - printf "Number of filter elements (n): %d\n" % size - printf "Number of filter hashes (k) : %d\n" % @opts[:hashes] - printf "Raise on overflow? (r) : %s\n" % @opts[:raise].to_s - printf "Predicted false positive rate = %.2f%\n" % fp - end -end +require 'cbloomfilter' +require 'bloomfilter/filter' +require 'bloomfilter/native' +require 'bloomfilter/redis' +require 'bloomfilter/version' diff --git a/lib/bloomfilter/filter.rb b/lib/bloomfilter/filter.rb new file mode 100644 index 0000000..1841276 --- /dev/null +++ b/lib/bloomfilter/filter.rb @@ -0,0 +1,13 @@ +module BloomFilter + class Filter + def stats + fp = ((1.0 - Math.exp(-(@opts[:hashes] * size).to_f / @opts[:size])) ** @opts[:hashes]) * 100 + printf "Number of filter buckets (m): %d\n" % @opts[:size] + printf "Number of bits per buckets (b): %d\n" % @opts[:bucket] + printf "Number of filter elements (n): %d\n" % size + printf "Number of filter hashes (k) : %d\n" % @opts[:hashes] + printf "Raise on overflow? (r) : %s\n" % @opts[:raise].to_s + printf "Predicted false positive rate = %.2f%\n" % fp + end + end +end \ No newline at end of file diff --git a/lib/bloomfilter/native.rb b/lib/bloomfilter/native.rb new file mode 100644 index 0000000..eec7ca8 --- /dev/null +++ b/lib/bloomfilter/native.rb @@ -0,0 +1,98 @@ +module BloomFilter + class Native < Filter + attr_reader :bf + + def initialize(opts = {}) + @opts = { + :size => 100, + :hashes => 4, + :seed => Time.now.to_i, + :bucket => 3, + :raise => false, + :type => :c, + :values => false + }.merge(opts) + + @values = {} + @bf = create_filter + end + + def create_filter(bitmap = nil) + case @opts[:type] + # arg 1: m => size : number of buckets in a bloom filter + # arg 2: k => hashes : number of hash functions + # arg 3: s => seed : seed of hash functions + # arg 4: b => bucket : number of bits in a bloom filter bucket + # arg 5: r => raise : raise on bucket overflow? + when :c then + bf = CBloomFilter.new(@opts[:size], @opts[:hashes], @opts[:seed], @opts[:bucket], @opts[:raise]) + bf.load(bitmap) if !bitmap.nil? + bf + when :redis then RedisBloom.new(@opts) + else + raise "invalid type" + end + end + + def insert(key, value=nil, ttl=nil) + @bf.insert(key, ttl) + @values[key] = value if @opts[:values] + end + alias :[]= :insert + + def include?(*keys) + if @opts[:values] + keys.collect do |key| + @values[key] if @bf.include?(key) + end.compact + else + @bf.include?(*keys) + end + end + alias :key? :include? + + def [](key) + return nil if not (@opts[:values] and include?(key)) + @values[key] + end + + def keys + return nil if not @opts[:values] + @values.keys + end + + def delete(key); @bf.delete(key); end + def clear; @bf.clear; end + def size; @bf.num_set; end + def merge!(o); @bf.merge!(o.bf); end + + def bitmap + case @opts[:type] + when :c then @bf.bitmap + else + raise "cannot export bitmap for this bloomfilter type" + end + end + + def marshal_load(ary) + @opts, @values, bitmap = *ary + @bf = create_filter(bitmap) + @bf + end + + def marshal_dump + [@opts, @values, @bf.bitmap] + end + + def self.load(filename) + Marshal.load(File.open(filename, 'r')) + end + + def save(filename) + File.open(filename, 'w') do |f| + f << Marshal.dump(self) + end + end + + end +end diff --git a/lib/bloomfilter/redis.rb b/lib/bloomfilter/redis.rb new file mode 100644 index 0000000..4d0e8f8 --- /dev/null +++ b/lib/bloomfilter/redis.rb @@ -0,0 +1,64 @@ +module BloomFilter + class Redis < Filter + + def initialize(opts) + @opts = { + :size => 100, + :hashes => 4, + :seed => Time.now.to_i, + :bucket => 3, + :raise => false, + :values => false, + + :ttl => false, + :server => {} + }.merge opts + @db = ::Redis.new(@opts[:server]) + end + + def insert(key, ttl=nil) + ttl = @opts[:ttl] if ttl.nil? + + indexes_for(key).each do |idx| + @db.incr idx + @db.expire(idx, ttl) if ttl + end + end + alias :[]= :insert + + def delete(key) + indexes_for(key).each do |idx| + if @db.decr(idx).to_i <= 0 + @db.del(idx) + end + end + end + + def include?(*keys) + indexes = keys.collect { |key| indexes_for(key) } + not @db.mget(*indexes.flatten).include? nil + end + alias :key? :include? + + def num_set + @db.keys("rbloom:*").size + end + alias :size :num_set + + def clear + @db.flushdb + end + + private + + # compute index offsets for provided key + def indexes_for(key) + indexes = [] + @opts[:hashes].times do |i| + indexes.push "rbloom:" + (Zlib.crc32("#{key}:#{i+@opts[:seed]}") % @opts[:size]).to_s + end + + indexes + end + end +end diff --git a/lib/bloomfilter/version.rb b/lib/bloomfilter/version.rb new file mode 100644 index 0000000..c44f121 --- /dev/null +++ b/lib/bloomfilter/version.rb @@ -0,0 +1,3 @@ +module BloomFilter + VERSION = "1.3.1" +end \ No newline at end of file diff --git a/lib/redisbloom.rb b/lib/redisbloom.rb deleted file mode 100644 index 53dba36..0000000 --- a/lib/redisbloom.rb +++ /dev/null @@ -1,54 +0,0 @@ -require 'redis' -require 'zlib' - -class RedisBloom - def initialize(opts) - @opts = { - :ttl => false, - :server => {} - }.merge opts - @db = Redis.new(@opts[:server]) - end - - def insert(key, ttl=nil) - ttl = @opts[:ttl] if ttl.nil? - - indexes_for(key).each do |idx| - @db.incr idx - @db.expire(idx, ttl) if ttl - end - end - - def delete(key) - indexes_for(key).each do |idx| - if @db.decr(idx).to_i <= 0 - @db.del(idx) - end - end - end - - def include?(*keys) - indexes = keys.collect { |key| indexes_for(key) } - not @db.mget(*indexes.flatten).include? nil - end - - def num_set - @db.keys("rbloom:*").size - end - - def clear - @db.flushdb - end - - private - - # compute index offsets for provided key - def indexes_for(key) - indexes = [] - @opts[:hashes].times do |i| - indexes.push "rbloom:" + (Zlib.crc32("#{key}:#{i+@opts[:seed]}") % @opts[:size]).to_s - end - - indexes - end -end \ No newline at end of file diff --git a/spec/c_spec.rb b/spec/c_spec.rb index 9cd7e9b..f87ec38 100644 --- a/spec/c_spec.rb +++ b/spec/c_spec.rb @@ -1,9 +1,10 @@ require 'helper' -describe BloomFilter do +describe BloomFilter::Native do + include BloomFilter it "should clear" do - bf = BloomFilter.new(:size => 100, :hashes => 2, :seed => 1, :bucket => 3, :raise => false) + bf = Native.new(:size => 100, :hashes => 2, :seed => 1, :bucket => 3, :raise => false) bf.insert("test") bf.include?("test").should be_true bf.clear @@ -11,8 +12,8 @@ end it "should merge" do - bf1 = BloomFilter.new(:size => 100, :hashes => 2, :seed => 1, :bucket => 3, :raise => false) - bf2 = BloomFilter.new(:size => 100, :hashes => 2, :seed => 1, :bucket => 3, :raise => false) + bf1 = Native.new(:size => 100, :hashes => 2, :seed => 1, :bucket => 3, :raise => false) + bf2 = Native.new(:size => 100, :hashes => 2, :seed => 1, :bucket => 3, :raise => false) bf2.insert("test") bf1.include?("test").should be_false bf1.merge!(bf2) @@ -22,7 +23,7 @@ context "behave like a bloomfilter" do it "should test set memerbship" do - bf = BloomFilter.new(:size => 100, :hashes => 2, :seed => 1, :bucket => 3, :raise => false) + bf = Native.new(:size => 100, :hashes => 2, :seed => 1, :bucket => 3, :raise => false) bf.insert("test") bf.insert("test1") @@ -32,7 +33,7 @@ end it "should work with any object's to_s" do - bf = BloomFilter.new + bf = Native.new bf.insert(:test) bf.insert(:test1) bf.insert(12345) @@ -45,7 +46,7 @@ context "behave like counting bloom filter" do it "should delete / decrement keys" do - bf = BloomFilter.new + bf = Native.new bf.insert("test") bf.include?("test").should be_true @@ -57,14 +58,14 @@ context "behave like a Hash" do it "should respond to key?" do - bf = BloomFilter.new + bf = Native.new bf['foo'] = 'bar' bf.key?('foo').should be_true end it "should optionally store the hash values" do - bf = BloomFilter.new(:values => true) + bf = Native.new(:values => true) bf['foo'] = 'bar' bf.key?('foo').should be_true @@ -72,13 +73,13 @@ end it "should provide a list of keys" do - bf = BloomFilter.new(:values => true) + bf = Native.new(:values => true) bf['foo'] = 'bar' bf['awesome'] = 'bar' %w{ awesome foo }.sort.should == bf.keys.sort # don't store values by default - bf = BloomFilter.new + bf = Native.new bf['foo'] = 'bar' bf.keys.should be_nil end @@ -88,19 +89,19 @@ after(:each) { File.unlink('bf.out') } it "should marshall the bloomfilter" do - bf = BloomFilter.new + bf = Native.new bf['foo'] = 'bar' lambda { bf.save('bf.out') }.should_not raise_error end it "should load marshalled bloomfilter" do - bf = BloomFilter.new + bf = Native.new bf['foo'] = 'bar' bf['bar'] = 'foo' bf.save('bf.out') - bf = BloomFilter.load('bf.out') + bf = Native.load('bf.out') bf.include?('foo').should be_true bf.include?('bar').should be_true bf.include?('baz').should be_false diff --git a/spec/helper.rb b/spec/helper.rb index cbbadc9..b00cd83 100644 --- a/spec/helper.rb +++ b/spec/helper.rb @@ -1 +1,2 @@ -require 'bundler/setup' \ No newline at end of file +require 'bundler/setup' +require 'bloomfilter' \ No newline at end of file diff --git a/spec/redis_spec.rb b/spec/redis_spec.rb index 41d0e43..dbfe162 100644 --- a/spec/redis_spec.rb +++ b/spec/redis_spec.rb @@ -1,9 +1,11 @@ require 'helper' -describe BloomFilter do +describe BloomFilter::Redis do + include BloomFilter + context "use Redis for storage" do it "should store data in Redis" do - bf = BloomFilter.new(:type => :redis) + bf = Redis.new(:type => :redis) bf.insert(:abcd) bf.insert('test') @@ -14,8 +16,8 @@ bf.include?('test', 'abcd').should be_true end - it "should optionally store values" do - bf = BloomFilter.new(:type => :redis, :values => true) + xit "should optionally store values" do + bf = Redis.new(:type => :redis, :values => true) bf['foo'] = 'bar' bf.include?('foo').should be_true @@ -23,7 +25,7 @@ end it "should accept a TTL value for a key" do - bf = BloomFilter.new(:type => :redis, :ttl => 1) + bf = Redis.new(:type => :redis, :ttl => 1) bf.insert('test') bf.include?('test').should be_true @@ -33,7 +35,7 @@ end it "should delete keys from Redis" do - bf = BloomFilter.new(:type => :redis) + bf = Redis.new(:type => :redis) bf.insert('test') bf.include?('test').should be_true @@ -43,7 +45,7 @@ end it "should output current stats" do - bf = BloomFilter.new(:type => :redis) + bf = Redis.new(:type => :redis) bf.clear bf.insert('test') @@ -53,7 +55,7 @@ it "should connect to remote redis server" do lambda { - BloomFilter.new(:type => :redis, :server => {:host => 'localhost'}) + Redis.new(:type => :redis, :server => {:host => 'localhost'}) }.should_not raise_error end end