Permalink
Browse files

Merge branch 'union-and-intersection' of git://github.com/apohllo/blo…

…omfilter-rb

Conflicts:
	ext/cbloomfilter/cbloomfilter.c
	lib/bloomfilter/native.rb
	spec/native_spec.rb
  • Loading branch information...
igrigorik committed Mar 17, 2012
2 parents 808ee11 + 504427b commit ba20cc9eb4f23e88c9e44acf93c4fe0a37bf52a8
Showing with 133 additions and 3 deletions.
  1. +54 −3 ext/cbloomfilter/cbloomfilter.c
  2. +29 −0 lib/bloomfilter/native.rb
  3. +50 −0 spec/native_spec.rb
@@ -168,6 +168,12 @@ static VALUE bf_r(VALUE self) {
return bf->r == 0 ? Qfalse : Qtrue;
}
static VALUE bf_s(VALUE self) {
struct BloomFilter *bf;
Data_Get_Struct(self, struct BloomFilter, bf);
return INT2FIX(bf->s);
}
static VALUE bf_set_bits(VALUE self){
struct BloomFilter *bf;
int i,j,count = 0;
@@ -212,15 +218,59 @@ static VALUE bf_insert(VALUE self, VALUE key) {
static VALUE bf_merge(VALUE self, VALUE other) {
struct BloomFilter *bf, *target;
int i;
Data_Get_Struct(self, struct BloomFilter, bf);
Data_Get_Struct(other, struct BloomFilter, target);
int i;
for (i = 0; i < bf->bytes; i++) {
bf->ptr[i] |= target->ptr[i];
}
return Qnil;
}
static VALUE bf_and(VALUE self, VALUE other) {
struct BloomFilter *bf, *bf_other, *target;
VALUE klass, obj, args[5];
int i;
Data_Get_Struct(self, struct BloomFilter, bf);
Data_Get_Struct(other, struct BloomFilter, bf_other);
args[0] = INT2FIX(bf->m);
args[1] = INT2FIX(bf->k);
args[2] = INT2FIX(bf->s);
args[3] = INT2FIX(bf->b);
args[4] = INT2FIX(bf->r);
klass = rb_funcall(self,rb_intern("class"),0);
obj = bf_s_new(5,args,klass);
Data_Get_Struct(obj, struct BloomFilter, target);
for (i = 0; i < bf->bytes; i++){
target->ptr[i] = bf->ptr[i] & bf_other->ptr[i];
}
return obj;
}
static VALUE bf_or(VALUE self, VALUE other) {
struct BloomFilter *bf, *bf_other, *target;
VALUE klass, obj, args[5];
int i;
Data_Get_Struct(self, struct BloomFilter, bf);
Data_Get_Struct(other, struct BloomFilter, bf_other);
args[0] = INT2FIX(bf->m);
args[1] = INT2FIX(bf->k);
args[2] = INT2FIX(bf->s);
args[3] = INT2FIX(bf->b);
args[4] = INT2FIX(bf->r);
klass = rb_funcall(self,rb_intern("class"),0);
obj = bf_s_new(5,args,klass);
Data_Get_Struct(obj, struct BloomFilter, target);
for (i = 0; i < bf->bytes; i++){
target->ptr[i] = bf->ptr[i] | bf_other->ptr[i];
}
return obj;
}
static VALUE bf_delete(VALUE self, VALUE key) {
int index, seed;
int i, len, m, k, s;
@@ -343,19 +393,20 @@ void Init_cbloomfilter(void) {
rb_define_method(cBloomFilter, "b", bf_b, 0);
rb_define_method(cBloomFilter, "r", bf_r, 0);
rb_define_method(cBloomFilter, "set_bits", bf_set_bits, 0);
rb_define_method(cBloomFilter, "s", bf_s, 0);
rb_define_method(cBloomFilter, "insert", bf_insert, 1);
rb_define_method(cBloomFilter, "delete", bf_delete, 1);
rb_define_method(cBloomFilter, "include?", bf_include, -1);
rb_define_method(cBloomFilter, "clear", bf_clear, 0);
rb_define_method(cBloomFilter, "merge!", bf_merge, 1);
rb_define_method(cBloomFilter, "&", bf_and, 1);
rb_define_method(cBloomFilter, "|", bf_or, 1);
rb_define_method(cBloomFilter, "to_s", bf_to_s, 0);
rb_define_method(cBloomFilter, "bitmap", bf_bitmap, 0);
rb_define_method(cBloomFilter, "load", bf_load, 1);
/* functions that have not been implemented, yet */
// rb_define_method(cBloomFilter, "&", bf_and, 1);
// rb_define_method(cBloomFilter, "|", bf_or, 1);
// rb_define_method(cBloomFilter, "<=>", bf_cmp, 1);
}
View
@@ -41,6 +41,26 @@ def set_bits
@bf.set_bits
end
# Computes the intersection of two Bloom filters.
# It assumes that both filters have the same size -
# if this is not true +ArgumentError+ is raised.
def &(o)
raise ArgumentError.new() unless same_parameters?(o)
result = self.class.new
result.instance_variable_set(:@bf,@bf.&(o.bf))
result
end
# Computes the union of two Bloom filters.
# It assumes that both filters have the same size -
# if this is not true +ArgumentError+ is raised.
def |(o)
raise ArgumentError.new() unless same_parameters?(o)
result = self.class.new
result.instance_variable_set(:@bf,@bf.|(o.bf))
result
end
def bitmap
@bf.bitmap
end
@@ -66,5 +86,14 @@ def save(filename)
end
end
protected
# Returns true if parameters of the +o+ther filter are
# the same.
def same_parameters?(o)
@bf.m == o.bf.m && @bf.k == o.bf.k &&
@bf.s == o.bf.s && @bf.b == o.bf.b
end
end
end
View
@@ -55,6 +55,56 @@
bf.insert("test")
bf.set_bits.should == 1
end
it "should return intersection with other filter" do
bf1 = Native.new(:seed => 1)
bf1.insert("test")
bf1.insert("test1")
bf2 = Native.new(:seed => 1)
bf2.insert("test")
bf2.insert("test2")
bf3 = bf1 & bf2
bf3.include?("test").should be_true
bf3.include?("test1").should be_false
bf3.include?("test2").should be_false
end
it "should raise an exception when intersection is to be computed for incompatible filters" do
bf1 = Native.new(:size => 10)
bf1.insert("test")
bf2 = Native.new(:size => 20)
bf2.insert("test")
proc {bf1 & bf2}.should raise_error(ArgumentError)
end
it "should return union with other filter" do
bf1 = Native.new(:seed => 1)
bf1.insert("test")
bf1.insert("test1")
bf2 = Native.new(:seed => 1)
bf2.insert("test")
bf2.insert("test2")
bf3 = bf1 | bf2
bf3.include?("test").should be_true
bf3.include?("test1").should be_true
bf3.include?("test2").should be_true
end
it "should raise an exception when union is to be computed for incompatible filters" do
bf1 = Native.new(:size => 10)
bf1.insert("test")
bf2 = Native.new(:size => 20)
bf2.insert("test")
proc {bf1 | bf2}.should raise_error(ArgumentError)
end
end
context "behave like counting bloom filter" do

0 comments on commit ba20cc9

Please sign in to comment.