Skip to content
Browse files

Bloom filter: replace array with faster custom representation.

  • Loading branch information...
1 parent 1c6e0df commit ed65b5a468710a2e11b906e678e0195f1052dd9a @eriksoe eriksoe committed Sep 25, 2012
Showing with 47 additions and 1 deletion.
  1. +4 −1 src/hanoidb_bloom.erl
  2. +43 −0 src/hanoidb_sparse_bitmap.erl
View
5 src/hanoidb_bloom.erl
@@ -188,14 +188,15 @@ set_bits(Mask, I1, I, [H|T], Acc) ->
%%%========== Dispatch to appropriate representation:
bitmask_new(LogN) ->
if LogN >= 20 -> % Use sparse representation.
- bitarray_new(1 bsl LogN);
+ hanoidb_sparse_bitmap:new(LogN);
true -> % Use dense representation.
hanoidb_dense_bitmap:new(1 bsl LogN)
end.
bitmask_set(I, BM) ->
case element(1,BM) of
array -> bitarray_set(I, BM);
+ sparse_bitmap -> hanoidb_sparse_bitmap:set(I, BM);
dense_bitmap_ets -> hanoidb_dense_bitmap:set(I, BM);
dense_bitmap ->
%% Surprise - we need to mutate a built representation:
@@ -206,12 +207,14 @@ bitmask_set(I, BM) ->
bitmask_build(BM) ->
case element(1,BM) of
array -> BM;
+ sparse_bitmap -> BM;
dense_bitmap_ets -> hanoidb_dense_bitmap:build(BM)
end.
bitmask_get(I, BM) ->
case element(1,BM) of
array -> bitarray_get(I, BM);
+ sparse_bitmap -> hanoidb_sparse_bitmap:member(I, BM);
dense_bitmap_ets -> hanoidb_dense_bitmap:member(I, BM);
dense_bitmap -> hanoidb_dense_bitmap:member(I, BM)
end.
View
43 src/hanoidb_sparse_bitmap.erl
@@ -0,0 +1,43 @@
+-module(hanoidb_sparse_bitmap).
+-export([new/1, set/2, member/2]).
+
+-define(REPR_NAME, sparse_bitmap).
+
+new(Bits) when is_integer(Bits), Bits>0 ->
+ {?REPR_NAME, Bits, []}.
+
+set(N, {?REPR_NAME, Bits, Tree}) ->
+ {?REPR_NAME, Bits, set_to_tree(N, 1 bsl (Bits-1), Tree)}.
+
+set_to_tree(N, HighestBit, Mask) when HighestBit<32 ->
+ Nbit = 1 bsl N,
+ case Mask of
+ []-> Nbit;
+ _ -> Nbit bor Mask
+ end;
+set_to_tree(N, _HighestBit, []) -> N;
+set_to_tree(N, HighestBit, [TLo|THi]) ->
+ pushdown(N, HighestBit, TLo, THi);
+set_to_tree(N, _HighestBit, N) -> N;
+set_to_tree(N, HighestBit, M) when is_integer(M) ->
+ set_to_tree(N, HighestBit, pushdown(M, HighestBit, [], [])).
+
+pushdown(N, HighestBit, TLo, THi) ->
+ NHigh = N band HighestBit,
+ if NHigh =:= 0 -> [set_to_tree(N, HighestBit bsr 1, TLo) | THi];
+ true -> [TLo | set_to_tree(N bxor NHigh, HighestBit bsr 1, THi)]
+ end.
+
+member(N, {?REPR_NAME, Bits, Tree}) ->
+ member_in_tree(N, 1 bsl (Bits-1), Tree).
+
+member_in_tree(_N, _HighestBit, []) -> false;
+member_in_tree(N, HighestBit, Mask) when HighestBit<32 ->
+ Nbit = 1 bsl N,
+ Nbit band Mask > 0;
+member_in_tree(N, _HighestBit, M) when is_integer(M) -> N =:= M;
+member_in_tree(N, HighestBit, [TLo|THi]) ->
+ NHigh = N band HighestBit,
+ if NHigh =:= 0 -> member_in_tree(N, HighestBit bsr 1, TLo);
+ true -> member_in_tree(N bxor NHigh, HighestBit bsr 1, THi)
+ end.

0 comments on commit ed65b5a

Please sign in to comment.
Something went wrong with that request. Please try again.