Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with
or
.
Download ZIP
Browse files

Merge pull request #17 from eriksoe/bloom-sparse-representation

Bloom sparse representation
  • Loading branch information...
commit 5ee9ff4a42460d0273099cfe198ea3e85098495f 2 parents 4592bc0 + ed65b5a
@krestenkrab krestenkrab authored
View
52 src/hanoidb_bloom.erl
@@ -87,7 +87,7 @@ bloom(Mode, N, E) ->
M = 1 bsl Mb,
D = trunc(log(1-P) / log(1-1/M)),
#bloom{e=E, n=D, mb=Mb, size = 0,
- a = [bitarray_new(1 bsl Mb) || _ <- lists:seq(1, K)]}.
+ a = [bitmask_new(Mb) || _ <- lists:seq(1, K)]}.
log2(X) -> log(X) / log(2).
@@ -148,7 +148,7 @@ masked_pair(Mask, X, Y) -> {X band Mask, Y band Mask}.
all_set(_Mask, _I1, _I, []) -> true;
all_set(Mask, I1, I, [H|T]) ->
- case bitarray_get(I, H) of
+ case bitmask_get(I, H) of
true -> all_set(Mask, I1, (I+I1) band Mask, T);
false -> false
end.
@@ -182,8 +182,44 @@ hash_add(Hashes, #bloom{mb=Mb, a=A, size=Size} = B) ->
set_bits(_Mask, _I1, _I, [], Acc) -> lists:reverse(Acc);
set_bits(Mask, I1, I, [H|T], Acc) ->
- set_bits(Mask, I1, (I+I1) band Mask, T, [bitarray_set(I, H) | Acc]).
+ set_bits(Mask, I1, (I+I1) band Mask, T, [bitmask_set(I, H) | Acc]).
+
+%%%========== Dispatch to appropriate representation:
+bitmask_new(LogN) ->
+ if LogN >= 20 -> % Use sparse representation.
+ hanoidb_sparse_bitmap:new(LogN);
+ true -> % Use dense representation.
+ hanoidb_dense_bitmap:new(1 bsl LogN)
+ end.
+
+bitmask_set(I, BM) ->
+ case element(1,BM) of
+ array -> bitarray_set(I, BM);
+ sparse_bitmap -> hanoidb_sparse_bitmap:set(I, BM);
+ dense_bitmap_ets -> hanoidb_dense_bitmap:set(I, BM);
+ dense_bitmap ->
+ %% Surprise - we need to mutate a built representation:
+ hanoidb_dense_bitmap:set(I, hanoidb_dense_bitmap:unbuild(BM))
+ end.
+
+%%% Convert to external form.
+bitmask_build(BM) ->
+ case element(1,BM) of
+ array -> BM;
+ sparse_bitmap -> BM;
+ dense_bitmap_ets -> hanoidb_dense_bitmap:build(BM)
+ end.
+
+bitmask_get(I, BM) ->
+ case element(1,BM) of
+ array -> bitarray_get(I, BM);
+ sparse_bitmap -> hanoidb_sparse_bitmap:member(I, BM);
+ dense_bitmap_ets -> hanoidb_dense_bitmap:member(I, BM);
+ dense_bitmap -> hanoidb_dense_bitmap:member(I, BM)
+ end.
+
+%%%========== Bitarray representation - suitable for sparse arrays ==========
bitarray_new(N) -> array:new((N-1) div ?W + 1, {default, 0}).
bitarray_set(I, A) ->
@@ -197,12 +233,20 @@ bitarray_get(I, A) ->
V = array:get(AI, A),
V band (1 bsl (I rem ?W)) =/= 0.
+%%%^^^^^^^^^^ Bitarray representation - suitable for sparse arrays ^^^^^^^^^^
+
encode(Bloom) ->
- zlib:gzip(term_to_binary(Bloom)).
+ zlib:gzip(term_to_binary(bloom_build(Bloom))).
decode(Bin) ->
binary_to_term(zlib:gunzip(Bin)).
+%%% Convert to external form.
+bloom_build(Bloom=#bloom{a=Bitmasks}) ->
+ Bloom#bloom{a=[bitmask_build(X) || X <- Bitmasks]};
+bloom_build(Sbf=#sbf{b=Blooms}) ->
+ Sbf#sbf{b=[bloom_build(X) || X <- Blooms]}.
+
%% UNIT TESTS
-ifdef(TEST).
View
43 src/hanoidb_dense_bitmap.erl
@@ -0,0 +1,43 @@
+-module(hanoidb_dense_bitmap).
+
+-export([new/1, set/2, build/1, unbuild/1, member/2]).
+-define(BITS_PER_CELL, 32).
+
+-define(REPR_NAME, dense_bitmap).
+
+new(N) ->
+ Tab = ets:new(dense_bitmap, [private, set]),
+ Width = 1 + (N-1) div ?BITS_PER_CELL,
+ Value = erlang:make_tuple(Width+1, 0, [{1,?REPR_NAME}]),
+ ets:insert(Tab, Value),
+ {dense_bitmap_ets, N, Width, Tab}.
+
+%% Set a bit.
+set(I, {dense_bitmap_ets, _,_, Tab}=DBM) ->
+ Cell = 2 + I div ?BITS_PER_CELL,
+ BitInCell = I rem ?BITS_PER_CELL,
+ Old = ets:lookup_element(Tab, ?REPR_NAME, Cell),
+ New = Old bor (1 bsl BitInCell),
+ ets:update_element(Tab, ?REPR_NAME, {Cell,New}),
+ DBM.
+
+build({dense_bitmap_ets, _, _, Tab}) ->
+ [Row] = ets:lookup(Tab, ?REPR_NAME),
+ ets:delete(Tab),
+ Row.
+
+unbuild(Row) when element(1,Row)==?REPR_NAME ->
+ Tab = ets:new(dense_bitmap, [private, set]),
+ ets:insert(Tab, Row),
+ {dense_bitmap_ets, undefined, undefined, Tab}.
+
+member(I, Row) when element(1,Row)==?REPR_NAME ->
+ Cell = 2 + I div ?BITS_PER_CELL,
+ BitInCell = I rem ?BITS_PER_CELL,
+ CellValue = element(Cell, Row),
+ CellValue band (1 bsl BitInCell) =/= 0;
+member(I, {dense_bitmap_ets, _,_, Tab}) ->
+ Cell = 2 + I div ?BITS_PER_CELL,
+ BitInCell = I rem ?BITS_PER_CELL,
+ CellValue = ets:lookup_element(Tab, ?REPR_NAME, Cell),
+ CellValue band (1 bsl BitInCell) =/= 0.
View
43 src/hanoidb_sparse_bitmap.erl
@@ -0,0 +1,43 @@
+-module(hanoidb_sparse_bitmap).
+-export([new/1, set/2, member/2]).
+
+-define(REPR_NAME, sparse_bitmap).
+
+new(Bits) when is_integer(Bits), Bits>0 ->
+ {?REPR_NAME, Bits, []}.
+
+set(N, {?REPR_NAME, Bits, Tree}) ->
+ {?REPR_NAME, Bits, set_to_tree(N, 1 bsl (Bits-1), Tree)}.
+
+set_to_tree(N, HighestBit, Mask) when HighestBit<32 ->
+ Nbit = 1 bsl N,
+ case Mask of
+ []-> Nbit;
+ _ -> Nbit bor Mask
+ end;
+set_to_tree(N, _HighestBit, []) -> N;
+set_to_tree(N, HighestBit, [TLo|THi]) ->
+ pushdown(N, HighestBit, TLo, THi);
+set_to_tree(N, _HighestBit, N) -> N;
+set_to_tree(N, HighestBit, M) when is_integer(M) ->
+ set_to_tree(N, HighestBit, pushdown(M, HighestBit, [], [])).
+
+pushdown(N, HighestBit, TLo, THi) ->
+ NHigh = N band HighestBit,
+ if NHigh =:= 0 -> [set_to_tree(N, HighestBit bsr 1, TLo) | THi];
+ true -> [TLo | set_to_tree(N bxor NHigh, HighestBit bsr 1, THi)]
+ end.
+
+member(N, {?REPR_NAME, Bits, Tree}) ->
+ member_in_tree(N, 1 bsl (Bits-1), Tree).
+
+member_in_tree(_N, _HighestBit, []) -> false;
+member_in_tree(N, HighestBit, Mask) when HighestBit<32 ->
+ Nbit = 1 bsl N,
+ Nbit band Mask > 0;
+member_in_tree(N, _HighestBit, M) when is_integer(M) -> N =:= M;
+member_in_tree(N, HighestBit, [TLo|THi]) ->
+ NHigh = N band HighestBit,
+ if NHigh =:= 0 -> member_in_tree(N, HighestBit bsr 1, TLo);
+ true -> member_in_tree(N bxor NHigh, HighestBit bsr 1, THi)
+ end.
Please sign in to comment.
Something went wrong with that request. Please try again.