Skip to content

Commit

Permalink
Bloom filter: Add faster, ETS-based build representation for dense bi…
Browse files Browse the repository at this point in the history
…tmaps.
  • Loading branch information
esstrifork committed Sep 25, 2012
1 parent d727be2 commit 12148a7
Show file tree
Hide file tree
Showing 2 changed files with 63 additions and 4 deletions.
28 changes: 24 additions & 4 deletions src/hanoidb_bloom.erl
Expand Up @@ -187,16 +187,30 @@ set_bits(Mask, I1, I, [H|T], Acc) ->

%%%========== Dispatch to appropriate representation:
bitmask_new(LogN) ->
bitarray_new(1 bsl LogN).
if LogN >= 20 -> % Use sparse representation.
bitarray_new(1 bsl LogN);
true -> % Use dense representation.
hanoidb_dense_bitmap:new(1 bsl LogN)
end.

bitmask_set(I, BM) ->
case element(1,BM) of
array -> bitarray_set(I, BM)
array -> bitarray_set(I, BM);
dense_bitmap_ets -> hanoidb_dense_bitmap:set(I, BM)
end.

%%% Convert to external form.
bitmask_build(BM) ->
case element(1,BM) of
array -> BM;
dense_bitmap_ets -> hanoidb_dense_bitmap:build(BM)
end.

bitmask_get(I, BM) ->
case element(1,BM) of
array -> bitarray_get(I, BM)
array -> bitarray_get(I, BM);
dense_bitmap_ets -> hanoidb_dense_bitmap:member(I, BM);
dense_bitmap -> hanoidb_dense_bitmap:member(I, BM)
end.

%%%========== Bitarray representation - suitable for sparse arrays ==========
Expand All @@ -216,11 +230,17 @@ bitarray_get(I, A) ->
%%%^^^^^^^^^^ Bitarray representation - suitable for sparse arrays ^^^^^^^^^^

encode(Bloom) ->
zlib:gzip(term_to_binary(Bloom)).
zlib:gzip(term_to_binary(bloom_build(Bloom))).

decode(Bin) ->
binary_to_term(zlib:gunzip(Bin)).

%%% Convert to external form.
bloom_build(Bloom=#bloom{a=Bitmasks}) ->
Bloom#bloom{a=[bitmask_build(X) || X <- Bitmasks]};
bloom_build(Sbf=#sbf{b=Blooms}) ->
Sbf#sbf{b=[bloom_build(X) || X <- Blooms]}.

%% UNIT TESTS

-ifdef(TEST).
Expand Down
39 changes: 39 additions & 0 deletions src/hanoidb_dense_bitmap.erl
@@ -0,0 +1,39 @@
-module(hanoidb_dense_bitmap).

-export([new/1, set/2, build/1, member/2]).
-define(BITS_PER_CELL, 32).

-define(REPR_NAME, dense_bitmap).

new(N) ->
Tab = ets:new(dense_bitmap, [private, set]),
Width = 1 + (N-1) div ?BITS_PER_CELL,
Value = erlang:make_tuple(Width+1, 0, [{1,?REPR_NAME}]),
ets:insert(Tab, Value),
%io:format("DB| create(): ~p of width ~p\n", [Tab, Width]),
{dense_bitmap_ets, N, Width, Tab}.

%% Set a bit.
set(I, {dense_bitmap_ets, _,_, Tab}=DBM) ->
Cell = 2 + I div ?BITS_PER_CELL,
BitInCell = I rem ?BITS_PER_CELL,
Old = ets:lookup_element(Tab, ?REPR_NAME, Cell),
New = Old bor (1 bsl BitInCell),
ets:update_element(Tab, ?REPR_NAME, {Cell,New}),
DBM.

build({dense_bitmap_ets, _, _, Tab}) ->
[Row] = ets:lookup(Tab, ?REPR_NAME),
ets:delete(Tab),
Row.

member(I, Row) when element(1,Row)==?REPR_NAME ->
Cell = 2 + I div ?BITS_PER_CELL,
BitInCell = I rem ?BITS_PER_CELL,
CellValue = element(Cell, Row),
CellValue band (1 bsl BitInCell) =/= 0;
member(I, {dense_bitmap_ets, _,_, Tab}) ->
Cell = 2 + I div ?BITS_PER_CELL,
BitInCell = I rem ?BITS_PER_CELL,
CellValue = ets:lookup_element(Tab, ?REPR_NAME, Cell),
CellValue band (1 bsl BitInCell) =/= 0.

0 comments on commit 12148a7

Please sign in to comment.