Skip to content

Commit

Permalink
MB-6866 Use different thresholds for kp and kv nodes
Browse files Browse the repository at this point in the history
For views, since values are mostly raw JSON, compression
gains are high, therefore chunk threshold (uncompressed
size) can be raised, which decreases the number of kv nodes
in the trees. For kp nodes, because they all have a 1024
bits bitmask in their reductions, their branching factor was
a bit low, which made the trees deeper.

These changes made queries and compaction faster both for
generic btree tests and evperf lucky8 views (lucky8.conf and
lucky8-3d.conf). Inserts and updates also get a small speedup.

Change-Id: I2a94ec6005b881c81fd5295a2c89c7657cc6b19f
Reviewed-on: http://review.couchbase.org/21484
Reviewed-by: Damien Katz <damien@couchbase.com>
Reviewed-by: Volker Mische <volker.mische@gmail.com>
Reviewed-by: Farshid Ghods <farshid@couchbase.com>
Tested-by: Farshid Ghods <farshid@couchbase.com>
  • Loading branch information
fdmanana authored and Farshid Ghods committed Oct 11, 2012
1 parent b6454ca commit c62a4b2
Show file tree
Hide file tree
Showing 7 changed files with 66 additions and 28 deletions.
6 changes: 4 additions & 2 deletions src/couch_set_view/src/couch_set_view_group.erl
Expand Up @@ -38,7 +38,8 @@
pid()) -> no_return()).

-define(TIMEOUT, 3000).
-define(BTREE_CHUNK_THRESHOLD, 5120).
-define(BTREE_KV_CHUNK_THRESHOLD, 7168).
-define(BTREE_KP_CHUNK_THRESHOLD, 6144).

-define(root_dir(State), element(1, State#state.init_args)).
-define(set_name(State), element(2, State#state.init_args)).
Expand Down Expand Up @@ -1619,7 +1620,8 @@ init_group(Fd, Group, IndexHeader) ->
First, Rest)
end,
BtreeOptions = [
{chunk_threshold, ?BTREE_CHUNK_THRESHOLD},
{kv_chunk_threshold, ?BTREE_KV_CHUNK_THRESHOLD},
{kp_chunk_threshold, ?BTREE_KP_CHUNK_THRESHOLD},
{binary_mode, true}
],
{ok, IdBtree} = couch_btree:open(
Expand Down
21 changes: 12 additions & 9 deletions src/couchdb/couch_btree.erl
Expand Up @@ -86,8 +86,10 @@ set_options(Bt, [{less, Less}|Rest]) ->
set_options(Bt#btree{less=Less}, Rest);
set_options(Bt, [{reduce, Reduce}|Rest]) ->
set_options(Bt#btree{reduce=Reduce}, Rest);
set_options(Bt, [{chunk_threshold, Threshold}|Rest]) ->
set_options(Bt#btree{chunk_threshold = Threshold}, Rest);
set_options(Bt, [{kv_chunk_threshold, Threshold}|Rest]) ->
set_options(Bt#btree{kv_chunk_threshold = Threshold}, Rest);
set_options(Bt, [{kp_chunk_threshold, Threshold}|Rest]) ->
set_options(Bt#btree{kp_chunk_threshold = Threshold}, Rest);
set_options(#btree{root = Root} = Bt, [{binary_mode, true}|Rest]) when is_binary(Root) ->
<<Pointer:?POINTER_BITS, Size:?TREE_SIZE_BITS, Red0/binary>> = Root,
Red = binary:copy(Red0),
Expand Down Expand Up @@ -362,12 +364,12 @@ complete_root(Bt, KPs) ->
{ok, ResultKeyPointers, Bt2} = write_node(Bt, kp_node, KPs),
complete_root(Bt2, ResultKeyPointers).

%%%%%%%%%%%%% The chunkify function sucks! %%%%%%%%%%%%%
% It is inaccurate as it does not account for compression when blocks are
% written. Plus with the "case byte_size(term_to_binary(InList)) of" code
% it's probably really inefficient.
chunkify(#btree{kp_chunk_threshold = T}, kp_node, InList) ->
chunkify(T, InList);
chunkify(#btree{kv_chunk_threshold = T}, kv_node, InList) ->
chunkify(T, InList).

chunkify(#btree{chunk_threshold = ChunkThreshold0}, InList) ->
chunkify(ChunkThreshold0, InList) ->
case ?term_size(InList) of
Size when Size > ChunkThreshold0 ->
NumberOfChunksLikely = ((Size div ChunkThreshold0) + 1),
Expand Down Expand Up @@ -499,7 +501,7 @@ encode_node_iolist([{K, V}|RestKvs], Acc) ->

write_node(#btree{fd = Fd, binary_mode = BinMode} = Bt, NodeType, NodeList) ->
% split up nodes into smaller sizes
NodeListList = chunkify(Bt, NodeList),
NodeListList = chunkify(Bt, NodeType, NodeList),
% now write out each chunk and return the KeyPointer pairs for those nodes
ResultList = [
begin
Expand Down Expand Up @@ -1104,7 +1106,8 @@ stats(#btree{root = Root, fd = Fd} = Bt) ->
{btree_size, TreeSize},
{file_size, FileSize},
{fragmentation, Frag},
{chunk_threshold, Bt#btree.chunk_threshold} | Stats
{kv_chunk_threshold, Bt#btree.kv_chunk_threshold},
{kp_chunk_threshold, Bt#btree.kp_chunk_threshold} | Stats
].

collect_stats(nil, _Bt, 0, 0, 0, 0, 0, 0) ->
Expand Down
22 changes: 14 additions & 8 deletions src/couchdb/couch_btree_copy.erl
Expand Up @@ -24,7 +24,8 @@
before_kv_write = nil,
user_acc = [],
filter = fun(_) -> true end,
chunk_threshold,
kv_chunk_threshold,
kp_chunk_threshold,
nodes = array:new(),
cur_level = 1,
max_level = 1,
Expand All @@ -44,7 +45,8 @@ copy(Btree, Fd, Options) ->
Acc0 = #acc{
btree = Btree,
fd = Fd,
chunk_threshold = Btree#btree.chunk_threshold
kv_chunk_threshold = Btree#btree.kv_chunk_threshold,
kp_chunk_threshold = Btree#btree.kp_chunk_threshold
},
Acc = apply_options(Options, Acc0),
{ok, _, #acc{cur_level = 1} = FinalAcc0} = couch_btree:fold(
Expand All @@ -58,7 +60,8 @@ from_sorted_file(EmptyBtree, SortedFileName, DestFd, BinToKvFun) ->
Acc = #acc{
btree = EmptyBtree,
fd = DestFd,
chunk_threshold = EmptyBtree#btree.chunk_threshold
kv_chunk_threshold = EmptyBtree#btree.kv_chunk_threshold,
kp_chunk_threshold = EmptyBtree#btree.kp_chunk_threshold
},
{ok, SourceFd} = file:open(SortedFileName, [read, raw, binary, read_ahead]),
{ok, Acc2} = try
Expand Down Expand Up @@ -105,7 +108,8 @@ file_sort_output_fun(OrigBtree, Fd, Options) ->
Acc0 = #acc{
btree = OrigBtree,
fd = Fd,
chunk_threshold = OrigBtree#btree.chunk_threshold
kv_chunk_threshold = OrigBtree#btree.kv_chunk_threshold,
kp_chunk_threshold = OrigBtree#btree.kp_chunk_threshold
},
Acc = apply_options(Options, Acc0),
fun(Item) -> file_sort_loop(Item, Acc) end.
Expand All @@ -131,8 +135,10 @@ apply_options([{filter, Fun} | Rest], Acc) ->
apply_options(Rest, Acc#acc{filter = Fun});
apply_options([override | Rest], Acc) ->
apply_options(Rest, Acc);
apply_options([{chunk_threshold, Threshold} | Rest], Acc) ->
apply_options(Rest, Acc#acc{chunk_threshold = Threshold}).
apply_options([{kv_chunk_threshold, Threshold} | Rest], Acc) ->
apply_options(Rest, Acc#acc{kv_chunk_threshold = Threshold});
apply_options([{kp_chunk_threshold, Threshold} | Rest], Acc) ->
apply_options(Rest, Acc#acc{kp_chunk_threshold = Threshold}).


extract(#acc{btree = #btree{extract_kv = Extract}}, Value) ->
Expand Down Expand Up @@ -207,7 +213,7 @@ fold_copy(Item, ItemSize, #acc{cur_level = 1} = Acc) ->
Kv = extract(Acc, Item),
LeafSize2 = LeafSize + ItemSize,
Values2 = [Kv | Values],
NextAcc = case LeafSize2 >= Acc#acc.chunk_threshold of
NextAcc = case LeafSize2 >= Acc#acc.kv_chunk_threshold of
true ->
{LeafState, Acc2} = flush_leaf(Values2, Acc),
{K, _V} = Kv,
Expand Down Expand Up @@ -241,7 +247,7 @@ bubble_up({Key, NodeState}, Level, Acc) ->
{Size, NextLevelNodes} = array:get(Level + 1, Acc2#acc.nodes),
NextLevelNodes2 = [Kp | NextLevelNodes],
Size2 = Size + KpSize,
case Size2 >= Acc#acc.chunk_threshold of
case Size2 >= Acc#acc.kp_chunk_threshold of
true ->
{ok, NewNodeState} = write_kp_node(
Acc2, lists:reverse(NextLevelNodes2)),
Expand Down
3 changes: 2 additions & 1 deletion src/couchdb/couch_db.hrl
Expand Up @@ -286,6 +286,7 @@
assemble_kv = fun(Key, Value) -> {Key, Value} end,
less = fun(A, B) -> A < B end,
reduce = nil,
chunk_threshold = 16#4ff,
kv_chunk_threshold = 16#4ff,
kp_chunk_threshold = 2 * 16#4ff,
binary_mode = false
}).
3 changes: 2 additions & 1 deletion test/etap/020-btree-basics.t
Expand Up @@ -24,7 +24,8 @@ rows() -> 250.
assemble_kv,
less,
reduce,
chunk_threshold = 16#4ff,
kv_chunk_threshold = 16#4ff,
kp_chunk_threshold = 2 * 16#4ff,
binary_mode = false
}).

Expand Down
3 changes: 2 additions & 1 deletion test/etap/022-btree-copy.t
Expand Up @@ -21,7 +21,8 @@
assemble_kv,
less,
reduce,
chunk_threshold = 16#4ff,
kv_chunk_threshold = 16#4ff,
kp_chunk_threshold = 2 * 16#4ff,
binary_mode = false
}).

Expand Down
36 changes: 30 additions & 6 deletions test/etap/023-btree-guided-purge.t
Expand Up @@ -48,7 +48,11 @@ no_purged_items_test() ->
end,

{ok, Fd} = couch_file:open(filename(), [create, overwrite]),
{ok, Btree} = couch_btree:open(nil, Fd, [{reduce, ReduceFun}, {chunk_threshold, 6 * 1024}]),
{ok, Btree} = couch_btree:open(nil, Fd, [
{reduce, ReduceFun},
{kv_chunk_threshold, 6 * 1024},
{kp_chunk_threshold, 6 * 1024}
]),

N = 211341,
KVs = [{I, I} || I <- lists:seq(1, N)],
Expand Down Expand Up @@ -86,7 +90,11 @@ all_purged_items_test() ->
end,

{ok, Fd} = couch_file:open(filename(), [create, overwrite]),
{ok, Btree} = couch_btree:open(nil, Fd, [{reduce, ReduceFun}, {chunk_threshold, 6 * 1024}]),
{ok, Btree} = couch_btree:open(nil, Fd, [
{reduce, ReduceFun},
{kv_chunk_threshold, 6 * 1024},
{kp_chunk_threshold, 6 * 1024}
]),

N = 211341,
KVs = [{I, I} || I <- lists:seq(1, N)],
Expand Down Expand Up @@ -128,7 +136,11 @@ partial_purges_test() ->
end,

{ok, Fd} = couch_file:open(filename(), [create, overwrite]),
{ok, Btree} = couch_btree:open(nil, Fd, [{reduce, ReduceFun}, {chunk_threshold, 6 * 1024}]),
{ok, Btree} = couch_btree:open(nil, Fd, [
{reduce, ReduceFun},
{kv_chunk_threshold, 6 * 1024},
{kp_chunk_threshold, 6 * 1024}
]),

N = 211341,
KVs = [{I, I} || I <- lists:seq(1, N)],
Expand Down Expand Up @@ -190,7 +202,11 @@ partial_purges_test_2() ->
end,

{ok, Fd} = couch_file:open(filename(), [create, overwrite]),
{ok, Btree} = couch_btree:open(nil, Fd, [{reduce, ReduceFun}, {chunk_threshold, 6 * 1024}]),
{ok, Btree} = couch_btree:open(nil, Fd, [
{reduce, ReduceFun},
{kv_chunk_threshold, 6 * 1024},
{kp_chunk_threshold, 6 * 1024}
]),

N = 320000,
KVs = [{iolist_to_binary(io_lib:format("doc_~6..0b", [I])), {I rem 64, I}} || I <- lists:seq(1, N)],
Expand Down Expand Up @@ -257,7 +273,11 @@ partial_purges_test_with_stop() ->
end,

{ok, Fd} = couch_file:open(filename(), [create, overwrite]),
{ok, Btree} = couch_btree:open(nil, Fd, [{reduce, ReduceFun}, {chunk_threshold, 6 * 1024}]),
{ok, Btree} = couch_btree:open(nil, Fd, [
{reduce, ReduceFun},
{kv_chunk_threshold, 6 * 1024},
{kp_chunk_threshold, 6 * 1024}
]),

N = 211341,
KVs = [{I, I} || I <- lists:seq(1, N)],
Expand Down Expand Up @@ -323,7 +343,11 @@ add_remove_and_purge_test() ->
end,

{ok, Fd} = couch_file:open(filename(), [create, overwrite]),
{ok, Btree} = couch_btree:open(nil, Fd, [{reduce, ReduceFun}, {chunk_threshold, 6 * 1024}]),
{ok, Btree} = couch_btree:open(nil, Fd, [
{reduce, ReduceFun},
{kv_chunk_threshold, 6 * 1024},
{kp_chunk_threshold, 6 * 1024}
]),

N = 211341,
KVs = [{I, I} || I <- lists:seq(1, N)],
Expand Down

0 comments on commit c62a4b2

Please sign in to comment.