Skip to content
This repository has been archived by the owner on Sep 4, 2019. It is now read-only.

Commit

Permalink
More efficient term size calculation
Browse files Browse the repository at this point in the history
Unlike byte_size(term_to_binary(Term)), the BIF erlang:external_size/1 doesn't
do the serialization step, it only calculates the maximum external size for
any term, which is more efficient (faster and avoids the garbage generation).

With the test couch_http_bulk_writes.sh at [1], using 20 writers and batches
of 100 1Kb documents, it's possible to write about 1 400 000 documents with
this patch instead of about 1 300 000.

[1] https://github.com/fdmanana/basho_bench_couch




git-svn-id: https://svn.apache.org/repos/asf/couchdb/trunk@1129597 13f79535-47bb-0310-9956-ffa450edef68
  • Loading branch information
fdmanana committed May 31, 2011
1 parent ba730af commit 2cd5139
Show file tree
Hide file tree
Showing 5 changed files with 20 additions and 14 deletions.
20 changes: 10 additions & 10 deletions src/couchdb/couch_btree.erl
Expand Up @@ -276,26 +276,26 @@ complete_root(Bt, KPs) ->
% written. Plus with the "case byte_size(term_to_binary(InList)) of" code
% it's probably really inefficient.

chunkify(#btree{compression = Comp} = Bt, InList) ->
case byte_size(couch_compress:compress(InList, Comp)) of
chunkify(InList) ->
case ?term_size(InList) of
Size when Size > ?CHUNK_THRESHOLD ->
NumberOfChunksLikely = ((Size div ?CHUNK_THRESHOLD) + 1),
ChunkThreshold = Size div NumberOfChunksLikely,
chunkify(Bt, InList, ChunkThreshold, [], 0, []);
chunkify(InList, ChunkThreshold, [], 0, []);
_Else ->
[InList]
end.

chunkify(_Bt, [], _ChunkThreshold, [], 0, OutputChunks) ->
chunkify([], _ChunkThreshold, [], 0, OutputChunks) ->
lists:reverse(OutputChunks);
chunkify(_Bt, [], _ChunkThreshold, OutList, _OutListSize, OutputChunks) ->
chunkify([], _ChunkThreshold, OutList, _OutListSize, OutputChunks) ->
lists:reverse([lists:reverse(OutList) | OutputChunks]);
chunkify(Bt, [InElement | RestInList], ChunkThreshold, OutList, OutListSize, OutputChunks) ->
case byte_size(couch_compress:compress(InElement, Bt#btree.compression)) of
chunkify([InElement | RestInList], ChunkThreshold, OutList, OutListSize, OutputChunks) ->
case ?term_size(InElement) of
Size when (Size + OutListSize) > ChunkThreshold andalso OutList /= [] ->
chunkify(Bt, RestInList, ChunkThreshold, [], 0, [lists:reverse([InElement | OutList]) | OutputChunks]);
chunkify(RestInList, ChunkThreshold, [], 0, [lists:reverse([InElement | OutList]) | OutputChunks]);
Size ->
chunkify(Bt, RestInList, ChunkThreshold, [InElement | OutList], OutListSize + Size, OutputChunks)
chunkify(RestInList, ChunkThreshold, [InElement | OutList], OutListSize + Size, OutputChunks)
end.

modify_node(Bt, RootPointerInfo, Actions, QueryOutput) ->
Expand Down Expand Up @@ -350,7 +350,7 @@ get_node(#btree{fd = Fd}, NodePos) ->

write_node(#btree{fd = Fd, compression = Comp} = Bt, NodeType, NodeList) ->
% split up nodes into smaller sizes
NodeListList = chunkify(Bt, NodeList),
NodeListList = chunkify(NodeList),
% now write out each chunk and return the KeyPointer pairs for those nodes
ResultList = [
begin
Expand Down
6 changes: 6 additions & 0 deletions src/couchdb/couch_db.hrl
Expand Up @@ -27,6 +27,12 @@
-define(b2l(V), binary_to_list(V)).
-define(l2b(V), list_to_binary(V)).
-define(term_to_bin(T), term_to_binary(T, [{minor_version, 1}])).
-define(term_size(T),
try
erlang:external_size(T)
catch _:_ ->
byte_size(?term_to_bin(T))
end).

-define(DEFAULT_ATTACHMENT_CONTENT_TYPE, <<"application/octet-stream">>).

Expand Down
2 changes: 1 addition & 1 deletion src/couchdb/couch_db_updater.erl
Expand Up @@ -888,7 +888,7 @@ copy_compact(Db, NewDb0, Retry) ->
fun(#doc_info{high_seq=Seq}=DocInfo, _Offset,
{AccNewDb, AccUncopied, AccUncopiedSize, AccCopiedSize, TotalCopied}) ->

AccUncopiedSize2 = AccUncopiedSize + byte_size(?term_to_bin(DocInfo)),
AccUncopiedSize2 = AccUncopiedSize + ?term_size(DocInfo),
if AccUncopiedSize2 >= BufferSize ->
NewDb2 = copy_docs(
Db, AccNewDb, lists:reverse([DocInfo | AccUncopied]), Retry),
Expand Down
4 changes: 2 additions & 2 deletions src/couchdb/couch_view_compactor.erl
Expand Up @@ -57,7 +57,7 @@ compact_group(Group, EmptyGroup) ->
Msg = "Duplicates of ~s detected in ~s ~s - rebuild required",
exit(io_lib:format(Msg, [DocId, DbName, GroupId]));
true -> ok end,
AccSize2 = AccSize + byte_size(?term_to_bin(KV)),
AccSize2 = AccSize + ?term_size(KV),
if AccSize2 >= BufferSize ->
{ok, Bt2} = couch_btree:add(Bt, lists:reverse([KV|Acc])),
couch_task_status:update("Copied ~p of ~p Ids (~p%)",
Expand Down Expand Up @@ -90,7 +90,7 @@ compact_view(View, EmptyView, BufferSize) ->

%% Key is {Key,DocId}
Fun = fun(KV, {Bt, Acc, AccSize, TotalCopied}) ->
AccSize2 = AccSize + byte_size(?term_to_bin(KV)),
AccSize2 = AccSize + ?term_size(KV),
if AccSize2 >= BufferSize ->
{ok, Bt2} = couch_btree:add(Bt, lists:reverse([KV|Acc])),
couch_task_status:update("View #~p: copied ~p of ~p KVs (~p%)",
Expand Down
2 changes: 1 addition & 1 deletion src/couchdb/couch_work_queue.erl
Expand Up @@ -42,7 +42,7 @@ new(Options) ->
queue(Wq, Item) when is_binary(Item) ->
gen_server:call(Wq, {queue, Item, byte_size(Item)}, infinity);
queue(Wq, Item) ->
gen_server:call(Wq, {queue, Item, byte_size(?term_to_bin(Item))}, infinity).
gen_server:call(Wq, {queue, Item, ?term_size(Item)}, infinity).


dequeue(Wq) ->
Expand Down

0 comments on commit 2cd5139

Please sign in to comment.