Permalink
Browse files

Track used space for database and view index files

The database and view group info URIs now expose a new field, named "data_size",
which corresponds to the number of bytes used by the current data snapshot.
Users can now use this value, compare it to the "disk_size" value (total file size)
and decide whether or not to trigger a compaction based on this comparison.
This new value is an approximation and therefore not 100% accurate (but close enough).

Closes COUCHDB-1132.

Special thanks to Adam Kocoloski and Robert Dionne for they're good work on this
feature as well.



git-svn-id: https://svn.apache.org/repos/asf/couchdb/trunk@1095477 13f79535-47bb-0310-9956-ffa450edef68
  • Loading branch information...
1 parent f396e32 commit f5bc870480daca6efd5b18b382fcda0f39f42742 @fdmanana fdmanana committed Apr 20, 2011
@@ -31,8 +31,12 @@ couchTests.compact = function(debug) {
T(db.save(binAttDoc).ok);
var originalsize = db.info().disk_size;
+ var originaldatasize = db.info().data_size;
var start_time = db.info().instance_start_time;
+ TEquals("number", typeof originaldatasize, "data_size is a number");
+ T(originaldatasize < originalsize, "data size is < then db file size");
+
for(var i in docs) {
db.deleteDoc(docs[i]);
}
@@ -55,5 +59,7 @@ couchTests.compact = function(debug) {
T(xhr.getResponseHeader("Content-Type") == "text/plain");
T(db.info().doc_count == 1);
T(db.info().disk_size < deletesize);
+ TEquals("number", typeof db.info().data_size, "data_size is a number");
+ T(db.info().data_size < db.info().disk_size, "data size is < then db file size");
};
@@ -81,6 +81,10 @@ couchTests.view_compaction = function(debug) {
T(resp.view_index.update_seq === 3001);
var disk_size_before_compact = resp.view_index.disk_size;
+ var data_size_before_compact = resp.view_index.data_size;
+
+ TEquals("number", typeof data_size_before_compact, "data size is a number");
+ T(data_size_before_compact < disk_size_before_compact, "data size < file size");
// compact view group
var xhr = CouchDB.request("POST", "/" + db.name + "/_compact" + "/foo");
@@ -101,4 +105,6 @@ couchTests.view_compaction = function(debug) {
resp = db.designInfo("_design/foo");
T(resp.view_index.update_seq === 3001);
T(resp.view_index.disk_size < disk_size_before_compact);
+ TEquals("number", typeof resp.view_index.data_size, "data size is a number");
+ T(resp.view_index.data_size < resp.view_index.disk_size, "data size < file size");
};
@@ -13,7 +13,7 @@
-module(couch_btree).
-export([open/2, open/3, query_modify/4, add/2, add_remove/3]).
--export([fold/4, full_reduce/1, final_reduce/2, foldl/3, foldl/4]).
+-export([fold/4, full_reduce/1, final_reduce/2, size/1, foldl/3, foldl/4]).
-export([fold_reduce/4, lookup/2, get_state/1, set_options/2]).
-include("couch_db.hrl").
@@ -92,8 +92,16 @@ fold_reduce(#btree{root=Root}=Bt, Fun, Acc, Options) ->
full_reduce(#btree{root=nil,reduce=Reduce}) ->
{ok, Reduce(reduce, [])};
-full_reduce(#btree{root={_P, Red}}) ->
- {ok, Red}.
+full_reduce(#btree{root=Root}) ->
+ {ok, element(2, Root)}.
+
+size(#btree{root = nil}) ->
+ 0;
+size(#btree{root = {_P, _Red}}) ->
+ % pre 1.2 format
+ nil;
+size(#btree{root = {_P, _Red, Size}}) ->
+ Size.
% wraps a 2 arity function with the proper 3 arity function
convert_fun_arity(Fun) when is_function(Fun, 2) ->
@@ -150,7 +158,7 @@ fold(#btree{root=Root}=Bt, Fun, Acc, Options) ->
end,
case Result of
{ok, Acc2}->
- {_P, FullReduction} = Root,
+ FullReduction = element(2, Root),
{ok, {[], [FullReduction]}, Acc2};
{stop, LastReduction, Acc2} ->
{ok, LastReduction, Acc2}
@@ -202,7 +210,8 @@ lookup(#btree{root=Root, less=Less}=Bt, Keys) ->
lookup(_Bt, nil, Keys) ->
{ok, [{Key, not_found} || Key <- Keys]};
-lookup(Bt, {Pointer, _Reds}, Keys) ->
+lookup(Bt, Node, Keys) ->
+ Pointer = element(1, Node),
{NodeType, NodeList} = get_node(Bt, Pointer),
case NodeType of
kp_node ->
@@ -292,7 +301,8 @@ modify_node(Bt, RootPointerInfo, Actions, QueryOutput) ->
nil ->
NodeType = kv_node,
NodeList = [];
- {Pointer, _Reds} ->
+ _Tuple ->
+ Pointer = element(1, RootPointerInfo),
{NodeType, NodeList} = get_node(Bt, Pointer)
end,
NodeTuple = list_to_tuple(NodeList),
@@ -316,10 +326,21 @@ modify_node(Bt, RootPointerInfo, Actions, QueryOutput) ->
reduce_node(#btree{reduce=nil}, _NodeType, _NodeList) ->
[];
reduce_node(#btree{reduce=R}, kp_node, NodeList) ->
- R(rereduce, [Red || {_K, {_P, Red}} <- NodeList]);
+ R(rereduce, [element(2, Node) || {_K, Node} <- NodeList]);
reduce_node(#btree{reduce=R}=Bt, kv_node, NodeList) ->
R(reduce, [assemble(Bt, K, V) || {K, V} <- NodeList]).
+reduce_tree_size(kv_node, NodeSize, _KvList) ->
+ NodeSize;
+reduce_tree_size(kp_node, NodeSize, []) ->
+ NodeSize;
+reduce_tree_size(kp_node, _NodeSize, [{_K, {_P, _Red}} | _]) ->
+ % pre 1.2 format
+ nil;
+reduce_tree_size(kp_node, _NodeSize, [{_K, {_P, _Red, nil}} | _]) ->
+ nil;
+reduce_tree_size(kp_node, NodeSize, [{_K, {_P, _Red, Sz}} | NodeList]) ->
+ reduce_tree_size(kp_node, NodeSize + Sz, NodeList).
get_node(#btree{fd = Fd}, NodePos) ->
{ok, {NodeType, NodeList}} = couch_file:pread_term(Fd, NodePos),
@@ -331,9 +352,10 @@ write_node(Bt, NodeType, NodeList) ->
% now write out each chunk and return the KeyPointer pairs for those nodes
ResultList = [
begin
- {ok, Pointer} = couch_file:append_term(Bt#btree.fd, {NodeType, ANodeList}),
+ {ok, Pointer, Size} = couch_file:append_term(Bt#btree.fd, {NodeType, ANodeList}),
{LastKey, _} = lists:last(ANodeList),
- {LastKey, {Pointer, reduce_node(Bt, NodeType, ANodeList)}}
+ SubTreeSize = reduce_tree_size(NodeType, Size, ANodeList),
+ {LastKey, {Pointer, reduce_node(Bt, NodeType, ANodeList), SubTreeSize}}
end
||
ANodeList <- NodeListList
@@ -449,8 +471,9 @@ modify_kvnode(Bt, NodeTuple, LowerBound, [{ActionType, ActionKey, ActionValue} |
reduce_stream_node(_Bt, _Dir, nil, _KeyStart, _KeyEnd, GroupedKey, GroupedKVsAcc,
GroupedRedsAcc, _KeyGroupFun, _Fun, Acc) ->
{ok, Acc, GroupedRedsAcc, GroupedKVsAcc, GroupedKey};
-reduce_stream_node(Bt, Dir, {P, _R}, KeyStart, KeyEnd, GroupedKey, GroupedKVsAcc,
+reduce_stream_node(Bt, Dir, Node, KeyStart, KeyEnd, GroupedKey, GroupedKVsAcc,
GroupedRedsAcc, KeyGroupFun, Fun, Acc) ->
+ P = element(1, Node),
case get_node(Bt, P) of
{kp_node, NodeList} ->
reduce_stream_kp_node(Bt, Dir, NodeList, KeyStart, KeyEnd, GroupedKey,
@@ -559,7 +582,7 @@ reduce_stream_kp_node2(Bt, Dir, NodeList, KeyStart, KeyEnd,
[FirstGrouped | RestGrouped] = lists:reverse(Grouped0),
{RestGrouped, [FirstGrouped | Ungrouped0]}
end,
- GroupedReds = [R || {_, {_,R}} <- GroupedNodes],
+ GroupedReds = [element(2, Node) || {_, Node} <- GroupedNodes],
case UngroupedNodes of
[{_Key, NodeInfo}|RestNodes] ->
{ok, Acc2, GroupedRedsAcc2, GroupedKVsAcc2, GroupedKey2} =
@@ -576,7 +599,8 @@ adjust_dir(fwd, List) ->
adjust_dir(rev, List) ->
lists:reverse(List).
-stream_node(Bt, Reds, {Pointer, _Reds}, StartKey, InRange, Dir, Fun, Acc) ->
+stream_node(Bt, Reds, Node, StartKey, InRange, Dir, Fun, Acc) ->
+ Pointer = element(1, Node),
{NodeType, NodeList} = get_node(Bt, Pointer),
case NodeType of
kp_node ->
@@ -585,7 +609,8 @@ stream_node(Bt, Reds, {Pointer, _Reds}, StartKey, InRange, Dir, Fun, Acc) ->
stream_kv_node(Bt, Reds, adjust_dir(Dir, NodeList), StartKey, InRange, Dir, Fun, Acc)
end.
-stream_node(Bt, Reds, {Pointer, _Reds}, InRange, Dir, Fun, Acc) ->
+stream_node(Bt, Reds, Node, InRange, Dir, Fun, Acc) ->
+ Pointer = element(1, Node),
{NodeType, NodeList} = get_node(Bt, Pointer),
case NodeType of
kp_node ->
@@ -596,8 +621,9 @@ stream_node(Bt, Reds, {Pointer, _Reds}, InRange, Dir, Fun, Acc) ->
stream_kp_node(_Bt, _Reds, [], _InRange, _Dir, _Fun, Acc) ->
{ok, Acc};
-stream_kp_node(Bt, Reds, [{_Key, {Pointer, Red}} | Rest], InRange, Dir, Fun, Acc) ->
- case stream_node(Bt, Reds, {Pointer, Red}, InRange, Dir, Fun, Acc) of
+stream_kp_node(Bt, Reds, [{_Key, Node} | Rest], InRange, Dir, Fun, Acc) ->
+ Red = element(2, Node),
+ case stream_node(Bt, Reds, Node, InRange, Dir, Fun, Acc) of
{ok, Acc2} ->
stream_kp_node(Bt, [Red | Reds], Rest, InRange, Dir, Fun, Acc2);
{stop, LastReds, Acc2} ->
@@ -606,10 +632,12 @@ stream_kp_node(Bt, Reds, [{_Key, {Pointer, Red}} | Rest], InRange, Dir, Fun, Acc
drop_nodes(_Bt, Reds, _StartKey, []) ->
{Reds, []};
-drop_nodes(Bt, Reds, StartKey, [{NodeKey, {Pointer, Red}} | RestKPs]) ->
+drop_nodes(Bt, Reds, StartKey, [{NodeKey, Node} | RestKPs]) ->
case less(Bt, NodeKey, StartKey) of
- true -> drop_nodes(Bt, [Red | Reds], StartKey, RestKPs);
- false -> {Reds, [{NodeKey, {Pointer, Red}} | RestKPs]}
+ true ->
+ drop_nodes(Bt, [element(2, Node) | Reds], StartKey, RestKPs);
+ false ->
+ {Reds, [{NodeKey, Node} | RestKPs]}
end.
stream_kp_node(Bt, Reds, KPs, StartKey, InRange, Dir, Fun, Acc) ->
@@ -626,16 +654,17 @@ stream_kp_node(Bt, Reds, KPs, StartKey, InRange, Dir, Fun, Acc) ->
% everything sorts before it
{Reds, KPs};
{RevBefore, [FirstAfter | Drop]} ->
- {[Red || {_K,{_P,Red}} <- Drop] ++ Reds,
+ {[element(2, Node) || {_K, Node} <- Drop] ++ Reds,
[FirstAfter | lists:reverse(RevBefore)]}
end
end,
case NodesToStream of
[] ->
{ok, Acc};
- [{_Key, {Pointer, Red}} | Rest] ->
- case stream_node(Bt, NewReds, {Pointer, Red}, StartKey, InRange, Dir, Fun, Acc) of
+ [{_Key, Node} | Rest] ->
+ case stream_node(Bt, NewReds, Node, StartKey, InRange, Dir, Fun, Acc) of
{ok, Acc2} ->
+ Red = element(2, Node),
stream_kp_node(Bt, [Red | NewReds], Rest, InRange, Dir, Fun, Acc2);
{stop, LastReds, Acc2} ->
{stop, LastReds, Acc2}
View
@@ -252,23 +252,38 @@ get_db_info(Db) ->
update_seq=SeqNum,
name=Name,
instance_start_time=StartTime,
- committed_update_seq=CommittedUpdateSeq} = Db,
+ committed_update_seq=CommittedUpdateSeq,
+ fulldocinfo_by_id_btree = IdBtree,
+ docinfo_by_seq_btree = SeqBtree
+ } = Db,
{ok, Size} = couch_file:bytes(Fd),
- {ok, {Count, DelCount}} = couch_btree:full_reduce(by_id_btree(Db)),
+ {ok, DbReduction} = couch_btree:full_reduce(by_id_btree(Db)),
InfoList = [
{db_name, Name},
- {doc_count, Count},
- {doc_del_count, DelCount},
+ {doc_count, element(1, DbReduction)},
+ {doc_del_count, element(2, DbReduction)},
{update_seq, SeqNum},
{purge_seq, couch_db:get_purge_seq(Db)},
{compact_running, Compactor/=nil},
{disk_size, Size},
+ {data_size, db_data_size(
+ couch_btree:size(SeqBtree), couch_btree:size(IdBtree), DbReduction)},
{instance_start_time, StartTime},
{disk_format_version, DiskVersion},
{committed_update_seq, CommittedUpdateSeq}
],
{ok, InfoList}.
+db_data_size(nil, _, _) ->
+ null;
+db_data_size(_, nil, _) ->
+ null;
+db_data_size(_, _, {_Count, _DelCount}) ->
+ % pre 1.2 format, upgraded on compaction
+ null;
+db_data_size(SeqBtreeSize, IdBtreeSize, {_Count, _DelCount, DocAndAttsSize}) ->
+ SeqBtreeSize + IdBtreeSize + DocAndAttsSize.
+
get_design_docs(Db) ->
{ok,_, Docs} = couch_btree:fold(by_id_btree(Db),
fun(#full_doc_info{id= <<"_design/",_/binary>>}=FullDocInfo, _Reds, AccDocs) ->
@@ -510,8 +525,14 @@ prep_and_validate_updates(Db, [DocBucket|RestBuckets],
[{ok, #full_doc_info{rev_tree=OldRevTree}=OldFullDocInfo}|RestLookups],
AllowConflict, AccPrepped, AccErrors) ->
Leafs = couch_key_tree:get_all_leafs(OldRevTree),
- LeafRevsDict = dict:from_list([{{Start, RevId}, {Deleted, Sp, Revs}} ||
- {{Deleted, Sp, _Seq}, {Start, [RevId|_]}=Revs} <- Leafs]),
+ LeafRevsDict = dict:from_list([
+ begin
+ Deleted = element(1, LeafVal),
+ Sp = element(2, LeafVal),
+ {{Start, RevId}, {Deleted, Sp, Revs}}
+ end ||
+ {LeafVal, {Start, [RevId | _]} = Revs} <- Leafs
+ ]),
{PreppedBucket, AccErrors3} = lists:foldl(
fun(Doc, {Docs2Acc, AccErrors2}) ->
case prep_and_validate_update(Db, Doc, OldFullDocInfo,
@@ -738,7 +759,9 @@ make_first_doc_on_disk(_Db, _Id, _Pos, []) ->
nil;
make_first_doc_on_disk(Db, Id, Pos, [{_Rev, ?REV_MISSING}|RestPath]) ->
make_first_doc_on_disk(Db, Id, Pos - 1, RestPath);
-make_first_doc_on_disk(Db, Id, Pos, [{_Rev, {IsDel, Sp, _Seq}} |_]=DocPath) ->
+make_first_doc_on_disk(Db, Id, Pos, [{_Rev, RevValue} |_]=DocPath) ->
+ IsDel = element(1, RevValue),
+ Sp = element(2, RevValue),
Revs = [Rev || {Rev, _} <- DocPath],
make_doc(Db, Id, IsDel, Sp, {Pos, Revs}).
@@ -954,9 +977,9 @@ enum_docs_since_reduce_to_count(Reds) ->
fun couch_db_updater:btree_by_seq_reduce/2, Reds).
enum_docs_reduce_to_count(Reds) ->
- {Count, _DelCount} = couch_btree:final_reduce(
+ FinalRed = couch_btree:final_reduce(
fun couch_db_updater:btree_by_id_reduce/2, Reds),
- Count.
+ element(1, FinalRed).
changes_since(Db, Style, StartSeq, Fun, Acc) ->
changes_since(Db, Style, StartSeq, Fun, [], Acc).
@@ -1081,7 +1104,9 @@ open_doc_revs_int(Db, IdRevs, Options) ->
?REV_MISSING ->
% we have the rev in our list but know nothing about it
{{not_found, missing}, {Pos, Rev}};
- {IsDeleted, SummaryPtr, _UpdateSeq} ->
+ RevValue ->
+ IsDeleted = element(1, RevValue),
+ SummaryPtr = element(2, RevValue),
{ok, make_doc(Db, Id, IsDeleted, SummaryPtr, FoundRevPath)}
end
end, FoundRevs),
@@ -1128,12 +1153,15 @@ doc_meta_info(#doc_info{high_seq=Seq,revs=[#rev_info{rev=Rev}|RestInfo]}, RevTre
couch_key_tree:get_full_key_paths(RevTree, [Rev]),
[{revs_info, Pos, lists:map(
- fun({Rev1, {true, _Sp, _UpdateSeq}}) ->
- {Rev1, deleted};
- ({Rev1, {false, _Sp, _UpdateSeq}}) ->
- {Rev1, available};
- ({Rev1, ?REV_MISSING}) ->
- {Rev1, missing}
+ fun({Rev1, ?REV_MISSING}) ->
+ {Rev1, missing};
+ ({Rev1, RevValue}) ->
+ case element(1, RevValue) of
+ true ->
+ {Rev1, deleted};
+ false ->
+ {Rev1, available}
+ end
end, RevPath)}]
end ++
case lists:member(conflicts, Options) of
View
@@ -56,7 +56,8 @@
{id = <<"">>,
update_seq = 0,
deleted = false,
- rev_tree = []
+ rev_tree = [],
+ leafs_size = 0
}).
-record(httpd,
@@ -128,7 +129,7 @@
% if the disk revision is incremented, then new upgrade logic will need to be
% added to couch_db_updater:init_db.
--define(LATEST_DISK_VERSION, 5).
+-define(LATEST_DISK_VERSION, 6).
-record(db_header,
{disk_version = ?LATEST_DISK_VERSION,
Oops, something went wrong.

0 comments on commit f5bc870

Please sign in to comment.