Skip to content

Track "user data" size #42

Closed
wants to merge 10 commits into from
View
9 apps/couch/include/couch_db.hrl
@@ -65,6 +65,7 @@
{id = <<"">>,
update_seq = 0,
deleted = false,
+ data_size = 0,
rev_tree = []
}).
@@ -297,3 +298,11 @@
set_timeout_fun,
stop_fun
}).
+
+-record(leaf, {
+ deleted,
+ ptr,
+ seq,
+ size = 0,
+ atts = []
+}).
View
19 apps/couch/src/couch_db.erl
@@ -252,7 +252,7 @@ get_last_purged(#db{fd=Fd, header=#db_header{purged_docs=PurgedPointer}}) ->
couch_file:pread_term(Fd, PurgedPointer).
get_doc_count(Db) ->
- {ok, {Count, _DelCount}} = couch_btree:full_reduce(Db#db.id_tree),
+ {ok, {Count, _, _}} = couch_btree:full_reduce(Db#db.id_tree),
{ok, Count}.
get_db_info(Db) ->
@@ -265,7 +265,7 @@ get_db_info(Db) ->
instance_start_time=StartTime,
committed_update_seq=CommittedUpdateSeq} = Db,
{ok, Size} = couch_file:bytes(Fd),
- {ok, {Count, DelCount}} = couch_btree:full_reduce(FullDocBtree),
+ {ok, {Count, DelCount, DataSize}} = couch_btree:full_reduce(FullDocBtree),
InfoList = [
{db_name, Name},
{doc_count, Count},
@@ -274,6 +274,7 @@ get_db_info(Db) ->
{purge_seq, couch_db:get_purge_seq(Db)},
{compact_running, Compactor/=nil},
{disk_size, Size},
+ {other, {[{data_size, DataSize}]}},
{instance_start_time, StartTime},
{disk_format_version, DiskVersion},
{committed_update_seq, CommittedUpdateSeq}
@@ -548,8 +549,8 @@ prep_and_validate_updates(Db, [DocBucket|RestBuckets],
[{ok, #full_doc_info{rev_tree=OldRevTree}=OldFullDocInfo}|RestLookups],
AllowConflict, AccPrepped, AccErrors) ->
Leafs = couch_key_tree:get_all_leafs(OldRevTree),
- LeafRevsDict = dict:from_list([{{Start, RevId}, {Deleted, Sp, Revs}} ||
- {{Deleted, Sp, _Seq}, {Start, [RevId|_]}=Revs} <- Leafs]),
+ LeafRevsDict = dict:from_list([{{Start, RevId}, {Del, Ptr, Revs}} ||
+ {#leaf{deleted=Del, ptr=Ptr}, {Start, [RevId|_]}=Revs} <- Leafs]),
{PreppedBucket, AccErrors3} = lists:foldl(
fun(Doc, {Docs2Acc, AccErrors2}) ->
case prep_and_validate_update(Db, Doc, OldFullDocInfo,
@@ -776,7 +777,7 @@ make_first_doc_on_disk(_Db, _Id, _Pos, []) ->
nil;
make_first_doc_on_disk(Db, Id, Pos, [{_Rev, ?REV_MISSING}|RestPath]) ->
make_first_doc_on_disk(Db, Id, Pos - 1, RestPath);
-make_first_doc_on_disk(Db, Id, Pos, [{_Rev, {IsDel, Sp, _Seq}} |_]=DocPath) ->
+make_first_doc_on_disk(Db, Id, Pos, [{_, #leaf{deleted=IsDel, ptr=Sp}} |_]=DocPath) ->
Revs = [Rev || {Rev, _} <- DocPath],
make_doc(Db, Id, IsDel, Sp, {Pos, Revs}).
@@ -971,7 +972,7 @@ enum_docs_since_reduce_to_count(Reds) ->
fun couch_db_updater:btree_by_seq_reduce/2, Reds).
enum_docs_reduce_to_count(Reds) ->
- {Count, _DelCount} = couch_btree:final_reduce(
+ {Count, _, _} = couch_btree:final_reduce(
fun couch_db_updater:btree_by_id_reduce/2, Reds),
Count.
@@ -1045,7 +1046,7 @@ open_doc_revs_int(Db, IdRevs, Options) ->
?REV_MISSING ->
% we have the rev in our list but know nothing about it
{{not_found, missing}, {Pos, Rev}};
- {IsDeleted, SummaryPtr, _UpdateSeq} ->
+ #leaf{deleted=IsDeleted, ptr=SummaryPtr} ->
{ok, make_doc(Db, Id, IsDeleted, SummaryPtr, FoundRevPath)}
end
end, FoundRevs),
@@ -1092,9 +1093,9 @@ doc_meta_info(#doc_info{high_seq=Seq,revs=[#rev_info{rev=Rev}|RestInfo]}, RevTre
couch_key_tree:get_full_key_paths(RevTree, [Rev]),
[{revs_info, Pos, lists:map(
- fun({Rev1, {true, _Sp, _UpdateSeq}}) ->
+ fun({Rev1, #leaf{deleted=true}}) ->
{Rev1, deleted};
- ({Rev1, {false, _Sp, _UpdateSeq}}) ->
+ ({Rev1, #leaf{deleted=false}}) ->
{Rev1, available};
({Rev1, ?REV_MISSING}) ->
{Rev1, missing}
View
116 apps/couch/src/couch_db_updater.erl
@@ -303,17 +303,19 @@ collect_updates(GroupedDocsAcc, ClientsAcc, MergeConflicts, FullCommit) ->
rev_tree(DiskTree) ->
- couch_key_tree:map(fun(_RevId, {IsDeleted, BodyPointer, UpdateSeq}) ->
- {IsDeleted == 1, BodyPointer, UpdateSeq};
+ couch_key_tree:map(fun(_RevId, {Del, Ptr, Seq}) ->
+ #leaf{deleted=(Del==1), ptr=Ptr, seq=Seq};
+ (_RevId, {Del, Ptr, Seq, Size, Atts}) ->
+ #leaf{deleted=(Del==1), ptr=Ptr, seq=Seq, size=Size, atts=Atts};
(_RevId, ?REV_MISSING) ->
?REV_MISSING
end, DiskTree).
disk_tree(RevTree) ->
- couch_key_tree:map(fun(_RevId, {IsDeleted, BodyPointer, UpdateSeq}) ->
- {if IsDeleted -> 1; true -> 0 end, BodyPointer, UpdateSeq};
- (_RevId, ?REV_MISSING) ->
- ?REV_MISSING
+ couch_key_tree:map(fun(_RevId, ?REV_MISSING) ->
+ ?REV_MISSING;
+ (_RevId, #leaf{deleted=Del, ptr=Ptr, seq=Seq, size=Size, atts=Atts}) ->
+ {if Del -> 1; true -> 0 end, Ptr, Seq, Size, Atts}
end, RevTree).
btree_by_seq_split(#full_doc_info{id=Id, update_seq=Seq, deleted=Del, rev_tree=T}) ->
@@ -345,34 +347,37 @@ btree_by_seq_join(KeySeq,{Id, Rev, Bp, Conflicts, DelConflicts, Deleted}) ->
[#rev_info{rev=Rev2,seq=KeySeq,deleted=true} || Rev2 <- DelConflicts]}.
btree_by_id_split(#full_doc_info{id=Id, update_seq=Seq,
- deleted=Deleted, rev_tree=Tree}) ->
- {Id, {Seq, if Deleted -> 1; true -> 0 end, disk_tree(Tree)}}.
+ data_size=Size, deleted=Deleted, rev_tree=Tree}) ->
+ {Id, {Seq, if Deleted -> 1; true -> 0 end, Size, disk_tree(Tree)}}.
+%% handle old formats before `data_size` added
btree_by_id_join(Id, {HighSeq, Deleted, DiskTree}) ->
- Tree =
- couch_key_tree:map(
- fun(_RevId, {IsDeleted, BodyPointer, UpdateSeq}) ->
- {IsDeleted == 1, BodyPointer, UpdateSeq};
- (_RevId, ?REV_MISSING) ->
- ?REV_MISSING;
- (_RevId, {IsDeleted, BodyPointer}) ->
- % 09 UPGRADE CODE
- % this is the 0.9.0 and earlier rev info record. It's missing the seq
- % nums, which means couchdb will sometimes reexamine unchanged
- % documents with the _changes API.
- % This is fixed by compacting the database.
- {IsDeleted == 1, BodyPointer, HighSeq}
- end, DiskTree),
+ btree_by_id_join(Id, {HighSeq, Deleted, 0, DiskTree});
- #full_doc_info{id=Id, update_seq=HighSeq, deleted=Deleted==1, rev_tree=Tree}.
+btree_by_id_join(Id, {HighSeq, Deleted, Size, DiskTree}) ->
+ #full_doc_info{id=Id, update_seq=HighSeq,
+ deleted=Deleted==1, data_size=Size,
+ rev_tree=rev_tree(DiskTree)}.
btree_by_id_reduce(reduce, FullDocInfos) ->
- % count the number of not deleted documents
- {length([1 || #full_doc_info{deleted=false} <- FullDocInfos]),
- length([1 || #full_doc_info{deleted=true} <- FullDocInfos])};
-btree_by_id_reduce(rereduce, Reds) ->
- {lists:sum([Count || {Count,_} <- Reds]),
- lists:sum([DelCount || {_, DelCount} <- Reds])}.
+ lists:foldl(
+ fun(#full_doc_info{deleted = false, data_size=Size},
+ {NotDeleted, Deleted, DocSize}) ->
+ {NotDeleted + 1, Deleted, DocSize + Size};
+ (#full_doc_info{deleted = true, data_size=Size},
+ {NotDeleted, Deleted, DocSize}) ->
+ {NotDeleted, Deleted + 1, DocSize + Size}
+ end,
+ {0, 0, 0}, FullDocInfos);
+
+btree_by_id_reduce(rereduce, Reductions) ->
+ lists:foldl(
+ fun({NotDeleted, Deleted}, {AccNotDeleted, AccDeleted, AccDocSizes}) ->
+ {AccNotDeleted + NotDeleted, AccDeleted + Deleted, AccDocSizes};
+ ({NotDeleted, Deleted, DocSizes}, {AccNotDeleted, AccDeleted, AccDocSizes}) ->
+ {AccNotDeleted + NotDeleted, AccDeleted + Deleted, DocSizes + AccDocSizes}
+ end,
+ {0, 0, 0}, Reductions).
btree_by_seq_reduce(reduce, DocInfos) ->
% count the number of documents
@@ -486,14 +491,17 @@ flush_trees(#db{fd=Fd,header=Header}=Db,
% make sure the Fd in the written bins is the same Fd we are
% and convert bins, removing the FD.
% All bins should have been written to disk already.
- DiskAtts =
+ {DiskAtts, SizeInfo} =
case Atts of
- [] -> [];
+ [] -> {[],[]};
[#att{data={BinFd, _Sp}} | _ ] when BinFd == Fd ->
- [{N,T,P,AL,DL,R,M,E}
+ {[{N,T,P,AL,DL,R,M,E}
|| #att{name=N,type=T,data={_,P},md5=M,revpos=R,
att_len=AL,disk_len=DL,encoding=E}
- <- Atts];
+ <- Atts],
+ [{P1,AL1}
+ || #att{data={_,P1},att_len=AL1}
+ <- Atts]};
_ ->
% BinFd must not equal our Fd. This can happen when a database
% is being switched out during a compaction
@@ -508,7 +516,13 @@ flush_trees(#db{fd=Fd,header=Header}=Db,
false ->
couch_file:append_term_md5(Fd, {Doc#doc.body, DiskAtts})
end,
- {IsDeleted, NewSummaryPointer, UpdateSeq};
+ #leaf{
+ deleted = IsDeleted,
+ ptr = NewSummaryPointer,
+ seq = UpdateSeq,
+ size = size(term_to_binary(Doc#doc.body)),
+ atts = SizeInfo
+ };
_ ->
Value
end
@@ -636,9 +650,9 @@ update_docs_int(Db, DocsList, NonRepDocs, MergeConflicts, FullCommit) ->
% Write out the document summaries (the bodies are stored in the nodes of
% the trees, the attachments are already written to disk)
{ok, FlushedFullDocInfos} = flush_trees(Db2, NewFullDocInfos, []),
-
- IndexInfos = new_index_entries(FlushedFullDocInfos, []),
-
+ IndexInfos =
+ new_index_entries(compute_data_sizes(FlushedFullDocInfos, []),
+ []),
% and the indexes
{ok, DocInfoByIdBTree2} = couch_btree:add_remove(DocInfoByIdBTree,
IndexInfos, []),
@@ -661,6 +675,18 @@ update_docs_int(Db, DocsList, NonRepDocs, MergeConflicts, FullCommit) ->
{ok, commit_data(Db4, not FullCommit)}.
+compute_data_sizes([], Acc) ->
+ lists:reverse(Acc);
+
+compute_data_sizes([FullDocInfo | RestDocInfos], Acc) ->
+ #full_doc_info{rev_tree=Tree} = FullDocInfo,
+ Size = couch_key_tree:compute_data_size(Tree),
+ compute_data_sizes(RestDocInfos,
+ [FullDocInfo#full_doc_info{data_size=Size}
+ | Acc]).
+
+
+
update_local_docs(#db{local_tree=Btree}=Db, Docs) ->
Ids = [Id || {_Client, #doc{id=Id}} <- Docs],
@@ -815,15 +841,21 @@ copy_docs(Db, #db{fd=DestFd}=NewDb, MixedInfos, Retry) ->
end, merge_lookups(MixedInfos, LookupResults)),
NewInfos1 = [Info#full_doc_info{rev_tree=couch_key_tree:map(
- fun(Rev, {IsDel, Sp, Seq}, leaf) ->
- DocBody = copy_doc_attachments(Db, Rev, Sp, DestFd),
- {ok, Pos} = couch_file:append_term_md5(DestFd, DocBody),
- {IsDel, Pos, Seq};
+ fun(Rev, #leaf{ptr=Sp, size=Size0}=Leaf, leaf) ->
+ {Body, AttInfos} = copy_doc_attachments(Db, Rev, Sp, DestFd),
+ {ok, Pos} = couch_file:append_term_md5(DestFd, {Body, AttInfos}),
+ if Size0 > 0 ->
+ Leaf#leaf{ptr=Pos};
+ true ->
+ DocSize = byte_size(term_to_binary(Body)),
+ AttSizes = [{element(3,A), element(4,A)} || A <- AttInfos],
+ Leaf#leaf{ptr=Pos, size=DocSize, atts=AttSizes}
+ end;
(_, _, branch) ->
?REV_MISSING
end, RevTree)} || #full_doc_info{rev_tree=RevTree}=Info <- Infos],
- NewInfos = stem_full_doc_infos(Db, NewInfos1),
+ NewInfos = stem_full_doc_infos(Db, compute_data_sizes(NewInfos1, [])),
RemoveSeqs =
case Retry of
false ->
View
2 apps/couch/src/couch_doc.erl
@@ -282,7 +282,7 @@ max_seq([#rev_info{seq=Seq}|Rest], Max) ->
to_doc_info_path(#full_doc_info{id=Id,rev_tree=Tree}) ->
RevInfosAndPath =
[{#rev_info{deleted=Del,body_sp=Bp,seq=Seq,rev={Pos,RevId}}, Path} ||
- {{Del, Bp, Seq},{Pos, [RevId|_]}=Path} <-
+ {#leaf{deleted=Del, ptr=Bp, seq=Seq},{Pos, [RevId|_]}=Path} <-
couch_key_tree:get_all_leafs(Tree)],
SortedRevInfosAndPath = lists:sort(
fun({#rev_info{deleted=DeletedA,rev=RevA}, _PathA},
View
52 apps/couch/src/couch_key_tree.erl
@@ -12,10 +12,13 @@
-module(couch_key_tree).
--export([merge/3, find_missing/2, get_key_leafs/2, get_full_key_paths/2, get/2]).
+-export([merge/3, find_missing/2, get_key_leafs/2,
+ get_full_key_paths/2, get/2, compute_data_size/1]).
-export([map/2, get_all_leafs/1, count_leafs/1, remove_leafs/2,
get_all_leafs_full/1,stem/2,map_leafs/2]).
+-include("couch_db.hrl").
+
% Tree::term() is really a tree(), but we don't want to require R13B04 yet
-type branch() :: {Key::term(), Value::term(), Tree::term()}.
-type path() :: {Start::pos_integer(), branch()}.
@@ -277,6 +280,53 @@ count_leafs_simple([{_Key, _Value, []} | RestTree]) ->
count_leafs_simple([{_Key, _Value, SubTree} | RestTree]) ->
count_leafs_simple(SubTree) + count_leafs_simple(RestTree).
+compute_data_size(Tree) ->
+ {TotBodySizes,TotAttSizes} =
+ tree_fold(fun({_Pos, _Key, _Value},branch,Acc) ->
+ {ok,Acc};
+ ({_Pos, _Key, Value},leaf,Acc) ->
+ {ok, sum_up_sizes(Value, Acc)}
+ end,{0,[]},Tree),
+ SumTotAttSizes = lists:foldl(fun({_K,V},Acc) ->
+ V + Acc
+ end,0,TotAttSizes),
+ TotBodySizes + SumTotAttSizes.
+
+sum_up_sizes(#leaf{deleted=true}, Acc) ->
+ Acc;
+sum_up_sizes(#leaf{deleted=false, size=DocBodySize, atts=AttSizes},Acc) ->
+ {TotBodySizes,TotalAttSizes} = Acc,
+ {TotBodySizes + DocBodySize, add_att_sizes(TotalAttSizes, AttSizes)}.
+
+add_att_sizes(TotalAttSizes,AttSizes) ->
+ lists:umerge(TotalAttSizes, lists:sort(AttSizes)).
+
+tree_fold(_Fun, Acc, []) ->
+ Acc;
+
+tree_fold(Fun, Acc, [{Pos, Branch} | Rest]) ->
+ Acc1 = tree_fold_simple(Fun, Pos, [Branch], Acc),
+ tree_fold(Fun, Acc1, Rest).
+
+tree_fold_simple(_Fun, _Pos, [], Acc) ->
+ Acc;
+
+tree_fold_simple(Fun, Pos, [{Key, Value, []} | RestTree], Acc) ->
+ case Fun({Pos, Key, Value}, leaf, Acc) of
+ {ok, Acc1} ->
+ tree_fold_simple(Fun, Pos, RestTree, Acc1);
+ {stop, Acc1} ->
+ Acc1
+ end;
+
+tree_fold_simple(Fun, Pos, [{Key, Value, SubTree} | RestTree], Acc) ->
+ Acc1 = tree_fold_simple(Fun, Pos + 1, SubTree, Acc),
+ case Fun({Pos, Key, Value}, branch, Acc1) of
+ {ok, Acc2} ->
+ tree_fold_simple(Fun, Pos, RestTree, Acc2);
+ {stop, Acc2} ->
+ Acc2
+ end.
map(_Fun, []) ->
[];
View
11 apps/couch/src/couch_view.erl
@@ -18,7 +18,7 @@
code_change/3,get_reduce_view/4,get_temp_reduce_view/5,get_temp_map_view/4,
get_map_view/4,get_row_count/1,reduce_to_count/1,fold_reduce/4,
extract_map_view/1,get_group_server/2,get_group_info/2,
- cleanup_index_files/1,config_change/2]).
+ cleanup_index_files/1,config_change/2, data_size/2]).
-include("couch_db.hrl").
@@ -102,7 +102,7 @@ list_index_files(Db) ->
get_row_count(#view{btree=Bt}) ->
- {ok, {Count, _Reds}} = couch_btree:full_reduce(Bt),
+ {ok, {Count, _, _}} = couch_btree:full_reduce(Bt),
{ok, Count}.
get_temp_reduce_view(Db, Language, DesignOptions, MapSrc, RedSrc) ->
@@ -150,6 +150,13 @@ expand_dups([{Key, {dups, Vals}} | Rest], Acc) ->
expand_dups([KV | Rest], Acc) ->
expand_dups(Rest, [KV | Acc]).
+data_size(KVList, Reduction) ->
+ lists:foldl(fun([[Key, _], Value], Acc) ->
+ size(term_to_binary(Key)) +
+ size(term_to_binary(Value)) +
+ Acc
+ end,size(term_to_binary(Reduction)),KVList).
+
fold_reduce({temp_reduce, #view{btree=Bt}}, Fun, Acc, Options) ->
WrapperFun = fun({GroupedKey, _}, PartialReds, Acc0) ->
{_, [Red]} = couch_btree:final_reduce(Bt, PartialReds),
View
27 apps/couch/src/couch_view_group.erl
@@ -461,6 +461,7 @@ get_group_info(State) ->
fd = Fd,
sig = GroupSig,
def_lang = Lang,
+ views = Views,
current_seq=CurrentSeq,
purge_seq=PurgeSeq
} = Group,
@@ -469,6 +470,7 @@ get_group_info(State) ->
{signature, ?l2b(hex_sig(GroupSig))},
{language, Lang},
{disk_size, Size},
+ {data_size, compute_data_size(Views)},
{updater_running, UpdaterPid /= nil},
{compact_running, CompactorPid /= nil},
{waiting_commit, WaitingCommit},
@@ -477,6 +479,13 @@ get_group_info(State) ->
{purge_seq, PurgeSeq}
].
+compute_data_size(ViewList) ->
+ lists:foldl(fun(#view{btree=Btree}, Acc) ->
+ {ok, {_, Size, _}} = couch_btree:full_reduce(Btree),
+ Size + Acc
+ end, 0, ViewList).
+
+
% maybe move to another module
design_doc_to_view_group(#doc{id=Id,body={Fields}}) ->
Language = couch_util:get_value(<<"language">>, Fields, <<"javascript">>),
@@ -558,13 +567,14 @@ init_group(Fd, #group{def_lang=Lang,views=Views}=Group, IndexHeader) ->
KVs3 = couch_view:detuple_kvs(KVs2,[]),
{ok, Reduced} = couch_query_servers:reduce(Lang, FunSrcs,
KVs3),
- {length(KVs3), Reduced};
+ {length(KVs3), couch_view:data_size(KVs3, Reduced), Reduced};
(rereduce, Reds) ->
- Count = lists:sum([Count0 || {Count0, _} <- Reds]),
- UserReds = [UserRedsList || {_, UserRedsList} <- Reds],
+ Count = lists:sum(extract(Reds, counts)),
+ DataSize = lists:sum(extract(Reds, data_size)),
+ UserReds = extract(Reds, user_reds),
{ok, Reduced} = couch_query_servers:rereduce(Lang, FunSrcs,
UserReds),
- {Count, Reduced}
+ {Count, DataSize, Reduced}
end,
case couch_util:get_value(<<"collation">>, Options, <<"default">>) of
@@ -580,3 +590,12 @@ init_group(Fd, #group{def_lang=Lang,views=Views}=Group, IndexHeader) ->
ViewStates, Views),
Group#group{fd=Fd, current_seq=Seq, purge_seq=PurgeSeq, id_btree=IdBtree,
views=Views2}.
+
+extract(Reds, counts) ->
+ % counts are always the first element
+ [element(1, R) || R <- Reds];
+extract(Reds, data_size) ->
+ lists:map(fun({_, _}) -> 0; ({_, Size, _}) -> Size end, Reds);
+extract(Reds, user_reds) ->
+ % user reductions always come last
+ [element(tuple_size(R), R) || R <- Reds].
Something went wrong with that request. Please try again.