Skip to content

Commit

Permalink
Fix builtin _sum reduce function
Browse files Browse the repository at this point in the history
The builting _sum reduce function has no protection against overflowing
reduce values. Users can emit objects with enough unique keys to cause
the builtin _sum to create objects that are exceedingly large in the
inner nodes of the view B+Tree.

This change adds the same logic that applies to JavaScript reduce
functions to check if a reduce function is properly reducing its input.
  • Loading branch information
davisp committed Aug 23, 2018
1 parent f82b156 commit c97121d
Show file tree
Hide file tree
Showing 2 changed files with 124 additions and 1 deletion.
27 changes: 26 additions & 1 deletion src/couch/src/couch_query_servers.erl
Expand Up @@ -171,7 +171,8 @@ builtin_reduce(_Re, [], _KVs, Acc) ->
{ok, lists:reverse(Acc)};
builtin_reduce(Re, [<<"_sum",_/binary>>|BuiltinReds], KVs, Acc) ->
Sum = builtin_sum_rows(KVs, 0),
builtin_reduce(Re, BuiltinReds, KVs, [Sum|Acc]);
Red = check_sum_overflow(?term_size(KVs), ?term_size(Sum), Sum),
builtin_reduce(Re, BuiltinReds, KVs, [Red|Acc]);
builtin_reduce(reduce, [<<"_count",_/binary>>|BuiltinReds], KVs, Acc) ->
Count = length(KVs),
builtin_reduce(reduce, BuiltinReds, KVs, [Count|Acc]);
Expand Down Expand Up @@ -247,6 +248,30 @@ sum_arrays([X|Xs], [Y|Ys]) when is_number(X), is_number(Y) ->
sum_arrays(Else, _) ->
throw_sum_error(Else).

check_sum_overflow(InSize, OutSize, Sum) ->
Overflowed = OutSize > 4906 andalso OutSize * 2 > InSize,
case config:get("query_server_config", "reduce_limit", "true") of
"true" when Overflowed ->
Msg = log_sum_overflow(InSize, OutSize),
{[
{<<"error">>, <<"builtin_reduce_error">>},
{<<"reason">>, Msg}
]};
"log" when Overflowed ->
log_sum_overflow(InSize, OutSize),
Sum;
_ ->
Sum
end.

log_sum_overflow(InSize, OutSize) ->
Fmt = "Reduce output must shrink more rapidly: "
"input size: ~b "
"output size: ~b",
Msg = iolist_to_binary(io_lib:format(Fmt, [InSize, OutSize])),
couch_log:error(Msg, []),
Msg.

builtin_stats(_, []) ->
{0, 0, 0, 0, 0};
builtin_stats(_, [[_,First]|Rest]) ->
Expand Down
98 changes: 98 additions & 0 deletions src/couch/test/couch_query_servers_tests.erl
@@ -0,0 +1,98 @@
% Licensed under the Apache License, Version 2.0 (the "License"); you may not
% use this file except in compliance with the License. You may obtain a copy of
% the License at
%
% http://www.apache.org/licenses/LICENSE-2.0
%
% Unless required by applicable law or agreed to in writing, software
% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
% License for the specific language governing permissions and limitations under
% the License.

-module(couch_query_servers_tests).

-include_lib("couch/include/couch_eunit.hrl").


setup() ->
meck:new([config, couch_log]).


teardown(_) ->
meck:unload().


sum_overflow_test_() ->
{
"Test overflow detection in the _sum reduce function",
{
setup,
fun setup/0,
fun teardown/1,
[
fun should_return_error_on_overflow/0,
fun should_return_object_on_log/0,
fun should_return_object_on_false/0
]
}
}.


should_return_error_on_overflow() ->
meck:reset([config, couch_log]),
meck:expect(
config, get, ["query_server_config", "reduce_limit", "true"],
"true"
),
meck:expect(couch_log, error, ['_', '_'], ok),
KVs = gen_sum_kvs(),
{ok, [Result]} = couch_query_servers:reduce(<<"foo">>, [<<"_sum">>], KVs),
?assertMatch(
{[{<<"error">>, <<"builtin_reduce_error">>} | _]},
Result
),
?assert(meck:called(config, get, '_')),
?assert(meck:called(couch_log, error, '_')).


should_return_object_on_log() ->
meck:reset([config, couch_log]),
meck:expect(
config, get, ["query_server_config", "reduce_limit", "true"],
"log"
),
meck:expect(couch_log, error, ['_', '_'], ok),
KVs = gen_sum_kvs(),
{ok, [Result]} = couch_query_servers:reduce(<<"foo">>, [<<"_sum">>], KVs),
?assertMatch({[_ | _]}, Result),
Keys = [K || {K, _} <- element(1, Result)],
?assert(not lists:member(<<"error">>, Keys)),
?assert(meck:called(config, get, '_')),
?assert(meck:called(couch_log, error, '_')).


should_return_object_on_false() ->
meck:reset([config, couch_log]),
meck:expect(
config, get, ["query_server_config", "reduce_limit", "true"],
"false"
),
meck:expect(couch_log, error, ['_', '_'], ok),
KVs = gen_sum_kvs(),
{ok, [Result]} = couch_query_servers:reduce(<<"foo">>, [<<"_sum">>], KVs),
?assertMatch({[_ | _]}, Result),
Keys = [K || {K, _} <- element(1, Result)],
?assert(not lists:member(<<"error">>, Keys)),
?assert(meck:called(config, get, '_')),
?assertNot(meck:called(couch_log, error, '_')).


gen_sum_kvs() ->
lists:map(fun(I) ->
Props = lists:map(fun(_) ->
K = couch_util:encodeBase64Url(crypto:strong_rand_bytes(16)),
{K, 1}
end, lists:seq(1, 20)),
[I, {Props}]
end, lists:seq(1, 10)).

0 comments on commit c97121d

Please sign in to comment.