Permalink
Browse files

MB-6591: Use raw collation for _all_docs query params

_all_docs are always sorted in raw order, not Unicode. Hence
the query parameters (startkey and endkey) also need to
take this raw collation into account. The query validation checks
whether startkey is smaller than endkey, hence we need to take
the collation type (Unicode or raw) into account.

An example for raw collation is:

    Foo < bar < foo < zoo

With Unicode collation it would be:

    bar < foo < Foo < zoo

This patch is based on a patch for Apache CouchDB [1].

I've kept the original commit message below.

[1] apache/couchdb@46d2dc6

Fix empty range check for raw collation.

The check for empty ranges was not taking into account the
view option for raw collation. This fixes that by passing
the couch_btree:less/2 function into the check.

Patch by: Jason Smith
Re: COUCHDB-1228 4/4

Change-Id: I4569683ff0253221f2f265e287b7a52d2ad32ac1
git-svn-id: https://svn.apache.org/repos/asf/couchdb/trunk@1156509 13f79535-47bb-0310-9956-ffa450edef68
Reviewed-on: http://review.couchbase.org/21359
Reviewed-by: Filipe David Borba Manana <fdmanana@gmail.com>
Reviewed-by: Volker Mische <volker.mische@gmail.com>
Tested-by: Volker Mische <volker.mische@gmail.com>
  • Loading branch information...
1 parent 1b21587 commit fd03c3a583904440d34bbc8033a826cbec446357 @davisp davisp committed with Farshid Ghods Aug 11, 2011
@@ -41,6 +41,12 @@ couchTests.all_docs = function(debug) {
var all = db.allDocs({startkey:"2"});
T(all.offset == 2);
+ // Confirm that queries may assume raw collation.
+ var raw = db.allDocs({ startkey: "org.couchdb.user:"
+ , endkey : "org.couchdb.user;"
+ });
+ TEquals(0, raw.rows.length);
+
// check that the docs show up in the seq view in the order they were created
var changes = db.changes();
var ids = ["0","3","1","2"];
@@ -81,7 +81,12 @@ couchTests.view_collation_raw = function(debug) {
T(equals(rows[i].key, values[i]));
}
- // everything has collated correctly. Now to check the descending output
+ // Confirm that couch allows raw semantics in key ranges.
+ rows = db.view("test/test", {startkey:"Z", endkey:"a"}).rows;
+ TEquals(1, rows.length);
+ TEquals("a", rows[0].key);
+
+ // Check the descending output.
rows = db.view("test/test", {descending: true}).rows;
for (i=0; i<values.length; i++) {
T(equals(rows[i].key, values[values.length - 1 -i]));
@@ -47,7 +47,13 @@ parse_http_params(Req, DDoc, ViewName, #view_merge{keys = Keys}) ->
end,
StaleDefined = couch_httpd:qs_value(Req, "stale") =/= undefined,
- QueryArgs = couch_httpd_view:parse_view_params(Req, Keys, ViewType),
+ QueryArgs = case ViewName of
+ <<"_all_docs">> ->
+ couch_httpd_view:parse_view_params(Req, Keys, ViewType,
+ fun(A, B) -> A < B end);
+ _ ->
+ couch_httpd_view:parse_view_params(Req, Keys, ViewType)
+ end,
QueryArgs1 = QueryArgs#view_query_args{view_name = ViewName},
case StaleDefined of
@@ -1526,6 +1532,8 @@ simple_set_view_query(Params, DDoc, Req) ->
throw({error, set_view_outdated})
end,
+ % This code path is never triggered for _all_docs, hence we don't need
+ % to handle the special case to do raw collation for the query parameters
QueryArgs = couch_httpd_view:parse_view_params(Req, Keys, ViewType),
QueryArgs2 = QueryArgs#view_query_args{
view_name = ViewName,
@@ -17,7 +17,7 @@
-export([modify/3, fold_reduce/4, lookup/2, get_state/1, set_options/2]).
-export([add_remove/5, query_modify/6]).
-export([guided_purge/3]).
--export([stats/1]).
+-export([stats/1, less/3]).
-export([encode_node/2]).
@@ -364,6 +364,7 @@ db_req(#httpd{path_parts=[_, DocId]}=Req, Db) ->
db_doc_req(Req, Db, DocId).
all_docs_view(Req, Db, Keys) ->
+ RawCollator = fun(A, B) -> A < B end,
#view_query_args{
start_key = StartKey,
start_docid = StartDocId,
@@ -373,7 +374,8 @@ all_docs_view(Req, Db, Keys) ->
skip = SkipCount,
direction = Dir,
inclusive_end = Inclusive
- } = QueryArgs = couch_httpd_view:parse_view_params(Req, Keys, map),
+ } = QueryArgs
+ = couch_httpd_view:parse_view_params(Req, Keys, map, RawCollator),
{ok, Info} = couch_db:get_db_info(Db),
CurrentEtag = couch_httpd:make_etag(Info),
couch_httpd:etag_respond(Req, CurrentEtag, fun() ->
@@ -15,7 +15,7 @@
-export([handle_view_req/3,handle_temp_view_req/2]).
--export([parse_view_params/3]).
+-export([parse_view_params/3, parse_view_params/4]).
-export([make_view_fold_fun/7, finish_view_fold/4, finish_view_fold/5, view_row_obj/4]).
-export([view_etag/3, view_etag/4, make_reduce_fold_funs/6]).
-export([design_doc_view/5, parse_bool_param/1, doc_member/3]).
@@ -32,18 +32,19 @@ design_doc_view(Req, Db, DName, ViewName, Keys) ->
Reduce = get_reduce_type(Req),
case couch_view:get_map_view(Db, DesignId, ViewName, Stale) of
{ok, View, Group} ->
- QueryArgs = parse_view_params(Req, Keys, map),
+ QueryArgs = parse_view_params(Req, Keys, map, view_collator(View)),
output_map_view(Req, View, Group, Db, QueryArgs, Keys);
{not_found, Reason} ->
case couch_view:get_reduce_view(Db, DesignId, ViewName, Stale) of
{ok, ReduceView, Group} ->
+ Collator = view_collator(ReduceView),
case Reduce of
false ->
- QueryArgs = parse_view_params(Req, Keys, red_map),
+ QueryArgs = parse_view_params(Req, Keys, red_map, Collator),
MapView = couch_view:extract_map_view(ReduceView),
output_map_view(Req, MapView, Group, Db, QueryArgs, Keys);
_ ->
- QueryArgs = parse_view_params(Req, Keys, reduce),
+ QueryArgs = parse_view_params(Req, Keys, reduce, Collator),
output_reduce_view(Req, Db, ReduceView, Group, QueryArgs, Keys)
end;
_ ->
@@ -85,19 +86,19 @@ handle_temp_view_req(#httpd{method='POST'}=Req, Db) ->
Reduce = get_reduce_type(Req),
case couch_util:get_value(<<"reduce">>, Props, null) of
null ->
- QueryArgs = parse_view_params(Req, Keys, map),
{ok, View, Group} = couch_view:get_temp_map_view(Db, Language,
DesignOptions, MapSrc),
+ QueryArgs = parse_view_params(Req, Keys, map, view_collator(View)),
output_map_view(Req, View, Group, Db, QueryArgs, Keys);
_ when Reduce =:= false ->
- QueryArgs = parse_view_params(Req, Keys, red_map),
{ok, View, Group} = couch_view:get_temp_map_view(Db, Language,
DesignOptions, MapSrc),
+ QueryArgs = parse_view_params(Req, Keys, red_map, view_collator(View)),
output_map_view(Req, View, Group, Db, QueryArgs, Keys);
RedSrc ->
- QueryArgs = parse_view_params(Req, Keys, reduce),
{ok, View, Group} = couch_view:get_temp_reduce_view(Db, Language,
DesignOptions, MapSrc, RedSrc),
+ QueryArgs = parse_view_params(Req, Keys, reduce, view_collator(View)),
output_reduce_view(Req, Db, View, Group, QueryArgs, Keys)
end;
@@ -204,31 +205,53 @@ load_view(Req, Db, {ViewDesignId, ViewName}, Keys) ->
Reduce = get_reduce_type(Req),
case couch_view:get_map_view(Db, ViewDesignId, ViewName, Stale) of
{ok, View, Group} ->
- QueryArgs = parse_view_params(Req, Keys, map),
+ QueryArgs = parse_view_params(Req, Keys, map, view_collator(View)),
{map, View, Group, QueryArgs};
{not_found, _Reason} ->
case couch_view:get_reduce_view(Db, ViewDesignId, ViewName, Stale) of
{ok, ReduceView, Group} ->
+ Collator = view_collator(ReduceView),
case Reduce of
false ->
- QueryArgs = parse_view_params(Req, Keys, map_red),
+ QueryArgs = parse_view_params(Req, Keys, map_red, Collator),
MapView = couch_view:extract_map_view(ReduceView),
{map, MapView, Group, QueryArgs};
_ ->
- QueryArgs = parse_view_params(Req, Keys, reduce),
+ QueryArgs = parse_view_params(Req, Keys, reduce, Collator),
{reduce, ReduceView, Group, QueryArgs}
end;
{not_found, Reason} ->
throw({not_found, Reason})
end
end.
+view_collator({reduce, _N, _Lang, View}) ->
+ view_collator(View);
+
+view_collator({temp_reduce, View}) ->
+ view_collator(View);
+
+view_collator(#view{btree=Btree}) ->
+ % Return an "is-less-than" predicate by calling into the btree's
+ % collator. For raw collation, couch_btree compares arbitrary
+ % Erlang terms, but for normal (ICU) collation, it expects
+ % {Json, Id} tuples.
+ fun
+ ({_JsonA, _IdA}=A, {_JsonB, _IdB}=B) ->
+ couch_btree:less(Btree, A, B);
+ (JsonA, JsonB) ->
+ couch_btree:less(Btree, {JsonA, null}, {JsonB, null})
+ end.
+
% query_parse_error could be removed
% we wouldn't need to pass the view type, it'd just parse params.
% I'm not sure what to do about the error handling, but
% it might simplify things to have a parse_view_params function
% that doesn't throw().
parse_view_params(Req, Keys, ViewType) ->
+ % If not collation is given use the Unicode collation as default
+ parse_view_params(Req, Keys, ViewType, fun couch_view:less_json/2).
+parse_view_params(Req, Keys, ViewType, LessThan) ->
QueryList = couch_httpd:qs(Req),
QueryParams =
lists:foldl(fun({K, V}, Acc) ->
@@ -242,7 +265,7 @@ parse_view_params(Req, Keys, ViewType) ->
QueryArgs = lists:foldl(fun({K, V}, Args2) ->
validate_view_query(K, V, Args2)
end, Args, lists:reverse(QueryParams)), % Reverse to match QS order.
- warn_on_empty_key_range(QueryArgs),
+ warn_on_empty_key_range(QueryArgs, LessThan),
GroupLevel = QueryArgs#view_query_args.group_level,
case {ViewType, GroupLevel, IsMultiGet} of
{reduce, exact, true} ->
@@ -340,15 +363,15 @@ parse_view_param("_type", Value) ->
parse_view_param(Key, Value) ->
[{extra, {Key, Value}}].
-warn_on_empty_key_range(#view_query_args{start_key=undefined}) ->
+warn_on_empty_key_range(#view_query_args{start_key=undefined}, _Lt) ->
ok;
-warn_on_empty_key_range(#view_query_args{end_key=undefined}) ->
+warn_on_empty_key_range(#view_query_args{end_key=undefined}, _Lt) ->
ok;
-warn_on_empty_key_range(#view_query_args{start_key=A, end_key=A}) ->
+warn_on_empty_key_range(#view_query_args{start_key=A, end_key=A}, _Lt) ->
ok;
warn_on_empty_key_range(#view_query_args{
- start_key=StartKey, end_key=EndKey, direction=Dir}) ->
- case {Dir, couch_view:less_json(StartKey, EndKey)} of
+ start_key=StartKey, end_key=EndKey, direction=Dir}, LessThan) ->
+ case {Dir, LessThan(StartKey, EndKey)} of
{fwd, false} ->
throw({query_parse_error,
<<"No rows can match your key range, reverse your ",

0 comments on commit fd03c3a

Please sign in to comment.