Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP
Browse files

CBD-32 Faster view merging and parsing

Change-Id: Ic80456651dc25baead3b24c74bd731a15f245548
Reviewed-on: http://review.couchbase.org/17464
Reviewed-by: Volker Mische <volker.mische@gmail.com>
Reviewed-by: Bin Cui <bin.cui@gmail.com>
Reviewed-by: Damien Katz <damien@couchbase.com>
Tested-by: Filipe David Borba Manana <fdmanana@gmail.com>
Tested-by: Damien Katz <damien@couchbase.com>
  • Loading branch information...
commit 767de84e1d4c1b6838608e2f6c52a20996af9df9 1 parent 481fb99
@fdmanana fdmanana authored Damienkatz committed
Showing with 7,443 additions and 57 deletions.
  1. +5 −0 .gitignore
  2. +3 −0  Makefile.am
  3. +1 −0  configure.ac
  4. +1 −0  license.skip
  5. +7 −1 src/Makefile.am
  6. +2 −0  src/couch_index_merger/Makefile.am
  7. +113 −0 src/couch_index_merger/src/couch_http_view_streamer.erl
  8. +16 −17 src/couch_index_merger/src/couch_httpd_view_merger.erl
  9. +21 −3 src/couch_index_merger/src/couch_index_merger.erl
  10. +104 −33 src/couch_index_merger/src/couch_view_merger.erl
  11. +1 −1  src/couch_set_view/src/couch_set_view_http.erl
  12. +135 −0 src/couch_view_parser/Makefile.am
  13. +12 −0 src/couch_view_parser/couch_view_parser.app.in
  14. +950 −0 src/couch_view_parser/couch_view_parser.cc
  15. +70 −0 src/couch_view_parser/couch_view_parser.erl
  16. +137 −0 src/couch_view_parser/couch_view_parser.h
  17. +601 −0 src/couch_view_parser/couch_view_parser_nif.cc
  18. +129 −0 src/couch_view_parser/erl_nif_compat.h
  19. +958 −0 src/couch_view_parser/test/01-map-view.t
  20. +287 −0 src/couch_view_parser/test/02-reduce-view.t
  21. +22 −0 src/couch_view_parser/test/run.tpl
  22. +114 −0 src/couch_view_parser/win32/couch_view_parser.vcxproj.tpl.in
  23. +1 −0  src/couch_view_parser/win32/msbuild.bat.tpl.in
  24. +175 −0 src/couch_view_parser/yajl/yajl.c
  25. +49 −0 src/couch_view_parser/yajl/yajl_alloc.c
  26. +34 −0 src/couch_view_parser/yajl/yajl_alloc.h
  27. +103 −0 src/couch_view_parser/yajl/yajl_buf.c
  28. +57 −0 src/couch_view_parser/yajl/yajl_buf.h
  29. +69 −0 src/couch_view_parser/yajl/yajl_bytestack.h
  30. +75 −0 src/couch_view_parser/yajl/yajl_common.h
  31. +242 −0 src/couch_view_parser/yajl/yajl_encode.c
  32. +34 −0 src/couch_view_parser/yajl/yajl_encode.h
  33. +354 −0 src/couch_view_parser/yajl/yajl_gen.c
  34. +157 −0 src/couch_view_parser/yajl/yajl_gen.h
  35. +763 −0 src/couch_view_parser/yajl/yajl_lex.c
  36. +117 −0 src/couch_view_parser/yajl/yajl_lex.h
  37. +226 −0 src/couch_view_parser/yajl/yajl_parse.h
  38. +498 −0 src/couch_view_parser/yajl/yajl_parser.c
  39. +78 −0 src/couch_view_parser/yajl/yajl_parser.h
  40. +503 −0 src/couch_view_parser/yajl/yajl_tree.c
  41. +185 −0 src/couch_view_parser/yajl/yajl_tree.h
  42. +7 −0 src/couch_view_parser/yajl/yajl_version.c
  43. +23 −0 src/couch_view_parser/yajl/yajl_version.h
  44. +2 −1  src/couchdb/couch_app.erl
  45. +1 −1  test/etap/run.tpl
  46. +1 −0  utils/Makefile.am
View
5 .gitignore
@@ -74,6 +74,10 @@ src/mapreduce/mapreduce.app
src/mapreduce/.deps/
src/mapreduce/.libs/
src/mapreduce/priv
+src/couch_view_parser/couch_view_parser.app
+src/couch_view_parser/.deps/
+src/couch_view_parser/.libs/
+src/couch_view_parser/priv
src/mochiweb/mochiweb.app
src/snappy/.deps/
src/snappy/.libs/
@@ -85,6 +89,7 @@ src/snappy/*/.deps/
src/couch_set_view/ebin/
src/couch_set_view/test/run
src/couch_index_merger/ebin/
+src/couch_view_parser/test/run
src/mapreduce/test/run
test/local.ini
test/etap/.deps/
View
3  Makefile.am
@@ -166,6 +166,7 @@ $(COUCHDB_PLT):
-pa src/couchdb \
-pa src/couch_set_view \
-pa src/couch_index_merger \
+ -pa src/couch_view_parser \
-pa src/mapreduce \
--apps \
compiler \
@@ -200,6 +201,7 @@ dialyzer: all $(COUCHDB_PLT)
-pa src/couchdb \
-pa src/couch_set_view \
-pa src/couch_index_merger \
+ -pa src/couch_view_parser \
-pa src/mapreduce \
-pa src/lhttpc \
-pa src/snappy \
@@ -207,6 +209,7 @@ dialyzer: all $(COUCHDB_PLT)
src/couchdb \
src/couch_set_view/ebin \
src/couch_index_merger/ebin \
+ src/couch_view_parser \
src/snappy \
src/lhttpc \
src/mapreduce
View
1  configure.ac
@@ -512,6 +512,7 @@ AC_CONFIG_FILES([src/snappy/snappy-$SNAPPY_MAJOR.$SNAPPY_MINOR.$SNAPPY_PATCHLEVE
AC_CONFIG_FILES([src/ejson/Makefile])
AC_CONFIG_FILES([src/couch_set_view/Makefile])
AC_CONFIG_FILES([src/couch_index_merger/Makefile])
+AC_CONFIG_FILES([src/couch_view_parser/Makefile])
AS_IF([ test "x${ac_enable_v8}" = "xyes" ], [
AC_CONFIG_FILES([src/mapreduce/Makefile])
])
View
1  license.skip
@@ -97,6 +97,7 @@
^src/couch_index_merger/Makefile
^src/couch_index_merger/Makefile.in
^src/couch_index_merger/ebin/.*beam
+^src/couch_view_parser/*
^stamp-h1
^test/Makefile
^test/Makefile.in
View
8 src/Makefile.am
@@ -24,6 +24,12 @@ SUBDIRS += \
etap \
lhttpc \
mochiweb \
- snappy \
+ snappy
+
+if !WINDOWS
+SUBDIRS += couch_view_parser
+endif
+
+SUBDIRS += \
couch_set_view \
couch_index_merger
View
2  src/couch_index_merger/Makefile.am
@@ -26,6 +26,7 @@ source_files = \
src/couch_index_merger.erl \
src/couch_view_merger.erl \
src/couch_view_merger_queue.erl \
+ src/couch_http_view_streamer.erl \
src/couch_httpd_view_merger.erl \
src/couch_skew.erl
@@ -34,6 +35,7 @@ compiled_files = \
ebin/couch_index_merger.beam \
ebin/couch_view_merger.beam \
ebin/couch_view_merger_queue.beam \
+ ebin/couch_http_view_streamer.beam \
ebin/couch_httpd_view_merger.beam \
ebin/couch_skew.beam
View
113 src/couch_index_merger/src/couch_http_view_streamer.erl
@@ -0,0 +1,113 @@
+% Licensed under the Apache License, Version 2.0 (the "License"); you may not
+% use this file except in compliance with the License. You may obtain a copy of
+% the License at
+%
+% http://www.apache.org/licenses/LICENSE-2.0
+%
+% Unless required by applicable law or agreed to in writing, software
+% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+% License for the specific language governing permissions and limitations under
+% the License.
+
+-module(couch_http_view_streamer).
+
+-include("couch_db.hrl").
+
+-export([parse/3]).
+
+
+parse(DataFun, Queue, FromUrl) ->
+ {ok, Ctx} = couch_view_parser:start_context(),
+ JsonUrl = ?JSON_ENCODE(FromUrl),
+ ok = stream_loop(Ctx, Queue, DataFun, JsonUrl).
+
+
+stream_loop(Ctx, Queue, DataFun, Url) ->
+ case next_streamer_state(Ctx, DataFun) of
+ {ok, debug_infos, _DebugInfos} ->
+ % TODO, currently broken for reduce views.
+ % View response only gets the debug_info for the merger (local) node.
+ stream_loop(Ctx, Queue, DataFun, Url);
+ {ok, row_count, TotalRowsList} ->
+ TotalRows = list_to_integer(TotalRowsList),
+ ok = couch_view_merger_queue:queue(Queue, {row_count, TotalRows}),
+ stream_loop(Ctx, Queue, DataFun, Url);
+ {ok, rows, Rows} ->
+ lists:foreach(
+ fun(Row) ->
+ ok = couch_view_merger_queue:queue(Queue, transform_row(Row, Url))
+ end,
+ Rows),
+ stream_loop(Ctx, Queue, DataFun, Url);
+ {ok, errors, Errors} ->
+ lists:foreach(
+ fun(Error) ->
+ ok = couch_view_merger_queue:queue(Queue, make_error_item(Error, Url))
+ end,
+ Errors),
+ stream_loop(Ctx, Queue, DataFun, Url);
+ {ok, done} ->
+ ok
+ end.
+
+
+next_streamer_state(Ctx, DataFun) ->
+ case couch_view_parser:next_state(Ctx) of
+ {ok, need_more_data} ->
+ case DataFun() of
+ {ok, Chunk} ->
+ case couch_view_parser:parse_chunk(Ctx, Chunk) of
+ ok ->
+ next_streamer_state(Ctx, DataFun);
+ {error, _} = Error ->
+ throw(Error)
+ end;
+ eof ->
+ {ok, done}
+ end;
+ {error, _Reason} = Error ->
+ throw(Error);
+ Else ->
+ Else
+ end.
+
+
+% _all_docs error row
+transform_row({{Key, error}, Reason}, _Url) ->
+ RowJson = <<"{\"key\":", Key/binary, ",\"error\":", Reason/binary, "}">>,
+ {{?JSON_DECODE(Key), error}, {row_json, RowJson}};
+
+% map view rows
+transform_row({{Key, DocId}, Value}, _Url) when is_binary(Value) ->
+ RowJson = <<"{\"key\":", Key/binary, ",\"id\":", DocId/binary,
+ ",\"value\":", Value/binary, "}">>,
+ {{?JSON_DECODE(Key), ?JSON_DECODE(DocId)}, {row_json, RowJson}};
+
+transform_row({{Key, DocId}, Value, Doc}, _Url) when is_binary(Value) ->
+ RowJson = <<"{\"key\":", Key/binary, ",\"id\":", DocId/binary,
+ ",\"value\":", Value/binary, ",\"doc\":", Doc/binary, "}">>,
+ {{?JSON_DECODE(Key), ?JSON_DECODE(DocId)}, {row_json, RowJson}};
+
+transform_row({{Key, DocId}, {PartId, _Node, Value}}, Url) ->
+ RowJson = <<"{\"key\":", Key/binary, ",\"id\":", DocId/binary,
+ ",\"partition\":", PartId/binary, ",\"node\":", Url/binary,
+ ",\"value\":", Value/binary, "}">>,
+ {{?JSON_DECODE(Key), ?JSON_DECODE(DocId)}, {row_json, RowJson}};
+
+transform_row({{Key, DocId}, {PartId, _Node, Value}, Doc}, Url) ->
+ RowJson = <<"{\"key\":", Key/binary, ",\"id\":", DocId/binary,
+ ",\"partition\":", PartId/binary, ",\"node\":", Url/binary,
+ ",\"value\":", Value/binary, ",\"doc\":", Doc/binary, "}">>,
+ {{?JSON_DECODE(Key), ?JSON_DECODE(DocId)}, {row_json, RowJson}};
+
+% reduce view rows
+transform_row({Key, Value}, _Url) when is_binary(Key) ->
+ RowJson = <<"{\"key\":", Key/binary, ",\"value\":", Value/binary, "}">>,
+ % value_json, in case rereduce needs to be done by the merger
+ {?JSON_DECODE(Key), {row_json, RowJson}, {value_json, Value}}.
+
+
+make_error_item({_Node, Reason}, Url) ->
+ Json = <<"{\"from\":", Url/binary, ",\"reason\":", Reason/binary, "}">>,
+ {error, row_json, Json}.
View
33 src/couch_index_merger/src/couch_httpd_view_merger.erl
@@ -129,7 +129,7 @@ http_sender({start, RowCount}, #sender_acc{req = Req} = SAcc) ->
Start = [<<"{\"total_rows\":">>, integer_to_list(RowCount), <<",\"rows\":[">>],
{ok, SAcc#sender_acc{resp = Resp, rows_acc = [Start], acc = <<"\r\n">>}};
-http_sender({row, Row}, SAcc) ->
+http_sender({row, Row}, SAcc) when is_binary(Row) ->
SAcc2 = maybe_flush_rows(Row, SAcc),
{ok, SAcc2#sender_acc{acc = <<",\r\n">>}};
@@ -154,20 +154,13 @@ http_sender(stop, SAcc) ->
couch_httpd:send_chunk(Resp, Buffer2),
{ok, couch_httpd:end_json_response(Resp)};
-http_sender({error, Url, Reason}, #sender_acc{on_error = continue, error_acc = ErrorAcc} = SAcc) ->
- Row = {[
- {<<"from">>, couch_index_merger:rem_passwd(Url)},
- {<<"reason">>, to_binary(Reason)}
- ]},
- ErrorAcc2 = [?JSON_ENCODE(Row) | ErrorAcc],
+http_sender({error, _, _} = Error, #sender_acc{on_error = continue, error_acc = ErrorAcc} = SAcc) ->
+ ErrorAcc2 = [make_error_row(Error) | ErrorAcc],
{ok, SAcc#sender_acc{error_acc = ErrorAcc2}};
-http_sender({error, Url, Reason}, #sender_acc{on_error = stop} = SAcc) ->
+http_sender({error, _, _} = Error, #sender_acc{on_error = stop} = SAcc) ->
#sender_acc{rows_acc = RowsAcc, req = Req, resp = Resp} = SAcc,
- Row = {[
- {<<"from">>, couch_index_merger:rem_passwd(Url)},
- {<<"reason">>, to_binary(Reason)}
- ]},
+ ErrorRow = make_error_row(Error),
case Resp of
nil ->
% we haven't started the response yet
@@ -177,26 +170,25 @@ http_sender({error, Url, Reason}, #sender_acc{on_error = stop} = SAcc) ->
Buffer1 = [
<<"{\"total_rows\":0,\"rows\":[]\r\n">>,
<<",\r\n\"errors\":[">>,
- ?JSON_ENCODE(Row),
+ ErrorRow,
<<"]">>
];
_ ->
Resp2 = Resp,
- Buffer1 = [<<"\r\n],\"errors\":[">>, ?JSON_ENCODE(Row), <<"]">>]
+ Buffer1 = [<<"\r\n],\"errors\":[">>, ErrorRow, <<"]">>]
end,
Buffer2 = [lists:reverse(RowsAcc), Buffer1, debug_info_buffer(SAcc), <<"\r\n}">>],
couch_httpd:send_chunk(Resp2, Buffer2),
couch_httpd:end_json_response(Resp2),
{stop, Resp2}.
-maybe_flush_rows(NewRow, SAcc) ->
+maybe_flush_rows(JsonRow, SAcc) ->
#sender_acc{
acc = Acc,
rows_acc = RowsAcc,
rows_acc_size = RowsAccSize
} = SAcc,
- JsonRow = ?JSON_ENCODE(NewRow),
- RowsAccSize2 = RowsAccSize + iolist_size(JsonRow),
+ RowsAccSize2 = RowsAccSize + byte_size(JsonRow),
SAcc2 = SAcc#sender_acc{
rows_acc = [[Acc, JsonRow] | RowsAcc],
rows_acc_size = RowsAccSize2
@@ -384,3 +376,10 @@ validate_on_error_param(Value) ->
Msg = io_lib:format("Invalid value (`~s`) for the parameter `on_error`."
" It must be `continue` (default) or `stop`.", [to_binary(Value)]),
throw({bad_request, Msg}).
+
+
+make_error_row({error, row_json, Json}) ->
+ Json;
+make_error_row({error, Url, Reason}) ->
+ ?JSON_ENCODE({[{<<"from">>, iolist_to_binary(Url)},
+ {<<"reason">>, to_binary(Reason)}]}).
View
24 src/couch_index_merger/src/couch_index_merger.erl
@@ -635,7 +635,6 @@ http_index_folder(Mod, IndexSpec, MergeParams, DDoc, Queue) ->
end.
run_http_index_folder(Mod, IndexSpec, MergeParams, DDoc, Queue) ->
- EventFun = Mod:make_event_fun(MergeParams#index_merge.http_params, Queue),
{Url, Method, Headers, Body, BaseOptions} =
Mod:http_index_folder_req_details(IndexSpec, MergeParams, DDoc),
#index_merge{
@@ -646,9 +645,17 @@ run_http_index_folder(Mod, IndexSpec, MergeParams, DDoc, Queue) ->
case lhttpc:request(Url, Method, Headers, Body, Timeout, LhttpcOptions) of
{ok, {{200, _}, _RespHeaders, Pid}} when is_pid(Pid) ->
put(streamer_pid, Pid),
- DataFun = fun() -> stream_data(Pid, Timeout) end,
try
- json_stream_parse:events(DataFun, EventFun)
+ case os:type() of
+ {win32, _} ->
+ % TODO: make couch_view_parser build and run on Windows
+ EventFun = Mod:make_event_fun(MergeParams#index_merge.http_params, Queue),
+ DataFun = fun() -> stream_data(Pid, Timeout) end,
+ json_stream_parse:events(DataFun, EventFun);
+ _ ->
+ DataFun = fun() -> next_chunk(Pid, Timeout) end,
+ ok = couch_http_view_streamer:parse(DataFun, Queue, get(from_url))
+ end
catch throw:{error, Error} ->
ok = couch_view_merger_queue:queue(Queue, {error, Url, Error})
after
@@ -696,6 +703,17 @@ stream_data(Pid, Timeout) ->
end.
+next_chunk(Pid, Timeout) ->
+ case lhttpc:get_body_part(Pid, Timeout) of
+ {ok, {http_eob, _Trailers}} ->
+ eof;
+ {ok, _Data} = Ok ->
+ Ok;
+ {error, _} = Error ->
+ throw(Error)
+ end.
+
+
stream_all(Pid, Timeout, Acc) ->
case stream_data(Pid, Timeout) of
{<<>>, _} ->
View
137 src/couch_index_merger/src/couch_view_merger.erl
@@ -259,7 +259,9 @@ view_less_fun(Collation, Dir, ViewType) ->
couch_view:less_json_ids(element(1, RowA), element(1, RowB))
end;
reduce ->
- fun({KeyA, _}, {KeyB, _}) -> couch_view:less_json(KeyA, KeyB) end
+ fun(RowA, RowB) ->
+ couch_view:less_json(element(1, RowA), element(1, RowB))
+ end
end;
<<"raw">> ->
fun(A, B) -> A < B end
@@ -271,33 +273,91 @@ view_less_fun(Collation, Dir, ViewType) ->
fun(A, B) -> not LessFun(A, B) end
end.
-view_row_obj_map({{Key, error}, Value}, _DebugMode) ->
- {[{key, Key}, {error, Value}]};
+% Optimized path, row assembled by couch_http_view_streamer
+view_row_obj_map({_KeyDocId, {row_json, RowJson}}, _Debug) ->
+ RowJson;
+
+% Row from local _all_docs, old couchdb
+view_row_obj_map({{Key, error}, Reason}, _DebugMode) ->
+ <<"{\"key\":", (?JSON_ENCODE(Key))/binary,
+ ",\"error\":", (couch_util:to_binary(Reason))/binary, "}">>;
-% set view
+% Row from local node, query with ?debug=true
view_row_obj_map({{Key, DocId}, {PartId, Value}}, true) when is_integer(PartId) ->
- {[{id, DocId}, {key, Key}, {partition, PartId}, {node, ?LOCAL}, {value, Value}]};
+ {json, RawValue} = Value,
+ <<"{\"id\":", (?JSON_ENCODE(DocId))/binary,
+ ",\"key\":", (?JSON_ENCODE(Key))/binary,
+ ",\"partition\":", (?l2b(integer_to_list(PartId)))/binary,
+ ",\"node\":\"", (?LOCAL)/binary, "\"",
+ ",\"value\":", RawValue/binary, "}">>;
+
+% Row from remote node, using Erlang based stream JSON parser, query with ?debug=true
view_row_obj_map({{Key, DocId}, {PartId, Node, Value}}, true) when is_integer(PartId) ->
- {[{id, DocId}, {key, Key}, {partition, PartId}, {node, Node}, {value, Value}]};
+ {json, RawValue} = Value,
+ <<"{\"id\":", (?JSON_ENCODE(DocId))/binary,
+ ",\"key\":", (?JSON_ENCODE(Key))/binary,
+ ",\"partition\":", (?l2b(integer_to_list(PartId)))/binary,
+ ",\"node\":", (?JSON_ENCODE(Node))/binary,
+ ",\"value\":", RawValue/binary, "}">>;
+
+% Row from local node, query with ?debug=false
view_row_obj_map({{Key, DocId}, {PartId, Value}}, false) when is_integer(PartId) ->
- {[{id, DocId}, {key, Key}, {value, Value}]};
+ {json, RawValue} = Value,
+ <<"{\"id\":", (?JSON_ENCODE(DocId))/binary,
+ ",\"key\":", (?JSON_ENCODE(Key))/binary,
+ ",\"value\":", RawValue/binary, "}">>;
+% Row from local node, old couchdb views
view_row_obj_map({{Key, DocId}, Value}, _DebugMode) ->
- {[{id, DocId}, {key, Key}, {value, Value}]};
+ <<"{\"id\":", (?JSON_ENCODE(DocId))/binary,
+ ",\"key\":", (?JSON_ENCODE(Key))/binary,
+ ",\"value\":", (?JSON_ENCODE(Value))/binary, "}">>;
-% set view
+% Row from local node with ?include_docs=true
view_row_obj_map({{Key, DocId}, {PartId, Value}, Doc}, true) when is_integer(PartId) ->
- {[{id, DocId}, {key, Key}, {partition, PartId}, {node, ?LOCAL}, {value, Value}, Doc]};
+ {json, RawValue} = Value,
+ {json, RawDoc} = Doc,
+ <<"{\"id\":", (?JSON_ENCODE(DocId))/binary,
+ ",\"key\":", (?JSON_ENCODE(Key))/binary,
+ ",\"partition\":", (?l2b(integer_to_list(PartId)))/binary,
+ ",\"node\":\"", (?LOCAL)/binary, "\"",
+ ",\"value\":", RawValue/binary,
+ ",\"doc\":", RawDoc/binary, "}">>;
+
+% Row from remote node queried with ?debug=true and ?include_docs=true
view_row_obj_map({{Key, DocId}, {PartId, Node, Value}, Doc}, true) when is_integer(PartId) ->
- {[{id, DocId}, {key, Key}, {partition, PartId}, {node, Node}, {value, Value}, Doc]};
+ {json, RawValue} = Value,
+ {json, RawDoc} = Doc,
+ <<"{\"id\":", (?JSON_ENCODE(DocId))/binary,
+ ",\"key\":", (?JSON_ENCODE(Key))/binary,
+ ",\"partition\":", (?l2b(integer_to_list(PartId)))/binary,
+ ",\"node\":", (?JSON_ENCODE(Node))/binary,
+ ",\"value\":", RawValue/binary,
+ ",\"doc\":", RawDoc/binary, "}">>;
+
+% Row from local node with ?include_docs=true and ?debug=false
view_row_obj_map({{Key, DocId}, {PartId, Value}, Doc}, false) when is_integer(PartId) ->
- {[{id, DocId}, {key, Key}, {value, Value}, Doc]};
-
+ {json, RawValue} = Value,
+ {json, RawDoc} = Doc,
+ <<"{\"id\":", (?JSON_ENCODE(DocId))/binary,
+ ",\"key\":", (?JSON_ENCODE(Key))/binary,
+ ",\"value\":", RawValue/binary,
+ ",\"doc\":", RawDoc/binary, "}">>;
+
+% Row from local node, old couchdb views (no partition id)
view_row_obj_map({{Key, DocId}, Value, Doc}, _DebugMode) ->
- {[{id, DocId}, {key, Key}, {value, Value}, Doc]}.
-
+ <<"{\"id\":", (?JSON_ENCODE(DocId))/binary,
+ ",\"key\":", (?JSON_ENCODE(Key))/binary,
+ ",\"value\":", (?JSON_ENCODE(Value))/binary,
+ ",\"doc\":", (?JSON_ENCODE(Doc))/binary, "}">>.
+
+% Optimized path, reduce row assembled by couch_http_view_streamer
+view_row_obj_reduce({_Key, {row_json, RowJson}, _ValueJson}, _DebugMode) ->
+ RowJson;
+% Reduce row from local node
view_row_obj_reduce({Key, Value}, _DebugMode) ->
- {[{key, Key}, {value, Value}]}.
+ <<"{\"key\":", (?JSON_ENCODE(Key))/binary,
+ ",\"value\":", (?JSON_ENCODE(Value))/binary, "}">>.
merge_map_views(#merge_params{limit = 0} = Params) ->
@@ -344,7 +404,7 @@ handle_all_docs_row(MinRow, Queue) ->
{ValueRows, ErrorRows} = case Id0 of
error ->
pop_similar_rows(Key0, Queue, [], [MinRow]);
- _ when is_binary(Id0) ->
+ _ ->
pop_similar_rows(Key0, Queue, [MinRow], [])
end,
case {ValueRows, ErrorRows} of
@@ -403,10 +463,10 @@ merge_reduce_min_row(Params, MinRow) ->
{Row, Col2} = case RowGroup of
[R] ->
{{row, R}, Col};
- [{K, _}, _ | _] ->
+ [FirstRow, _ | _] ->
try
RedVal = rereduce(RowGroup, Params),
- {{row, {K, RedVal}}, Col}
+ {{row, {element(1, FirstRow), RedVal}}, Col}
catch
_Tag:Error ->
Stack = erlang:get_stacktrace(),
@@ -454,13 +514,14 @@ reduce_error(Error) ->
{error, ?LOCAL, to_binary(Error)}.
-group_keys_for_rereduce(Queue, [{K, _} | _] = Acc) ->
+group_keys_for_rereduce(Queue, [Row | _] = Acc) ->
+ K = element(1, Row),
case couch_view_merger_queue:peek(Queue) of
empty ->
Acc;
- {ok, {K, _} = Row} ->
- {ok, Row} = couch_view_merger_queue:pop_next(Queue),
- group_keys_for_rereduce(Queue, [Row | Acc]);
+ {ok, Row2} when element(1, Row2) == K ->
+ {ok, Row2} = couch_view_merger_queue:pop_next(Queue),
+ group_keys_for_rereduce(Queue, [Row2 | Acc]);
{ok, revision_mismatch} ->
revision_mismatch;
{ok, _} ->
@@ -468,12 +529,23 @@ group_keys_for_rereduce(Queue, [{K, _} | _] = Acc) ->
end.
-rereduce(Reds, #merge_params{extra = #view_merge{rereduce_fun = <<"_", _/binary>> = FunSrc}}) ->
+rereduce(Reds0, #merge_params{extra = #view_merge{rereduce_fun = <<"_", _/binary>> = FunSrc}}) ->
+ Reds = lists:map(
+ fun({Key, _RowJson, {value_json, ValueJson}}) ->
+ {Key, ?JSON_DECODE(ValueJson)};
+ (Ejson) ->
+ Ejson
+ end, Reds0),
{ok, [Value]} = couch_set_view_mapreduce:builtin_reduce(rereduce, [FunSrc], Reds),
Value;
rereduce(Rows, #merge_params{extra = #view_merge{rereduce_fun = FunSrc}}) ->
- Reds = [?JSON_ENCODE(Val) || {_Key, Val} <- Rows],
+ Reds = lists:map(
+ fun({_Key, _RowJson, {value_json, ValueJson}}) ->
+ ValueJson;
+ ({_Key, Val}) ->
+ ?JSON_ENCODE(Val)
+ end, Rows),
case get(reduce_context) of
undefined ->
{ok, Ctx} = mapreduce:start_reduce_context([FunSrc]),
@@ -763,11 +835,10 @@ all_docs_row(DocInfo, Db, IncludeDoc, Conflicts) ->
true ->
case Del of
true ->
- DocVal = {<<"doc">>, null};
+ DocVal = null;
false ->
DocOptions = if Conflicts -> [conflicts]; true -> [] end,
- [DocVal] = couch_httpd_view:doc_member(Db, DocInfo, DocOptions),
- DocVal
+ [{doc, DocVal}] = couch_httpd_view:doc_member(Db, DocInfo, DocOptions)
end,
{{Id, Id}, Value, DocVal};
false ->
@@ -876,7 +947,7 @@ http_view_fold_queue_row({Props}, Queue) ->
nil ->
{{Key, Id}, Value};
Doc ->
- {{Key, Id}, Value, {doc, Doc}}
+ {{Key, Id}, Value, Doc}
end
end;
Error ->
@@ -1061,7 +1132,7 @@ make_map_set_fold_fun(true, Conflicts, SetName, UserCtx, Queue) ->
fun({{Key, DocId}, {PartId, Value}} = Kv, _, Acc) ->
JsonDoc = couch_set_view_http:get_row_doc(
Kv, SetName, true, UserCtx, DocOpenOpts),
- Row = {{Key, DocId}, {PartId, Value}, {doc, JsonDoc}},
+ Row = {{Key, DocId}, {PartId, Value}, JsonDoc},
ok = couch_view_merger_queue:queue(Queue, Row),
{ok, Acc}
end.
@@ -1079,11 +1150,11 @@ make_map_fold_fun(true, Conflicts, Db, Queue) ->
{ok, Acc};
({{_Key, DocId} = Kd, {Props} = Value}, _, Acc) ->
IncludeId = get_value(<<"_id">>, Props, DocId),
- [Doc] = couch_httpd_view:doc_member(Db, IncludeId, DocOpenOpts),
+ [{doc, Doc}] = couch_httpd_view:doc_member(Db, IncludeId, DocOpenOpts),
ok = couch_view_merger_queue:queue(Queue, {Kd, Value, Doc}),
{ok, Acc};
({{_Key, DocId} = Kd, Value}, _, Acc) ->
- [Doc] = couch_httpd_view:doc_member(Db, DocId, DocOpenOpts),
+ [{doc, Doc}] = couch_httpd_view:doc_member(Db, DocId, DocOpenOpts),
ok = couch_view_merger_queue:queue(Queue, {Kd, Value, Doc}),
{ok, Acc}
end.
@@ -1449,7 +1520,7 @@ simple_set_view_map_query(Params, Group, View, ViewArgs) ->
nil ->
RowDetails = Kv;
JsonDoc ->
- RowDetails = {{Key, DocId}, {PartId, Value}, {doc, JsonDoc}}
+ RowDetails = {{Key, DocId}, {PartId, Value}, JsonDoc}
end,
Row = view_row_obj_map(RowDetails, DebugMode),
{ok, UAcc2} = Callback({row, Row}, UAcc),
View
2  src/couch_set_view/src/couch_set_view_http.erl
@@ -292,7 +292,7 @@ open_row_doc(SetName, PartId, Id, UserCtx, DocOptions) ->
{ok, #doc{} = Doc} ->
{json, couch_doc:to_raw_json_binary(Doc)};
_ ->
- null
+ {json, <<"null">>}
end,
ok = couch_db:close(Db),
JsonDoc.
View
135 src/couch_view_parser/Makefile.am
@@ -0,0 +1,135 @@
+## Licensed under the Apache License, Version 2.0 (the "License"); you may not
+## use this file except in compliance with the License. You may obtain a copy of
+## the License at
+##
+## http://www.apache.org/licenses/LICENSE-2.0
+##
+## Unless required by applicable law or agreed to in writing, software
+## distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+## WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+## License for the specific language governing permissions and limitations under
+## the License.
+
+COUCH_VIEW_PARSER_VERSION = 1.0
+couch_view_parserebindir = $(localerlanglibdir)/couch_view_parser-$(COUCH_VIEW_PARSER_VERSION)/ebin
+couch_view_parserprivdir = $(localerlanglibdir)/couch_view_parser-$(COUCH_VIEW_PARSER_VERSION)/priv
+
+if WINDOWS
+COUCH_VIEW_PARSER_OUTPUT_DIR = win32
+couch_view_parser_msbuild = $(COUCH_VIEW_PARSER_OUTPUT_DIR)/msbuild.bat
+couch_view_parser_vcproj = $(COUCH_VIEW_PARSER_OUTPUT_DIR)/couch_view_parser.vcxproj
+WIN_ERL_INCLUDE = $(COUCH_VIEW_PARSER_OUTPUT_DIR)/win_erl_include.tmp
+COUCH_VIEW_PARSER_NIF = $(COUCH_VIEW_PARSER_OUTPUT_DIR)/couch_view_parser_nif.dll
+else
+couch_view_parser_cxx_srcs = \
+ couch_view_parser.cc \
+ couch_view_parser_nif.cc \
+ yajl/yajl.c \
+ yajl/yajl_alloc.c \
+ yajl/yajl_buf.c \
+ yajl/yajl_encode.c \
+ yajl/yajl_gen.c \
+ yajl/yajl_lex.c \
+ yajl/yajl_parser.c \
+ yajl/yajl_tree.c \
+ yajl/yajl_version.c
+
+couch_view_parser_cxx_hdrs = \
+ couch_view_parser.h \
+ erl_nif_compat.h \
+ yajl/yajl_alloc.h \
+ yajl/yajl_buf.h \
+ yajl/yajl_bytestack.h \
+ yajl/yajl_encode.h \
+ yajl/yajl_lex.h \
+ yajl/yajl_parser.h \
+ yajl/yajl_common.h \
+ yajl/yajl_gen.h \
+ yajl/yajl_parse.h \
+ yajl/yajl_tree.h \
+ yajl/yajl_version.h
+endif
+couch_view_parser_file_collection = \
+ couch_view_parser.app.in \
+ couch_view_parser.erl
+
+couch_view_parserebin_make_generated_file_list = \
+ couch_view_parser.app \
+ couch_view_parser.beam
+
+test_files = \
+ test/01-map-view.t \
+ test/02-reduce-view.t
+
+EXTRA_DIST = \
+ $(couch_view_parser_cxx_hdrs) \
+ $(couch_view_parser_file_collection) \
+ $(test_files) \
+ test/run.tpl
+
+CLEANFILES = \
+ $(couch_view_parserebin_make_generated_file_list) \
+ priv/couch_view_parser_nif.so
+
+noinst_SCRIPTS = test/run
+
+if WINDOWS
+couch_view_parserebin_DATA = $(COUCH_VIEW_PARSER_NIF) \
+ $(couch_view_parserebin_make_generated_file_list)
+else
+couch_view_parserebin_DATA = \
+ $(couch_view_parserebin_make_generated_file_list)
+endif
+
+if !WINDOWS
+couch_view_parserpriv_LTLIBRARIES = couch_view_parser_nif.la
+couch_view_parser_nif_la_AM_CPPFLAGS = $(AM_CPPFLAGS) -Icouch_view_parser-$(COUCH_VIEW_PARSER_VERSION) -Iyajl
+couch_view_parser_nif_la_CPPFLAGS = -std=c99 -Wall
+couch_view_parser_nif_la_SOURCES = $(couch_view_parser_cxx_srcs)
+couch_view_parser_nif_la_LDFLAGS = -module -avoid-version
+
+priv/couch_view_parser_nif.so: couch_view_parser_nif.la
+ @mkdir -p ./priv
+ cp .libs/couch_view_parser_nif.so $@
+
+all: priv/couch_view_parser_nif.so
+endif
+
+if WINDOWS
+$(COUCH_VIEW_PARSER_NIF) : $(couch_view_parser_msbuild) $(couch_view_parser_vcproj)
+ $(MKDIR_P) "$(couch_view_parserprivdir)" || true
+ (cd win32 && cmd /c msbuild.bat)
+ cp $(COUCH_VIEW_PARSER_NIF) $(couch_view_parserprivdir)
+
+all: $(COUCH_VIEW_PARSER_NIF)
+
+$(couch_view_parser_msbuild): $(couch_view_parser_msbuild).tpl
+ sed -e "s|%msbuild_dir%|$(msbuild_dir)|" \
+ -e "s|%msbuild_name%|$(msbuild_name)|" \
+ -e "s|^/cygdrive/\([a-zA-Z]\)|\1:|" \
+ < $< > $@
+
+$(couch_view_parser_vcproj): $(couch_view_parser_vcproj).tpl $(WIN_ERL_INCLUDE)
+ sed -e "s|%ERLANG_INCLUDE%|`cat $(WIN_ERL_INCLUDE)`|" \
+ -e "s|%COUCH_VIEW_PARSER_VERSION%|$(COUCH_VIEW_PARSER_VERSION)|" \
+ < $< > $@
+
+$(WIN_ERL_INCLUDE):$(ERLANG_INCLUDE)
+ echo $< | sed -e "s|^/cygdrive/\([a-zA-Z]\)|\1:|" > $@
+
+endif
+
+check:
+ $(abs_top_builddir)/src/couch_view_parser/test/run $(abs_top_srcdir)/src/couch_view_parser/test
+
+test/run: test/run.tpl
+ sed -e "s|%abs_top_srcdir%|@abs_top_srcdir@|g" \
+ -e "s|%abs_top_builddir%|@abs_top_builddir@|g" > \
+ $@ < $<
+ chmod +x $@
+
+%.app: %.app.in
+ cp $< $@
+
+%.beam: %.erl
+ $(ERLC) $(ERLC_FLAGS) $<
View
12 src/couch_view_parser/couch_view_parser.app.in
@@ -0,0 +1,12 @@
+{application, couch_view_parser,
+ [
+ {description, "Couch view parser"},
+ {vsn, "1.0.0"},
+ {registered, []},
+ {applications, [
+ kernel,
+ stdlib
+ ]},
+ {env, []},
+ {modules, [couch_view_parser]}
+ ]}.
View
950 src/couch_view_parser/couch_view_parser.cc
@@ -0,0 +1,950 @@
+/**
+ * @copyright 2012 Couchbase, Inc.
+ *
+ * @author Filipe Manana <filipe@couchbase.com>
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ **/
+
+#include <iostream>
+#include <string.h>
+#include <assert.h>
+
+#include "couch_view_parser.h"
+
+#define ROW_KEY_RESERVE_BYTES 40
+#define ROW_VALUE_RESERVE_BYTES 128
+#define ROW_DOC_RESERVE_BYTES 512
+
+
+#define KEY_IS(key, len, desiredKey) \
+ ((len == (sizeof(desiredKey) - 1)) && \
+ (strncmp(reinterpret_cast<const char *>(key), desiredKey, (sizeof(desiredKey) - 1)) == 0))
+
+
+/**
+ * Top level fields in a view response:
+ *
+ * "debug_info", optional, must come before "total_rows" and its value is an object
+ * each key in the "debug_info" object can have any JSON value and this corresponds
+ * to debug information from one node
+ * "total_rows", field must come before any other field, and it's absent for reduce views
+ * "rows" must come before "debug_info" (if present) or before "total_rows" (if present)
+ * each row has the following fields:
+ * "id" - must be a string, mandatory only for map views
+ * "key" - can be any JSON value, mandatory
+ * "value" - can be any JSON value, mandatory
+ * "partition" - optional, if present must be an integer
+ * "node" - optional, if present must be a string
+ * "doc" - optional, if present must be an object or null
+ * "error" - optional, if present must be a string
+ * "errors" must come after "rows", its value is an array and it's an optional field
+ * each entry in "errors" is an object with the following fields:
+ * "from" - must be a string, mandatory
+ * "reason" - must be a string, mandatory
+ **/
+
+
+static const char *BAD_DEBUG_INFO_VALUE = "'debug_info' value is not an object";
+static const char *BAD_TOTAL_ROWS_VALUE = "'total_rows' value is not a number";
+static const char *BAD_ROWS_VALUE = "'rows' value is not an array";
+static const char *BAD_ROW_FIELD = "unsupported field in row object: ";
+static const char *BAD_ROW_ID_VALUE = "row 'id' value is not a string";
+static const char *BAD_ROW_PARTITION_VALUE = "row 'partition' value is not an integer";
+static const char *BAD_ROW_NODE_VALUE = "row 'node' value is not a string";
+static const char *BAD_ROW_DOC_VALUE = "row 'doc' value is not an object nor null";
+static const char *BAD_ROW_ERROR_VALUE = "row 'error' value is not a string";
+static const char *BAD_ERRORS_VALUE = "'errors' value is not an array";
+static const char *BAD_ERROR_FROM_VALUE = "error 'from' value is not a string";
+static const char *BAD_ERROR_REASON_VALUE = "error 'reason' value is not a string";
+static const char *BAD_ERROR_FIELD = "unsupported field in error object: ";
+
+
+// YAJL parser callbacks
+static int null_callback(void *ctx);
+static int boolean_callback(void *ctx, int boolean);
+static int number_callback(void *ctx, const char *number, size_t len);
+static int string_callback(void *ctx, const unsigned char *str, size_t len);
+static int start_object_callback(void *ctx);
+static int end_object_callback(void *ctx);
+static int object_key_callback(void *ctx, const unsigned char *key, size_t len);
+static int start_array_callback(void *ctx);
+static int end_array_callback(void *ctx);
+
+
+// Helpers to collect a raw JSON value into a string buffer
+static inline void add_null(ctx_t *context, std::string &buffer);
+static inline void add_boolean(ctx_t *context, std::string &buffer, int boolean);
+static inline void add_number(ctx_t *context, std::string &buffer, const char *number, size_t len);
+static inline void add_string(ctx_t *context, std::string &buffer, const unsigned char *str, size_t len);
+static inline void add_object_start(ctx_t *context, std::string &buffer);
+static inline void add_object_end(ctx_t *context, std::string &buffer);
+static inline void add_object_key(ctx_t *context, std::string &buffer, const unsigned char *key, size_t len);
+static inline void add_array_start(ctx_t *context, std::string &buffer);
+static inline void add_array_end(ctx_t *context, std::string &buffer);
+
+// Helpers
+static inline void maybe_debug_entry_end(ctx_t *context);
+static inline void maybe_expand_buffer(std::string &buffer, size_t appendLen);
+
+
+static yajl_callbacks callbacks = {
+ null_callback,
+ boolean_callback,
+ NULL,
+ NULL,
+ number_callback,
+ string_callback,
+ start_object_callback,
+ object_key_callback,
+ end_object_callback,
+ start_array_callback,
+ end_array_callback
+};
+
+
+void initContext(ctx_t *context)
+{
+ context->handle = yajl_alloc(&callbacks, NULL, (void *) context);
+ yajl_config(context->handle, yajl_dont_validate_strings, 1);
+
+ context->level = 0;
+ context->row_count = NULL;
+ context->rows = new std::list<row_t *>();
+ context->error_entries = new std::list<error_entry_t *>();
+ context->debug_infos = new std::list<debug_info_t *>();
+ context->error = NULL;
+ context->caller_state = debug_infos;
+ context->parser_state = parser_starting;
+ context->parser_sub_state = parser_find_row_count_key;
+ context->tmp_row = NULL;
+ context->tmp_error_entry = NULL;
+ context->tmp_debug_info = NULL;
+}
+
+
+void destroyContext(ctx_t *context)
+{
+ yajl_complete_parse(context->handle);
+ yajl_free(context->handle);
+
+ delete [] context->row_count;
+ delete context->error;
+ delete context->tmp_row;
+ delete context->tmp_error_entry;
+ delete context->tmp_debug_info;
+
+ for (std::list<row_t *>::iterator it = context->rows->begin();
+ it != context->rows->end(); ++it) {
+ delete *it;
+ }
+ delete context->rows;
+
+ for (std::list<error_entry_t *>::iterator it = context->error_entries->begin();
+ it != context->error_entries->end(); ++it) {
+ delete *it;
+ }
+ delete context->error_entries;
+
+ for (std::list<debug_info_t *>::iterator it = context->debug_infos->begin();
+ it != context->debug_infos->end(); ++it) {
+ delete *it;
+ }
+ delete context->debug_infos;
+}
+
+
+void parseJsonChunk(ctx_t *context, unsigned char *data, size_t len)
+{
+ yajl_status status = yajl_parse(context->handle, data, len);
+
+ if (status != yajl_status_ok) {
+ yajl_complete_parse(context->handle);
+
+ if (context->error == NULL) {
+ unsigned char *buf = yajl_get_error(context->handle, 0, data, len);
+ context->error = new std::string(reinterpret_cast<char *>(buf));
+ yajl_free_error(context->handle, buf);
+ }
+
+ throw JsonParseException(*context->error);
+ }
+}
+
+
+static int null_callback(void *ctx)
+{
+ ctx_t *context = static_cast<ctx_t *>(ctx);
+
+ switch (context->parser_sub_state) {
+ case parser_found_debug_info_key:
+ context->error = new std::string(BAD_DEBUG_INFO_VALUE);
+ return 0;
+ case parser_get_debug_entry:
+ add_null(context, context->tmp_debug_info->value);
+ maybe_debug_entry_end(context);
+ return 1;
+ case parser_found_row_count_key:
+ context->error = new std::string(BAD_TOTAL_ROWS_VALUE);
+ return 0;
+ case parser_found_rows_key:
+ context->error = new std::string(BAD_ROWS_VALUE);
+ return 0;
+ case parser_get_row_id:
+ context->error = new std::string(BAD_ROW_ID_VALUE);
+ return 0;
+ case parser_get_row_key:
+ add_null(context, context->tmp_row->key);
+ if (context->value_nesting == 0) {
+ context->parser_sub_state = parser_get_row;
+ }
+ return 1;
+ case parser_get_row_value:
+ add_null(context, context->tmp_row->value);
+ if (context->value_nesting == 0) {
+ context->parser_sub_state = parser_get_row;
+ }
+ return 1;
+ case parser_get_row_doc:
+ add_null(context, context->tmp_row->doc);
+ if (context->value_nesting == 0) {
+ context->parser_sub_state = parser_get_row;
+ }
+ return 1;
+ case parser_get_row_partition:
+ context->error = new std::string(BAD_ROW_PARTITION_VALUE);
+ return 0;
+ case parser_get_row_node:
+ context->error = new std::string(BAD_ROW_NODE_VALUE);
+ return 0;
+ case parser_get_row_error:
+ context->error = new std::string(BAD_ROW_ERROR_VALUE);
+ return 0;
+ case parser_found_errors_key:
+ context->error = new std::string(BAD_ERRORS_VALUE);
+ return 0;
+ case parser_get_error_from:
+ context->error = new std::string(BAD_ERROR_FROM_VALUE);
+ return 0;
+ case parser_get_error_reason:
+ context->error = new std::string(BAD_ERROR_REASON_VALUE);
+ return 0;
+ default:
+ break;
+ }
+
+ return 1;
+}
+
+
+static int boolean_callback(void *ctx, int boolean)
+{
+ ctx_t *context = static_cast<ctx_t *>(ctx);
+
+ switch (context->parser_sub_state) {
+ case parser_found_debug_info_key:
+ context->error = new std::string(BAD_DEBUG_INFO_VALUE);
+ return 0;
+ case parser_get_debug_entry:
+ add_boolean(context, context->tmp_debug_info->value, boolean);
+ maybe_debug_entry_end(context);
+ return 1;
+ case parser_found_row_count_key:
+ context->error = new std::string(BAD_TOTAL_ROWS_VALUE);
+ return 0;
+ case parser_found_rows_key:
+ context->error = new std::string(BAD_ROWS_VALUE);
+ return 0;
+ case parser_get_row_id:
+ context->error = new std::string(BAD_ROW_ID_VALUE);
+ return 0;
+ case parser_get_row_key:
+ add_boolean(context, context->tmp_row->key, boolean);
+ if (context->value_nesting == 0) {
+ context->parser_sub_state = parser_get_row;
+ }
+ return 1;
+ case parser_get_row_value:
+ add_boolean(context, context->tmp_row->value, boolean);
+ if (context->value_nesting == 0) {
+ context->parser_sub_state = parser_get_row;
+ }
+ return 1;
+ case parser_get_row_doc:
+ if (context->value_nesting == 0) {
+ context->error = new std::string(BAD_ROW_DOC_VALUE);
+ return 0;
+ } else {
+ add_boolean(context, context->tmp_row->doc, boolean);
+ }
+ case parser_get_row_partition:
+ context->error = new std::string(BAD_ROW_PARTITION_VALUE);
+ return 0;
+ case parser_get_row_node:
+ context->error = new std::string(BAD_ROW_NODE_VALUE);
+ return 0;
+ case parser_get_row_error:
+ context->error = new std::string(BAD_ROW_ERROR_VALUE);
+ return 0;
+ case parser_found_errors_key:
+ context->error = new std::string(BAD_ERRORS_VALUE);
+ return 0;
+ case parser_get_error_from:
+ context->error = new std::string(BAD_ERROR_FROM_VALUE);
+ return 0;
+ case parser_get_error_reason:
+ context->error = new std::string(BAD_ERROR_REASON_VALUE);
+ return 0;
+ default:
+ break;
+ }
+
+ return 1;
+}
+
+
+static int number_callback(void *ctx, const char *number, size_t len)
+{
+ ctx_t *context = static_cast<ctx_t *>(ctx);
+
+ switch (context->parser_sub_state) {
+ case parser_found_debug_info_key:
+ context->error = new std::string(BAD_DEBUG_INFO_VALUE);
+ return 0;
+ case parser_get_debug_entry:
+ add_number(context, context->tmp_debug_info->value, number, len);
+ maybe_debug_entry_end(context);
+ return 1;
+ case parser_found_row_count_key:
+ context->row_count = new char[len + 1];
+ memcpy(context->row_count, number, len);
+ context->row_count[len] = '\0';
+ context->parser_state = parser_rows;
+ context->parser_sub_state = parser_find_rows_key;
+ return 1;
+ case parser_found_rows_key:
+ context->error = new std::string(BAD_ROWS_VALUE);
+ return 0;
+ case parser_get_row_id:
+ context->error = new std::string(BAD_ROW_ID_VALUE);
+ return 0;
+ case parser_get_row_key:
+ add_number(context, context->tmp_row->key, number, len);
+ if (context->value_nesting == 0) {
+ context->parser_sub_state = parser_get_row;
+ }
+ return 1;
+ case parser_get_row_value:
+ add_number(context, context->tmp_row->value, number, len);
+ if (context->value_nesting == 0) {
+ context->parser_sub_state = parser_get_row;
+ }
+ return 1;
+ case parser_get_row_doc:
+ if (context->value_nesting == 0) {
+ context->error = new std::string(BAD_ROW_DOC_VALUE);
+ return 0;
+ } else {
+ add_number(context, context->tmp_row->doc, number, len);
+ }
+ return 1;
+ case parser_get_row_partition:
+ add_number(context, context->tmp_row->partition, number, len);
+ context->parser_sub_state = parser_get_row;
+ return 1;
+ case parser_get_row_node:
+ context->error = new std::string(BAD_ROW_NODE_VALUE);
+ return 0;
+ case parser_get_row_error:
+ context->error = new std::string(BAD_ROW_ERROR_VALUE);
+ return 0;
+ case parser_found_errors_key:
+ context->error = new std::string(BAD_ERRORS_VALUE);
+ return 0;
+ case parser_get_error_from:
+ context->error = new std::string(BAD_ERROR_FROM_VALUE);
+ return 0;
+ case parser_get_error_reason:
+ context->error = new std::string(BAD_ERROR_REASON_VALUE);
+ return 0;
+ default:
+ break;
+ }
+
+ return 1;
+}
+
+
+static int string_callback(void *ctx, const unsigned char *str, size_t len)
+{
+ ctx_t *context = static_cast<ctx_t *>(ctx);
+
+ switch (context->parser_sub_state) {
+ case parser_found_debug_info_key:
+ context->error = new std::string(BAD_DEBUG_INFO_VALUE);
+ return 0;
+ case parser_get_debug_entry:
+ add_string(context, context->tmp_debug_info->value, str, len);
+ maybe_debug_entry_end(context);
+ return 1;
+ case parser_found_row_count_key:
+ context->error = new std::string(BAD_TOTAL_ROWS_VALUE);
+ return 0;
+ case parser_found_rows_key:
+ context->error = new std::string(BAD_ROWS_VALUE);
+ return 0;
+ case parser_get_row_id:
+ add_string(context, context->tmp_row->id, str, len);
+ context->parser_sub_state = parser_get_row;
+ return 1;
+ case parser_get_row_key:
+ add_string(context, context->tmp_row->key, str, len);
+ if (context->value_nesting == 0) {
+ context->parser_sub_state = parser_get_row;
+ }
+ return 1;
+ case parser_get_row_value:
+ add_string(context, context->tmp_row->value, str, len);
+ if (context->value_nesting == 0) {
+ context->parser_sub_state = parser_get_row;
+ }
+ return 1;
+ case parser_get_row_doc:
+ if (context->value_nesting == 0) {
+ context->error = new std::string(BAD_ROW_DOC_VALUE);
+ return 0;
+ } else {
+ add_string(context, context->tmp_row->doc, str, len);
+ return 1;
+ }
+ case parser_get_row_partition:
+ context->error = new std::string(BAD_ROW_PARTITION_VALUE);
+ return 0;
+ case parser_get_row_node:
+ add_string(context, context->tmp_row->node, str, len);
+ context->parser_sub_state = parser_get_row;
+ return 1;
+ case parser_get_row_error:
+ add_string(context, context->tmp_row->error, str, len);
+ context->parser_sub_state = parser_get_row;
+ return 1;
+ case parser_found_errors_key:
+ context->error = new std::string(BAD_ERRORS_VALUE);
+ return 0;
+ case parser_get_error_from:
+ add_string(context, context->tmp_error_entry->from, str, len);
+ context->parser_sub_state = parser_get_error_entry;
+ return 1;
+ case parser_get_error_reason:
+ add_string(context, context->tmp_error_entry->reason, str, len);
+ context->parser_sub_state = parser_get_error_entry;
+ return 1;
+ default:
+ break;
+ }
+
+ return 1;
+}
+
+
+static int start_object_callback(void *ctx)
+{
+ ctx_t *context = static_cast<ctx_t *>(ctx);
+
+ switch (context->parser_sub_state) {
+ case parser_found_debug_info_key:
+ context->parser_sub_state = parser_get_debug_infos;
+ break;
+ case parser_get_debug_entry:
+ add_object_start(context, context->tmp_debug_info->value);
+ return 1;
+ case parser_found_row_count_key:
+ context->error = new std::string(BAD_TOTAL_ROWS_VALUE);
+ return 0;
+ case parser_found_rows_key:
+ context->error = new std::string(BAD_ROWS_VALUE);
+ return 0;
+ case parser_get_row_id:
+ context->error = new std::string(BAD_ROW_ID_VALUE);
+ return 0;
+ case parser_get_row_key:
+ add_object_start(context, context->tmp_row->key);
+ return 1;
+ case parser_get_row_value:
+ add_object_start(context, context->tmp_row->value);
+ return 1;
+ case parser_get_row_doc:
+ add_object_start(context, context->tmp_row->doc);
+ return 1;
+ case parser_get_row_partition:
+ context->error = new std::string(BAD_ROW_PARTITION_VALUE);
+ return 0;
+ case parser_get_row_node:
+ context->error = new std::string(BAD_ROW_NODE_VALUE);
+ return 0;
+ case parser_get_row_error:
+ context->error = new std::string(BAD_ROW_ERROR_VALUE);
+ return 0;
+ case parser_found_errors_key:
+ context->error = new std::string(BAD_ERRORS_VALUE);
+ return 0;
+ case parser_get_error_from:
+ context->error = new std::string(BAD_ERROR_FROM_VALUE);
+ return 0;
+ case parser_get_error_reason:
+ context->error = new std::string(BAD_ERROR_REASON_VALUE);
+ return 0;
+ default:
+ break;
+ }
+
+ ++context->level;
+
+ if (context->level == 1) {
+ assert(context->parser_state == parser_starting);
+ context->parser_state = parser_debug_info;
+ context->parser_sub_state = parser_find_debug_info_key;
+ } else if (context->level == 2) {
+ if (context->parser_state == parser_rows &&
+ context->parser_sub_state == parser_get_row) {
+ // starting to parse a row
+ context->tmp_row = new row_t();
+ } else if (context->parser_state == parser_errors &&
+ context->parser_sub_state == parser_get_error_entry) {
+ // starting to parse an error entry
+ context->tmp_error_entry = new error_entry_t();
+ }
+ }
+
+ return 1;
+}
+
+
+static int end_object_callback(void *ctx)
+{
+ ctx_t *context = static_cast<ctx_t *>(ctx);
+
+ switch (context->parser_sub_state) {
+ case parser_get_row_key:
+ add_object_end(context, context->tmp_row->key);
+ if (context->value_nesting == 0) {
+ context->parser_sub_state = parser_get_row;
+ }
+ return 1;
+ case parser_get_debug_entry:
+ add_object_end(context, context->tmp_debug_info->value);
+ maybe_debug_entry_end(context);
+ return 1;
+ case parser_get_row_value:
+ add_object_end(context, context->tmp_row->value);
+ if (context->value_nesting == 0) {
+ context->parser_sub_state = parser_get_row;
+ }
+ return 1;
+ case parser_get_row_doc:
+ add_object_end(context, context->tmp_row->doc);
+ if (context->value_nesting == 0) {
+ context->parser_sub_state = parser_get_row;
+ }
+ return 1;
+ default:
+ break;
+ }
+
+ --context->level;
+
+ if (context->level == 0) {
+ context->parser_state = parser_ending;
+ } else if (context->level == 1) {
+ if (context->parser_state == parser_debug_info &&
+ context->parser_sub_state == parser_get_debug_infos) {
+ // finished parsing the debug_info object
+ context->parser_state = parser_row_count;
+ context->parser_sub_state = parser_find_row_count_key;
+ } else if (context->parser_state == parser_rows &&
+ context->parser_sub_state == parser_get_row) {
+ // finished parsing a row
+ assert(context->tmp_row != NULL);
+ context->rows->push_back(context->tmp_row);
+ context->tmp_row = NULL;
+ context->parser_sub_state = parser_get_row;
+ } else if (context->parser_state == parser_errors &&
+ context->parser_sub_state == parser_get_error_entry) {
+ // finished parsing an error entry
+ assert(context->tmp_error_entry != NULL);
+ context->error_entries->push_back(context->tmp_error_entry);
+ context->tmp_error_entry = NULL;
+ context->parser_sub_state = parser_get_error_entry;
+ }
+ }
+
+ return 1;
+}
+
+
+static int object_key_callback(void *ctx, const unsigned char *key, size_t len)
+{
+ ctx_t *context = static_cast<ctx_t *>(ctx);
+
+ switch (context->parser_sub_state) {
+ case parser_get_debug_entry:
+ add_object_key(context, context->tmp_debug_info->value, key, len);
+ return 1;
+ case parser_get_row_key:
+ add_object_key(context, context->tmp_row->key, key, len);
+ return 1;
+ case parser_get_row_value:
+ add_object_key(context, context->tmp_row->value, key, len);
+ return 1;
+ case parser_get_row_doc:
+ add_object_key(context, context->tmp_row->doc, key, len);
+ return 1;
+ default:
+ break;
+ }
+
+ if (context->level == 1) {
+ if (context->parser_state == parser_debug_info &&
+ context->parser_sub_state == parser_find_debug_info_key) {
+
+ if (KEY_IS(key, len, "debug_info")) {
+ context->parser_sub_state = parser_found_debug_info_key;
+ } else if (KEY_IS(key, len, "total_rows")) {
+ context->parser_state = parser_row_count;
+ context->parser_sub_state = parser_found_row_count_key;
+ } else if (KEY_IS(key, len, "rows")) {
+ context->parser_state = parser_rows;
+ context->parser_sub_state = parser_found_rows_key;
+ } else if (KEY_IS(key, len, "errors")) {
+ context->parser_state = parser_errors;
+ context->parser_sub_state = parser_found_errors_key;
+ }
+ } else if (context->parser_state == parser_row_count &&
+ context->parser_sub_state == parser_find_row_count_key) {
+
+ if (KEY_IS(key, len, "total_rows")) {
+ context->parser_sub_state = parser_found_row_count_key;
+ } else if (KEY_IS(key, len, "rows")) {
+ // reduce view, no "total_rows" field
+ context->parser_state = parser_rows;
+ context->parser_sub_state = parser_found_rows_key;
+ }
+ } else if (context->parser_state == parser_rows &&
+ context->parser_sub_state == parser_find_rows_key) {
+
+ if (KEY_IS(key, len, "rows")) {
+ context->parser_sub_state = parser_found_rows_key;
+ } else if (KEY_IS(key, len, "errors")) {
+ context->parser_state = parser_errors;
+ context->parser_sub_state = parser_found_errors_key;
+ }
+ } else if (context->parser_state == parser_errors &&
+ context->parser_sub_state == parser_find_errors_key) {
+
+ if (KEY_IS(key, len, "errors")) {
+ context->parser_sub_state = parser_found_errors_key;
+ }
+ }
+ } else if (context->level == 2) {
+ if (context->parser_state == parser_debug_info &&
+ context->parser_sub_state == parser_get_debug_infos) {
+
+ // starting to parse a debug info entry (relative to one node)
+ assert(context->tmp_debug_info == NULL);
+ context->tmp_debug_info = new debug_info_t();
+ context->tmp_debug_info->from += "\"";
+ context->tmp_debug_info->from.append(reinterpret_cast<const char *>(key), len);
+ context->tmp_debug_info->from += "\"";
+ context->value_nesting = 0;
+ context->parser_sub_state = parser_get_debug_entry;
+ }
+ else if (context->parser_state == parser_rows &&
+ context->parser_sub_state == parser_get_row) {
+
+ context->value_nesting = 0;
+
+ if (KEY_IS(key, len, "id")) {
+ context->parser_sub_state = parser_get_row_id;
+ } else if (KEY_IS(key, len, "key")) {
+ context->tmp_row->key.reserve(ROW_KEY_RESERVE_BYTES);
+ context->parser_sub_state = parser_get_row_key;
+ } else if (KEY_IS(key, len, "value")) {
+ context->tmp_row->value.reserve(ROW_VALUE_RESERVE_BYTES);
+ context->parser_sub_state = parser_get_row_value;
+ } else if (KEY_IS(key, len, "doc")) {
+ context->tmp_row->doc.reserve(ROW_DOC_RESERVE_BYTES);
+ context->parser_sub_state = parser_get_row_doc;
+ } else if (KEY_IS(key, len, "partition")) {
+ context->parser_sub_state = parser_get_row_partition;
+ } else if (KEY_IS(key, len, "node")) {
+ context->parser_sub_state = parser_get_row_node;
+ } else if (KEY_IS(key, len, "error")) {
+ context->parser_sub_state = parser_get_row_error;
+ } else {
+ context->error = new std::string(BAD_ROW_FIELD);
+ context->error->append(reinterpret_cast<const char *>(key), len);
+ return 0;
+ }
+ } else if (context->parser_state == parser_errors &&
+ context->parser_sub_state == parser_get_error_entry) {
+
+ context->value_nesting = 0;
+
+ if (KEY_IS(key, len, "from")) {
+ context->parser_sub_state = parser_get_error_from;
+ } else if (KEY_IS(key, len, "reason")) {
+ context->parser_sub_state = parser_get_error_reason;
+ } else {
+ context->error = new std::string(BAD_ERROR_FIELD);
+ context->error->append(reinterpret_cast<const char *>(key), len);
+ return 0;
+ }
+ }
+ }
+
+ return 1;
+}
+
+
+static int start_array_callback(void *ctx)
+{
+ ctx_t *context = static_cast<ctx_t *>(ctx);
+
+ switch (context->parser_sub_state) {
+ case parser_found_debug_info_key:
+ context->error = new std::string(BAD_DEBUG_INFO_VALUE);
+ return 0;
+ case parser_get_debug_entry:
+ add_array_start(context, context->tmp_debug_info->value);
+ return 1;
+ case parser_found_row_count_key:
+ context->error = new std::string(BAD_TOTAL_ROWS_VALUE);
+ return 0;
+ case parser_found_rows_key:
+ context->parser_sub_state = parser_get_row;
+ return 1;
+ case parser_get_row_id:
+ context->error = new std::string(BAD_ROW_ID_VALUE);
+ return 0;
+ case parser_get_row_key:
+ add_array_start(context, context->tmp_row->key);
+ return 1;
+ case parser_get_row_value:
+ add_array_start(context, context->tmp_row->value);
+ return 1;
+ case parser_get_row_doc:
+ if (context->value_nesting == 0) {
+ context->error = new std::string(BAD_ROW_DOC_VALUE);
+ return 0;
+ } else {
+ add_array_start(context, context->tmp_row->doc);
+ return 1;
+ }
+ case parser_get_row_partition:
+ context->error = new std::string(BAD_ROW_PARTITION_VALUE);
+ return 0;
+ case parser_get_row_node:
+ context->error = new std::string(BAD_ROW_NODE_VALUE);
+ return 0;
+ case parser_get_row_error:
+ context->error = new std::string(BAD_ROW_ERROR_VALUE);
+ return 0;
+ case parser_found_errors_key:
+ context->parser_sub_state = parser_get_error_entry;
+ return 1;
+ case parser_get_error_from:
+ context->error = new std::string(BAD_ERROR_FROM_VALUE);
+ return 0;
+ case parser_get_error_reason:
+ context->error = new std::string(BAD_ERROR_REASON_VALUE);
+ return 0;
+ default:
+ break;
+ }
+
+ return 1;
+}
+
+
+static int end_array_callback(void *ctx)
+{
+ ctx_t *context = static_cast<ctx_t *>(ctx);
+
+ switch (context->parser_sub_state) {
+ case parser_get_debug_entry:
+ add_array_end(context, context->tmp_debug_info->value);
+ maybe_debug_entry_end(context);
+ return 1;
+ case parser_get_row_key:
+ add_array_end(context, context->tmp_row->key);
+ if (context->value_nesting == 0) {
+ context->parser_sub_state = parser_get_row;
+ }
+ return 1;
+ case parser_get_row_value:
+ add_array_end(context, context->tmp_row->value);
+ if (context->value_nesting == 0) {
+ context->parser_sub_state = parser_get_row;
+ }
+ return 1;
+ case parser_get_row_doc:
+ add_array_end(context, context->tmp_row->doc);
+ assert(context->value_nesting > 0);
+ return 1;
+ default:
+ break;
+ }
+
+ if (context->level == 1) {
+ if (context->parser_state == parser_rows &&
+ context->parser_sub_state == parser_get_row) {
+ // finished parsing the "rows" array
+ context->parser_state = parser_errors;
+ context->parser_sub_state = parser_find_errors_key;
+ } else if (context->parser_state == parser_errors &&
+ context->parser_sub_state == parser_find_errors_key) {
+ // finished parsing the "errors" array
+ context->parser_state = parser_ending;
+ }
+ }
+
+ return 1;
+}
+
+
+static inline void add_null(ctx_t *context, std::string &buffer)
+{
+ maybe_expand_buffer(buffer, 5);
+ if (context->value_nesting > 0) {
+ buffer += "null,";
+ } else {
+ buffer += "null";
+ }
+}
+
+
+static inline void add_boolean(ctx_t *context, std::string &buffer, int boolean)
+{
+ maybe_expand_buffer(buffer, 6);
+ if (context->value_nesting > 0) {
+ buffer += (boolean ? "true," : "false,");
+ } else {
+ buffer += (boolean ? "true" : "false");
+ }
+}
+
+
+static inline void add_number(ctx_t *context, std::string &buffer, const char *number, size_t len)
+{
+ maybe_expand_buffer(buffer, len + 1);
+ buffer.append(reinterpret_cast<const char *>(number), len);
+ if (context->value_nesting > 0) {
+ buffer += ',';
+ }
+}
+
+
+static inline void add_string(ctx_t *context, std::string &buffer, const unsigned char *str, size_t len)
+{
+ maybe_expand_buffer(buffer, len + 3);
+ buffer += '"';
+ buffer.append(reinterpret_cast<const char *>(str), len);
+ if (context->value_nesting > 0) {
+ buffer += "\",";
+ } else {
+ buffer += '"';
+ }
+}
+
+
+static inline void add_object_start(ctx_t *context, std::string &buffer)
+{
+ ++context->value_nesting;
+ maybe_expand_buffer(buffer, 3);
+ buffer += '{';
+}
+
+
+static inline void add_object_end(ctx_t *context, std::string &buffer)
+{
+ size_t last = buffer.length() - 1;
+
+ --context->value_nesting;
+
+ if (buffer[last] == ',') {
+ buffer[last] = '}';
+ } else {
+ buffer += '}';
+ }
+
+ if (context->value_nesting > 0) {
+ buffer += ',';
+ }
+}
+
+
+static inline void add_object_key(ctx_t *context, std::string &buffer, const unsigned char *key, size_t len)
+{
+ maybe_expand_buffer(buffer, len + 3);
+ buffer += '"';
+ buffer.append(reinterpret_cast<const char *>(key), len);
+ buffer += "\":";
+}
+
+
+static inline void add_array_start(ctx_t *context, std::string &buffer)
+{
+ ++context->value_nesting;
+ maybe_expand_buffer(buffer, 3);
+ buffer += '[';
+}
+
+
+static inline void add_array_end(ctx_t *context, std::string &buffer)
+{
+ size_t last = buffer.length() - 1;
+
+ --context->value_nesting;
+
+ if (buffer[last] == ',') {
+ buffer[last] = ']';
+ } else {
+ buffer += ']';
+ }
+
+ if (context->value_nesting > 0) {
+ buffer += ',';
+ }
+}
+
+
+static inline void maybe_debug_entry_end(ctx_t *context)
+{
+ if (context->value_nesting == 0) {
+ assert(context->tmp_debug_info != NULL);
+ context->debug_infos->push_back(context->tmp_debug_info);
+ context->tmp_debug_info = NULL;
+ context->parser_sub_state = parser_get_debug_infos;
+ }
+}
+
+
+static inline void maybe_expand_buffer(std::string &buffer, size_t appendLen)
+{
+ size_t len = buffer.length();
+ size_t capacity = buffer.capacity();
+
+ if ((len + appendLen) > capacity) {
+ buffer.reserve(capacity + std::max(capacity, appendLen));
+ }
+}
View
70 src/couch_view_parser/couch_view_parser.erl
@@ -0,0 +1,70 @@
+%% @copyright 2012 Couchbase, Inc.
+%%
+%% @author Filipe Manana <filipe@couchbase.com>
+%%
+%% Licensed under the Apache License, Version 2.0 (the "License"); you may not
+%% use this file except in compliance with the License. You may obtain a copy of
+%% the License at
+%%
+%% http://www.apache.org/licenses/LICENSE-2.0
+%%
+%% Unless required by applicable law or agreed to in writing, software
+%% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+%% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+%% License for the specific language governing permissions and limitations under
+%% the License.
+
+-module(couch_view_parser).
+
+-export([start_context/0, parse_chunk/2, next_state/1]).
+
+-on_load(init/0).
+
+-type key_docid() :: {Key::binary(), DocId::binary()}.
+-type map_value() :: binary() |
+ {PartId::binary(), Node::binary(), Value::binary()}.
+-type reduce_value() :: binary().
+-type doc_member() :: {'doc', Doc::binary()}.
+-type view_row() :: {Key::binary(), reduce_value()} |
+ {key_docid(), map_value()} |
+ {key_docid(), map_value(), doc_member()}.
+
+init() ->
+ SoName = case code:priv_dir(?MODULE) of
+ {error, bad_name} ->
+ case filelib:is_dir(filename:join(["..", "priv"])) of
+ true ->
+ filename:join(["..", "priv", "couch_view_parser_nif"]);
+ false ->
+ filename:join(["priv", "couch_view_parser_nif"])
+ end;
+ Dir ->
+ filename:join(Dir, "couch_view_parser_nif")
+ end,
+ (catch erlang:load_nif(SoName, 0)),
+ case erlang:system_info(otp_release) of
+ "R13B03" -> true;
+ _ -> ok
+ end.
+
+
+-spec start_context() -> {ok, Context::term()}.
+start_context() ->
+ erlang:nif_error(couch_view_parser_nif_not_loaded).
+
+
+-spec parse_chunk(Context::term(), iolist()) -> 'ok' | {'error', term()}.
+parse_chunk(_Ctx, _Chunk) ->
+ erlang:nif_error(couch_view_parser_nif_not_loaded).
+
+
+-spec next_state(Context::term()) ->
+ {'ok', 'need_more_data'} |
+ {'ok', 'debug_infos', [{From::binary(), Value::binary()}]} |
+ {'ok', 'row_count', string()} |
+ {'ok', 'rows', [view_row()]} |
+ {'ok', 'errors', [{From::binary(), Reason::binary()}]} |
+ {'ok', 'done'} |
+ {'error', term()}.
+next_state(_Ctx) ->
+ erlang:nif_error(couch_view_parser_nif_not_loaded).
View
137 src/couch_view_parser/couch_view_parser.h
@@ -0,0 +1,137 @@
+/**
+ * @copyright 2012 Couchbase, Inc.
+ *
+ * @author Filipe Manana <filipe@couchbase.com>
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ **/
+
+#ifndef COUCH_VIEW_PARSER_H_
+#define COUCH_VIEW_PARSER_H_
+
+#include <string>
+#include <list>
+
+#include "yajl/yajl_parse.h"
+#include "yajl/yajl_gen.h"
+
+#include "erl_nif_compat.h"
+
+
+typedef enum {
+ debug_infos,
+ row_count,
+ rows,
+ error_entries,
+ ending
+} caller_state_t;
+
+typedef enum {
+ parser_starting,
+ parser_debug_info,
+ parser_row_count,
+ parser_rows,
+ parser_errors,
+ parser_ending
+} parse_state_t;
+
+typedef enum {
+ // main state "parser_debug_info"
+ parser_find_debug_info_key, // looking for "debug_info" key
+ parser_found_debug_info_key, // found "debug_info" key, looking for its value
+ parser_get_debug_infos, // parser "inside" the "debug_info" object
+ parser_get_debug_entry, // parser collecting a value in the "debug_info" object
+ // main state "parser_row_count"
+ parser_find_row_count_key, // looking for "total_rows" key
+ parser_found_row_count_key, // found "total_rows" key, looking for its value
+ // main state "parser_rows"
+ parser_find_rows_key, // looking for "rows" key
+ parser_found_rows_key, // found "rows" key, looking for its value
+ parser_get_row, // parsing a row object (element of the array value of "rows")
+ parser_get_row_id, // parsing value of the "id" field of a row
+ parser_get_row_key, // parsing value of the "key" field of a row
+ parser_get_row_value, // parsing value of the "value" field of a row
+ parser_get_row_doc, // parsing value of the "doc" field of a row
+ parser_get_row_partition, // parsing value of the "partition" field of a row
+ parser_get_row_node, // parsing value of the "node" field of a row
+ parser_get_row_error, // parsing value of the "error" field of a row
+ // main state "parser_errors"
+ parser_find_errors_key, // looking for "errors" key
+ parser_found_errors_key, // found "errors" key, looking for its value
+ parser_get_error_entry, // parsing an error entry from the "errors" array
+ parser_get_error_from, // parsing value of the "from" field of an error entry
+ parser_get_error_reason // parsing value of the "reason" field of an error entry
+} parse_sub_state_t;
+
+typedef struct {
+ std::string key;
+ std::string id;
+ std::string value;
+ std::string doc;
+ std::string partition;
+ std::string node;
+ std::string error;
+} row_t;
+
+typedef struct {
+ std::string from;
+ std::string reason;
+} error_entry_t;
+
+typedef struct {
+ std::string from;
+ std::string value;
+} debug_info_t;
+
+typedef struct {
+ yajl_handle handle;
+ // current object depth level
+ int level;
+ caller_state_t caller_state;
+ parse_state_t parser_state;
+ parse_sub_state_t parser_sub_state;
+ // parse error message
+ std::string *error;
+ char *row_count;
+ std::list<row_t *> *rows;
+ row_t *tmp_row;
+ std::list<error_entry_t *> *error_entries;
+ error_entry_t *tmp_error_entry;
+ std::list<debug_info_t *> *debug_infos;
+ debug_info_t *tmp_debug_info;
+ // nesting level of the object we're raw collecting
+ // when > 0, we're inside an object or an array
+ int value_nesting;
+} ctx_t;
+
+
+class JsonParseException {
+public:
+ JsonParseException(const std::string &msg) : _msg(msg) {
+ }
+
+ const std::string& getMsg() const {
+ return _msg;
+ }
+
+private:
+ const std::string _msg;
+};
+
+
+void initContext(ctx_t *context);
+void destroyContext(ctx_t *context);
+void parseJsonChunk(ctx_t *context, unsigned char *data, size_t len);
+
+
+#endif
View
601 src/couch_view_parser/couch_view_parser_nif.cc
@@ -0,0 +1,601 @@
+/**
+ * @copyright 2012 Couchbase, Inc.
+ *
+ * @author Filipe Manana <filipe@couchbase.com>
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ **/
+
+#include <iostream>
+#include <string.h>
+#include <assert.h>
+
+#include "couch_view_parser.h"
+
+static ERL_NIF_TERM ATOM_OK;
+static ERL_NIF_TERM ATOM_ERROR;
+static ERL_NIF_TERM ATOM_NEED_MORE_DATA;
+static ERL_NIF_TERM ATOM_ROW_COUNT;
+static ERL_NIF_TERM ATOM_ROWS;
+static ERL_NIF_TERM ATOM_ERRORS;
+static ERL_NIF_TERM ATOM_DEBUG_INFOS;
+static ERL_NIF_TERM ATOM_DONE;
+
+static ErlNifResourceType *CTX_RES;
+
+// NIF API functions
+static ERL_NIF_TERM startContext(ErlNifEnv *env, int argc, const ERL_NIF_TERM argv[]);
+static ERL_NIF_TERM parseChunk(ErlNifEnv *env, int argc, const ERL_NIF_TERM argv[]);
+static ERL_NIF_TERM nextState(ErlNifEnv *env, int argc, const ERL_NIF_TERM argv[]);
+
+// NIF API callbacks
+static int onLoad(ErlNifEnv *env, void **priv, ERL_NIF_TERM info);
+
+// Utilities
+static inline ERL_NIF_TERM return_rows(ErlNifEnv *env, ctx_t *ctx);
+static inline ERL_NIF_TERM make_rows_list(ErlNifEnv *env, ctx_t *ctx);
+static inline ERL_NIF_TERM return_error_entries(ErlNifEnv *env, ctx_t *ctx);
+static inline ERL_NIF_TERM make_errors_list(ErlNifEnv *env, ctx_t *ctx);
+static inline ERL_NIF_TERM return_debug_entries(ErlNifEnv *env, ctx_t *ctx);
+static inline ERL_NIF_TERM make_debug_entries_list(ErlNifEnv *env, ctx_t *ctx);
+static inline ERL_NIF_TERM makeError(ErlNifEnv *env, const std::string &msg);
+
+// NIF resource functions
+static void freeContext(ErlNifEnv *env, void *res);
+
+
+
+static ERL_NIF_TERM startContext(ErlNifEnv *env, int argc, const ERL_NIF_TERM argv[])
+{
+ ctx_t *ctx = static_cast<ctx_t *>(enif_alloc_resource(CTX_RES, sizeof(ctx_t)));
+
+ ERL_NIF_TERM res = enif_make_resource(env, ctx);
+ enif_release_resource(ctx);
+
+ initContext(ctx);
+
+ return enif_make_tuple2(env, ATOM_OK, res);
+}
+
+
+static ERL_NIF_TERM parseChunk(ErlNifEnv *env, int argc, const ERL_NIF_TERM argv[])
+{
+ ctx_t *ctx;
+
+ if (!enif_get_resource(env, argv[0], CTX_RES, reinterpret_cast<void **>(&ctx))) {
+ return enif_make_badarg(env);
+ }
+
+ if (ctx->error != NULL) {
+ return makeError(env, *ctx->error);
+ }
+
+ ErlNifBinary chunkBin;
+
+ if (!enif_inspect_iolist_as_binary(env, argv[1], &chunkBin)) {
+ return enif_make_badarg(env);
+ }
+
+ try {
+ parseJsonChunk(ctx, chunkBin.data, chunkBin.size);
+ return ATOM_OK;
+ } catch(JsonParseException &e) {
+ return makeError(env, e.getMsg());
+ } catch(std::bad_alloc &) {
+ return makeError(env, "memory allocation failure");
+ }
+}
+
+
+static ERL_NIF_TERM nextState(ErlNifEnv *env, int argc, const ERL_NIF_TERM argv[])
+{
+ ctx_t *ctx;
+
+ if (!enif_get_resource(env, argv[0], CTX_RES, reinterpret_cast<void **>(&ctx))) {
+ return enif_make_badarg(env);
+ }
+
+ if (ctx->error != NULL) {
+ return makeError(env, *ctx->error);
+ }
+
+ switch (ctx->caller_state) {
+ case debug_infos:
+ switch (ctx->parser_state) {
+ case parser_starting:
+ return enif_make_tuple2(env, ATOM_OK, ATOM_NEED_MORE_DATA);
+ case parser_debug_info:
+ if (ctx->debug_infos->empty()) {
+ return enif_make_tuple2(env, ATOM_OK, ATOM_NEED_MORE_DATA);
+ } else {
+ return return_debug_entries(env, ctx);
+ }
+ case parser_row_count:
+ {
+ ctx->caller_state = row_count;
+ if (ctx->debug_infos->empty()) {
+ return enif_make_tuple2(env, ATOM_OK, ATOM_NEED_MORE_DATA);
+ } else {
+ return return_debug_entries(env, ctx);
+ }
+ }
+ case parser_rows:
+ if (ctx->debug_infos->empty()) {
+ ctx->caller_state = rows;
+ if (ctx->row_count != NULL) {
+ // map view
+ ERL_NIF_TERM row_count = enif_make_string(env, ctx->row_count, ERL_NIF_LATIN1);
+ return enif_make_tuple3(env, ATOM_OK, ATOM_ROW_COUNT, row_count);
+ } else {
+ // reduce view
+ if (ctx->rows->empty()) {
+ return enif_make_tuple2(env, ATOM_OK, ATOM_NEED_MORE_DATA);
+ } else {
+ return return_rows(env, ctx);
+ }
+ }
+ } else {
+ ctx->caller_state = row_count;
+ return return_debug_entries(env, ctx);
+ }
+ case parser_errors:
+ if (ctx->debug_infos->empty()) {
+ if (ctx->row_count != NULL) {
+ // map view
+ ERL_NIF_TERM row_count = enif_make_string(env, ctx->row_count, ERL_NIF_LATIN1);
+ ctx->caller_state = rows;
+ return enif_make_tuple3(env, ATOM_OK, ATOM_ROW_COUNT, row_count);
+ } else {
+ // reduce view
+ ctx->caller_state = error_entries;
+ if (ctx->rows->empty()) {
+ if (ctx->error_entries->empty()) {
+ return enif_make_tuple2(env, ATOM_OK, ATOM_NEED_MORE_DATA);
+ } else {
+ return return_error_entries(env, ctx);
+ }
+ } else {
+ return return_rows(env, ctx);
+ }
+ }
+ } else {
+ ctx->caller_state = row_count;
+ return return_debug_entries(env, ctx);
+ }
+ case parser_ending:
+ if (ctx->debug_infos->empty()) {
+ if (ctx->row_count != NULL) {
+ // map view
+ ERL_NIF_TERM row_count = enif_make_string(env, ctx->row_count, ERL_NIF_LATIN1);
+ ctx->caller_state = rows;
+ return enif_make_tuple3(env, ATOM_OK, ATOM_ROW_COUNT, row_count);
+ } else {
+ // reduce view
+ if (ctx->rows->empty()) {
+ ctx->caller_state = ending;
+ if (ctx->error_entries->empty()) {
+ return enif_make_tuple2(env, ATOM_OK, ATOM_DONE);
+ } else {
+ return return_error_entries(env, ctx);
+ }
+ } else {
+ ctx->caller_state = error_entries;
+ return return_rows(env, ctx);
+ }
+ }
+ } else {
+ ctx->caller_state = row_count;
+ return return_debug_entries(env, ctx);
+ }
+ }
+ case row_count:
+ if (ctx->parser_state > parser_row_count) {
+ if (ctx->row_count != NULL) {
+ // map view
+ ERL_NIF_TERM row_count = enif_make_string(env, ctx->row_count, ERL_NIF_LATIN1);
+ ctx->caller_state = rows;
+ return enif_make_tuple3(env, ATOM_OK, ATOM_ROW_COUNT, row_count);
+ } else {
+ // reduce view
+ switch (ctx->parser_state) {
+ case parser_rows:
+ {
+ ctx->caller_state = rows;
+ if (ctx->rows->empty()) {
+ return enif_make_tuple2(env, ATOM_OK, ATOM_NEED_MORE_DATA);
+ } else {
+ return return_rows(env, ctx);
+ }
+ }
+ case parser_errors:
+ {
+ ctx->caller_state = error_entries;
+ if (ctx->rows->empty()) {
+ if (ctx->error_entries->empty()) {
+ return enif_make_tuple2(env, ATOM_OK, ATOM_NEED_MORE_DATA);
+ } else {
+ return return_error_entries(env, ctx);
+ }
+ } else {
+ return return_rows(env, ctx);
+ }
+ }
+ case parser_ending:
+ {
+ ctx->caller_state = ending;
+ if (ctx->rows->empty()) {
+ if (ctx->error_entries->empty()) {
+ return enif_make_tuple2(env, ATOM_OK, ATOM_DONE);
+ } else {
+ return return_error_entries(env, ctx);
+ }
+ } else {
+ if (!ctx->error_entries->empty()) {
+ ctx->caller_state = error_entries;
+ }
+ return return_rows(env, ctx);
+ }
+ }
+ default:
+ return makeError(env, "unexpected state");
+ }
+ }
+ } else {
+ return enif_make_tuple2(env, ATOM_OK, ATOM_NEED_MORE_DATA);
+ }
+ case rows:
+ switch (ctx->parser_state) {
+ case parser_rows:
+ if (ctx->rows->empty()) {
+ return enif_make_tuple2(env, ATOM_OK, ATOM_NEED_MORE_DATA);
+ } else {
+ return return_rows(env, ctx);
+ }
+ case parser_errors:
+ {
+ ctx->caller_state = error_entries;
+ if (ctx->rows->empty()) {
+ if (ctx->error_entries->empty()) {
+ return enif_make_tuple2(env, ATOM_OK, ATOM_NEED_MORE_DATA);
+ } else {
+ return return_error_entries(env, ctx);
+ }
+ } else {
+ return return_rows(env, ctx);
+ }
+ }
+ case parser_ending:
+ {
+ ctx->caller_state = ending;
+ if (ctx->rows->empty()) {
+ if (ctx->error_entries->empty()) {
+ return enif_make_tuple2(env, ATOM_OK, ATOM_DONE);
+ } else {
+ return return_error_entries(env, ctx);
+ }
+ } else {
+ if (!ctx->error_entries->empty()) {
+ ctx->caller_state = error_entries;
+ }
+ return return_rows(env, ctx);
+ }
+ }
+ default:
+ return makeError(env, "unexpected state");
+ }
+ case error_entries:
+ switch (ctx->parser_state) {
+ case parser_errors:
+ if (ctx->error_entries->empty()) {
+ return enif_make_tuple2(env, ATOM_OK, ATOM_NEED_MORE_DATA);
+ } else {
+ return return_error_entries(env, ctx);
+ }
+ case parser_ending:
+ {
+ ctx->caller_state = ending;
+ if (ctx->error_entries->empty()) {
+ return enif_make_tuple2(env, ATOM_OK, ATOM_DONE);
+ } else {
+ return return_error_entries(env, ctx);
+ }
+ }
+ default:
+ return makeError(env, "unexpected state");
+ }
+ case ending:
+ assert(ctx->parser_state == parser_ending);
+ return enif_make_tuple2(env, ATOM_OK, ATOM_DONE);
+ }
+
+ return makeError(env, "unexpected state");
+}
+
+
+static inline ERL_NIF_TERM return_rows(ErlNifEnv *env, ctx_t *ctx)
+{
+ try {
+ return enif_make_tuple3(env, ATOM_OK, ATOM_ROWS, make_rows_list(env, ctx));
+ } catch(std::bad_alloc &e) {
+ return makeError(env, "memory allocation failure");
+ }
+}
+
+
+static inline ERL_NIF_TERM make_rows_list(ErlNifEnv *env, ctx_t *ctx)
+{
+ ERL_NIF_TERM result = enif_make_list(env, 0);
+ bool isReduceView = (ctx->row_count == NULL);
+ std::list<row_t *>::reverse_iterator it = ctx->rows->rbegin();
+
+ for ( ; it != ctx->rows->rend(); ++it) {
+ ErlNifBinary keyBin;
+ ErlNifBinary idBin;
+ ErlNifBinary valueBin;
+ ErlNifBinary docBin;
+ ErlNifBinary partBin;
+ ErlNifBinary nodeBin;
+ bool hasDoc = ((*it)->doc.length() > 0);
+ bool hasPart = ((*it)->partition.length() > 0) || ((*it)->node.length() > 0);
+
+ if (!enif_alloc_binary_compat(env, (*it)->key.length(), &keyBin)) {
+ throw std::bad_alloc();
+ }
+ memcpy(keyBin.data, (*it)->key.data(), (*it)->key.length());
+
+ if (!((*it)->error.empty())) {
+ ERL_NIF_TERM key_docid = enif_make_tuple2(env, enif_make_binary(env, &keyBin), ATOM_ERROR);
+ ErlNifBinary errorBin;
+ ERL_NIF_TERM row;
+
+ if (!enif_alloc_binary_compat(env, (*it)->error.length(), &errorBin)) {
+ enif_release_binary(&keyBin);
+ throw std::bad_alloc();
+ }
+ memcpy(errorBin.data, (*it)->error.data(), (*it)->error.length());
+ row = enif_make_tuple2(env, key_docid, enif_make_binary(env, &errorBin));
+ result = enif_make_list_cell(env, row, result);
+ continue;
+ }
+
+ if (!isReduceView) {
+ // id empty for reduce views
+ if (!enif_alloc_binary_compat(env, (*it)->id.length(), &idBin)) {
+ enif_release_binary(&keyBin);
+ throw std::bad_alloc();
+ }
+ memcpy(idBin.data, (*it)->id.data(), (*it)->id.length());
+ }
+
+ if (!enif_alloc_binary_compat(env, (*it)->value.length(), &valueBin)) {
+ enif_release_binary(&keyBin);
+ enif_release_binary(&idBin);
+ throw std::bad_alloc();
+ }
+ memcpy(valueBin.data, (*it)->value.data(), (*it)->value.length());
+
+ if (hasDoc) {
+ assert(isReduceView == false);
+ if (!enif_alloc_binary_compat(env, (*it)->doc.length(), &docBin)) {
+ enif_release_binary(&keyBin);
+ enif_release_binary(&idBin);
+ enif_release_binary(&valueBin);
+ throw std::bad_alloc();
+ }
+ memcpy(docBin.data, (*it)->doc.data(), (*it)->doc.length());
+ }
+
+ if (hasPart) {
+ if (!enif_alloc_binary_compat(env, (*it)->partition.length(), &partBin)) {
+ enif_release_binary(&keyBin);
+ enif_release_binary(&idBin);
+ enif_release_binary(&valueBin);
+ enif_release_binary(&docBin);
+ throw std::bad_alloc();
+ }
+ if (!enif_alloc_binary_compat(env, (*it)->node.length(), &nodeBin)) {
+ enif_release_binary(&keyBin);
+ enif_release_binary(&idBin);
+ enif_release_binary(&valueBin);
+ enif_release_binary(&docBin);
+ enif_release_binary(&partBin);
+ throw std::bad_alloc();
+ }
+ memcpy(partBin.data, (*it)->partition.data(), (*it)->partition.length());
+ memcpy(nodeBin.data, (*it)->node.data(), (*it)->node.length());
+ }
+
+ ERL_NIF_TERM keyTerm = enif_make_binary(env, &keyBin);
+ ERL_NIF_TERM valueTerm = enif_make_binary(env, &valueBin);
+ ERL_NIF_TERM row;
+
+ if (isReduceView) {
+ row = enif_make_tuple2(env, keyTerm, valueTerm);
+ } else {
+ // {Key, DocId}
+ ERL_NIF_TERM key_docid = enif_make_tuple2(env, keyTerm, enif_make_binary(env, &idBin));
+ ERL_NIF_TERM value;
+
+ if (hasPart) {
+ // {PartId, Node, Value}
+ ERL_NIF_TERM partTerm = enif_make_binary(env, &partBin);
+ ERL_NIF_TERM nodeTerm = enif_make_binary(env, &nodeBin);
+ value = enif_make_tuple3(env, partTerm, nodeTerm, valueTerm);
+ } else {
+ value = valueTerm;
+ }
+
+ if (hasDoc) {
+ // { {Key, DocId}, Value, Doc }
+ ERL_NIF_TERM docTerm = enif_make_binary(env, &docBin);
+ row = enif_make_tuple3(env, key_docid, value, docTerm);
+ } else {
+ // { {Key, DocId}, Value }
+ row = enif_make_tuple2(env, key_docid, value);
+ }
+ }
+
+ result = enif_make_list_cell(env, row, result);
+ delete *it;
+ }
+
+ ctx->rows->clear();
+
+ return result;
+}
+
+
+static inline ERL_NIF_TERM return_error_entries(ErlNifEnv *env, ctx_t *ctx)
+{
+ try {
+ return enif_make_tuple3(env, ATOM_OK, ATOM_ERRORS, make_errors_list(env, ctx));
+ } catch(std::bad_alloc &e) {
+ return makeError(env, "memory allocation failure");
+ }
+}
+
+
+static inline ERL_NIF_TERM make_errors_list(ErlNifEnv *env, ctx_t *ctx)
+{
+ ERL_NIF_TERM result = enif_make_list(env, 0);
+ std::list<error_entry_t *>::reverse_iterator it = ctx->error_entries->rbegin();
+
+ for ( ; it != ctx->error_entries->rend(); ++it) {
+ ErlNifBinary fromBin;
+ ErlNifBinary reasonBin;
+
+ if (!enif_alloc_binary_compat(env, (*it)->from.length(), &fromBin)) {
+ throw std::bad_alloc();
+ }
+ memcpy(fromBin.data, (*it)->from.data(), (*it)->from.length());
+
+ if (!enif_alloc_binary_compat(env, (*it)->reason.length(), &reasonBin)) {
+ enif_release_binary(&fromBin);
+ throw std::bad_alloc();
+ }
+ memcpy(reasonBin.data, (*it)->reason.data(), (*it)->reason.length());
+
+ ERL_NIF_TERM fromTerm = enif_make_binary(env, &fromBin);
+ ERL_NIF_TERM reasonTerm = enif_make_binary(env, &reasonBin);
+
+ result = enif_make_list_cell(env, enif_make_tuple2(env, fromTerm, reasonTerm), result);
+ delete *it;
+ }
+
+ ctx->error_entries->clear();
+
+ return result;
+}
+
+
+static inline ERL_NIF_TERM return_debug_entries(ErlNifEnv *env, ctx_t *ctx)
+{
+ try {
+ return enif_make_tuple3(env, ATOM_OK, ATOM_DEBUG_INFOS, make_debug_entries_list(env, ctx));
+ } catch(std::bad_alloc &e) {
+ return makeError(env, "memory allocation failure");
+ }
+}
+