From d8d54dbf1de38e99164dcbcead08ca91ab0cbba8 Mon Sep 17 00:00:00 2001 From: Nick Vatamaniuc Date: Fri, 8 Nov 2024 21:32:17 -0500 Subject: [PATCH] Add more features to QuickJS scanner Expand scanning for update handlers and text indexes. * Even though Nouveau is experimental, users seem to be excited to try it out, so ensure we also check compatibility for it. Once we check Nouveau, checking Clouseau is not a big deal then, so let's do that as well. * Deliberatly use `RegExp.$1` incompatibility to exercise warning logs. * We still have `lists` and `shows` left. `lists` are quite a bit trickier to test as they involved a streaming sub-protocol. So maybe leave them for later. --- .../src/couch_quickjs_scanner_plugin.erl | 198 +++++++++++++++++- .../couch_quickjs_scanner_plugin_tests.erl | 136 +++++++++++- src/couch_scanner/src/couch_scanner_util.erl | 3 +- 3 files changed, 325 insertions(+), 12 deletions(-) diff --git a/src/couch_quickjs/src/couch_quickjs_scanner_plugin.erl b/src/couch_quickjs/src/couch_quickjs_scanner_plugin.erl index c65ae28b42b..2f3a005a4c5 100644 --- a/src/couch_quickjs/src/couch_quickjs_scanner_plugin.erl +++ b/src/couch_quickjs/src/couch_quickjs_scanner_plugin.erl @@ -50,7 +50,11 @@ % DDoc fields -define(FILTERS, <<"filters">>). +-define(UPDATES, <<"updates">>). -define(VIEWS, <<"views">>). +-define(CLOUSEAU, <<"indexes">>). +-define(NOUVEAU, <<"nouveau">>). +-define(INDEX, <<"index">>). -define(MAP, <<"map">>). -define(REDUCE, <<"reduce">>). -define(LIB, <<"lib">>). @@ -138,7 +142,7 @@ doc(#st{} = St, Db, #doc{id = DocId} = Doc) -> JsonDoc = couch_query_servers:json_doc(Doc), try St1 = maybe_reset_and_teach_ddocs(St), - process_doc_filter_and_vdu(St1, Db, DocId, JsonDoc), + process_ddoc_functions(St1, Db, DocId, JsonDoc), process_doc_views(St1, Db, JsonDoc) catch Tag:Err:Stack -> @@ -150,8 +154,10 @@ doc(#st{} = St, Db, #doc{id = DocId} = Doc) -> db_closing(#st{docs = []} = St, _Db) -> {ok, St#st{doc_cnt = 0, doc_step = 0}}; db_closing(#st{ddocs = DDocs} = St, Db) -> - {_Db, St1} = maps:fold(fun views_validate/3, {Db, St}, DDocs), - {ok, St1#st{doc_cnt = 0, doc_step = 0, docs = []}}. + {_, St1} = maps:fold(fun views_validate/3, {Db, St}, DDocs), + {_, St2} = maps:fold(fun clouseau_validate/3, {Db, St1}, DDocs), + {_, St3} = maps:fold(fun nouveau_validate/3, {Db, St2}, DDocs), + {ok, St3#st{doc_cnt = 0, doc_step = 0, docs = []}}. % Private @@ -177,9 +183,14 @@ process_ddoc(#st{} = St, DbName, #doc{} = DDoc0) -> St1 = start_or_reset_procs(St), try Views = maps:get(?VIEWS, DDoc, undefined), + Clouseau = maps:get(?CLOUSEAU, DDoc, undefined), + Nouveau = maps:get(?NOUVEAU, DDoc, undefined), lib_load(St1, Views), views_load(St1, valid_views(Views)), + clouseau_load(St1, indexes(Clouseau)), + nouveau_load(St1, indexes(Nouveau)), filters_load(St1, maps:get(?FILTERS, DDoc, undefined)), + updates_load(St1, maps:get(?UPDATES, DDoc, undefined)), vdu_load(St1, maps:get(?VDU, DDoc, undefined)), St2 = start_or_reset_procs(St1), teach_ddoc_validate(St2, DDocId, DDoc), @@ -198,14 +209,16 @@ process_ddoc(#st{} = St, DbName, #doc{} = DDoc0) -> St end. -process_doc_filter_and_vdu(#st{} = St, Db, DocId, JsonDoc) -> +process_ddoc_functions(#st{} = St, Db, DocId, JsonDoc) -> #st{sid = SId, ddocs = DDocs} = St, DDocFun = fun(DDocId, #{} = DDoc) -> try Filters = maps:get(?FILTERS, DDoc, undefined), filter_doc_validate(St, DDocId, Filters, JsonDoc), VDU = maps:get(?VDU, DDoc, undefined), - vdu_doc_validate(St, DDocId, VDU, JsonDoc) + vdu_doc_validate(St, DDocId, VDU, JsonDoc), + Updates = maps:get(?UPDATES, DDoc, undefined), + update_doc_validate(St, DDocId, Updates, JsonDoc) catch throw:{validate, Error} -> Meta = #{sid => SId, db => Db, ddoc => DDocId, doc => DocId}, @@ -234,9 +247,11 @@ process_doc_views(#st{} = St, Db, JsonDoc) -> St1 = St#st{docs = [JsonDoc | Docs], docs_size = DocsSize1}, {ok, St1}; false -> - {_Db, St1} = maps:fold(fun views_validate/3, {Db, St}, DDocs), - St2 = St1#st{docs = [], docs_size = 0}, - {ok, St2} + {_, St1} = maps:fold(fun views_validate/3, {Db, St}, DDocs), + {_, St2} = maps:fold(fun clouseau_validate/3, {Db, St1}, DDocs), + {_, St3} = maps:fold(fun nouveau_validate/3, {Db, St2}, DDocs), + St4 = St3#st{docs = [], docs_size = 0}, + {ok, St4} end. views_validate(DDocId, #{?VIEWS := Views}, {Db, #st{} = St0}) when @@ -251,7 +266,7 @@ views_validate(DDocId, #{?VIEWS := Views}, {Db, #st{} = St0}) when [_ | _] -> Fun = fun({Name, #{?MAP := Src}}) -> add_fun_load(St, Name, Src) end, lists:foreach(Fun, ViewList), - {[_ | _], St1 = #st{}} = lists:foldl(fun mapred_fold/2, {ViewList, St}, Docs), + {[_ | _], St1 = #st{}} = lists:foldl(fun view_mapred_fold/2, {ViewList, St}, Docs), {Db, St1}; [] -> % There may be no valid views left @@ -276,7 +291,7 @@ views_validate(_DDocId, #{} = _DDoc, {Db, #st{} = St}) -> % No views {Db, St}. -mapred_fold({Props = [_ | _]} = Doc, {ViewList = [_ | _], #st{} = St}) -> +view_mapred_fold({Props = [_ | _]} = Doc, {ViewList = [_ | _], #st{} = St}) -> #st{qjs_proc = Qjs, sm_proc = Sm} = St, DocId = couch_util:get_value(<<"_id">>, Props), SmMapRes = map_doc(Sm, Doc), @@ -306,6 +321,80 @@ mapred_fold({Props = [_ | _]} = Doc, {ViewList = [_ | _], #st{} = St}) -> throw(restart_procs) end. +clouseau_validate(DDocId, #{?CLOUSEAU := Indexes0}, {Db, #st{} = St}) when map_size(Indexes0) > 0 -> + Indexes = indexes(Indexes0), + {Db1, _, St1} = maps:fold(fun clouseau_validate_mapfold/3, {Db, DDocId, St}, Indexes), + {Db1, St1}; +clouseau_validate(_DDocId, #{} = _DDoc, {Db, #st{} = St}) -> + % No clouseau indexes + {Db, St}. + +nouveau_validate(DDocId, #{?NOUVEAU := Indexes0}, {Db, #st{} = St}) when map_size(Indexes0) > 0 -> + Indexes = indexes(Indexes0), + {Db1, _, St1} = maps:fold(fun nouveau_validate_mapfold/3, {Db, DDocId, St}, Indexes), + {Db1, St1}; +nouveau_validate(_DDocId, #{}, {Db, #st{} = St}) -> + % No nouveau indexes + {Db, St}. + +clouseau_validate_mapfold(IndexName, IndexSrc, {Db, DDocId, #st{} = St0}) -> + St = start_or_reset_procs(St0), + #st{sid = SId, docs = Docs, qjs_proc = Qjs, sm_proc = Sm} = St, + try + add_fun(Sm, IndexSrc), + add_fun(Qjs, IndexSrc), + St1 = #st{} = lists:foldl(fun clouseau_foldl/2, St, Docs), + {Db, DDocId, St1} + catch + throw:{validate, Error} -> + Meta = #{sid => SId, db => Db, ddoc => DDocId, index => IndexName}, + validation_warning("clouseau validation failed ~p", Error, Meta), + {Db, DDocId, St}; + Tag:Err:Stack -> + Meta = #{sid => SId, db => Db, ddoc => DDocId, index => IndexName}, + ?ERR("clouseau validation exception ~p:~p:~p", [Tag, Err, Stack], Meta), + {Db, DDocId, St} + end. + +nouveau_validate_mapfold(IndexName, IndexSrc, {Db, DDocId, #st{} = St0}) -> + St = start_or_reset_procs(St0), + #st{sid = SId, docs = Docs, qjs_proc = Qjs, sm_proc = Sm} = St, + try + nouveau_add_fun(Sm, IndexSrc), + nouveau_add_fun(Qjs, IndexSrc), + St1 = #st{} = lists:foldl(fun nouveau_foldl/2, St, Docs), + {Db, DDocId, St1} + catch + throw:{validate, Error} -> + Meta = #{sid => SId, db => Db, ddoc => DDocId, index => IndexName}, + validation_warning("nouveau validation failed ~p", Error, Meta), + {Db, DDocId, St}; + Tag:Err:Stack -> + Meta = #{sid => SId, db => Db, ddoc => DDocId, index => IndexName}, + ?ERR("nouveau validation exception ~p:~p:~p", [Tag, Err, Stack], Meta), + {Db, DDocId, St} + end. + +clouseau_foldl({Props = [_ | _]} = Doc, #st{} = St) -> + #st{qjs_proc = Qjs, sm_proc = Sm} = St, + DocId = couch_util:get_value(<<"_id">>, Props), + SmMapRes = clouseau_index_doc(Sm, Doc), + QjsMapRes = clouseau_index_doc(Qjs, Doc), + case QjsMapRes == SmMapRes of + true -> St; + false -> throw({validate, {clouseau_index, DocId, QjsMapRes, SmMapRes}}) + end. + +nouveau_foldl({Props = [_ | _]} = Doc, #st{} = St) -> + #st{qjs_proc = Qjs, sm_proc = Sm} = St, + DocId = couch_util:get_value(<<"_id">>, Props), + SmMapRes = nouveau_index_doc(Sm, Doc), + QjsMapRes = nouveau_index_doc(Qjs, Doc), + case QjsMapRes == SmMapRes of + true -> St; + false -> throw({validate, {nouveau_index, DocId, QjsMapRes, SmMapRes}}) + end. + reset_per_db_state(#st{qjs_proc = QjsProc, sm_proc = SmProc} = St) -> proc_stop(SmProc), proc_stop(QjsProc), @@ -376,6 +465,20 @@ valid_views(#{} = Views) -> valid_views(_) -> #{}. +indexes(#{} = Indexes) -> + Fun = fun + (<<_/binary>> = IndexName, #{?INDEX := <>}, #{} = Acc) -> + case no_indeterminism(IndexFun) of + true -> Acc#{IndexName => IndexFun}; + false -> Acc + end; + (_, _, #{} = Acc) -> + Acc + end, + maps:fold(Fun, #{}, Indexes); +indexes(_) -> + #{}. + % Math.random(), Date.now() or new Date() will always show as false postives % no_indeterminism(<>) -> @@ -408,6 +511,29 @@ view_load(#st{} = St, Name, View) -> RedSrc = maps:get(?REDUCE, View, undefined), add_fun_load(St, Name, RedSrc). +clouseau_load(#st{} = St, #{} = Indexes) -> + % Note: we can re-use views add_fun_load here + Fun = fun(Name, <>) -> add_fun_load(St, Name, FunSrc) end, + maps:foreach(Fun, Indexes); +clouseau_load(#st{}, _) -> + ok. + +nouveau_load(#st{} = St, #{} = Indexes) -> + Fun = fun(Name, <>) -> nouveau_add_fun_load(St, Name, FunSrc) end, + maps:foreach(Fun, Indexes); +nouveau_load(#st{}, _) -> + ok. + +nouveau_add_fun_load(#st{qjs_proc = Qjs, sm_proc = Sm}, Name, <<_/binary>> = Src) -> + SmRes = nouveau_add_fun(Sm, Src), + QjsRes = nouveau_add_fun(Qjs, Src), + case QjsRes == SmRes of + true -> ok; + false -> throw({validate, {nouveau_add_fun, Name, QjsRes, SmRes}}) + end; +nouveau_add_fun_load(#st{}, _, _) -> + ok. + add_fun_load(#st{qjs_proc = Qjs, sm_proc = Sm}, Name, <<_/binary>> = Src) -> SmRes = add_fun(Sm, Src), QjsRes = add_fun(Qjs, Src), @@ -477,6 +603,34 @@ filter_doc_validate(#st{} = St, DDocId, #{} = Filters, Doc) -> filter_doc_validate(#st{}, _, _, _) -> ok. +updates_load(#st{} = St, #{} = Updates) -> + Fun = fun(Name, Update) -> update_load(St, Name, Update) end, + maps:foreach(Fun, Updates); +updates_load(#st{}, _) -> + ok. + +update_load(#st{qjs_proc = Qjs, sm_proc = Sm}, Name, Update) -> + SmRes = add_fun(Sm, Update), + QjsRes = add_fun(Qjs, Update), + case QjsRes == SmRes of + true -> ok; + false -> throw({validate, {update, Name, QjsRes, SmRes}}) + end. + +update_doc_validate(#st{} = St, DDocId, #{} = Updates, Doc) -> + #st{qjs_proc = Qjs, sm_proc = Sm} = St, + Fun = fun(UName, _) -> + SmRes = update_doc(Sm, DDocId, UName, Doc), + QjsRes = update_doc(Qjs, DDocId, UName, Doc), + case QjsRes == SmRes of + true -> ok; + false -> throw({validate, {update_doc, UName, QjsRes, SmRes}}) + end + end, + maps:foreach(Fun, Updates); +update_doc_validate(#st{}, _, _, _) -> + ok. + vdu_load(#st{qjs_proc = Qjs, sm_proc = Sm}, <<_/binary>> = VDU) -> SmRes = add_fun(Sm, VDU), QjsRes = add_fun(Qjs, VDU), @@ -598,11 +752,35 @@ add_fun(#proc{} = Proc, <<_/binary>> = FunSrc) -> add_fun(#proc{}, _) -> ok. +nouveau_add_fun(#proc{} = Proc, <<_/binary>> = FunSrc) -> + prompt(Proc, [<<"add_fun">>, FunSrc, <<"nouveau">>]); +nouveau_add_fun(#proc{}, _) -> + ok. + +clouseau_index_doc(#proc{} = Proc, {[_ | _]} = Doc) -> + [Fields | _] = prompt(Proc, [<<"index_doc">>, Doc]), + lists:sort(Fields). + +nouveau_index_doc(#proc{} = Proc, {[_ | _]} = Doc) -> + [Fields | _] = prompt(Proc, [<<"nouveau_index_doc">>, Doc]), + lists:sort(Fields). + filter_doc(#proc{} = Proc, DDocId, FName, {[_ | _]} = Doc) -> % Add a mock request object so param access doesn't throw a TypeError MockReq = #{<<"query">> => #{}}, prompt(Proc, [<<"ddoc">>, DDocId, [<<"filters">>, FName], [[Doc], MockReq]]). +update_doc(#proc{} = Proc, DDocId, UName, {[_ | _] = Props} = Doc) -> + % Use a mock object. It's better than nothing at least. We don't know + % what the user might post. + MockReq = #{<<"body">> => #{}, <<"method">> => <<"POST">>, <<"headers">> => #{}}, + MockReq1 = + case couch_util:get_value(<<"_id">>, Props) of + Id when is_binary(Id) -> MockReq#{<<"id">> => Id}; + _ -> MockReq + end, + prompt(Proc, [<<"ddoc">>, DDocId, [<<"updates">>, UName], [Doc, MockReq1]]). + vdu_doc(#proc{} = Proc, DDocId, {[_ | _]} = Doc) -> prompt(Proc, [<<"ddoc">>, DDocId, [<<"validate_doc_update">>], [Doc, Doc]]). diff --git a/src/couch_quickjs/test/couch_quickjs_scanner_plugin_tests.erl b/src/couch_quickjs/test/couch_quickjs_scanner_plugin_tests.erl index 03a6b701aaa..24c41e13cbe 100644 --- a/src/couch_quickjs/test/couch_quickjs_scanner_plugin_tests.erl +++ b/src/couch_quickjs/test/couch_quickjs_scanner_plugin_tests.erl @@ -30,7 +30,10 @@ couch_quickjs_scanner_plugin_test_() -> ?TDEF_FE(t_empty_ddoc, 10), ?TDEF_FE(t_multi_emit_map, 10), ?TDEF_FE(t_non_deterministic_views, 10), - ?TDEF_FE(t_handle_list_functions_in_maps, 10) + ?TDEF_FE(t_handle_list_functions_in_maps, 10), + ?TDEF_FE(t_doc_updates, 10), + ?TDEF_FE(t_clouseau, 10), + ?TDEF_FE(t_nouveau, 10) ] }. @@ -350,6 +353,66 @@ t_handle_list_functions_in_maps({_, DbName}) -> ok end. +t_doc_updates({_, DbName}) -> + ok = add_doc(DbName, ?DDOC1, ddoc_update(#{})), + meck:reset(couch_scanner_server), + meck:reset(?PLUGIN), + config:set("couch_scanner_plugins", atom_to_list(?PLUGIN), "true", false), + wait_exit(10000), + ?assertEqual(1, num_calls(start, 2)), + case couch_server:with_spidermonkey() of + true -> + ?assertEqual(1, num_calls(complete, 1)), + ?assert(num_calls(doc, 3) >= 5), + ?assertEqual(0, couch_stats:sample([couchdb, query_server, process_error_exits])), + ?assertEqual(0, couch_stats:sample([couchdb, query_server, process_errors])), + ?assertEqual(0, couch_stats:sample([couchdb, query_server, process_exits])), + % start and complete = 2 + 5 warnings = 7 + ?assertEqual(7, log_calls(warning)); + false -> + ok + end. + +t_clouseau({_, DbName}) -> + ok = add_doc(DbName, ?DDOC1, ddoc_clouseau(#{})), + meck:reset(couch_scanner_server), + meck:reset(?PLUGIN), + config:set("couch_scanner_plugins", atom_to_list(?PLUGIN), "true", false), + wait_exit(10000), + ?assertEqual(1, num_calls(start, 2)), + case couch_server:with_spidermonkey() of + true -> + ?assertEqual(1, num_calls(complete, 1)), + ?assert(num_calls(doc, 3) >= 5), + ?assertEqual(0, couch_stats:sample([couchdb, query_server, process_error_exits])), + ?assertEqual(0, couch_stats:sample([couchdb, query_server, process_errors])), + ?assertEqual(0, couch_stats:sample([couchdb, query_server, process_exits])), + % start and complete = 2 + 3 warnings = 5 + ?assertEqual(5, log_calls(warning)); + false -> + ok + end. + +t_nouveau({_, DbName}) -> + ok = add_doc(DbName, ?DDOC1, ddoc_nouveau(#{})), + meck:reset(couch_scanner_server), + meck:reset(?PLUGIN), + config:set("couch_scanner_plugins", atom_to_list(?PLUGIN), "true", false), + wait_exit(10000), + ?assertEqual(1, num_calls(start, 2)), + case {couch_server:with_spidermonkey(), nouveau:enabled()} of + {true, true} -> + ?assertEqual(1, num_calls(complete, 1)), + ?assert(num_calls(doc, 3) >= 5), + ?assertEqual(0, couch_stats:sample([couchdb, query_server, process_error_exits])), + ?assertEqual(0, couch_stats:sample([couchdb, query_server, process_errors])), + ?assertEqual(0, couch_stats:sample([couchdb, query_server, process_exits])), + % start and complete = 2 + 3 warnings = 5 + ?assertEqual(5, log_calls(warning)); + {_, _} -> + ok + end. + reset_stats() -> Counters = [ [couchdb, query_server, process_error_exits], @@ -573,3 +636,74 @@ ddoc_use_list_funs_in_maps(Doc) -> } } }. + +ddoc_update(Doc) -> + % If users call list functions from their maps, we used to crash + % the scanner process with a function_clause. + Doc#{ + updates => #{ + u1 => << + "function(doc, req) {\n" + " doc.a.search(/(x+)/); \n" + " if (RegExp.$1 === undefined) {\n" + " return [null, 'no_dollar_one']; \n" + " } else { \n" + " return [null, 'got_dollar_one'];\n" + " }\n" + "}" + >> + } + }. + +ddoc_clouseau(Doc) -> + Doc#{ + indexes => #{ + idx1 => #{ + <<"default_analyzer">> => <<"english">>, + <<"index">> => << + "function(doc) {\n" + " index('a', doc.a, {'store': true}); \n" + " index('fourtytwo', 42, {'store': false}); \n" + "}" + >> + }, + idx2 => #{ + <<"index">> => << + "function(doc) {\n" + " doc.a.search(/(x+)/); \n" + " if (RegExp.$1 === undefined) {\n" + " index('dollar_one', 'nope') \n" + " } else { \n" + " index('dollar_one', 'yup') \n" + " }\n" + "}" + >> + } + } + }. + +ddoc_nouveau(Doc) -> + Doc#{ + nouveau => #{ + idx1 => #{ + <<"index">> => << + "function(doc) {\n" + " index('string', 'a', doc.a, {'store': true}); \n" + " index('double', 'fourtytwo', 42, {'store': false}); \n" + "}" + >> + }, + idx2 => #{ + <<"index">> => << + "function(doc) {\n" + " doc.a.search(/(x+)/); \n" + " if (RegExp.$1 === undefined) {\n" + " index('string', 'dollar_one', 'nope') \n" + " } else { \n" + " index('string', 'dollar_one', 'yup') \n" + " }\n" + "}" + >> + } + } + }. diff --git a/src/couch_scanner/src/couch_scanner_util.erl b/src/couch_scanner/src/couch_scanner_util.erl index e67316eb12f..2caafaa5bc6 100644 --- a/src/couch_scanner/src/couch_scanner_util.erl +++ b/src/couch_scanner/src/couch_scanner_util.erl @@ -259,7 +259,8 @@ log_format_meta(Mod, #{} = Meta) -> Db = {"db:~s ", format_db(maps:get(db, Meta, undefined))}, DDocId = {"ddoc:~s ", maps:get(ddoc, Meta, undefined)}, DocId = {"doc:~s ", maps:get(doc, Meta, undefined)}, - FmtArgs = [{"~s ", Mod}, SId, Fun, Db, DDocId, DocId], + Index = {"index:~s ", maps:get(index, Meta, undefined)}, + FmtArgs = [{"~s ", Mod}, SId, Fun, Db, DDocId, Index, DocId], lists:unzip([{Fmt, Arg} || {Fmt, Arg} <- FmtArgs, Arg /= undefined]). format_db(undefined) ->