From 442c302972bd7559b074bdb91d27f4bf6150f064 Mon Sep 17 00:00:00 2001 From: Garren Smith Date: Thu, 21 Jun 2018 16:30:35 +0200 Subject: [PATCH 01/14] Add PSE API to store opaque properties This allows us to implement features outside of the PSE API without requiring changes to the API for each bit of data we may want to end up storing. The use of this opaque object should only be used for features that don't require a beahvior change from the storage engine API. Co-authored-by: Garren Smith Co-authored-by: Robert Newson --- src/couch/src/couch_bt_engine.erl | 55 +++++++++++- src/couch/src/couch_bt_engine_compactor.erl | 8 +- src/couch/src/couch_bt_engine_header.erl | 3 +- src/couch/src/couch_db.erl | 5 ++ src/couch/src/couch_db_engine.erl | 26 ++++++ src/couch/src/couch_db_updater.erl | 9 +- .../test/couch_db_props_upgrade_tests.erl | 83 +++++++++++++++++++ 7 files changed, 180 insertions(+), 9 deletions(-) create mode 100644 src/couch/test/couch_db_props_upgrade_tests.erl diff --git a/src/couch/src/couch_bt_engine.erl b/src/couch/src/couch_bt_engine.erl index f856bde8f01..946b74d0c75 100644 --- a/src/couch/src/couch_bt_engine.erl +++ b/src/couch/src/couch_bt_engine.erl @@ -40,6 +40,7 @@ get_purge_infos_limit/1, get_revs_limit/1, get_security/1, + get_props/1, get_size_info/1, get_update_seq/1, get_uuid/1, @@ -47,6 +48,7 @@ set_revs_limit/2, set_purge_infos_limit/2, set_security/2, + set_props/2, open_docs/2, open_local_docs/2, @@ -104,7 +106,8 @@ -export([ set_update_seq/2, update_header/2, - copy_security/2 + copy_security/2, + copy_props/2 ]). @@ -143,8 +146,9 @@ init(FilePath, Options) -> true -> delete_compaction_files(FilePath), Header0 = couch_bt_engine_header:new(), - ok = couch_file:write_header(Fd, Header0), - Header0; + Header1 = init_set_props(Fd, Header0, Options), + ok = couch_file:write_header(Fd, Header1), + Header1; false -> case couch_file:read_header(Fd) of {ok, Header0} -> @@ -283,6 +287,16 @@ get_security(#st{header = Header} = St) -> end. +get_props(#st{header = Header} = St) -> + case couch_bt_engine_header:get(Header, props_ptr) of + undefined -> + []; + Pointer -> + {ok, Props} = couch_file:pread_term(St#st.fd, Pointer), + Props + end. + + get_update_seq(#st{header = Header}) -> couch_bt_engine_header:get(Header, update_seq). @@ -323,6 +337,18 @@ set_security(#st{header = Header} = St, NewSecurity) -> {ok, increment_update_seq(NewSt)}. +set_props(#st{header = Header} = St, Props) -> + Options = [{compression, St#st.compression}], + {ok, Ptr, _} = couch_file:append_term(St#st.fd, Props, Options), + NewSt = St#st{ + header = couch_bt_engine_header:set(Header, [ + {props_ptr, Ptr} + ]), + needs_commit = true + }, + {ok, increment_update_seq(NewSt)}. + + open_docs(#st{} = St, DocIds) -> Results = couch_btree:lookup(St#st.id_tree, DocIds), lists:map(fun @@ -753,6 +779,17 @@ copy_security(#st{header = Header} = St, SecProps) -> }}. +copy_props(#st{header = Header} = St, Props) -> + Options = [{compression, St#st.compression}], + {ok, Ptr, _} = couch_file:append_term(St#st.fd, Props, Options), + {ok, St#st{ + header = couch_bt_engine_header:set(Header, [ + {props_ptr, Ptr} + ]), + needs_commit = true + }}. + + open_db_file(FilePath, Options) -> case couch_file:open(FilePath, Options) of {ok, Fd} -> @@ -939,6 +976,18 @@ upgrade_purge_info(Fd, Header) -> end. +init_set_props(Fd, Header, Options) -> + case couch_util:get_value(props, Options) of + undefined -> + Header; + InitialProps -> + Compression = couch_compress:get_compression_method(), + AppendOpts = [{compression, Compression}], + {ok, Ptr, _} = couch_file:append_term(Fd, InitialProps, AppendOpts), + couch_bt_engine_header:set(Header, props_ptr, Ptr) + end. + + delete_compaction_files(FilePath) -> RootDir = config:get("couchdb", "database_dir", "."), DelOpts = [{context, compaction}], diff --git a/src/couch/src/couch_bt_engine_compactor.erl b/src/couch/src/couch_bt_engine_compactor.erl index 10de686878e..737f7724558 100644 --- a/src/couch/src/couch_bt_engine_compactor.erl +++ b/src/couch/src/couch_bt_engine_compactor.erl @@ -276,9 +276,13 @@ copy_compact(DbName, St, NewSt0, Retry) -> SecProps = couch_bt_engine:get_security(St), {ok, NewSt4} = couch_bt_engine:copy_security(NewSt3, SecProps), + % Copy general properties over + Props = couch_bt_engine:get_props(St), + {ok, NewSt5} = couch_bt_engine:set_props(NewSt4, Props), + FinalUpdateSeq = couch_bt_engine:get_update_seq(St), - {ok, NewSt5} = couch_bt_engine:set_update_seq(NewSt4, FinalUpdateSeq), - commit_compaction_data(NewSt5). + {ok, NewSt6} = couch_bt_engine:set_update_seq(NewSt5, FinalUpdateSeq), + commit_compaction_data(NewSt6). copy_docs(St, #st{} = NewSt, MixedInfos, Retry) -> diff --git a/src/couch/src/couch_bt_engine_header.erl b/src/couch/src/couch_bt_engine_header.erl index 619264a0d4a..2dafb3e47c7 100644 --- a/src/couch/src/couch_bt_engine_header.erl +++ b/src/couch/src/couch_bt_engine_header.erl @@ -69,7 +69,8 @@ uuid, epochs, compacted_seq, - purge_infos_limit = 1000 + purge_infos_limit = 1000, + props_ptr }). diff --git a/src/couch/src/couch_db.erl b/src/couch/src/couch_db.erl index 2f63fcfe8f6..9bc0f9263a0 100644 --- a/src/couch/src/couch_db.erl +++ b/src/couch/src/couch_db.erl @@ -584,6 +584,10 @@ get_db_info(Db) -> undefined -> null; Else1 -> Else1 end, + Props = case couch_db_engine:get_props(Db) of + undefined -> null; + Else2 -> {Else2} + end, InfoList = [ {db_name, Name}, {engine, couch_db_engine:get_engine(Db)}, @@ -605,6 +609,7 @@ get_db_info(Db) -> {disk_format_version, DiskVersion}, {committed_update_seq, CommittedUpdateSeq}, {compacted_seq, CompactedSeq}, + {props, Props}, {uuid, Uuid} ], {ok, InfoList}. diff --git a/src/couch/src/couch_db_engine.erl b/src/couch/src/couch_db_engine.erl index ea30dbc77ac..806d352cb60 100644 --- a/src/couch/src/couch_db_engine.erl +++ b/src/couch/src/couch_db_engine.erl @@ -243,6 +243,10 @@ -callback get_security(DbHandle::db_handle()) -> SecProps::any(). +% Get the current properties. +-callback get_props(DbHandle::db_handle()) -> Props::[any()]. + + % This information is displayed in the database info poperties. It % should just be a list of {Name::atom(), Size::non_neg_integer()} % tuples that will then be combined across shards. Currently, @@ -288,6 +292,15 @@ {ok, NewDbHandle::db_handle()}. +% This function is only called by couch_db_updater and +% as such is guaranteed to be single threaded calls. The +% database should simply store provided property list +% unaltered. + +-callback set_props(DbHandle::db_handle(), Props::any()) -> + {ok, NewDbHandle::db_handle()}. + + % This function will be called by many processes concurrently. % It should return a #full_doc_info{} record or not_found for % every provided DocId in the order those DocId's appear in @@ -670,6 +683,7 @@ get_purge_infos_limit/1, get_revs_limit/1, get_security/1, + get_props/1, get_size_info/1, get_update_seq/1, get_uuid/1, @@ -677,6 +691,7 @@ set_revs_limit/2, set_security/2, set_purge_infos_limit/2, + set_props/2, open_docs/2, open_local_docs/2, @@ -836,6 +851,11 @@ get_security(#db{} = Db) -> Engine:get_security(EngineState). +get_props(#db{} = Db) -> + #db{engine = {Engine, EngineState}} = Db, + Engine:get_props(EngineState). + + get_size_info(#db{} = Db) -> #db{engine = {Engine, EngineState}} = Db, Engine:get_size_info(EngineState). @@ -868,6 +888,12 @@ set_security(#db{} = Db, SecProps) -> {ok, Db#db{engine = {Engine, NewSt}}}. +set_props(#db{} = Db, Props) -> + #db{engine = {Engine, EngineState}} = Db, + {ok, NewSt} = Engine:set_props(EngineState, Props), + {ok, Db#db{engine = {Engine, NewSt}}}. + + open_docs(#db{} = Db, DocIds) -> #db{engine = {Engine, EngineState}} = Db, Engine:open_docs(EngineState, DocIds). diff --git a/src/couch/src/couch_db_updater.erl b/src/couch/src/couch_db_updater.erl index 87301d2d8b6..c0974aa94ec 100644 --- a/src/couch/src/couch_db_updater.erl +++ b/src/couch/src/couch_db_updater.erl @@ -310,21 +310,24 @@ init_db(DbName, FilePath, EngineState, Options) -> BDU = couch_util:get_value(before_doc_update, Options, nil), ADR = couch_util:get_value(after_doc_read, Options, nil), - CleanedOpts = [Opt || Opt <- Options, Opt /= create], + NonCreateOpts = [Opt || Opt <- Options, Opt /= create], InitDb = #db{ name = DbName, filepath = FilePath, engine = EngineState, instance_start_time = StartTime, - options = CleanedOpts, + options = NonCreateOpts, before_doc_update = BDU, after_doc_read = ADR }, + DbProps = couch_db_engine:get_props(InitDb), + InitDb#db{ committed_update_seq = couch_db_engine:get_update_seq(InitDb), - security = couch_db_engine:get_security(InitDb) + security = couch_db_engine:get_security(InitDb), + options = lists:keystore(props, 1, NonCreateOpts, {props, DbProps}) }. diff --git a/src/couch/test/couch_db_props_upgrade_tests.erl b/src/couch/test/couch_db_props_upgrade_tests.erl new file mode 100644 index 00000000000..40ad283cf31 --- /dev/null +++ b/src/couch/test/couch_db_props_upgrade_tests.erl @@ -0,0 +1,83 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +-module(couch_db_props_upgrade_tests). + +-include_lib("couch/include/couch_eunit.hrl"). +-include_lib("couch/include/couch_db.hrl"). +-include_lib("couch_mrview/include/couch_mrview.hrl"). + + +setup() -> + DbName = <<"test">>, + DbFileName = "test.couch", + OldDbFilePath = filename:join([?FIXTURESDIR, DbFileName]), + + DbDir = config:get("couchdb", "database_dir"), + NewDbFilePath = filename:join([DbDir, DbFileName]), + + file:delete(NewDbFilePath), + {ok, _} = file:copy(OldDbFilePath, NewDbFilePath), + + DbName. + + +teardown(DbName) when is_binary(DbName) -> + couch_server:delete(DbName, [?ADMIN_CTX]), + ok. + + +old_db_info_test_() -> + { + "Old database versions work", + { + setup, + fun test_util:start_couch/0, + fun test_util:stop_couch/1, + { + foreach, + fun setup/0, + fun teardown/1, + [ + fun can_get_props/1, + fun can_get_db_info/1, + fun can_compact_db/1 + ] + } + } + }. + + +can_get_props(DbName) -> + ?_test(begin + {ok, Db} = couch_db:open_int(DbName, []), + Props = couch_db_engine:get_props(Db), + ?assert(is_list(Props)) + end). + + +can_get_db_info(DbName) -> + ?_test(begin + {ok, Db} = couch_db:open_int(DbName, []), + {ok, Info} = couch_db:get_db_info(Db), + Props = couch_util:get_value(props, Info), + ?assertEqual({[]}, Props) + end). + + +can_compact_db(DbName) -> + ?_test(begin + couch_util:with_db(DbName, fun(Db) -> + couch_db:start_compact(Db), + couch_db:wait_for_compaction(Db) + end) + end). From 62a294f9ce36c1e570bf9035b287dc5432b8b95e Mon Sep 17 00:00:00 2001 From: "Paul J. Davis" Date: Thu, 25 Oct 2018 16:09:38 -0500 Subject: [PATCH 02/14] Improve `couch_db:clustered_db` flexibility This allows for setting any combintaion of supported settings using a proplist appraoch. --- src/couch/src/couch_db.erl | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) diff --git a/src/couch/src/couch_db.erl b/src/couch/src/couch_db.erl index 9bc0f9263a0..2b4f21aec72 100644 --- a/src/couch/src/couch_db.erl +++ b/src/couch/src/couch_db.erl @@ -185,11 +185,22 @@ reopen(#db{} = Db) -> incref(#db{} = Db) -> couch_db_engine:incref(Db). -clustered_db(DbName, UserCtx) -> - clustered_db(DbName, UserCtx, []). +clustered_db(DbName, Options) when is_list(Options) -> + UserCtx = couch_util:get_value(user_ctx, Options, #user_ctx{}), + SecProps = couch_util:get_value(security, Options, []), + Props = couch_util:get_value(props, Options, []), + {ok, #db{ + name = DbName, + user_ctx = UserCtx, + security = SecProps, + options = [{props, Props}] + }}; + +clustered_db(DbName, #user_ctx{} = UserCtx) -> + clustered_db(DbName, [{user_ctx, UserCtx}]). clustered_db(DbName, UserCtx, SecProps) -> - {ok, #db{name = DbName, user_ctx = UserCtx, security = SecProps}}. + clustered_db(DbName, [{user_ctx, UserCtx}, {security, SecProps}]). is_db(#db{}) -> true; From cea5ea0a7734fd867c80dba1b7e08a52fb6626f5 Mon Sep 17 00:00:00 2001 From: "Paul J. Davis" Date: Thu, 25 Oct 2018 16:13:48 -0500 Subject: [PATCH 03/14] Implement `fabric_util:open_cluster_db` This allows for more fine grained use of couch_db:clustered_db as well as chagnes the name to something more appropriate than `fake_db`. --- src/fabric/src/fabric.erl | 2 +- src/fabric/src/fabric_doc_update.erl | 8 ++++---- src/fabric/src/fabric_util.erl | 15 +++++++++++++-- 3 files changed, 18 insertions(+), 7 deletions(-) diff --git a/src/fabric/src/fabric.erl b/src/fabric/src/fabric.erl index 9bc99c26533..092553f2bcf 100644 --- a/src/fabric/src/fabric.erl +++ b/src/fabric/src/fabric.erl @@ -383,7 +383,7 @@ query_view(DbName, Options, DDoc, ViewName, Callback, Acc0, QueryArgs0) -> Db = dbname(DbName), View = name(ViewName), case fabric_util:is_users_db(Db) of true -> - FakeDb = fabric_util:fake_db(Db, Options), + FakeDb = fabric_util:open_cluster_db(DbName, Options), couch_users_db:after_doc_read(DDoc, FakeDb); false -> ok diff --git a/src/fabric/src/fabric_doc_update.erl b/src/fabric/src/fabric_doc_update.erl index b7a27bb4ad4..c108c9a3285 100644 --- a/src/fabric/src/fabric_doc_update.erl +++ b/src/fabric/src/fabric_doc_update.erl @@ -104,13 +104,13 @@ handle_message({request_entity_too_large, Entity}, _, _) -> before_doc_update(DbName, Docs, Opts) -> case {fabric_util:is_replicator_db(DbName), fabric_util:is_users_db(DbName)} of {true, _} -> - %% fake db is expensive to create so we only do it if we have to - Db = fabric_util:fake_db(DbName, Opts), + %% cluster db is expensive to create so we only do it if we have to + Db = fabric_util:open_cluster_db(DbName, Opts), [couch_replicator_docs:before_doc_update(Doc, Db, replicated_changes) || Doc <- Docs]; {_, true} -> - %% fake db is expensive to create so we only do it if we have to - Db = fabric_util:fake_db(DbName, Opts), + %% cluster db is expensive to create so we only do it if we have to + Db = fabric_util:open_cluster_db(DbName, Opts), [couch_users_db:before_doc_update(Doc, Db, interactive_edit) || Doc <- Docs]; _ -> diff --git a/src/fabric/src/fabric_util.erl b/src/fabric/src/fabric_util.erl index cc1f1b62203..f1bc23ad0ea 100644 --- a/src/fabric/src/fabric_util.erl +++ b/src/fabric/src/fabric_util.erl @@ -17,7 +17,8 @@ remove_down_workers/2, doc_id_and_rev/1]). -export([request_timeout/0, attachments_timeout/0, all_docs_timeout/0]). -export([log_timeout/2, remove_done_workers/2]). --export([is_users_db/1, is_replicator_db/1, fake_db/2]). +-export([is_users_db/1, is_replicator_db/1]). +-export([open_cluster_db/1, open_cluster_db/2]). -export([upgrade_mrargs/1]). -compile({inline, [{doc_id_and_rev,1}]}). @@ -214,7 +215,17 @@ is_users_db(DbName) -> path_ends_with(Path, Suffix) -> Suffix =:= couch_db:dbname_suffix(Path). -fake_db(DbName, Opts) -> +open_cluster_db(#shard{dbname = DbName, opts = Options}) -> + case couch_util:get_value(props, Options) of + Props when is_list(Props) -> + {ok, Db} = couch_db:clustered_db(DbName, [{props, Props}]), + Db; + _ -> + {ok, Db} = couch_db:clustered_db(DbName, []), + Db + end. + +open_cluster_db(DbName, Opts) -> {SecProps} = fabric:get_security(DbName), % as admin UserCtx = couch_util:get_value(user_ctx, Opts, #user_ctx{}), {ok, Db} = couch_db:clustered_db(DbName, UserCtx, SecProps), From 993a85f6aaeaa354cc1a8476a3d8327d7659b428 Mon Sep 17 00:00:00 2001 From: "Paul J. Davis" Date: Wed, 24 Oct 2018 11:12:27 -0500 Subject: [PATCH 04/14] Pass the DB record to index validation functions Allow index validation to be parameterized by the database without having to reopen its own copy. --- src/couch/src/couch_db.erl | 6 +++--- src/couch_index/src/couch_index_server.erl | 4 ++-- src/couch_mrview/src/couch_mrview.erl | 9 ++++++--- 3 files changed, 11 insertions(+), 8 deletions(-) diff --git a/src/couch/src/couch_db.erl b/src/couch/src/couch_db.erl index 2b4f21aec72..8ff73e4d297 100644 --- a/src/couch/src/couch_db.erl +++ b/src/couch/src/couch_db.erl @@ -847,7 +847,7 @@ group_alike_docs([Doc|Rest], [Bucket|RestBuckets]) -> validate_doc_update(#db{}=Db, #doc{id= <<"_design/",_/binary>>}=Doc, _GetDiskDocFun) -> case catch check_is_admin(Db) of - ok -> validate_ddoc(Db#db.name, Doc); + ok -> validate_ddoc(Db, Doc); Error -> Error end; validate_doc_update(#db{validate_doc_funs = undefined} = Db, Doc, Fun) -> @@ -865,9 +865,9 @@ validate_doc_update(Db, Doc, GetDiskDocFun) -> validate_doc_update_int(Db, Doc, GetDiskDocFun) end. -validate_ddoc(DbName, DDoc) -> +validate_ddoc(Db, DDoc) -> try - ok = couch_index_server:validate(DbName, couch_doc:with_ejson_body(DDoc)) + ok = couch_index_server:validate(Db, couch_doc:with_ejson_body(DDoc)) catch throw:{invalid_design_doc, Reason} -> {bad_request, invalid_design_doc, Reason}; diff --git a/src/couch_index/src/couch_index_server.erl b/src/couch_index/src/couch_index_server.erl index a33c1e4909a..49d1e61b741 100644 --- a/src/couch_index/src/couch_index_server.erl +++ b/src/couch_index/src/couch_index_server.erl @@ -41,7 +41,7 @@ start_link() -> gen_server:start_link({local, ?MODULE}, ?MODULE, [], []). -validate(DbName, DDoc) -> +validate(Db, DDoc) -> LoadModFun = fun ({ModNameList, "true"}) -> try @@ -54,7 +54,7 @@ validate(DbName, DDoc) -> end, ValidateFun = fun (ModName) -> - ModName:validate(DbName, DDoc) + ModName:validate(Db, DDoc) end, EnabledIndexers = lists:flatmap(LoadModFun, config:get("indexers")), lists:foreach(ValidateFun, EnabledIndexers). diff --git a/src/couch_mrview/src/couch_mrview.erl b/src/couch_mrview/src/couch_mrview.erl index f6462e15670..d5b01ed7c18 100644 --- a/src/couch_mrview/src/couch_mrview.erl +++ b/src/couch_mrview/src/couch_mrview.erl @@ -169,7 +169,7 @@ join([H|T], Sep, Acc) -> join(T, Sep, [Sep, H | Acc]). -validate(DbName, DDoc) -> +validate(Db, DDoc) -> ok = validate_ddoc_fields(DDoc#doc.body), GetName = fun (#mrview{map_names = [Name | _]}) -> Name; @@ -194,8 +194,11 @@ validate(DbName, DDoc) -> couch_query_servers:try_compile(Proc, reduce, RedName, RedSrc) end, Reds) end, - {ok, #mrst{language=Lang, views=Views}} - = couch_mrview_util:ddoc_to_mrst(DbName, DDoc), + {ok, #mrst{ + language = Lang, + views = Views + }} = couch_mrview_util:ddoc_to_mrst(couch_db:name(Db), DDoc), + try Views =/= [] andalso couch_query_servers:get_os_process(Lang) of false -> ok; From c5569edcf9a76500574f1aa26cfe552449f47384 Mon Sep 17 00:00:00 2001 From: "Paul J. Davis" Date: Wed, 24 Oct 2018 11:13:29 -0500 Subject: [PATCH 05/14] Validate design document options more strictly This adds specific datatype requirements to the list of allowable design document options. Co-authored-by: Garren Smith Co-authored-by: Robert Newson --- src/couch_mrview/src/couch_mrview.erl | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/couch_mrview/src/couch_mrview.erl b/src/couch_mrview/src/couch_mrview.erl index d5b01ed7c18..391acf41200 100644 --- a/src/couch_mrview/src/couch_mrview.erl +++ b/src/couch_mrview/src/couch_mrview.erl @@ -57,6 +57,8 @@ validate_ddoc_fields(DDoc) -> [{<<"language">>, string}], [{<<"lists">>, object}, {any, [object, string]}], [{<<"options">>, object}], + [{<<"options">>, object}, {<<"include_design">>, boolean}], + [{<<"options">>, object}, {<<"local_seq">>, boolean}], [{<<"rewrites">>, [string, array]}], [{<<"shows">>, object}, {any, [object, string]}], [{<<"updates">>, object}, {any, [object, string]}], @@ -133,6 +135,8 @@ validate_ddoc_field(Value, array) when is_list(Value) -> ok; validate_ddoc_field({Value}, object) when is_list(Value) -> ok; +validate_ddoc_field(Value, boolean) when is_boolean(Value) -> + ok; validate_ddoc_field({Props}, {any, Type}) -> validate_ddoc_field1(Props, Type); validate_ddoc_field({Props}, {Key, Type}) -> From 6adcb184c9df6bade064453d4d6151e8e90a8a04 Mon Sep 17 00:00:00 2001 From: "Paul J. Davis" Date: Thu, 25 Oct 2018 16:58:48 -0500 Subject: [PATCH 06/14] Implement configurable hash functions This provides the capability for features to specify alternative hash functions for placing documents in a given shard range. While the functionality exists with this implementation it is not yet actually used. --- src/mem3/src/mem3.erl | 8 ++-- src/mem3/src/mem3_hash.erl | 73 ++++++++++++++++++++++++++++++++ src/mem3/src/mem3_shards.erl | 4 +- src/mem3/src/mem3_util.erl | 21 ++++++--- src/mem3/test/mem3_hash_test.erl | 23 ++++++++++ src/mem3/test/mem3_util_test.erl | 8 ---- 6 files changed, 116 insertions(+), 21 deletions(-) create mode 100644 src/mem3/src/mem3_hash.erl create mode 100644 src/mem3/test/mem3_hash_test.erl diff --git a/src/mem3/src/mem3.erl b/src/mem3/src/mem3.erl index f1af0f796c2..832c88d5432 100644 --- a/src/mem3/src/mem3.erl +++ b/src/mem3/src/mem3.erl @@ -237,15 +237,15 @@ dbname(_) -> %% @doc Determine if DocId belongs in shard (identified by record or filename) belongs(#shard{}=Shard, DocId) when is_binary(DocId) -> [Begin, End] = range(Shard), - belongs(Begin, End, DocId); + belongs(Begin, End, Shard, DocId); belongs(<<"shards/", _/binary>> = ShardName, DocId) when is_binary(DocId) -> [Begin, End] = range(ShardName), - belongs(Begin, End, DocId); + belongs(Begin, End, ShardName, DocId); belongs(DbName, DocId) when is_binary(DbName), is_binary(DocId) -> true. -belongs(Begin, End, DocId) -> - HashKey = mem3_util:hash(DocId), +belongs(Begin, End, Shard, DocId) -> + HashKey = mem3_hash:calculate(Shard, DocId), Begin =< HashKey andalso HashKey =< End. range(#shard{range = Range}) -> diff --git a/src/mem3/src/mem3_hash.erl b/src/mem3/src/mem3_hash.erl new file mode 100644 index 00000000000..665c61cb143 --- /dev/null +++ b/src/mem3/src/mem3_hash.erl @@ -0,0 +1,73 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +-module(mem3_hash). + +-export([ + calculate/2, + + get_hash_fun/1, + + crc32/1 +]). + + +-include_lib("mem3/include/mem3.hrl"). + + +calculate(#shard{opts = Opts}, DocId) -> + Props = couch_util:get_value(props, Opts, []), + MFA = get_hash_fun_int(Props), + calculate(MFA, DocId); + +calculate(#ordered_shard{opts = Opts}, DocId) -> + Props = couch_util:get_value(props, Opts, []), + MFA = get_hash_fun_int(Props), + calculate(MFA, DocId); + +calculate(DbName, DocId) when is_binary(DbName) -> + MFA = get_hash_fun(DbName), + calculate(MFA, DocId); + +calculate({Mod, Fun, Args}, DocId) -> + erlang:apply(Mod, Fun, [DocId | Args]). + + +get_hash_fun(#shard{opts = Opts}) -> + get_hash_fun_int(Opts); + +get_hash_fun(#ordered_shard{opts = Opts}) -> + get_hash_fun_int(Opts); + +get_hash_fun(DbName0) when is_binary(DbName0) -> + DbName = mem3:dbname(DbName0), + try + [#shard{opts=Opts} | _] = mem3_shards:for_db(DbName), + get_hash_fun_int(couch_util:get_value(props, Opts, [])) + catch error:database_does_not_exist -> + {?MODULE, crc32, []} + end. + + +crc32(Item) when is_binary(Item) -> + erlang:crc32(Item); +crc32(Item) -> + erlang:crc32(term_to_binary(Item)). + + +get_hash_fun_int(Opts) when is_list(Opts) -> + case lists:keyfind(hash, 1, Opts) of + {hash, [Mod, Fun, Args]} -> + {Mod, Fun, Args}; + _ -> + {?MODULE, crc32, []} + end. diff --git a/src/mem3/src/mem3_shards.erl b/src/mem3/src/mem3_shards.erl index 183f28fef06..6afc22f57bd 100644 --- a/src/mem3/src/mem3_shards.erl +++ b/src/mem3/src/mem3_shards.erl @@ -67,7 +67,7 @@ for_docid(DbName, DocId) -> for_docid(DbName, DocId, []). for_docid(DbName, DocId, Options) -> - HashKey = mem3_util:hash(DocId), + HashKey = mem3_hash:calculate(DbName, DocId), ShardHead = #shard{ dbname = DbName, range = ['$1', '$2'], @@ -397,7 +397,7 @@ load_shards_from_db(ShardDb, DbName) -> load_shards_from_disk(DbName, DocId)-> Shards = load_shards_from_disk(DbName), - HashKey = mem3_util:hash(DocId), + HashKey = mem3_hash:calculate(hd(Shards), DocId), [S || S <- Shards, in_range(S, HashKey)]. in_range(Shard, HashKey) -> diff --git a/src/mem3/src/mem3_util.erl b/src/mem3/src/mem3_util.erl index 927607aff6c..b44ca2332c1 100644 --- a/src/mem3/src/mem3_util.erl +++ b/src/mem3/src/mem3_util.erl @@ -12,7 +12,7 @@ -module(mem3_util). --export([hash/1, name_shard/2, create_partition_map/5, build_shards/2, +-export([name_shard/2, create_partition_map/5, build_shards/2, n_val/2, q_val/1, to_atom/1, to_integer/1, write_db_doc/1, delete_db_doc/1, shard_info/1, ensure_exists/1, open_db_doc/1]). -export([is_deleted/1, rotate_list/2]). @@ -32,10 +32,6 @@ -include_lib("mem3/include/mem3.hrl"). -include_lib("couch/include/couch_db.hrl"). -hash(Item) when is_binary(Item) -> - erlang:crc32(Item); -hash(Item) -> - erlang:crc32(term_to_binary(Item)). name_shard(Shard) -> name_shard(Shard, ""). @@ -165,7 +161,7 @@ build_shards_by_node(DbName, DocProps) -> dbname = DbName, node = to_atom(Node), range = [Beg, End], - opts = get_engine_opt(DocProps) + opts = get_shard_opts(DocProps) }, Suffix) end, Ranges) end, ByNode). @@ -183,7 +179,7 @@ build_shards_by_range(DbName, DocProps) -> node = to_atom(Node), range = [Beg, End], order = Order, - opts = get_engine_opt(DocProps) + opts = get_shard_opts(DocProps) }, Suffix) end, lists:zip(Nodes, lists:seq(1, length(Nodes)))) end, ByRange). @@ -200,6 +196,9 @@ to_integer(N) when is_binary(N) -> to_integer(N) when is_list(N) -> list_to_integer(N). +get_shard_opts(DocProps) -> + get_engine_opt(DocProps) ++ get_props_opt(DocProps). + get_engine_opt(DocProps) -> case couch_util:get_value(<<"engine">>, DocProps) of Engine when is_binary(Engine) -> @@ -208,6 +207,14 @@ get_engine_opt(DocProps) -> [] end. +get_props_opt(DocProps) -> + case couch_util:get_value(<<"props">>, DocProps) of + {Props} when is_list(Props) -> + [{props, Props}]; + _ -> + [] + end. + n_val(undefined, NodeCount) -> n_val(config:get("cluster", "n", "3"), NodeCount); n_val(N, NodeCount) when is_list(N) -> diff --git a/src/mem3/test/mem3_hash_test.erl b/src/mem3/test/mem3_hash_test.erl new file mode 100644 index 00000000000..7a40c5366e9 --- /dev/null +++ b/src/mem3/test/mem3_hash_test.erl @@ -0,0 +1,23 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +-module(mem3_hash_test). + +-include_lib("eunit/include/eunit.hrl"). + +hash_test() -> + ?assertEqual(1624516141,mem3_hash:crc32(0)), + ?assertEqual(3816901808,mem3_hash:crc32("0")), + ?assertEqual(3523407757,mem3_hash:crc32(<<0>>)), + ?assertEqual(4108050209,mem3_hash:crc32(<<"0">>)), + ?assertEqual(3094724072,mem3_hash:crc32(zero)), + ok. diff --git a/src/mem3/test/mem3_util_test.erl b/src/mem3/test/mem3_util_test.erl index 214217ec4ce..8b74c4b2b74 100644 --- a/src/mem3/test/mem3_util_test.erl +++ b/src/mem3/test/mem3_util_test.erl @@ -15,14 +15,6 @@ -include("mem3.hrl"). -include_lib("eunit/include/eunit.hrl"). -hash_test() -> - ?assertEqual(1624516141,mem3_util:hash(0)), - ?assertEqual(3816901808,mem3_util:hash("0")), - ?assertEqual(3523407757,mem3_util:hash(<<0>>)), - ?assertEqual(4108050209,mem3_util:hash(<<"0">>)), - ?assertEqual(3094724072,mem3_util:hash(zero)), - ok. - name_shard_test() -> Shard1 = #shard{}, ?assertError(function_clause, mem3_util:name_shard(Shard1, ".1234")), From d964ab5f7d68d5b05a1ab11655eb1653085452bb Mon Sep 17 00:00:00 2001 From: "Paul J. Davis" Date: Thu, 25 Oct 2018 17:03:00 -0500 Subject: [PATCH 07/14] Implement partitioned dbs This change introduces the ability for users to place a group of documents in a single shard range by specifying a "partition key" in the document id. A partition key is denoted by everything preceding a colon ':' in the document id. Every document id (except for design documents) in a partitioned database is required to have a partition key. Co-authored-by: Garren Smith Co-authored-by: Robert Newson --- src/chttpd/src/chttpd.erl | 2 + src/chttpd/src/chttpd_db.erl | 87 +++++++----- src/chttpd/src/chttpd_show.erl | 2 +- src/couch/src/couch_db.erl | 32 +++++ src/couch/src/couch_httpd.erl | 2 + src/couch/src/couch_httpd_db.erl | 44 +++--- src/couch/src/couch_partition.erl | 148 +++++++++++++++++++++ src/couch/src/couch_server.erl | 4 + src/couch_mrview/src/couch_mrview_show.erl | 3 +- src/fabric/src/fabric.erl | 30 +++-- src/fabric/src/fabric_db_create.erl | 7 +- src/fabric/src/fabric_db_info.erl | 15 ++- src/fabric/src/fabric_util.erl | 9 ++ src/mem3/src/mem3.erl | 12 +- 14 files changed, 320 insertions(+), 77 deletions(-) create mode 100644 src/couch/src/couch_partition.erl diff --git a/src/chttpd/src/chttpd.erl b/src/chttpd/src/chttpd.erl index b606ad4142a..2f241cdadd4 100644 --- a/src/chttpd/src/chttpd.erl +++ b/src/chttpd/src/chttpd.erl @@ -905,6 +905,8 @@ error_info({error, {illegal_database_name, Name}}) -> {400, <<"illegal_database_name">>, Message}; error_info({illegal_docid, Reason}) -> {400, <<"illegal_docid">>, Reason}; +error_info({illegal_partition, Reason}) -> + {400, <<"illegal_partition">>, Reason}; error_info({_DocID,{illegal_docid,DocID}}) -> {400, <<"illegal_docid">>,DocID}; error_info({error, {database_name_too_long, DbName}}) -> diff --git a/src/chttpd/src/chttpd_db.erl b/src/chttpd/src/chttpd_db.erl index 75904672b95..c4f3686fbfc 100644 --- a/src/chttpd/src/chttpd_db.erl +++ b/src/chttpd/src/chttpd_db.erl @@ -16,6 +16,7 @@ -include_lib("couch/include/couch_db.hrl"). -include_lib("couch_mrview/include/couch_mrview.hrl"). +-include_lib("mem3/include/mem3.hrl"). -export([handle_request/1, handle_compact_req/2, handle_design_req/2, db_req/2, couch_doc_open/4,handle_changes_req/2, @@ -288,10 +289,12 @@ create_db_req(#httpd{}=Req, DbName) -> Q = chttpd:qs_value(Req, "q", config:get("cluster", "q", "8")), P = chttpd:qs_value(Req, "placement", config:get("cluster", "placement")), EngineOpt = parse_engine_opt(Req), + DbProps = parse_partitioned_opt(Req), Options = [ {n, N}, {q, Q}, - {placement, P} + {placement, P}, + {props, DbProps} ] ++ EngineOpt, DocUrl = absolute_uri(Req, "/" ++ couch_util:url_encode(DbName)), case fabric:create_db(DbName, Options) of @@ -317,7 +320,15 @@ delete_db_req(#httpd{}=Req, DbName) -> end. do_db_req(#httpd{path_parts=[DbName|_], user_ctx=Ctx}=Req, Fun) -> - {ok, Db} = couch_db:clustered_db(DbName, Ctx), + Shard = hd(mem3:shards(DbName)), + Props = couch_util:get_value(props, Shard#shard.opts, []), + Opts = case Ctx of + undefined -> + [{props, Props}]; + #user_ctx{} -> + [{user_ctx, Ctx}, {props, Props}] + end, + {ok, Db} = couch_db:clustered_db(DbName, Opts), Fun(Req, Db). db_req(#httpd{method='GET',path_parts=[DbName]}=Req, _Db) -> @@ -334,7 +345,7 @@ db_req(#httpd{method='POST', path_parts=[DbName], user_ctx=Ctx}=Req, Db) -> W = chttpd:qs_value(Req, "w", integer_to_list(mem3:quorum(Db))), Options = [{user_ctx,Ctx}, {w,W}], - Doc = couch_doc:from_json_obj_validate(chttpd:json_body(Req)), + Doc = couch_db:doc_from_json_obj_validate(Db, chttpd:json_body(Req)), Doc2 = case Doc#doc.id of <<"">> -> Doc#doc{id=couch_uuids:new(), revs={0, []}}; @@ -421,7 +432,7 @@ db_req(#httpd{method='POST',path_parts=[_,<<"_bulk_docs">>], user_ctx=Ctx}=Req, Options = [{user_ctx,Ctx}, {w,W}] end, Docs = lists:map(fun(JsonObj) -> - Doc = couch_doc:from_json_obj_validate(JsonObj), + Doc = couch_db:doc_from_json_obj_validate(Db, JsonObj), validate_attachment_names(Doc), case Doc#doc.id of <<>> -> Doc#doc{id = couch_uuids:new()}; @@ -778,7 +789,8 @@ db_doc_req(#httpd{method='DELETE'}=Req, Db, DocId) -> Rev -> Body = {[{<<"_rev">>, ?l2b(Rev)},{<<"_deleted">>,true}]} end, - send_updated_doc(Req, Db, DocId, couch_doc_from_req(Req, DocId, Body)); + Doc = couch_doc_from_req(Req, Db, DocId, Body), + send_updated_doc(Req, Db, DocId, Doc); db_doc_req(#httpd{method='GET', mochi_req=MochiReq}=Req, Db, DocId) -> #doc_query_args{ @@ -835,7 +847,7 @@ db_doc_req(#httpd{method='GET', mochi_req=MochiReq}=Req, Db, DocId) -> db_doc_req(#httpd{method='POST', user_ctx=Ctx}=Req, Db, DocId) -> couch_httpd:validate_referer(Req), - couch_doc:validate_docid(DocId, couch_db:name(Db)), + couch_db:validate_docid(Db, DocId), chttpd:validate_ctype(Req, "multipart/form-data"), W = chttpd:qs_value(Req, "w", integer_to_list(mem3:quorum(Db))), @@ -845,7 +857,7 @@ db_doc_req(#httpd{method='POST', user_ctx=Ctx}=Req, Db, DocId) -> case proplists:is_defined("_doc", Form) of true -> Json = ?JSON_DECODE(couch_util:get_value("_doc", Form)), - Doc = couch_doc_from_req(Req, DocId, Json); + Doc = couch_doc_from_req(Req, Db, DocId, Json); false -> Rev = couch_doc:parse_rev(list_to_binary(couch_util:get_value("_rev", Form))), Doc = case fabric:open_revs(Db, DocId, [Rev], []) of @@ -891,7 +903,7 @@ db_doc_req(#httpd{method='PUT', user_ctx=Ctx}=Req, Db, DocId) -> update_type = UpdateType } = parse_doc_query(Req), DbName = couch_db:name(Db), - couch_doc:validate_docid(DocId, DbName), + couch_db:validate_docid(Db, DocId), W = chttpd:qs_value(Req, "w", integer_to_list(mem3:quorum(Db))), Options = [{user_ctx,Ctx}, {w,W}], @@ -905,7 +917,7 @@ db_doc_req(#httpd{method='PUT', user_ctx=Ctx}=Req, Db, DocId) -> couch_httpd_multipart:num_mp_writers(mem3:n(mem3:dbname(DbName), DocId)), {ok, Doc0, WaitFun, Parser} = couch_doc:doc_from_multi_part_stream(ContentType, fun() -> receive_request_data(Req) end), - Doc = couch_doc_from_req(Req, DocId, Doc0), + Doc = couch_doc_from_req(Req, Db, DocId, Doc0), try Result = send_updated_doc(Req, Db, DocId, Doc, RespHeaders, UpdateType), WaitFun(), @@ -919,7 +931,7 @@ db_doc_req(#httpd{method='PUT', user_ctx=Ctx}=Req, Db, DocId) -> case chttpd:qs_value(Req, "batch") of "ok" -> % batch - Doc = couch_doc_from_req(Req, DocId, chttpd:json_body(Req)), + Doc = couch_doc_from_req(Req, Db, DocId, chttpd:json_body(Req)), spawn(fun() -> case catch(fabric:update_doc(Db, Doc, Options)) of @@ -936,7 +948,7 @@ db_doc_req(#httpd{method='PUT', user_ctx=Ctx}=Req, Db, DocId) -> _Normal -> % normal Body = chttpd:json_body(Req), - Doc = couch_doc_from_req(Req, DocId, Body), + Doc = couch_doc_from_req(Req, Db, DocId, Body), send_updated_doc(Req, Db, DocId, Doc, RespHeaders, UpdateType) end end; @@ -1217,7 +1229,7 @@ update_doc(Db, DocId, #doc{deleted=Deleted, body=DocBody}=Doc, Options) -> Body = {[{ok, true}, {id, DocId}, {rev, NewRevStr}]}, {Status, {etag, Etag}, Body}. -couch_doc_from_req(Req, DocId, #doc{revs=Revs} = Doc) -> +couch_doc_from_req(Req, _Db, DocId, #doc{revs=Revs} = Doc) -> validate_attachment_names(Doc), Rev = case chttpd:qs_value(Req, "rev") of undefined -> @@ -1244,8 +1256,9 @@ couch_doc_from_req(Req, DocId, #doc{revs=Revs} = Doc) -> end end, Doc#doc{id=DocId, revs=Revs2}; -couch_doc_from_req(Req, DocId, Json) -> - couch_doc_from_req(Req, DocId, couch_doc:from_json_obj_validate(Json)). +couch_doc_from_req(Req, Db, DocId, Json) -> + Doc = couch_db:doc_from_json_obj_validate(Db, Json), + couch_doc_from_req(Req, Db, DocId, Doc). % Useful for debugging @@ -1435,7 +1448,7 @@ db_attachment_req(#httpd{method=Method, user_ctx=Ctx}=Req, Db, DocId, FileNamePa % check for the existence of the doc to handle the 404 case. couch_doc_open(Db, DocId, nil, []) end, - couch_doc:validate_docid(DocId, couch_db:name(Db)), + couch_db:validate_docid(Db, DocId), #doc{id=DocId}; Rev -> case fabric:open_revs(Db, DocId, [Rev], [{user_ctx,Ctx}]) of @@ -1558,6 +1571,23 @@ parse_engine_opt(Req) -> end end. + +parse_partitioned_opt(Req) -> + case chttpd:qs_value(Req, "partitioned") of + undefined -> + []; + "false" -> + []; + "true" -> + [ + {partitioned, true}, + {hash, [couch_partition, hash, []]} + ]; + _ -> + throw({bad_request, <<"Invalid `partitioned` parameter">>}) + end. + + parse_doc_query({Key, Value}, Args) -> case {Key, Value} of {"attachments", "true"} -> @@ -1777,16 +1807,17 @@ bulk_get_open_doc_revs(Db, {Props}, Options) -> bulk_get_open_doc_revs1(Db, Props, Options, {}) -> - case parse_field(<<"id">>, couch_util:get_value(<<"id">>, Props)) of - {error, {DocId, Error, Reason}} -> - {DocId, {error, {null, Error, Reason}}, Options}; - - {ok, undefined} -> + case couch_util:get_value(<<"id">>, Props) of + undefined -> Error = {null, bad_request, <<"document id missed">>}, {null, {error, Error}, Options}; - - {ok, DocId} -> - bulk_get_open_doc_revs1(Db, Props, Options, {DocId}) + DocId -> + try + couch_db:validate_docid(Db, DocId), + bulk_get_open_doc_revs1(Db, Props, Options, {DocId}) + catch throw:{Error, Reason} -> + {DocId, {error, {null, Error, Reason}}, Options} + end end; bulk_get_open_doc_revs1(Db, Props, Options, {DocId}) -> RevStr = couch_util:get_value(<<"rev">>, Props), @@ -1826,16 +1857,6 @@ bulk_get_open_doc_revs1(Db, Props, _, {DocId, Revs, Options}) -> end. -parse_field(<<"id">>, undefined) -> - {ok, undefined}; -parse_field(<<"id">>, Value) -> - try - ok = couch_doc:validate_docid(Value), - {ok, Value} - catch - throw:{Error, Reason} -> - {error, {Value, Error, Reason}} - end; parse_field(<<"rev">>, undefined) -> {ok, undefined}; parse_field(<<"rev">>, Value) -> diff --git a/src/chttpd/src/chttpd_show.erl b/src/chttpd/src/chttpd_show.erl index c6d232c969f..a724189cff7 100644 --- a/src/chttpd/src/chttpd_show.erl +++ b/src/chttpd/src/chttpd_show.erl @@ -132,7 +132,7 @@ send_doc_update_response(Req, Db, DDoc, UpdateName, Doc, DocId) -> _ -> Options = [{user_ctx, Req#httpd.user_ctx}, {w, W}] end, - NewDoc = couch_doc:from_json_obj_validate({NewJsonDoc}), + NewDoc = couch_db:doc_from_json_obj_validate(Db, {NewJsonDoc}), couch_doc:validate_docid(NewDoc#doc.id), {UpdateResult, NewRev} = fabric:update_doc(Db, NewDoc, Options), NewRevStr = couch_doc:rev_to_str(NewRev), diff --git a/src/couch/src/couch_db.erl b/src/couch/src/couch_db.erl index 8ff73e4d297..2c6f41bf77a 100644 --- a/src/couch/src/couch_db.erl +++ b/src/couch/src/couch_db.erl @@ -57,6 +57,7 @@ is_system_db/1, is_clustered/1, is_system_db_name/1, + is_partitioned/1, set_revs_limit/2, set_purge_infos_limit/2, @@ -85,6 +86,9 @@ get_minimum_purge_seq/1, purge_client_exists/3, + validate_docid/2, + doc_from_json_obj_validate/2, + update_doc/3, update_doc/4, update_docs/4, @@ -217,6 +221,10 @@ is_clustered(#db{}) -> is_clustered(?OLD_DB_REC = Db) -> ?OLD_DB_MAIN_PID(Db) == undefined. +is_partitioned(#db{options = Options}) -> + Props = couch_util:get_value(props, Options, []), + couch_util:get_value(partitioned, Props, false). + ensure_full_commit(#db{main_pid=Pid, instance_start_time=StartTime}) -> ok = gen_server:call(Pid, full_commit, infinity), {ok, StartTime}. @@ -798,6 +806,30 @@ name(#db{name=Name}) -> name(?OLD_DB_REC = Db) -> ?OLD_DB_NAME(Db). + +validate_docid(#db{} = Db, DocId) when is_binary(DocId) -> + couch_doc:validate_docid(DocId, name(Db)), + case is_partitioned(Db) of + true -> + couch_partition:validate_docid(DocId); + false -> + ok + end. + + +doc_from_json_obj_validate(#db{} = Db, DocJson) -> + Doc = couch_doc:from_json_obj_validate(DocJson, name(Db)), + {Props} = DocJson, + case couch_util:get_value(<<"_id">>, Props) of + DocId when is_binary(DocId) -> + % Only validate the docid if it was provided + validate_docid(Db, DocId); + _ -> + ok + end, + Doc. + + update_doc(Db, Doc, Options) -> update_doc(Db, Doc, Options, interactive_edit). diff --git a/src/couch/src/couch_httpd.erl b/src/couch/src/couch_httpd.erl index 861fd58c426..3cdfc0ca31a 100644 --- a/src/couch/src/couch_httpd.erl +++ b/src/couch/src/couch_httpd.erl @@ -878,6 +878,8 @@ error_info(md5_mismatch) -> {400, <<"content_md5_mismatch">>, <<"Possible message corruption.">>}; error_info({illegal_docid, Reason}) -> {400, <<"illegal_docid">>, Reason}; +error_info({illegal_partition, Reason}) -> + {400, <<"illegal_partition">>, Reason}; error_info(not_found) -> {404, <<"not_found">>, <<"missing">>}; error_info({not_found, Reason}) -> diff --git a/src/couch/src/couch_httpd_db.erl b/src/couch/src/couch_httpd_db.erl index ced146e396f..6cfae9610b9 100644 --- a/src/couch/src/couch_httpd_db.erl +++ b/src/couch/src/couch_httpd_db.erl @@ -266,8 +266,7 @@ db_req(#httpd{method='GET',path_parts=[_DbName]}=Req, Db) -> db_req(#httpd{method='POST',path_parts=[_DbName]}=Req, Db) -> couch_httpd:validate_ctype(Req, "application/json"), - DbName = couch_db:name(Db), - Doc = couch_doc:from_json_obj_validate(couch_httpd:json_body(Req), DbName), + Doc = couch_db:doc_from_json_obj_validate(Db, couch_httpd:json_body(Req)), validate_attachment_names(Doc), Doc2 = case Doc#doc.id of <<"">> -> @@ -313,7 +312,6 @@ db_req(#httpd{method='POST',path_parts=[_,<<"_bulk_docs">>]}=Req, Db) -> couch_stats:increment_counter([couchdb, httpd, bulk_requests]), couch_httpd:validate_ctype(Req, "application/json"), {JsonProps} = couch_httpd:json_body_obj(Req), - DbName = couch_db:name(Db), case couch_util:get_value(<<"docs">>, JsonProps) of undefined -> send_error(Req, 400, <<"bad_request">>, <<"Missing JSON list of 'docs'">>); @@ -331,7 +329,7 @@ db_req(#httpd{method='POST',path_parts=[_,<<"_bulk_docs">>]}=Req, Db) -> true -> Docs = lists:map( fun({ObjProps} = JsonObj) -> - Doc = couch_doc:from_json_obj_validate(JsonObj, DbName), + Doc = couch_db:doc_from_json_obj_validate(Db, JsonObj), validate_attachment_names(Doc), Id = case Doc#doc.id of <<>> -> couch_uuids:new(); @@ -365,7 +363,7 @@ db_req(#httpd{method='POST',path_parts=[_,<<"_bulk_docs">>]}=Req, Db) -> end; false -> Docs = lists:map(fun(JsonObj) -> - Doc = couch_doc:from_json_obj_validate(JsonObj, DbName), + Doc = couch_db:doc_from_json_obj_validate(Db, JsonObj), validate_attachment_names(Doc), Doc end, DocsArray), @@ -502,17 +500,15 @@ db_req(#httpd{path_parts=[_, DocId | FileNameParts]}=Req, Db) -> db_doc_req(#httpd{method='DELETE'}=Req, Db, DocId) -> % check for the existence of the doc to handle the 404 case. couch_doc_open(Db, DocId, nil, []), - DbName = couch_db:name(Db), case couch_httpd:qs_value(Req, "rev") of undefined -> - update_doc(Req, Db, DocId, - couch_doc_from_req(Req, DocId, {[{<<"_deleted">>,true}]}, - DbName)); + JsonObj = {[{<<"_deleted">>,true}]}, + Doc = couch_doc_from_req(Req, Db, DocId, JsonObj), + update_doc(Req, Db, DocId, Doc); Rev -> - update_doc(Req, Db, DocId, - couch_doc_from_req(Req, DocId, - {[{<<"_rev">>, ?l2b(Rev)},{<<"_deleted">>,true}]}, - DbName)) + JsonObj = {[{<<"_rev">>, ?l2b(Rev)},{<<"_deleted">>,true}]}, + Doc = couch_doc_from_req(Req, Db, DocId, JsonObj), + update_doc(Req, Db, DocId, Doc) end; db_doc_req(#httpd{method = 'GET', mochi_req = MochiReq} = Req, Db, DocId) -> @@ -565,8 +561,7 @@ db_doc_req(#httpd{method = 'GET', mochi_req = MochiReq} = Req, Db, DocId) -> db_doc_req(#httpd{method='POST'}=Req, Db, DocId) -> couch_httpd:validate_referer(Req), - DbName = couch_db:name(Db), - couch_doc:validate_docid(DocId, DbName), + couch_db:validate_docid(Db, DocId), couch_httpd:validate_ctype(Req, "multipart/form-data"), Form = couch_httpd:parse_form(Req), case couch_util:get_value("_doc", Form) of @@ -574,7 +569,7 @@ db_doc_req(#httpd{method='POST'}=Req, Db, DocId) -> Rev = couch_doc:parse_rev(couch_util:get_value("_rev", Form)), {ok, [{ok, Doc}]} = couch_db:open_doc_revs(Db, DocId, [Rev], []); Json -> - Doc = couch_doc_from_req(Req, DocId, ?JSON_DECODE(Json), DbName) + Doc = couch_doc_from_req(Req, Db, DocId, ?JSON_DECODE(Json)) end, UpdatedAtts = [ couch_att:new([ @@ -600,15 +595,14 @@ db_doc_req(#httpd{method='POST'}=Req, Db, DocId) -> update_doc(Req, Db, DocId, NewDoc); db_doc_req(#httpd{method='PUT'}=Req, Db, DocId) -> - DbName = couch_db:name(Db), - couch_doc:validate_docid(DocId, DbName), + couch_db:validate_docid(Db, DocId), case couch_util:to_list(couch_httpd:header_value(Req, "Content-Type")) of ("multipart/related;" ++ _) = ContentType -> couch_httpd:check_max_request_length(Req), {ok, Doc0, WaitFun, Parser} = couch_doc:doc_from_multi_part_stream( ContentType, fun() -> receive_request_data(Req) end), - Doc = couch_doc_from_req(Req, DocId, Doc0, DbName), + Doc = couch_doc_from_req(Req, Db, DocId, Doc0), try Result = update_doc(Req, Db, DocId, Doc), WaitFun(), @@ -620,7 +614,7 @@ db_doc_req(#httpd{method='PUT'}=Req, Db, DocId) -> end; _Else -> Body = couch_httpd:json_body(Req), - Doc = couch_doc_from_req(Req, DocId, Body, DbName), + Doc = couch_doc_from_req(Req, Db, DocId, Body), update_doc(Req, Db, DocId, Doc) end; @@ -805,7 +799,7 @@ update_doc(Req, Db, DocId, #doc{deleted=Deleted}=Doc, Headers, UpdateType) -> {rev, NewRevStr}]}) end. -couch_doc_from_req(Req, DocId, #doc{revs=Revs}=Doc, _) -> +couch_doc_from_req(Req, _Db, DocId, #doc{revs=Revs}=Doc) -> validate_attachment_names(Doc), Rev = case couch_httpd:qs_value(Req, "rev") of undefined -> @@ -832,9 +826,9 @@ couch_doc_from_req(Req, DocId, #doc{revs=Revs}=Doc, _) -> end end, Doc#doc{id=DocId, revs=Revs2}; -couch_doc_from_req(Req, DocId, Json, DbName) -> - couch_doc_from_req(Req, DocId, - couch_doc:from_json_obj_validate(Json, DbName), DbName). +couch_doc_from_req(Req, Db, DocId, Json) -> + Doc = couch_db:doc_from_json_obj_validate(Db, Json), + couch_doc_from_req(Req, Db, DocId, Doc). % Useful for debugging % couch_doc_open(Db, DocId) -> @@ -1042,7 +1036,7 @@ db_attachment_req(#httpd{method=Method,mochi_req=MochiReq}=Req, Db, DocId, FileN % check for the existence of the doc to handle the 404 case. couch_doc_open(Db, DocId, nil, []) end, - couch_doc:validate_docid(DocId, couch_db:name(Db)), + couch_db:validate_docid(Db, DocId), #doc{id=DocId}; Rev -> case couch_db:open_doc_revs(Db, DocId, [Rev], []) of diff --git a/src/couch/src/couch_partition.erl b/src/couch/src/couch_partition.erl new file mode 100644 index 00000000000..783921f0aad --- /dev/null +++ b/src/couch/src/couch_partition.erl @@ -0,0 +1,148 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +-module(couch_partition). + + +-export([ + extract/1, + from_docid/1, + is_member/2, + + validate_dbname/2, + validate_docid/1, + validate_partition/1, + + hash/1 +]). + + +-include_lib("couch/include/couch_db.hrl"). + + +extract(Value) when is_binary(Value) -> + case binary:split(Value, <<":">>) of + [Partition, Rest] -> + {Partition, Rest}; + _ -> + undefined + end; + +extract(_) -> + undefined. + + +from_docid(DocId) -> + case extract(DocId) of + undefined -> + throw({illegal_docid, <<"Doc id must be of form partition:id">>}); + {Partition, _} -> + Partition + end. + + +is_member(DocId, Partition) -> + case extract(DocId) of + {Partition, _} -> + true; + _ -> + false + end. + + +validate_dbname(DbName, Options) when is_list(DbName) -> + validate_dbname(?l2b(DbName), Options); +validate_dbname(DbName, Options) when is_binary(DbName) -> + Props = couch_util:get_value(props, Options, []), + IsPartitioned = couch_util:get_value(partitioned, Props, false), + + if not IsPartitioned -> ok; true -> + + DbsDbName = config:get("mem3", "shards_db", "_dbs"), + NodesDbName = config:get("mem3", "nodes_db", "_nodes"), + UsersDbSuffix = config:get("couchdb", "users_db_suffix", "_users"), + Suffix = couch_db:dbname_suffix(DbName), + + SysDbNames = [ + iolist_to_binary(DbsDbName), + iolist_to_binary(NodesDbName) + | ?SYSTEM_DATABASES + ], + + Suffices = [ + <<"_replicator">>, + <<"_users">>, + iolist_to_binary(UsersDbSuffix) + ], + + IsSysDb = lists:member(DbName, SysDbNames) + orelse lists:member(Suffix, Suffices), + + if not IsSysDb -> ok; true -> + throw({bad_request, <<"Cannot partition a system database">>}) + end + end. + + +validate_docid(<<"_design/", _/binary>>) -> + ok; +validate_docid(<<"_local/", _/binary>>) -> + ok; +validate_docid(DocId) when is_binary(DocId) -> + % When this function is called we already know that + % DocId is already valid thus we only need to + % ensure that the partition exists and is not empty. + case extract(DocId) of + undefined -> + throw({illegal_docid, <<"Doc id must be of form partition:id">>}); + {Partition, PartitionedDocId} -> + validate_partition(Partition), + couch_doc:validate_docid(PartitionedDocId) + end. + + +validate_partition(<<>>) -> + throw({illegal_partition, <<"Partition must not be empty">>}); +validate_partition(Partition) when is_binary(Partition) -> + case Partition of + <<"_", _/binary>> -> + Msg1 = <<"Partition must not start with an underscore">>, + throw({illegal_partition, Msg1}); + _ -> + ok + end, + case couch_util:validate_utf8(Partition) of + true -> + ok; + false -> + Msg2 = <<"Partition must be valid UTF-8">>, + throw({illegal_partition, Msg2}) + end, + case extract(Partition) of + {_, _} -> + Msg3 = <<"Partition must not contain a colon">>, + throw({illegal_partition, Msg3}); + undefined -> + ok + end; +validate_partition(_) -> + throw({illegal_partition, <<"Partition must be a string">>}). + + +% Document ids that start with an underscore +% (i.e., _local and _design) do not contain a +% partition and thus do not use the partition +% hashing. +hash(<<"_", _/binary>> = DocId) -> + erlang:crc32(DocId); +hash(DocId) when is_binary(DocId) -> + erlang:crc32(from_docid(DocId)). diff --git a/src/couch/src/couch_server.erl b/src/couch/src/couch_server.erl index df447d1c706..395ec31a903 100644 --- a/src/couch/src/couch_server.erl +++ b/src/couch/src/couch_server.erl @@ -116,6 +116,7 @@ close_lru() -> create(DbName, Options0) -> Options = maybe_add_sys_db_callbacks(DbName, Options0), + couch_partition:validate_dbname(DbName, Options), case gen_server:call(couch_server, {create, DbName, Options}, infinity) of {ok, Db0} -> Ctx = couch_util:get_value(user_ctx, Options, #user_ctx{}), @@ -221,6 +222,9 @@ init([]) -> % Mark pluggable storage engines as a supported feature config:enable_feature('pluggable-storage-engines'), + % Mark partitioned databases as a supported feature + config:enable_feature(partitions), + % read config and register for configuration changes % just stop if one of the config settings change. couch_server_sup diff --git a/src/couch_mrview/src/couch_mrview_show.erl b/src/couch_mrview/src/couch_mrview_show.erl index e2c94bac3f4..c9be5b0630d 100644 --- a/src/couch_mrview/src/couch_mrview_show.erl +++ b/src/couch_mrview/src/couch_mrview_show.erl @@ -132,8 +132,7 @@ send_doc_update_response(Req, Db, DDoc, UpdateName, Doc, DocId) -> _ -> Options = [{user_ctx, Req#httpd.user_ctx}] end, - NewDoc = couch_doc:from_json_obj_validate({NewJsonDoc}), - couch_doc:validate_docid(NewDoc#doc.id), + NewDoc = couch_db:doc_from_json_obj_validate(Db, {NewJsonDoc}), {ok, NewRev} = couch_db:update_doc(Db, NewDoc, Options), NewRevStr = couch_doc:rev_to_str(NewRev), {JsonResp1} = apply_headers(JsonResp0, [ diff --git a/src/fabric/src/fabric.erl b/src/fabric/src/fabric.erl index 092553f2bcf..70d37679a11 100644 --- a/src/fabric/src/fabric.erl +++ b/src/fabric/src/fabric.erl @@ -270,7 +270,7 @@ update_doc(DbName, Doc, Options) -> throw(Error); {ok, []} -> % replication success - #doc{revs = {Pos, [RevId | _]}} = doc(Doc), + #doc{revs = {Pos, [RevId | _]}} = doc(DbName, Doc), {ok, {Pos, RevId}}; {error, [Error]} -> throw(Error) @@ -279,9 +279,10 @@ update_doc(DbName, Doc, Options) -> %% @doc update a list of docs -spec update_docs(dbname(), [#doc{} | json_obj()], [option()]) -> {ok, any()} | any(). -update_docs(DbName, Docs, Options) -> +update_docs(DbName, Docs0, Options) -> try - fabric_doc_update:go(dbname(DbName), docs(Docs), opts(Options)) of + Docs1 = docs(DbName, Docs0), + fabric_doc_update:go(dbname(DbName), Docs1, opts(Options)) of {ok, Results} -> {ok, Results}; {accepted, Results} -> @@ -536,16 +537,25 @@ docid(DocId) when is_list(DocId) -> docid(DocId) -> DocId. -docs(Docs) when is_list(Docs) -> - [doc(D) || D <- Docs]; -docs(Docs) -> +docs(Db, Docs) when is_list(Docs) -> + [doc(Db, D) || D <- Docs]; +docs(_Db, Docs) -> erlang:error({illegal_docs_list, Docs}). -doc(#doc{} = Doc) -> +doc(_Db, #doc{} = Doc) -> Doc; -doc({_} = Doc) -> - couch_doc:from_json_obj_validate(Doc); -doc(Doc) -> +doc(Db0, {_} = Doc) -> + Db = case couch_db:is_db(Db0) of + true -> + Db0; + false -> + Shard = hd(mem3:shards(Db0)), + Props = couch_util:get_value(props, Shard#shard.opts, []), + {ok, Db1} = couch_db:clustered_db(Db0, [{props, Props}]), + Db1 + end, + couch_db:doc_from_json_obj_validate(Db, Doc); +doc(_Db, Doc) -> erlang:error({illegal_doc_format, Doc}). design_doc(#doc{} = DDoc) -> diff --git a/src/fabric/src/fabric_db_create.erl b/src/fabric/src/fabric_db_create.erl index 94ffd5643a9..2edc6dc64c8 100644 --- a/src/fabric/src/fabric_db_create.erl +++ b/src/fabric/src/fabric_db_create.erl @@ -23,6 +23,7 @@ go(DbName, Options) -> case validate_dbname(DbName, Options) of ok -> + couch_partition:validate_dbname(DbName, Options), case db_exists(DbName) of true -> {error, file_exists}; @@ -168,6 +169,10 @@ make_document([#shard{dbname=DbName}|_] = Shards, Suffix, Options) -> E when is_binary(E) -> [{<<"engine">>, E}]; _ -> [] end, + DbProps = case couch_util:get_value(props, Options) of + Props when is_list(Props) -> [{<<"props">>, {Props}}]; + _ -> [] + end, #doc{ id = DbName, body = {[ @@ -175,7 +180,7 @@ make_document([#shard{dbname=DbName}|_] = Shards, Suffix, Options) -> {<<"changelog">>, lists:sort(RawOut)}, {<<"by_node">>, {[{K,lists:sort(V)} || {K,V} <- ByNodeOut]}}, {<<"by_range">>, {[{K,lists:sort(V)} || {K,V} <- ByRangeOut]}} - ] ++ EngineProp} + ] ++ EngineProp ++ DbProps} }. db_exists(DbName) -> is_list(catch mem3:shards(DbName)). diff --git a/src/fabric/src/fabric_db_info.erl b/src/fabric/src/fabric_db_info.erl index 97a31c23741..fe93878b56d 100644 --- a/src/fabric/src/fabric_db_info.erl +++ b/src/fabric/src/fabric_db_info.erl @@ -112,7 +112,9 @@ merge_results(Info) -> [{disk_format_version, lists:max(X)} | Acc]; (cluster, [X], Acc) -> [{cluster, {X}} | Acc]; - (_, _, Acc) -> + (props, Xs, Acc) -> + [{props, {merge_object(Xs)}} | Acc]; + (_K, _V, Acc) -> Acc end, [{instance_start_time, <<"0">>}], Dict). @@ -132,10 +134,17 @@ merge_object(Objects) -> lists:foldl(fun({K,V},D0) -> orddict:append(K,V,D0) end, D, Props) end, orddict:new(), Objects), orddict:fold(fun - (Key, X, Acc) -> - [{Key, lists:sum(X)} | Acc] + (Key, [X | _] = Xs, Acc) when is_integer(X) -> + [{Key, lists:sum(Xs)} | Acc]; + (Key, [X | _] = Xs, Acc) when is_boolean(X) -> + [{Key, lists:all(fun all_true/1, Xs)} | Acc]; + (_Key, _Xs, Acc) -> + Acc end, [], Dict). +all_true(true) -> true; +all_true(_) -> false. + get_cluster_info(Shards) -> Dict = lists:foldl(fun(#shard{range = R}, Acc) -> dict:update_counter(R, 1, Acc) diff --git a/src/fabric/src/fabric_util.erl b/src/fabric/src/fabric_util.erl index f1bc23ad0ea..5a1585fbc74 100644 --- a/src/fabric/src/fabric_util.erl +++ b/src/fabric/src/fabric_util.erl @@ -19,6 +19,7 @@ -export([log_timeout/2, remove_done_workers/2]). -export([is_users_db/1, is_replicator_db/1]). -export([open_cluster_db/1, open_cluster_db/2]). +-export([is_partitioned/1]). -export([upgrade_mrargs/1]). -compile({inline, [{doc_id_and_rev,1}]}). @@ -239,6 +240,14 @@ doc_id_and_rev(#doc{id=DocId, revs={RevNum, [RevHash|_]}}) -> {DocId, {RevNum, RevHash}}. +is_partitioned(DbName0) when is_binary(DbName0) -> + Shards = mem3:shards(fabric:dbname(DbName0)), + is_partitioned(open_cluster_db(hd(Shards))); + +is_partitioned(Db) -> + couch_db:is_partitioned(Db). + + upgrade_mrargs(#mrargs{} = Args) -> Args; diff --git a/src/mem3/src/mem3.erl b/src/mem3/src/mem3.erl index 832c88d5432..dea0c7a5b9f 100644 --- a/src/mem3/src/mem3.erl +++ b/src/mem3/src/mem3.erl @@ -13,7 +13,7 @@ -module(mem3). -export([start/0, stop/0, restart/0, nodes/0, node_info/2, shards/1, shards/2, - choose_shards/2, n/1, n/2, dbname/1, ushards/1]). + choose_shards/2, n/1, n/2, dbname/1, ushards/1, ushards/2]). -export([get_shard/3, local_shards/1, shard_suffix/1, fold_shards/2]). -export([sync_security/0, sync_security/1]). -export([compare_nodelists/0, compare_shards/1]). @@ -71,7 +71,9 @@ compare_shards(DbName) -> -spec n(DbName::iodata()) -> integer(). n(DbName) -> - n(DbName, <<"foo">>). + % Use _design to avoid issues with + % partition validation + n(DbName, <<"_design/foo">>). n(DbName, DocId) -> length(mem3:shards(DbName, DocId)). @@ -136,6 +138,12 @@ ushards(DbName) -> Shards = ushards(DbName, live_shards(DbName, Nodes, [ordered]), ZoneMap), mem3_util:downcast(Shards). +-spec ushards(DbName::iodata(), DocId::binary()) -> [#shard{}]. +ushards(DbName, DocId) -> + Shards = shards_int(DbName, DocId, [ordered]), + Shard = hd(Shards), + mem3_util:downcast([Shard]). + ushards(DbName, Shards0, ZoneMap) -> {L,S,D} = group_by_proximity(Shards0, ZoneMap), % Prefer shards in the local zone over shards in a different zone, From a5cd9621ab85cf0736f942c2176669c738d5d1be Mon Sep 17 00:00:00 2001 From: "Paul J. Davis" Date: Tue, 23 Oct 2018 14:18:35 -0500 Subject: [PATCH 08/14] Implement `couch_db:get_partition_info/2` This feature allows us to fetch statistics for a given partition key which will allow for users to find bloated partitions and such forth. Co-authored-by: Garren Smith Co-authored-by: Robert Newson --- src/couch/src/couch_bt_engine.erl | 43 +++++++++ src/couch/src/couch_db.erl | 8 ++ src/couch/src/couch_db_engine.erl | 24 +++++ src/couch/src/couch_partition.erl | 11 +++ src/fabric/src/fabric.erl | 15 +++- src/fabric/src/fabric_db_partition_info.erl | 99 +++++++++++++++++++++ src/fabric/src/fabric_rpc.erl | 5 +- 7 files changed, 203 insertions(+), 2 deletions(-) create mode 100644 src/fabric/src/fabric_db_partition_info.erl diff --git a/src/couch/src/couch_bt_engine.erl b/src/couch/src/couch_bt_engine.erl index 946b74d0c75..7b33c420357 100644 --- a/src/couch/src/couch_bt_engine.erl +++ b/src/couch/src/couch_bt_engine.erl @@ -42,6 +42,7 @@ get_security/1, get_props/1, get_size_info/1, + get_partition_info/2, get_update_seq/1, get_uuid/1, @@ -277,6 +278,48 @@ get_size_info(#st{} = St) -> ]. +partition_size_cb(traverse, Key, {DC, DDC, Sizes}, {Partition, DCAcc, DDCAcc, SizesAcc}) -> + case couch_partition:is_member(Key, Partition) of + true -> + {skip, {Partition, DC + DCAcc, DDC + DDCAcc, reduce_sizes(Sizes, SizesAcc)}}; + false -> + {ok, {Partition, DCAcc, DDCAcc, SizesAcc}} + end; + +partition_size_cb(visit, FDI, _PrevReds, {Partition, DCAcc, DDCAcc, Acc}) -> + InPartition = couch_partition:is_member(FDI#full_doc_info.id, Partition), + Deleted = FDI#full_doc_info.deleted, + case {InPartition, Deleted} of + {true, true} -> + {ok, {Partition, DCAcc, DDCAcc + 1, + reduce_sizes(FDI#full_doc_info.sizes, Acc)}}; + {true, false} -> + {ok, {Partition, DCAcc + 1, DDCAcc, + reduce_sizes(FDI#full_doc_info.sizes, Acc)}}; + {false, _} -> + {ok, {Partition, DCAcc, DDCAcc, Acc}} + end. + + +get_partition_info(#st{} = St, Partition) -> + StartKey = couch_partition:start_key(Partition), + EndKey = couch_partition:end_key(Partition), + Fun = fun partition_size_cb/4, + InitAcc = {Partition, 0, 0, #size_info{}}, + Options = [{start_key, StartKey}, {end_key, EndKey}], + {ok, _, OutAcc} = couch_btree:fold(St#st.id_tree, Fun, InitAcc, Options), + {Partition, DocCount, DocDelCount, SizeInfo} = OutAcc, + [ + {partition, Partition}, + {doc_count, DocCount}, + {doc_del_count, DocDelCount}, + {sizes, [ + {active, SizeInfo#size_info.active}, + {external, SizeInfo#size_info.external} + ]} + ]. + + get_security(#st{header = Header} = St) -> case couch_bt_engine_header:get(Header, security_ptr) of undefined -> diff --git a/src/couch/src/couch_db.erl b/src/couch/src/couch_db.erl index 2c6f41bf77a..74f4a099db6 100644 --- a/src/couch/src/couch_db.erl +++ b/src/couch/src/couch_db.erl @@ -38,6 +38,7 @@ get_compacted_seq/1, get_compactor_pid/1, get_db_info/1, + get_partition_info/2, get_del_doc_count/1, get_doc_count/1, get_epochs/1, @@ -633,6 +634,13 @@ get_db_info(Db) -> ], {ok, InfoList}. +get_partition_info(#db{} = Db, Partition) when is_binary(Partition) -> + Info = couch_db_engine:get_partition_info(Db, Partition), + {ok, Info}; +get_partition_info(_Db, _Partition) -> + throw({bad_request, <<"`partition` is not valid">>}). + + get_design_doc(#db{name = <<"shards/", _/binary>> = ShardDbName}, DDocId0) -> DDocId = couch_util:normalize_ddoc_id(DDocId0), DbName = mem3:dbname(ShardDbName), diff --git a/src/couch/src/couch_db_engine.erl b/src/couch/src/couch_db_engine.erl index 806d352cb60..91d35b0c777 100644 --- a/src/couch/src/couch_db_engine.erl +++ b/src/couch/src/couch_db_engine.erl @@ -44,6 +44,12 @@ -type purge_info() :: {purge_seq(), uuid(), docid(), revs()}. -type epochs() :: [{Node::atom(), UpdateSeq::non_neg_integer()}]. -type size_info() :: [{Name::atom(), Size::non_neg_integer()}]. +-type partition_info() :: [ + {partition, Partition::binary()} | + {doc_count, DocCount::non_neg_integer()} | + {doc_del_count, DocDelCount::non_neg_integer()} | + {sizes, size_info()} +]. -type write_stream_options() :: [ {buffer_size, Size::pos_integer()} | @@ -263,6 +269,18 @@ -callback get_size_info(DbHandle::db_handle()) -> SizeInfo::size_info(). +% This returns the information for the given partition. +% It should just be a list of {Name::atom(), Size::non_neg_integer()} +% It returns the partition name, doc count, deleted doc count and two sizes: +% +% active - Theoretical minimum number of bytes to store this partition on disk +% +% external - Number of bytes that would be required to represent the +% contents of this partition outside of the database +-callback get_partition_info(DbHandle::db_handle(), Partition::binary()) -> + partition_info(). + + % The current update sequence of the database. The update % sequence should be incrememnted for every revision added to % the database. @@ -685,6 +703,7 @@ get_security/1, get_props/1, get_size_info/1, + get_partition_info/2, get_update_seq/1, get_uuid/1, @@ -861,6 +880,11 @@ get_size_info(#db{} = Db) -> Engine:get_size_info(EngineState). +get_partition_info(#db{} = Db, Partition) -> + #db{engine = {Engine, EngineState}} = Db, + Engine:get_partition_info(EngineState, Partition). + + get_update_seq(#db{} = Db) -> #db{engine = {Engine, EngineState}} = Db, Engine:get_update_seq(EngineState). diff --git a/src/couch/src/couch_partition.erl b/src/couch/src/couch_partition.erl index 783921f0aad..9ff77a0ff79 100644 --- a/src/couch/src/couch_partition.erl +++ b/src/couch/src/couch_partition.erl @@ -18,6 +18,9 @@ from_docid/1, is_member/2, + start_key/1, + end_key/1, + validate_dbname/2, validate_docid/1, validate_partition/1, @@ -59,6 +62,14 @@ is_member(DocId, Partition) -> end. +start_key(Partition) -> + <>. + + +end_key(Partition) -> + <>. + + validate_dbname(DbName, Options) when is_list(DbName) -> validate_dbname(?l2b(DbName), Options); validate_dbname(DbName, Options) when is_binary(DbName) -> diff --git a/src/fabric/src/fabric.erl b/src/fabric/src/fabric.erl index 70d37679a11..7476ff7b225 100644 --- a/src/fabric/src/fabric.erl +++ b/src/fabric/src/fabric.erl @@ -23,7 +23,7 @@ get_revs_limit/1, get_security/1, get_security/2, get_all_security/1, get_all_security/2, get_purge_infos_limit/1, set_purge_infos_limit/3, - compact/1, compact/2]). + compact/1, compact/2, get_partition_info/2]). % Documents -export([open_doc/3, open_revs/4, get_doc_info/3, get_full_doc_info/3, @@ -86,6 +86,19 @@ all_dbs(Prefix) when is_list(Prefix) -> get_db_info(DbName) -> fabric_db_info:go(dbname(DbName)). +%% @doc returns the size of a given partition +-spec get_partition_info(dbname(), Partition::binary()) -> + {ok, [ + {db_name, binary()} | + {partition, binary()} | + {doc_count, non_neg_integer()} | + {doc_del_count, non_neg_integer()} | + {sizes, json_obj()} + ]}. +get_partition_info(DbName, Partition) -> + fabric_db_partition_info:go(dbname(DbName), Partition). + + %% @doc the number of docs in a database %% @equiv get_doc_count(DbName, <<"_all_docs">>) get_doc_count(DbName) -> diff --git a/src/fabric/src/fabric_db_partition_info.erl b/src/fabric/src/fabric_db_partition_info.erl new file mode 100644 index 00000000000..97e669a5240 --- /dev/null +++ b/src/fabric/src/fabric_db_partition_info.erl @@ -0,0 +1,99 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +-module(fabric_db_partition_info). + +-export([go/2]). + +-include_lib("fabric/include/fabric.hrl"). +-include_lib("mem3/include/mem3.hrl"). + +go(DbName, Partition) -> + Shards = mem3:shards(DbName, <>), + Workers = fabric_util:submit_jobs(Shards, get_partition_info, [Partition]), + RexiMon = fabric_util:create_monitors(Shards), + Fun = fun handle_message/3, + Acc0 = {fabric_dict:init(Workers, nil), []}, + try + case fabric_util:recv(Workers, #shard.ref, Fun, Acc0) of + {ok, Acc} -> {ok, Acc}; + {timeout, {WorkersDict, _}} -> + DefunctWorkers = fabric_util:remove_done_workers( + WorkersDict, + nil + ), + fabric_util:log_timeout( + DefunctWorkers, + "get_partition_info" + ), + {error, timeout}; + {error, Error} -> throw(Error) + end + after + rexi_monitor:stop(RexiMon) + end. + +handle_message({rexi_DOWN, _, {_,NodeRef},_}, _Shard, {Counters, Acc}) -> + case fabric_util:remove_down_workers(Counters, NodeRef) of + {ok, NewCounters} -> + {ok, {NewCounters, Acc}}; + error -> + {error, {nodedown, <<"progress not possible">>}} + end; + +handle_message({rexi_EXIT, Reason}, Shard, {Counters, Acc}) -> + NewCounters = fabric_dict:erase(Shard, Counters), + case fabric_view:is_progress_possible(NewCounters) of + true -> + {ok, {NewCounters, Acc}}; + false -> + {error, Reason} + end; + +handle_message({ok, Info}, #shard{dbname=Name} = Shard, {Counters, Acc}) -> + Acc2 = [Info | Acc], + Counters1 = fabric_dict:erase(Shard, Counters), + case fabric_dict:size(Counters1) =:= 0 of + true -> + [FirstInfo | RestInfos] = Acc2, + PartitionInfo = get_max_partition_size(FirstInfo, RestInfos), + {stop, [{db_name, Name} | format_partition(PartitionInfo)]}; + false -> + {ok, {Counters1, Acc2}} + end; + +handle_message(_, _, Acc) -> + {ok, Acc}. + + +get_max_partition_size(Max, []) -> + Max; +get_max_partition_size(MaxInfo, [NextInfo | Rest]) -> + {sizes, MaxSize} = lists:keyfind(sizes, 1, MaxInfo), + {sizes, NextSize} = lists:keyfind(sizes, 1, NextInfo), + + {external, MaxExtSize} = lists:keyfind(external, 1, MaxSize), + {external, NextExtSize} = lists:keyfind(external, 1, NextSize), + case NextExtSize > MaxExtSize of + true -> + get_max_partition_size(NextInfo, Rest); + false -> + get_max_partition_size(MaxInfo, Rest) + end. + + +% for JS to work nicely we need to convert the size list +% to a jiffy object +format_partition(PartitionInfo) -> + {value, {sizes, Size}, PartitionInfo1} = lists:keytake(sizes, 1, PartitionInfo), + [{sizes, {Size}} | PartitionInfo1]. + diff --git a/src/fabric/src/fabric_rpc.erl b/src/fabric/src/fabric_rpc.erl index 2b00a3668cf..b80cc792ecc 100644 --- a/src/fabric/src/fabric_rpc.erl +++ b/src/fabric/src/fabric_rpc.erl @@ -19,7 +19,7 @@ -export([all_docs/3, changes/3, map_view/4, reduce_view/4, group_info/2]). -export([create_db/1, create_db/2, delete_db/1, reset_validation_funs/1, set_security/3, set_revs_limit/3, create_shard_db_doc/2, - delete_shard_db_doc/2]). + delete_shard_db_doc/2, get_partition_info/2]). -export([get_all_security/2, open_shard/2]). -export([compact/1, compact/2]). -export([get_purge_seq/2, purge_docs/3, set_purge_infos_limit/3]). @@ -195,6 +195,9 @@ get_db_info(DbName) -> get_db_info(DbName, DbOptions) -> with_db(DbName, DbOptions, {couch_db, get_db_info, []}). +get_partition_info(DbName, Partition) -> + with_db(DbName, [], {couch_db, get_partition_info, [Partition]}). + %% equiv get_doc_count(DbName, []) get_doc_count(DbName) -> get_doc_count(DbName, []). From cada5c5928d4141ecb7db9cb38882866edf91744 Mon Sep 17 00:00:00 2001 From: "Paul J. Davis" Date: Thu, 25 Oct 2018 14:19:07 -0500 Subject: [PATCH 09/14] Implement partitioned views The benefit of using partitioned databases is that views can then be scoped to a single shard range. This allows for views to scale nearly as linearly as document lookups. Co-authored-by: Garren Smith Co-authored-by: Robert Newson --- src/chttpd/src/chttpd_db.erl | 61 +++++++- src/chttpd/src/chttpd_httpd_handlers.erl | 1 + src/chttpd/src/chttpd_view.erl | 8 +- .../chttpd_db_bulk_get_multipart_test.erl | 29 ++-- src/chttpd/test/chttpd_db_bulk_get_test.erl | 14 +- src/couch/src/couch_btree.erl | 6 +- src/couch/src/couch_ejson_compare.erl | 4 + src/couch/src/couch_partition.erl | 5 + src/couch_mrview/include/couch_mrview.hrl | 1 + src/couch_mrview/src/couch_mrview.erl | 17 ++- src/couch_mrview/src/couch_mrview_http.erl | 6 +- src/couch_mrview/src/couch_mrview_index.erl | 31 +++- src/couch_mrview/src/couch_mrview_updater.erl | 38 ++++- src/couch_mrview/src/couch_mrview_util.erl | 139 +++++++++++++++++- src/fabric/src/fabric.erl | 13 +- src/fabric/src/fabric_streams.erl | 1 - src/fabric/src/fabric_util.erl | 21 +++ src/fabric/src/fabric_view.erl | 35 ++++- src/fabric/src/fabric_view_all_docs.erl | 5 +- src/fabric/src/fabric_view_map.erl | 5 +- src/fabric/src/fabric_view_reduce.erl | 5 +- 21 files changed, 384 insertions(+), 61 deletions(-) diff --git a/src/chttpd/src/chttpd_db.erl b/src/chttpd/src/chttpd_db.erl index c4f3686fbfc..bcd082448b9 100644 --- a/src/chttpd/src/chttpd_db.erl +++ b/src/chttpd/src/chttpd_db.erl @@ -22,7 +22,8 @@ db_req/2, couch_doc_open/4,handle_changes_req/2, update_doc_result_to_json/1, update_doc_result_to_json/2, handle_design_info_req/3, handle_view_cleanup_req/2, - update_doc/4, http_code_from_status/1]). + update_doc/4, http_code_from_status/1, + handle_partition_req/2]). -import(chttpd, [send_json/2,send_json/3,send_json/4,send_method_not_allowed/2, @@ -55,6 +56,11 @@ orelse T == <<"_local_docs">> orelse T == <<"_design_docs">>)). +-define(IS_MANGO(T), ( + T == <<"_index">> + orelse T == <<"_find">> + orelse T == <<"_explain">>)). + % Database request handlers handle_request(#httpd{path_parts=[DbName|RestParts],method=Method}=Req)-> case {Method, RestParts} of @@ -254,6 +260,51 @@ handle_view_cleanup_req(Req, Db) -> ok = fabric:cleanup_index_files_all_nodes(Db), send_json(Req, 202, {[{ok, true}]}). + +handle_partition_req(#httpd{method='GET', path_parts=[_,_,PartId]}=Req, Db) -> + couch_partition:validate_partition(PartId), + case couch_db:is_partitioned(Db) of + true -> + {ok, PartitionInfo} = fabric:get_partition_info(Db, PartId), + send_json(Req, {PartitionInfo}); + false -> + throw({bad_request, <<"database is not partitioned">>}) + end; + +handle_partition_req(#httpd{path_parts = [_, _, _]}=Req, _Db) -> + send_method_not_allowed(Req, "GET"); + +handle_partition_req(#httpd{path_parts=[DbName, _, PartId | Rest]}=Req, Db) -> + case couch_db:is_partitioned(Db) of + true -> + couch_partition:validate_partition(PartId), + QS = chttpd:qs(Req), + NewQS = lists:ukeysort(1, [{"partition", ?b2l(PartId)} | QS]), + NewReq = Req#httpd{ + path_parts = [DbName | Rest], + qs = NewQS + }, + case Rest of + [OP | _] when OP == <<"_all_docs">> orelse ?IS_MANGO(OP) -> + case chttpd_handlers:db_handler(OP, fun db_req/2) of + Handler when is_function(Handler, 2) -> + Handler(NewReq, Db); + _ -> + chttpd:send_error(Req, not_found) + end; + [<<"_design">>, _Name, <<"_", _/binary>> | _] -> + handle_design_req(NewReq, Db); + _ -> + chttpd:send_error(Req, not_found) + end; + false -> + throw({bad_request, <<"database is not partitioned">>}) + end; + +handle_partition_req(Req, _Db) -> + chttpd:send_error(Req, not_found). + + handle_design_req(#httpd{ path_parts=[_DbName, _Design, Name, <<"_",_/binary>> = Action | _Rest] }=Req, Db) -> @@ -752,7 +803,7 @@ multi_all_docs_view(Req, Db, OP, Queries) -> ArgQueries = lists:map(fun({Query}) -> QueryArg1 = couch_mrview_http:parse_params(Query, undefined, Args1, [decoded]), - QueryArgs2 = couch_mrview_util:validate_args(QueryArg1), + QueryArgs2 = fabric_util:validate_all_docs_args(Db, QueryArg1), set_namespace(OP, QueryArgs2) end, Queries), Options = [{user_ctx, Req#httpd.user_ctx}], @@ -772,7 +823,7 @@ multi_all_docs_view(Req, Db, OP, Queries) -> all_docs_view(Req, Db, Keys, OP) -> Args0 = couch_mrview_http:parse_params(Req, Keys), Args1 = Args0#mrargs{view_type=map}, - Args2 = couch_mrview_util:validate_args(Args1), + Args2 = fabric_util:validate_all_docs_args(Db, Args1), Args3 = set_namespace(OP, Args2), Options = [{user_ctx, Req#httpd.user_ctx}], Max = chttpd:chunked_response_buffer_size(), @@ -1769,8 +1820,8 @@ set_namespace(<<"_local_docs">>, Args) -> set_namespace(<<"_local">>, Args); set_namespace(<<"_design_docs">>, Args) -> set_namespace(<<"_design">>, Args); -set_namespace(NS, #mrargs{extra = Extra} = Args) -> - Args#mrargs{extra = [{namespace, NS} | Extra]}. +set_namespace(NS, #mrargs{} = Args) -> + couch_mrview_util:set_extra(Args, namespace, NS). %% /db/_bulk_get stuff diff --git a/src/chttpd/src/chttpd_httpd_handlers.erl b/src/chttpd/src/chttpd_httpd_handlers.erl index cb52e2c40dc..000f29b2fef 100644 --- a/src/chttpd/src/chttpd_httpd_handlers.erl +++ b/src/chttpd/src/chttpd_httpd_handlers.erl @@ -32,6 +32,7 @@ url_handler(_) -> no_match. db_handler(<<"_view_cleanup">>) -> fun chttpd_db:handle_view_cleanup_req/2; db_handler(<<"_compact">>) -> fun chttpd_db:handle_compact_req/2; db_handler(<<"_design">>) -> fun chttpd_db:handle_design_req/2; +db_handler(<<"_partition">>) -> fun chttpd_db:handle_partition_req/2; db_handler(<<"_temp_view">>) -> fun chttpd_view:handle_temp_view_req/2; db_handler(<<"_changes">>) -> fun chttpd_db:handle_changes_req/2; db_handler(_) -> no_match. diff --git a/src/chttpd/src/chttpd_view.erl b/src/chttpd/src/chttpd_view.erl index 3c05c64ca74..1fce165f91c 100644 --- a/src/chttpd/src/chttpd_view.erl +++ b/src/chttpd/src/chttpd_view.erl @@ -24,7 +24,7 @@ multi_query_view(Req, Db, DDoc, ViewName, Queries) -> QueryArg = couch_mrview_http:parse_params(Query, undefined, Args1, [decoded]), QueryArg1 = couch_mrview_util:set_view_type(QueryArg, ViewName, Views), - couch_mrview_util:validate_args(QueryArg1) + fabric_util:validate_args(Db, DDoc, QueryArg1) end, Queries), Options = [{user_ctx, Req#httpd.user_ctx}], VAcc0 = #vacc{db=Db, req=Req, prepend="\r\n"}, @@ -122,17 +122,19 @@ check_multi_query_reduce_view_overrides_test_() -> t_check_include_docs_throw_validation_error() -> ?_test(begin Req = #httpd{qs = []}, + Db = test_util:fake_db([{name, <<"foo">>}]), Query = {[{<<"include_docs">>, true}]}, Throw = {query_parse_error, <<"`include_docs` is invalid for reduce">>}, - ?assertThrow(Throw, multi_query_view(Req, db, ddoc, <<"v">>, [Query])) + ?assertThrow(Throw, multi_query_view(Req, Db, ddoc, <<"v">>, [Query])) end). t_check_user_can_override_individual_query_type() -> ?_test(begin Req = #httpd{qs = []}, + Db = test_util:fake_db([{name, <<"foo">>}]), Query = {[{<<"include_docs">>, true}, {<<"reduce">>, false}]}, - multi_query_view(Req, db, ddoc, <<"v">>, [Query]), + multi_query_view(Req, Db, ddoc, <<"v">>, [Query]), ?assertEqual(1, meck:num_calls(chttpd, start_delayed_json_response, '_')) end). diff --git a/src/chttpd/test/chttpd_db_bulk_get_multipart_test.erl b/src/chttpd/test/chttpd_db_bulk_get_multipart_test.erl index 601f720a031..8a95c92acca 100644 --- a/src/chttpd/test/chttpd_db_bulk_get_multipart_test.erl +++ b/src/chttpd/test/chttpd_db_bulk_get_multipart_test.erl @@ -57,24 +57,27 @@ bulk_get_test_() -> should_require_docs_field(_) -> Req = fake_request({[{}]}), - ?_assertThrow({bad_request, _}, chttpd_db:db_req(Req, nil)). + Db = test_util:fake_db([{name, <<"foo">>}]), + ?_assertThrow({bad_request, _}, chttpd_db:db_req(Req, Db)). should_not_accept_specific_query_params(_) -> Req = fake_request({[{<<"docs">>, []}]}), + Db = test_util:fake_db([{name, <<"foo">>}]), lists:map(fun (Param) -> {Param, ?_assertThrow({bad_request, _}, begin ok = meck:expect(chttpd, qs, fun(_) -> [{Param, ""}] end), - chttpd_db:db_req(Req, nil) + chttpd_db:db_req(Req, Db) end)} end, ["rev", "open_revs", "atts_since", "w", "new_edits"]). should_return_empty_results_on_no_docs(Pid) -> Req = fake_request({[{<<"docs">>, []}]}), - chttpd_db:db_req(Req, nil), + Db = test_util:fake_db([{name, <<"foo">>}]), + chttpd_db:db_req(Req, Db), Results = get_results_from_response(Pid), ?_assertEqual([], Results). @@ -82,12 +85,13 @@ should_return_empty_results_on_no_docs(Pid) -> should_get_doc_with_all_revs(Pid) -> DocId = <<"docudoc">>, Req = fake_request(DocId), + Db = test_util:fake_db([{name, <<"foo">>}]), DocRevA = #doc{id = DocId, body = {[{<<"_rev">>, <<"1-ABC">>}]}}, DocRevB = #doc{id = DocId, body = {[{<<"_rev">>, <<"1-CDE">>}]}}, mock_open_revs(all, {ok, [{ok, DocRevA}, {ok, DocRevB}]}), - chttpd_db:db_req(Req, nil), + chttpd_db:db_req(Req, Db), Result = get_results_from_response(Pid), ?_assertEqual(DocId, couch_util:get_value(<<"_id">>, Result)). @@ -97,7 +101,8 @@ should_validate_doc_with_bad_id(Pid) -> DocId = <<"_docudoc">>, Req = fake_request(DocId), - chttpd_db:db_req(Req, nil), + Db = test_util:fake_db([{name, <<"foo">>}]), + chttpd_db:db_req(Req, Db), Result = get_results_from_response(Pid), ?assertEqual(DocId, couch_util:get_value(<<"id">>, Result)), @@ -113,7 +118,8 @@ should_validate_doc_with_bad_rev(Pid) -> Rev = <<"revorev">>, Req = fake_request(DocId, Rev), - chttpd_db:db_req(Req, nil), + Db = test_util:fake_db([{name, <<"foo">>}]), + chttpd_db:db_req(Req, Db), Result = get_results_from_response(Pid), ?assertEqual(DocId, couch_util:get_value(<<"id">>, Result)), @@ -129,8 +135,9 @@ should_validate_missing_doc(Pid) -> Rev = <<"1-revorev">>, Req = fake_request(DocId, Rev), + Db = test_util:fake_db([{name, <<"foo">>}]), mock_open_revs([{1,<<"revorev">>}], {ok, []}), - chttpd_db:db_req(Req, nil), + chttpd_db:db_req(Req, Db), Result = get_results_from_response(Pid), ?assertEqual(DocId, couch_util:get_value(<<"id">>, Result)), @@ -146,8 +153,9 @@ should_validate_bad_atts_since(Pid) -> Rev = <<"1-revorev">>, Req = fake_request(DocId, Rev, <<"badattsince">>), + Db = test_util:fake_db([{name, <<"foo">>}]), mock_open_revs([{1,<<"revorev">>}], {ok, []}), - chttpd_db:db_req(Req, nil), + chttpd_db:db_req(Req, Db), Result = get_results_from_response(Pid), ?assertEqual(DocId, couch_util:get_value(<<"id">>, Result)), @@ -163,11 +171,12 @@ should_include_attachments_when_atts_since_specified(_) -> Rev = <<"1-revorev">>, Req = fake_request(DocId, Rev, [<<"1-abc">>]), + Db = test_util:fake_db([{name, <<"foo">>}]), mock_open_revs([{1,<<"revorev">>}], {ok, []}), - chttpd_db:db_req(Req, nil), + chttpd_db:db_req(Req, Db), ?_assert(meck:called(fabric, open_revs, - [nil, DocId, [{1, <<"revorev">>}], + ['_', DocId, [{1, <<"revorev">>}], [{atts_since, [{1, <<"abc">>}]}, attachments, {user_ctx, undefined}]])). diff --git a/src/chttpd/test/chttpd_db_bulk_get_test.erl b/src/chttpd/test/chttpd_db_bulk_get_test.erl index 908d1f02237..864e7079a83 100644 --- a/src/chttpd/test/chttpd_db_bulk_get_test.erl +++ b/src/chttpd/test/chttpd_db_bulk_get_test.erl @@ -95,7 +95,7 @@ should_get_doc_with_all_revs(Pid) -> DocRevB = #doc{id = DocId, body = {[{<<"_rev">>, <<"1-CDE">>}]}}, mock_open_revs(all, {ok, [{ok, DocRevA}, {ok, DocRevB}]}), - chttpd_db:db_req(Req, nil), + chttpd_db:db_req(Req, test_util:fake_db([{name, <<"foo">>}])), [{Result}] = get_results_from_response(Pid), ?assertEqual(DocId, couch_util:get_value(<<"id">>, Result)), @@ -115,7 +115,7 @@ should_validate_doc_with_bad_id(Pid) -> DocId = <<"_docudoc">>, Req = fake_request(DocId), - chttpd_db:db_req(Req, nil), + chttpd_db:db_req(Req, test_util:fake_db([{name, <<"foo">>}])), [{Result}] = get_results_from_response(Pid), ?assertEqual(DocId, couch_util:get_value(<<"id">>, Result)), @@ -138,7 +138,7 @@ should_validate_doc_with_bad_rev(Pid) -> Rev = <<"revorev">>, Req = fake_request(DocId, Rev), - chttpd_db:db_req(Req, nil), + chttpd_db:db_req(Req, test_util:fake_db([{name, <<"foo">>}])), [{Result}] = get_results_from_response(Pid), ?assertEqual(DocId, couch_util:get_value(<<"id">>, Result)), @@ -162,7 +162,7 @@ should_validate_missing_doc(Pid) -> Req = fake_request(DocId, Rev), mock_open_revs([{1,<<"revorev">>}], {ok, []}), - chttpd_db:db_req(Req, nil), + chttpd_db:db_req(Req, test_util:fake_db([{name, <<"foo">>}])), [{Result}] = get_results_from_response(Pid), ?assertEqual(DocId, couch_util:get_value(<<"id">>, Result)), @@ -186,7 +186,7 @@ should_validate_bad_atts_since(Pid) -> Req = fake_request(DocId, Rev, <<"badattsince">>), mock_open_revs([{1,<<"revorev">>}], {ok, []}), - chttpd_db:db_req(Req, nil), + chttpd_db:db_req(Req, test_util:fake_db([{name, <<"foo">>}])), [{Result}] = get_results_from_response(Pid), ?assertEqual(DocId, couch_util:get_value(<<"id">>, Result)), @@ -210,10 +210,10 @@ should_include_attachments_when_atts_since_specified(_) -> Req = fake_request(DocId, Rev, [<<"1-abc">>]), mock_open_revs([{1,<<"revorev">>}], {ok, []}), - chttpd_db:db_req(Req, nil), + chttpd_db:db_req(Req, test_util:fake_db([{name, <<"foo">>}])), ?_assert(meck:called(fabric, open_revs, - [nil, DocId, [{1, <<"revorev">>}], + ['_', DocId, [{1, <<"revorev">>}], [{atts_since, [{1, <<"abc">>}]}, attachments, {user_ctx, undefined}]])). diff --git a/src/couch/src/couch_btree.erl b/src/couch/src/couch_btree.erl index daf846ba89e..ea0cf69e967 100644 --- a/src/couch/src/couch_btree.erl +++ b/src/couch/src/couch_btree.erl @@ -133,7 +133,9 @@ make_group_fun(Bt, exact) -> end; make_group_fun(Bt, GroupLevel) when is_integer(GroupLevel), GroupLevel > 0 -> fun - ({[_|_] = Key1, _}, {[_|_] = Key2, _}) -> + GF({{p, Partition, Key1}, Val1}, {{p, Partition, Key2}, Val2}) -> + GF({Key1, Val1}, {Key2, Val2}); + GF({[_|_] = Key1, _}, {[_|_] = Key2, _}) -> SL1 = lists:sublist(Key1, GroupLevel), SL2 = lists:sublist(Key2, GroupLevel), case less(Bt, {SL1, nil}, {SL2, nil}) of @@ -147,7 +149,7 @@ make_group_fun(Bt, GroupLevel) when is_integer(GroupLevel), GroupLevel > 0 -> _ -> false end; - ({Key1, _}, {Key2, _}) -> + GF({Key1, _}, {Key2, _}) -> case less(Bt, {Key1, nil}, {Key2, nil}) of false -> case less(Bt, {Key2, nil}, {Key1, nil}) of diff --git a/src/couch/src/couch_ejson_compare.erl b/src/couch/src/couch_ejson_compare.erl index 81adbb8f547..ca36c865664 100644 --- a/src/couch/src/couch_ejson_compare.erl +++ b/src/couch/src/couch_ejson_compare.erl @@ -22,6 +22,10 @@ init() -> Dir = code:priv_dir(couch), ok = erlang:load_nif(filename:join(Dir, ?MODULE), NumScheds). +% partitioned row comparison +less({p, PA, A}, {p, PB, B}) -> + less([PA, A], [PB, B]); + less(A, B) -> try less_nif(A, B) diff --git a/src/couch/src/couch_partition.erl b/src/couch/src/couch_partition.erl index 9ff77a0ff79..f2efcaa5ea3 100644 --- a/src/couch/src/couch_partition.erl +++ b/src/couch/src/couch_partition.erl @@ -20,6 +20,7 @@ start_key/1, end_key/1, + shard_key/1, validate_dbname/2, validate_docid/1, @@ -70,6 +71,10 @@ end_key(Partition) -> <>. +shard_key(Partition) -> + <>. + + validate_dbname(DbName, Options) when is_list(DbName) -> validate_dbname(?l2b(DbName), Options); validate_dbname(DbName, Options) when is_binary(DbName) -> diff --git a/src/couch_mrview/include/couch_mrview.hrl b/src/couch_mrview/include/couch_mrview.hrl index a341e30dbfb..e17aaba9337 100644 --- a/src/couch_mrview/include/couch_mrview.hrl +++ b/src/couch_mrview/include/couch_mrview.hrl @@ -20,6 +20,7 @@ design_opts=[], seq_indexed=false, keyseq_indexed=false, + partitioned=false, lib, views, id_btree=nil, diff --git a/src/couch_mrview/src/couch_mrview.erl b/src/couch_mrview/src/couch_mrview.erl index 391acf41200..ae1d8d6f5d2 100644 --- a/src/couch_mrview/src/couch_mrview.erl +++ b/src/couch_mrview/src/couch_mrview.erl @@ -59,6 +59,7 @@ validate_ddoc_fields(DDoc) -> [{<<"options">>, object}], [{<<"options">>, object}, {<<"include_design">>, boolean}], [{<<"options">>, object}, {<<"local_seq">>, boolean}], + [{<<"options">>, object}, {<<"partitioned">>, boolean}], [{<<"rewrites">>, [string, array]}], [{<<"shows">>, object}, {any, [object, string]}], [{<<"updates">>, object}, {any, [object, string]}], @@ -200,9 +201,19 @@ validate(Db, DDoc) -> end, {ok, #mrst{ language = Lang, - views = Views + views = Views, + partitioned = Partitioned }} = couch_mrview_util:ddoc_to_mrst(couch_db:name(Db), DDoc), + case {couch_db:is_partitioned(Db), Partitioned} of + {false, true} -> + throw({invalid_design_doc, + <<"partitioned option cannot be true in a " + "non-partitioned database.">>}); + {_, _} -> + ok + end, + try Views =/= [] andalso couch_query_servers:get_os_process(Lang) of false -> ok; @@ -230,7 +241,7 @@ query_all_docs(Db, Args0, Callback, Acc) -> couch_index_util:hexsig(couch_hash:md5_hash(term_to_binary(Info))) end), Args1 = Args0#mrargs{view_type=map}, - Args2 = couch_mrview_util:validate_args(Args1), + Args2 = couch_mrview_util:validate_all_docs_args(Db, Args1), {ok, Acc1} = case Args2#mrargs.preflight_fun of PFFun when is_function(PFFun, 2) -> PFFun(Sig, Acc); _ -> {ok, Acc} @@ -616,6 +627,8 @@ red_fold(Db, {NthRed, _Lang, View}=RedView, Args, Callback, UAcc) -> end, Acc, OptList), finish_fold(Acc2, []). +red_fold({p, _Partition, Key}, Red, Acc) -> + red_fold(Key, Red, Acc); red_fold(_Key, _Red, #mracc{skip=N}=Acc) when N > 0 -> {ok, Acc#mracc{skip=N-1, last_go=ok}}; red_fold(Key, Red, #mracc{meta_sent=false}=Acc) -> diff --git a/src/couch_mrview/src/couch_mrview_http.erl b/src/couch_mrview/src/couch_mrview_http.erl index 004caef09fa..cdf498e5d76 100644 --- a/src/couch_mrview/src/couch_mrview_http.erl +++ b/src/couch_mrview/src/couch_mrview_http.erl @@ -296,7 +296,7 @@ multi_query_view(Req, Db, DDoc, ViewName, Queries) -> {ok, _, _, Args1} = couch_mrview_util:get_view(Db, DDoc, ViewName, Args0), ArgQueries = lists:map(fun({Query}) -> QueryArg = parse_params(Query, undefined, Args1), - couch_mrview_util:validate_args(QueryArg) + couch_mrview_util:validate_args(Db, DDoc, QueryArg) end, Queries), {ok, Resp2} = couch_httpd:etag_maybe(Req, fun() -> Max = chttpd:chunked_response_buffer_size(), @@ -582,6 +582,10 @@ parse_param(Key, Val, Args, IsDecoded) -> Args#mrargs{callback=couch_util:to_binary(Val)}; "sorted" -> Args#mrargs{sorted=parse_boolean(Val)}; + "partition" -> + Partition = couch_util:to_binary(Val), + couch_partition:validate_partition(Partition), + couch_mrview_util:set_extra(Args, partition, Partition); _ -> BKey = couch_util:to_binary(Key), BVal = couch_util:to_binary(Val), diff --git a/src/couch_mrview/src/couch_mrview_index.erl b/src/couch_mrview/src/couch_mrview_index.erl index d3bcfe04bc6..ac433335cbb 100644 --- a/src/couch_mrview/src/couch_mrview_index.erl +++ b/src/couch_mrview/src/couch_mrview_index.erl @@ -40,10 +40,12 @@ get(update_options, #mrst{design_opts = Opts}) -> LocalSeq = couch_util:get_value(<<"local_seq">>, Opts, false), SeqIndexed = couch_util:get_value(<<"seq_indexed">>, Opts, false), KeySeqIndexed = couch_util:get_value(<<"keyseq_indexed">>, Opts, false), + Partitioned = couch_util:get_value(<<"partitioned">>, Opts, false), if IncDesign -> [include_design]; true -> [] end ++ if LocalSeq -> [local_seq]; true -> [] end ++ if KeySeqIndexed -> [keyseq_indexed]; true -> [] end - ++ if SeqIndexed -> [seq_indexed]; true -> [] end; + ++ if SeqIndexed -> [seq_indexed]; true -> [] end + ++ if Partitioned -> [partitioned]; true -> [] end; get(fd, #mrst{fd = Fd}) -> Fd; get(language, #mrst{language = Language}) -> @@ -94,14 +96,15 @@ get(Other, _) -> init(Db, DDoc) -> - couch_mrview_util:ddoc_to_mrst(couch_db:name(Db), DDoc). + {ok, State} = couch_mrview_util:ddoc_to_mrst(couch_db:name(Db), DDoc), + {ok, set_partitioned(Db, State)}. -open(Db, State) -> +open(Db, State0) -> #mrst{ db_name=DbName, sig=Sig - } = State, + } = State = set_partitioned(Db, State0), IndexFName = couch_mrview_util:index_file(DbName, Sig), % If we are upgrading from <=1.2.x, we upgrade the view @@ -244,6 +247,26 @@ verify_index_exists(DbName, Props) -> end. +set_partitioned(Db, State) -> + #mrst{ + design_opts = DesignOpts + } = State, + DbPartitioned = couch_db:is_partitioned(Db), + ViewPartitioned = couch_util:get_value( + <<"partitioned">>, DesignOpts, DbPartitioned), + IsPartitioned = case {DbPartitioned, ViewPartitioned} of + {true, true} -> + true; + {true, false} -> + false; + {false, false} -> + false; + _ -> + throw({bad_request, <<"invalid partition option">>}) + end, + State#mrst{partitioned = IsPartitioned}. + + ensure_local_purge_docs(DbName, DDocs) -> couch_util:with_db(DbName, fun(Db) -> lists:foreach(fun(DDoc) -> diff --git a/src/couch_mrview/src/couch_mrview_updater.erl b/src/couch_mrview/src/couch_mrview_updater.erl index 3383b49b60a..9740e6a28c0 100644 --- a/src/couch_mrview/src/couch_mrview_updater.erl +++ b/src/couch_mrview/src/couch_mrview_updater.erl @@ -65,7 +65,8 @@ purge(_Db, PurgeSeq, PurgedIdRevs, State) -> #mrst{ id_btree=IdBtree, log_btree=LogBtree, - views=Views + views=Views, + partitioned=Partitioned } = State, Ids = [Id || {Id, _Revs} <- PurgedIdRevs], @@ -84,7 +85,11 @@ purge(_Db, PurgeSeq, PurgedIdRevs, State) -> FoldFun = fun ({ViewNum, {Key, Seq, _Op}}, DictAcc2) -> dict:append(ViewNum, {Key, Seq, DocId}, DictAcc2); - ({ViewNum, RowKey}, DictAcc2) -> + ({ViewNum, RowKey0}, DictAcc2) -> + RowKey = if not Partitioned -> RowKey0; true -> + [{RK, _}] = inject_partition([{RowKey0, DocId}]), + RK + end, dict:append(ViewNum, {RowKey, DocId}, DictAcc2) end, lists:foldl(FoldFun, DictAcc, ViewNumRowKeys); @@ -315,7 +320,8 @@ write_kvs(State, UpdateSeq, ViewKVs, DocIdKeys, Seqs, Log0) -> #mrst{ id_btree=IdBtree, log_btree=LogBtree, - first_build=FirstBuild + first_build=FirstBuild, + partitioned=Partitioned } = State, Revs = dict:from_list(dict:fetch_keys(Log0)), @@ -332,9 +338,17 @@ write_kvs(State, UpdateSeq, ViewKVs, DocIdKeys, Seqs, Log0) -> _ -> update_log(LogBtree, Log, Revs, Seqs, FirstBuild) end, - UpdateView = fun(#mrview{id_num=ViewId}=View, {ViewId, {KVs, SKVs}}) -> + UpdateView = fun(#mrview{id_num=ViewId}=View, {ViewId, {KVs0, SKVs}}) -> #mrview{seq_indexed=SIndexed, keyseq_indexed=KSIndexed} = View, - ToRem = couch_util:dict_find(ViewId, ToRemByView, []), + ToRem0 = couch_util:dict_find(ViewId, ToRemByView, []), + {KVs, ToRem} = case Partitioned of + true -> + KVs1 = inject_partition(KVs0), + ToRem1 = inject_partition(ToRem0), + {KVs1, ToRem1}; + false -> + {KVs0, ToRem0} + end, {ok, VBtree2} = couch_btree:add_remove(View#mrview.btree, KVs, ToRem), NewUpdateSeq = case VBtree2 =/= View#mrview.btree of true -> UpdateSeq; @@ -382,6 +396,20 @@ write_kvs(State, UpdateSeq, ViewKVs, DocIdKeys, Seqs, Log0) -> log_btree=LogBtree2 }. + +inject_partition(Rows) -> + lists:map(fun + ({{Key, DocId}, Value}) -> + % Adding a row to the view + {Partition, _} = couch_partition:extract(DocId), + {{{p, Partition, Key}, DocId}, Value}; + ({Key, DocId}) -> + % Removing a row based on values in id_tree + {Partition, _} = couch_partition:extract(DocId), + {{p, Partition, Key}, DocId} + end, Rows). + + update_id_btree(Btree, DocIdKeys, true) -> ToAdd = [{Id, DIKeys} || {Id, DIKeys} <- DocIdKeys, DIKeys /= []], couch_btree:query_modify(Btree, [], ToAdd, []); diff --git a/src/couch_mrview/src/couch_mrview_util.erl b/src/couch_mrview/src/couch_mrview_util.erl index 4fd82e0af1e..b879d1242c8 100644 --- a/src/couch_mrview/src/couch_mrview_util.erl +++ b/src/couch_mrview/src/couch_mrview_util.erl @@ -26,12 +26,13 @@ -export([temp_view_to_ddoc/1]). -export([calculate_external_size/1]). -export([calculate_active_size/1]). --export([validate_args/1]). +-export([validate_all_docs_args/2, validate_args/1, validate_args/3]). -export([maybe_load_doc/3, maybe_load_doc/4]). -export([maybe_update_index_file/1]). -export([extract_view/4, extract_view_reduce/1]). -export([get_view_keys/1, get_view_queries/1]). -export([set_view_type/3]). +-export([set_extra/3, get_extra/2, get_extra/3]). -export([changes_key_opts/2]). -export([fold_changes/4]). -export([to_key_seq/1]). @@ -39,6 +40,10 @@ -define(MOD, couch_mrview_index). -define(GET_VIEW_RETRY_COUNT, 1). -define(GET_VIEW_RETRY_DELAY, 50). +-define(LOWEST_KEY, null). +-define(HIGHEST_KEY, {<<255, 255, 255, 255>>}). +-define(LOWEST(A, B), (if A < B -> A; true -> B end)). +-define(HIGHEST(A, B), (if A > B -> A; true -> B end)). -include_lib("couch/include/couch_db.hrl"). -include_lib("couch_mrview/include/couch_mrview.hrl"). @@ -94,7 +99,7 @@ get_view(Db, DDoc, ViewName, Args0) -> get_view_index_pid(Db, DDoc, ViewName, Args0) -> ArgCheck = fun(InitState) -> Args1 = set_view_type(Args0, ViewName, InitState#mrst.views), - {ok, validate_args(Args1)} + {ok, validate_args(InitState, Args1)} end, couch_index_server:get_index(?MOD, Db, DDoc, ArgCheck). @@ -169,6 +174,7 @@ ddoc_to_mrst(DbName, #doc{id=Id, body={Fields}}) -> {DesignOpts} = proplists:get_value(<<"options">>, Fields, {[]}), SeqIndexed = proplists:get_value(<<"seq_indexed">>, DesignOpts, false), KeySeqIndexed = proplists:get_value(<<"keyseq_indexed">>, DesignOpts, false), + Partitioned = proplists:get_value(<<"partitioned">>, DesignOpts, false), {RawViews} = couch_util:get_value(<<"views">>, Fields, {[]}), BySrc = lists:foldl(MakeDict, dict:new(), RawViews), @@ -189,7 +195,8 @@ ddoc_to_mrst(DbName, #doc{id=Id, body={Fields}}) -> language=Language, design_opts=DesignOpts, seq_indexed=SeqIndexed, - keyseq_indexed=KeySeqIndexed + keyseq_indexed=KeySeqIndexed, + partitioned=Partitioned }, SigInfo = {Views, Language, DesignOpts, couch_index_util:sort_lib(Lib)}, {ok, IdxState#mrst{sig=couch_hash:md5_hash(term_to_binary(SigInfo))}}. @@ -213,6 +220,19 @@ set_view_type(Args, ViewName, [View | Rest]) -> end. +set_extra(#mrargs{} = Args, Key, Value) -> + Extra0 = Args#mrargs.extra, + Extra1 = lists:ukeysort(1, [{Key, Value} | Extra0]), + Args#mrargs{extra = Extra1}. + + +get_extra(#mrargs{} = Args, Key) -> + couch_util:get_value(Key, Args#mrargs.extra). + +get_extra(#mrargs{} = Args, Key, Default) -> + couch_util:get_value(Key, Args#mrargs.extra, Default). + + extract_view(_Lang, _Args, _ViewName, []) -> throw({not_found, missing_named_view}); extract_view(Lang, #mrargs{view_type=map}=Args, Name, [View | Rest]) -> @@ -476,6 +496,49 @@ fold_reduce({NthRed, Lang, View}, Fun, Acc, Options) -> couch_btree:fold_reduce(Bt, WrapperFun, Acc, Options). +validate_args(Db, DDoc, Args) -> + {ok, State} = couch_mrview_index:init(Db, DDoc), + validate_args(State, Args). + + +validate_args(#mrst{} = State, Args0) -> + Args = validate_args(Args0), + + ViewPartitioned = State#mrst.partitioned, + Partition = get_extra(Args, partition), + + case {ViewPartitioned, Partition} of + {true, undefined} -> + Msg1 = <<"`partition` parameter is mandatory " + "for queries to this view.">>, + mrverror(Msg1); + {true, _} -> + apply_partition(Args, Partition); + {false, undefined} -> + Args; + {false, Value} when is_binary(Value) -> + Msg2 = <<"`partition` parameter is not " + "supported in this design doc">>, + mrverror(Msg2) + end. + + +validate_all_docs_args(Db, Args0) -> + Args = validate_args(Args0), + + DbPartitioned = couch_db:is_partitioned(Db), + Partition = get_extra(Args, partition), + + case {DbPartitioned, Partition} of + {false, <<_/binary>>} -> + mrverror(<<"`partition` parameter is not supported on this db">>); + {_, <<_/binary>>} -> + apply_all_docs_partition(Args, Partition); + _ -> + Args + end. + + validate_args(Args) -> GroupLevel = determine_group_level(Args), Reduce = Args#mrargs.reduce, @@ -598,6 +661,12 @@ validate_args(Args) -> _ -> mrverror(<<"Invalid value for `sorted`.">>) end, + case get_extra(Args, partition) of + undefined -> ok; + Partition when is_binary(Partition), Partition /= <<>> -> ok; + _ -> mrverror(<<"Invalid value for `partition`.">>) + end, + Args#mrargs{ start_key_docid=SKDocId, end_key_docid=EKDocId, @@ -616,6 +685,70 @@ determine_group_level(#mrargs{group=true, group_level=undefined}) -> determine_group_level(#mrargs{group_level=GroupLevel}) -> GroupLevel. +apply_partition(#mrargs{keys=[{p, _, _} | _]} = Args, _Partition) -> + Args; % already applied + +apply_partition(#mrargs{keys=Keys} = Args, Partition) when Keys /= undefined -> + Args#mrargs{keys=[{p, Partition, K} || K <- Keys]}; + +apply_partition(#mrargs{start_key={p, _, _}, end_key={p, _, _}} = Args, _Partition) -> + Args; % already applied. + +apply_partition(Args, Partition) -> + #mrargs{ + direction = Dir, + start_key = StartKey, + end_key = EndKey + } = Args, + + {DefSK, DefEK} = case Dir of + fwd -> {?LOWEST_KEY, ?HIGHEST_KEY}; + rev -> {?HIGHEST_KEY, ?LOWEST_KEY} + end, + + SK0 = if StartKey /= undefined -> StartKey; true -> DefSK end, + EK0 = if EndKey /= undefined -> EndKey; true -> DefEK end, + + Args#mrargs{ + start_key = {p, Partition, SK0}, + end_key = {p, Partition, EK0} + }. + +%% all_docs is special as it's not really a view and is already +%% effectively partitioned as the partition is a prefix of all keys. +apply_all_docs_partition(#mrargs{} = Args, Partition) -> + #mrargs{ + direction = Dir, + start_key = StartKey, + end_key = EndKey + } = Args, + + {DefSK, DefEK} = case Dir of + fwd -> + { + couch_partition:start_key(Partition), + couch_partition:end_key(Partition) + }; + rev -> + { + couch_partition:end_key(Partition), + couch_partition:start_key(Partition) + } + end, + + SK0 = if StartKey == undefined -> DefSK; true -> StartKey end, + EK0 = if EndKey == undefined -> DefEK; true -> EndKey end, + + {SK1, EK1} = case Dir of + fwd -> {?HIGHEST(DefSK, SK0), ?LOWEST(DefEK, EK0)}; + rev -> {?LOWEST(DefSK, SK0), ?HIGHEST(DefEK, EK0)} + end, + + Args#mrargs{ + start_key = SK1, + end_key = EK1 + }. + check_range(#mrargs{start_key=undefined}, _Cmp) -> ok; diff --git a/src/fabric/src/fabric.erl b/src/fabric/src/fabric.erl index 7476ff7b225..6d04184e63e 100644 --- a/src/fabric/src/fabric.erl +++ b/src/fabric/src/fabric.erl @@ -392,10 +392,11 @@ query_view(Db, Options, GroupId, ViewName, Callback, Acc0, QueryArgs) when is_binary(GroupId) -> DbName = dbname(Db), {ok, DDoc} = ddoc_cache:open(DbName, <<"_design/", GroupId/binary>>), - query_view(DbName, Options, DDoc, ViewName, Callback, Acc0, QueryArgs); -query_view(DbName, Options, DDoc, ViewName, Callback, Acc0, QueryArgs0) -> - Db = dbname(DbName), View = name(ViewName), - case fabric_util:is_users_db(Db) of + query_view(Db, Options, DDoc, ViewName, Callback, Acc0, QueryArgs); +query_view(Db, Options, DDoc, ViewName, Callback, Acc0, QueryArgs0) -> + DbName = dbname(Db), + View = name(ViewName), + case fabric_util:is_users_db(DbName) of true -> FakeDb = fabric_util:open_cluster_db(DbName, Options), couch_users_db:after_doc_read(DDoc, FakeDb); @@ -403,9 +404,9 @@ query_view(DbName, Options, DDoc, ViewName, Callback, Acc0, QueryArgs0) -> ok end, {ok, #mrst{views=Views, language=Lang}} = - couch_mrview_util:ddoc_to_mrst(Db, DDoc), + couch_mrview_util:ddoc_to_mrst(DbName, DDoc), QueryArgs1 = couch_mrview_util:set_view_type(QueryArgs0, View, Views), - QueryArgs2 = couch_mrview_util:validate_args(QueryArgs1), + QueryArgs2 = fabric_util:validate_args(Db, DDoc, QueryArgs1), VInfo = couch_mrview_util:extract_view(Lang, QueryArgs2, View, Views), case is_reduce_view(QueryArgs2) of true -> diff --git a/src/fabric/src/fabric_streams.erl b/src/fabric/src/fabric_streams.erl index ae0c2be55d4..288c67cab94 100644 --- a/src/fabric/src/fabric_streams.erl +++ b/src/fabric/src/fabric_streams.erl @@ -39,7 +39,6 @@ start(Workers0, Keypos, StartFun, Replacements) -> Timeout = fabric_util:request_timeout(), case rexi_utils:recv(Workers0, Keypos, Fun, Acc, Timeout, infinity) of {ok, #stream_acc{workers=Workers}} -> - true = fabric_view:is_progress_possible(Workers), AckedWorkers = fabric_dict:fold(fun(Worker, From, WorkerAcc) -> rexi:stream_start(From), [Worker | WorkerAcc] diff --git a/src/fabric/src/fabric_util.erl b/src/fabric/src/fabric_util.erl index 5a1585fbc74..d65d3c81d13 100644 --- a/src/fabric/src/fabric_util.erl +++ b/src/fabric/src/fabric_util.erl @@ -20,6 +20,7 @@ -export([is_users_db/1, is_replicator_db/1]). -export([open_cluster_db/1, open_cluster_db/2]). -export([is_partitioned/1]). +-export([validate_all_docs_args/2, validate_args/3]). -export([upgrade_mrargs/1]). -compile({inline, [{doc_id_and_rev,1}]}). @@ -248,6 +249,26 @@ is_partitioned(Db) -> couch_db:is_partitioned(Db). +validate_all_docs_args(DbName, Args) when is_binary(DbName) -> + Shards = mem3:shards(fabric:dbname(DbName)), + Db = open_cluster_db(hd(Shards)), + validate_all_docs_args(Db, Args); + +validate_all_docs_args(Db, Args) -> + true = couch_db:is_clustered(Db), + couch_mrview_util:validate_all_docs_args(Db, Args). + + +validate_args(DbName, DDoc, Args) when is_binary(DbName) -> + Shards = mem3:shards(fabric:dbname(DbName)), + Db = open_cluster_db(hd(Shards)), + validate_args(Db, DDoc, Args); + +validate_args(Db, DDoc, Args) -> + true = couch_db:is_clustered(Db), + couch_mrview_util:validate_args(Db, DDoc, Args). + + upgrade_mrargs(#mrargs{} = Args) -> Args; diff --git a/src/fabric/src/fabric_view.erl b/src/fabric/src/fabric_view.erl index 69f42909abe..0ba980a6589 100644 --- a/src/fabric/src/fabric_view.erl +++ b/src/fabric/src/fabric_view.erl @@ -128,8 +128,11 @@ maybe_send_row(State) -> try get_next_row(State) of {_, NewState} when Skip > 0 -> maybe_send_row(NewState#collector{skip=Skip-1}); - {Row, NewState} -> - case Callback(transform_row(possibly_embed_doc(NewState,Row)), AccIn) of + {Row0, NewState} -> + Row1 = possibly_embed_doc(NewState, Row0), + Row2 = detach_partition(Row1), + Row3 = transform_row(Row2), + case Callback(Row3, AccIn) of {stop, Acc} -> {stop, NewState#collector{user_acc=Acc, limit=Limit-1}}; {ok, Acc} -> @@ -194,6 +197,10 @@ possibly_embed_doc(#collector{db_name=DbName, query_args=Args}, _ -> Row end. +detach_partition(#view_row{key={p, _Partition, Key}} = Row) -> + Row#view_row{key = Key}; +detach_partition(#view_row{} = Row) -> + Row. keydict(undefined) -> undefined; @@ -309,10 +316,26 @@ index_of(X, [X|_Rest], I) -> index_of(X, [_|Rest], I) -> index_of(X, Rest, I+1). -get_shards(DbName, #mrargs{stable=true}) -> - mem3:ushards(DbName); -get_shards(DbName, #mrargs{stable=false}) -> - mem3:shards(DbName). +get_shards(Db, #mrargs{} = Args) -> + DbPartitioned = fabric_util:is_partitioned(Db), + Partition = couch_mrview_util:get_extra(Args, partition), + if DbPartitioned orelse Partition == undefined -> ok; true -> + throw({bad_request, <<"partition specified on non-partitioned db">>}) + end, + DbName = fabric:dbname(Db), + % Decide which version of mem3:shards/1,2 or + % mem3:ushards/1,2 to use for the current + % request. + case {Args#mrargs.stable, Partition} of + {true, undefined} -> + mem3:ushards(DbName); + {true, Partition} -> + mem3:ushards(DbName, couch_partition:shard_key(Partition)); + {false, undefined} -> + mem3:shards(DbName); + {false, Partition} -> + mem3:shards(DbName, couch_partition:shard_key(Partition)) + end. maybe_update_others(DbName, DDoc, ShardsInvolved, ViewName, #mrargs{update=lazy} = Args) -> diff --git a/src/fabric/src/fabric_view_all_docs.erl b/src/fabric/src/fabric_view_all_docs.erl index a404125faeb..263538f65ef 100644 --- a/src/fabric/src/fabric_view_all_docs.erl +++ b/src/fabric/src/fabric_view_all_docs.erl @@ -20,8 +20,9 @@ -include_lib("couch/include/couch_db.hrl"). -include_lib("couch_mrview/include/couch_mrview.hrl"). -go(DbName, Options, #mrargs{keys=undefined} = QueryArgs, Callback, Acc) -> - Shards = mem3:shards(DbName), +go(Db, Options, #mrargs{keys=undefined} = QueryArgs, Callback, Acc) -> + DbName = fabric:dbname(Db), + Shards = shards(Db, QueryArgs), Workers0 = fabric_util:submit_jobs( Shards, fabric_rpc, all_docs, [Options, QueryArgs]), RexiMon = fabric_util:create_monitors(Workers0), diff --git a/src/fabric/src/fabric_view_map.erl b/src/fabric/src/fabric_view_map.erl index ee51bfe740d..0f5e8bb2341 100644 --- a/src/fabric/src/fabric_view_map.erl +++ b/src/fabric/src/fabric_view_map.erl @@ -24,8 +24,9 @@ go(DbName, Options, GroupId, View, Args, Callback, Acc, VInfo) {ok, DDoc} = fabric:open_doc(DbName, <<"_design/", GroupId/binary>>, []), go(DbName, Options, DDoc, View, Args, Callback, Acc, VInfo); -go(DbName, Options, DDoc, View, Args, Callback, Acc, VInfo) -> - Shards = fabric_view:get_shards(DbName, Args), +go(Db, Options, DDoc, View, Args, Callback, Acc, VInfo) -> + DbName = fabric:dbname(Db), + Shards = fabric_view:get_shards(Db, Args), DocIdAndRev = fabric_util:doc_id_and_rev(DDoc), fabric_view:maybe_update_others(DbName, DocIdAndRev, Shards, View, Args), Repls = fabric_view:get_shard_replacements(DbName, Shards), diff --git a/src/fabric/src/fabric_view_reduce.erl b/src/fabric/src/fabric_view_reduce.erl index b2b8a05f020..84b9bba644f 100644 --- a/src/fabric/src/fabric_view_reduce.erl +++ b/src/fabric/src/fabric_view_reduce.erl @@ -23,10 +23,11 @@ go(DbName, GroupId, View, Args, Callback, Acc0, VInfo) when is_binary(GroupId) - {ok, DDoc} = fabric:open_doc(DbName, <<"_design/", GroupId/binary>>, []), go(DbName, DDoc, View, Args, Callback, Acc0, VInfo); -go(DbName, DDoc, VName, Args, Callback, Acc, VInfo) -> +go(Db, DDoc, VName, Args, Callback, Acc, VInfo) -> + DbName = fabric:dbname(Db), DocIdAndRev = fabric_util:doc_id_and_rev(DDoc), RPCArgs = [DocIdAndRev, VName, Args], - Shards = fabric_view:get_shards(DbName, Args), + Shards = fabric_view:get_shards(Db, Args), fabric_view:maybe_update_others(DbName, DocIdAndRev, Shards, VName, Args), Repls = fabric_view:get_shard_replacements(DbName, Shards), StartFun = fun(Shard) -> From dc53a3f88781d4065d09b8f3940d8eff920965b5 Mon Sep 17 00:00:00 2001 From: "Paul J. Davis" Date: Thu, 25 Oct 2018 14:26:25 -0500 Subject: [PATCH 10/14] Optimize all_docs queries in a single partition If a user specifies document ids that scope the query to a single partition key we can automatically determine that we only need to consuly a single shard range. Co-authored-by: Robert Newson --- src/fabric/src/fabric_view_all_docs.erl | 26 +++++++++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/src/fabric/src/fabric_view_all_docs.erl b/src/fabric/src/fabric_view_all_docs.erl index 263538f65ef..fdc3bd9886a 100644 --- a/src/fabric/src/fabric_view_all_docs.erl +++ b/src/fabric/src/fabric_view_all_docs.erl @@ -135,6 +135,32 @@ go(DbName, _Options, Workers, QueryArgs, Callback, Acc0) -> {ok, Resp} end. +shards(Db, Args) -> + DbPartitioned = fabric_util:is_partitioned(Db), + Partition = couch_mrview_util:get_extra(Args, partition), + NewArgs = case {DbPartitioned, Partition} of + {true, undefined} -> + % If a user specifies the same partition on both + % the start and end keys we can optimize the + % query by limiting to the partition shard. + Start = couch_partition:extract(Args#mrargs.start_key), + End = couch_partition:extract(Args#mrargs.end_key), + case {Start, End} of + {{Partition, SK}, {Partition, EK}} -> + A1 = Args#mrargs{ + start_key = SK, + end_key = EK + }, + couch_mrview_util:set_extra(A1, partition, Partition); + _ -> + Args + end; + _ -> + Args + end, + fabric_view:get_shards(Db, NewArgs). + + handle_message({rexi_DOWN, _, {_, NodeRef}, _}, _, State) -> fabric_view:check_down_shards(State, NodeRef); From fe6849f5caaafc0a21fc8b4e97b898baffe94817 Mon Sep 17 00:00:00 2001 From: "Paul J. Davis" Date: Thu, 25 Oct 2018 14:27:32 -0500 Subject: [PATCH 11/14] Optimize offset/limit for partition queries Now that a single shard handles the entire response we can optimize work normally done in the coordinator by moving it to the RPC worker which then removes the need to send an extra `skip` number of rows to the coordinator. Co-authored-by: Robert Newson --- src/fabric/src/fabric_rpc.erl | 12 +++--------- src/fabric/src/fabric_view.erl | 16 ++++++++++++++++ src/fabric/src/fabric_view_all_docs.erl | 5 +++-- src/fabric/src/fabric_view_map.erl | 5 +++-- src/fabric/src/fabric_view_reduce.erl | 7 ++++--- 5 files changed, 29 insertions(+), 16 deletions(-) diff --git a/src/fabric/src/fabric_rpc.erl b/src/fabric/src/fabric_rpc.erl index b80cc792ecc..97374be1f8c 100644 --- a/src/fabric/src/fabric_rpc.erl +++ b/src/fabric/src/fabric_rpc.erl @@ -118,9 +118,8 @@ do_changes(Db, StartSeq, Enum, Acc0, Opts) -> all_docs(DbName, Options, Args0) -> case fabric_util:upgrade_mrargs(Args0) of - #mrargs{keys=undefined} = Args1 -> + #mrargs{keys=undefined} = Args -> set_io_priority(DbName, Options), - Args = fix_skip_and_limit(Args1), {ok, Db} = get_or_create_db(DbName, Options), CB = get_view_cb(Args), couch_mrview:query_all_docs(Db, Args, CB, Args) @@ -144,7 +143,7 @@ map_view(DbName, {DDocId, Rev}, ViewName, Args0, DbOptions) -> map_view(DbName, DDoc, ViewName, Args0, DbOptions); map_view(DbName, DDoc, ViewName, Args0, DbOptions) -> set_io_priority(DbName, DbOptions), - Args = fix_skip_and_limit(fabric_util:upgrade_mrargs(Args0)), + Args = fabric_util:upgrade_mrargs(Args0), {ok, Db} = get_or_create_db(DbName, DbOptions), CB = get_view_cb(Args), couch_mrview:query_view(Db, DDoc, ViewName, Args, CB, Args). @@ -158,16 +157,11 @@ reduce_view(DbName, {DDocId, Rev}, ViewName, Args0, DbOptions) -> reduce_view(DbName, DDoc, ViewName, Args0, DbOptions); reduce_view(DbName, DDoc, ViewName, Args0, DbOptions) -> set_io_priority(DbName, DbOptions), - Args = fix_skip_and_limit(fabric_util:upgrade_mrargs(Args0)), + Args = fabric_util:upgrade_mrargs(Args0), {ok, Db} = get_or_create_db(DbName, DbOptions), VAcc0 = #vacc{db=Db}, couch_mrview:query_view(Db, DDoc, ViewName, Args, fun reduce_cb/2, VAcc0). -fix_skip_and_limit(Args) -> - #mrargs{skip=Skip, limit=Limit, extra=Extra}=Args, - % the coordinator needs to finalize each row, so make sure the shards don't - Args#mrargs{skip=0, limit=Skip+Limit, extra=[{finalizer,null} | Extra]}. - create_db(DbName) -> create_db(DbName, []). diff --git a/src/fabric/src/fabric_view.erl b/src/fabric/src/fabric_view.erl index 0ba980a6589..27b0c275f5f 100644 --- a/src/fabric/src/fabric_view.erl +++ b/src/fabric/src/fabric_view.erl @@ -16,6 +16,7 @@ transform_row/1, keydict/1, extract_view/4, get_shards/2, check_down_shards/2, handle_worker_exit/3, get_shard_replacements/2, maybe_update_others/5]). +-export([fix_skip_and_limit/1]). -include_lib("fabric/include/fabric.hrl"). -include_lib("mem3/include/mem3.hrl"). @@ -375,6 +376,21 @@ get_shard_replacements(DbName, UsedShards0) -> end end, [], UsedShards). +-spec fix_skip_and_limit(#mrargs{}) -> {CoordArgs::#mrargs{}, WorkerArgs::#mrargs{}}. +fix_skip_and_limit(#mrargs{} = Args) -> + {CoordArgs, WorkerArgs} = case couch_mrview_util:get_extra(Args, partition) of + undefined -> + #mrargs{skip=Skip, limit=Limit}=Args, + {Args, Args#mrargs{skip=0, limit=Skip+Limit}}; + _Partition -> + {Args#mrargs{skip=0}, Args} + end, + %% the coordinator needs to finalize each row, so make sure the shards don't + {CoordArgs, remove_finalizer(WorkerArgs)}. + +remove_finalizer(Args) -> + couch_mrview_util:set_extra(Args, finalizer, null). + % unit test is_progress_possible_test() -> EndPoint = 2 bsl 31, diff --git a/src/fabric/src/fabric_view_all_docs.erl b/src/fabric/src/fabric_view_all_docs.erl index fdc3bd9886a..4b412a6838d 100644 --- a/src/fabric/src/fabric_view_all_docs.erl +++ b/src/fabric/src/fabric_view_all_docs.erl @@ -21,16 +21,17 @@ -include_lib("couch_mrview/include/couch_mrview.hrl"). go(Db, Options, #mrargs{keys=undefined} = QueryArgs, Callback, Acc) -> + {CoordArgs, WorkerArgs} = fabric_view:fix_skip_and_limit(QueryArgs), DbName = fabric:dbname(Db), Shards = shards(Db, QueryArgs), Workers0 = fabric_util:submit_jobs( - Shards, fabric_rpc, all_docs, [Options, QueryArgs]), + Shards, fabric_rpc, all_docs, [Options, WorkerArgs]), RexiMon = fabric_util:create_monitors(Workers0), try case fabric_streams:start(Workers0, #shard.ref) of {ok, Workers} -> try - go(DbName, Options, Workers, QueryArgs, Callback, Acc) + go(DbName, Options, Workers, CoordArgs, Callback, Acc) after fabric_streams:cleanup(Workers) end; diff --git a/src/fabric/src/fabric_view_map.erl b/src/fabric/src/fabric_view_map.erl index 0f5e8bb2341..b3d768a5138 100644 --- a/src/fabric/src/fabric_view_map.erl +++ b/src/fabric/src/fabric_view_map.erl @@ -27,10 +27,11 @@ go(DbName, Options, GroupId, View, Args, Callback, Acc, VInfo) go(Db, Options, DDoc, View, Args, Callback, Acc, VInfo) -> DbName = fabric:dbname(Db), Shards = fabric_view:get_shards(Db, Args), + {CoordArgs, WorkerArgs} = fabric_view:fix_skip_and_limit(Args), DocIdAndRev = fabric_util:doc_id_and_rev(DDoc), fabric_view:maybe_update_others(DbName, DocIdAndRev, Shards, View, Args), Repls = fabric_view:get_shard_replacements(DbName, Shards), - RPCArgs = [DocIdAndRev, View, Args, Options], + RPCArgs = [DocIdAndRev, View, WorkerArgs, Options], StartFun = fun(Shard) -> hd(fabric_util:submit_jobs([Shard], fabric_rpc, map_view, RPCArgs)) end, @@ -42,7 +43,7 @@ go(Db, Options, DDoc, View, Args, Callback, Acc, VInfo) -> Callback({error, ddoc_updated}, Acc); {ok, Workers} -> try - go(DbName, Workers, VInfo, Args, Callback, Acc) + go(DbName, Workers, VInfo, CoordArgs, Callback, Acc) after fabric_streams:cleanup(Workers) end; diff --git a/src/fabric/src/fabric_view_reduce.erl b/src/fabric/src/fabric_view_reduce.erl index 84b9bba644f..f52061a4c3f 100644 --- a/src/fabric/src/fabric_view_reduce.erl +++ b/src/fabric/src/fabric_view_reduce.erl @@ -25,9 +25,10 @@ go(DbName, GroupId, View, Args, Callback, Acc0, VInfo) when is_binary(GroupId) - go(Db, DDoc, VName, Args, Callback, Acc, VInfo) -> DbName = fabric:dbname(Db), - DocIdAndRev = fabric_util:doc_id_and_rev(DDoc), - RPCArgs = [DocIdAndRev, VName, Args], Shards = fabric_view:get_shards(Db, Args), + {CoordArgs, WorkerArgs} = fabric_view:fix_skip_and_limit(Args), + DocIdAndRev = fabric_util:doc_id_and_rev(DDoc), + RPCArgs = [DocIdAndRev, VName, WorkerArgs], fabric_view:maybe_update_others(DbName, DocIdAndRev, Shards, VName, Args), Repls = fabric_view:get_shard_replacements(DbName, Shards), StartFun = fun(Shard) -> @@ -41,7 +42,7 @@ go(Db, DDoc, VName, Args, Callback, Acc, VInfo) -> Callback({error, ddoc_updated}, Acc); {ok, Workers} -> try - go2(DbName, Workers, VInfo, Args, Callback, Acc) + go2(DbName, Workers, VInfo, CoordArgs, Callback, Acc) after fabric_streams:cleanup(Workers) end; From 234a9637552ef78f40350ac03bbd13fbd3c6fc07 Mon Sep 17 00:00:00 2001 From: "Paul J. Davis" Date: Wed, 19 Dec 2018 11:48:45 -0600 Subject: [PATCH 12/14] Use index names when testing index selection Using the internal hash values for indexes was a brittle approach to ensuring that a specific index was or was not picked. By naming the index and design docs we can more concretely ensure that the chosen indexes match the intent of the test while also not breaking each time mango internals change. --- src/mango/test/05-index-selection-test.py | 20 +++++------- src/mango/test/user_docs.py | 37 ++++++++++++----------- 2 files changed, 28 insertions(+), 29 deletions(-) diff --git a/src/mango/test/05-index-selection-test.py b/src/mango/test/05-index-selection-test.py index e7ea329c6f9..3f7fb9f21df 100644 --- a/src/mango/test/05-index-selection-test.py +++ b/src/mango/test/05-index-selection-test.py @@ -37,8 +37,7 @@ def test_with_nested_and(self): self.assertEqual(resp["index"]["type"], "json") def test_with_or(self): - # index on ["company","manager"] - ddocid = "_design/a0c425a60cf3c3c09e3c537c9ef20059dcef9198" + ddocid = "_design/company_and_manager" resp = self.db.find( { @@ -50,8 +49,7 @@ def test_with_or(self): self.assertEqual(resp["index"]["ddoc"], ddocid) def test_use_most_columns(self): - # ddoc id for the age index - ddocid = "_design/ad3d537c03cd7c6a43cf8dff66ef70ea54c2b40f" + ddocid = "_design/age" resp = self.db.find( { "name.first": "Stephanie", @@ -60,7 +58,7 @@ def test_use_most_columns(self): }, explain=True, ) - self.assertNotEqual(resp["index"]["ddoc"], "_design/" + ddocid) + self.assertNotEqual(resp["index"]["ddoc"], ddocid) resp = self.db.find( { @@ -83,7 +81,7 @@ def test_no_valid_sort_index(self): def test_invalid_use_index(self): # ddoc id for the age index - ddocid = "_design/ad3d537c03cd7c6a43cf8dff66ef70ea54c2b40f" + ddocid = "_design/age" r = self.db.find({}, use_index=ddocid, return_raw=True) self.assertEqual( r["warning"], @@ -105,8 +103,7 @@ def test_uses_index_when_no_range_or_equals(self): self.assertEqual(resp_explain["index"]["type"], "json") def test_reject_use_index_invalid_fields(self): - # index on ["company","manager"] which should not be valid - ddocid = "_design/a0c425a60cf3c3c09e3c537c9ef20059dcef9198" + ddocid = "_design/company_and_manager" selector = {"company": "Pharmex"} r = self.db.find(selector, use_index=ddocid, return_raw=True) self.assertEqual( @@ -121,9 +118,8 @@ def test_reject_use_index_invalid_fields(self): self.assertEqual(d["company"], "Pharmex") def test_reject_use_index_ddoc_and_name_invalid_fields(self): - # index on ["company","manager"] which should not be valid - ddocid = "_design/a0c425a60cf3c3c09e3c537c9ef20059dcef9198" - name = "a0c425a60cf3c3c09e3c537c9ef20059dcef9198" + ddocid = "_design/company_and_manager" + name = "company_and_manager" selector = {"company": "Pharmex"} resp = self.db.find(selector, use_index=[ddocid, name], return_raw=True) @@ -141,7 +137,7 @@ def test_reject_use_index_ddoc_and_name_invalid_fields(self): def test_reject_use_index_sort_order(self): # index on ["company","manager"] which should not be valid # and there is no valid fallback (i.e. an index on ["company"]) - ddocid = "_design/a0c425a60cf3c3c09e3c537c9ef20059dcef9198" + ddocid = "_design/company_and_manager" selector = {"company": {"$gt": None}} try: self.db.find(selector, use_index=ddocid, sort=[{"company": "desc"}]) diff --git a/src/mango/test/user_docs.py b/src/mango/test/user_docs.py index afbea710e5e..e0495353b19 100644 --- a/src/mango/test/user_docs.py +++ b/src/mango/test/user_docs.py @@ -70,24 +70,27 @@ def setup(db, index_type="view", **kwargs): def add_view_indexes(db, kwargs): indexes = [ - ["user_id"], - ["name.last", "name.first"], - ["age"], - [ - "location.state", - "location.city", - "location.address.street", - "location.address.number", - ], - ["company", "manager"], - ["manager"], - ["favorites"], - ["favorites.3"], - ["twitter"], - ["ordered"], + (["user_id"], "user_id"), + (["name.last", "name.first"], "name"), + (["age"], "age"), + ( + [ + "location.state", + "location.city", + "location.address.street", + "location.address.number", + ], + "location", + ), + (["company", "manager"], "company_and_manager"), + (["manager"], "manager"), + (["favorites"], "favorites"), + (["favorites.3"], "favorites_3"), + (["twitter"], "twitter"), + (["ordered"], "ordered"), ] - for idx in indexes: - assert db.create_index(idx) is True + for (idx, name) in indexes: + assert db.create_index(idx, name=name, ddoc=name) is True def add_text_indexes(db, kwargs): From 0541a3124a6196a0757d7693479ebc40c61e0f27 Mon Sep 17 00:00:00 2001 From: "Paul J. Davis" Date: Mon, 29 Oct 2018 14:08:05 -0500 Subject: [PATCH 13/14] Support partitioned queries in Mango Co-authored-by: Garren Smith Co-authored-by: Robert Newson --- src/mango/src/mango_cursor.erl | 1 + src/mango/src/mango_cursor_text.erl | 9 +++ src/mango/src/mango_cursor_view.erl | 6 ++ src/mango/src/mango_error.erl | 7 ++ src/mango/src/mango_httpd.erl | 23 +++++- src/mango/src/mango_idx.erl | 114 +++++++++++++++++++++++++--- src/mango/src/mango_idx.hrl | 1 + src/mango/src/mango_idx_text.erl | 1 + src/mango/src/mango_idx_view.erl | 1 + src/mango/src/mango_opts.erl | 30 ++++++++ 10 files changed, 182 insertions(+), 11 deletions(-) diff --git a/src/mango/src/mango_cursor.erl b/src/mango/src/mango_cursor.erl index 5d2ea717d16..c6f21ddf8f5 100644 --- a/src/mango/src/mango_cursor.erl +++ b/src/mango/src/mango_cursor.erl @@ -71,6 +71,7 @@ explain(#cursor{}=Cursor) -> {[ {dbname, mango_idx:dbname(Idx)}, {index, mango_idx:to_json(Idx)}, + {partitioned, mango_idx:partitioned(Idx)}, {selector, Selector}, {opts, {Opts}}, {limit, Limit}, diff --git a/src/mango/src/mango_cursor_text.erl b/src/mango/src/mango_cursor_text.erl index 3883bc8f2bb..8938f3557bf 100644 --- a/src/mango/src/mango_cursor_text.erl +++ b/src/mango/src/mango_cursor_text.erl @@ -77,6 +77,7 @@ explain(Cursor) -> } = Cursor, [ {'query', mango_selector_text:convert(Selector)}, + {partition, get_partition(Opts, null)}, {sort, sort_query(Opts, Selector)} ]. @@ -93,6 +94,7 @@ execute(Cursor, UserFun, UserAcc) -> } = Cursor, QueryArgs = #index_query_args{ q = mango_selector_text:convert(Selector), + partition = get_partition(Opts, nil), sort = sort_query(Opts, Selector), raw_bookmark = true }, @@ -237,6 +239,13 @@ sort_query(Opts, Selector) -> end. +get_partition(Opts, Default) -> + case couch_util:get_value(partition, Opts) of + <<>> -> Default; + Else -> Else + end. + + get_bookmark(Opts) -> case lists:keyfind(bookmark, 1, Opts) of {_, BM} when is_list(BM), BM /= [] -> diff --git a/src/mango/src/mango_cursor_view.erl b/src/mango/src/mango_cursor_view.erl index b3a7f4080e4..1c4b3423e08 100644 --- a/src/mango/src/mango_cursor_view.erl +++ b/src/mango/src/mango_cursor_view.erl @@ -71,6 +71,7 @@ explain(Cursor) -> {include_docs, Args#mrargs.include_docs}, {view_type, Args#mrargs.view_type}, {reduce, Args#mrargs.reduce}, + {partition, couch_mrview_util:get_extra(Args, partition, null)}, {start_key, maybe_replace_max_json(Args#mrargs.start_key)}, {end_key, maybe_replace_max_json(Args#mrargs.end_key)}, {direction, Args#mrargs.direction}, @@ -398,6 +399,11 @@ apply_opts([{update, false} | Rest], Args) -> update = false }, apply_opts(Rest, NewArgs); +apply_opts([{partition, <<>>} | Rest], Args) -> + apply_opts(Rest, Args); +apply_opts([{partition, Partition} | Rest], Args) when is_binary(Partition) -> + NewArgs = couch_mrview_util:set_extra(Args, partition, Partition), + apply_opts(Rest, NewArgs); apply_opts([{_, _} | Rest], Args) -> % Ignore unknown options apply_opts(Rest, Args). diff --git a/src/mango/src/mango_error.erl b/src/mango/src/mango_error.erl index b2bbb392a87..dcf4b9a7e91 100644 --- a/src/mango/src/mango_error.erl +++ b/src/mango/src/mango_error.erl @@ -104,6 +104,13 @@ info(mango_idx, {invalid_index_type, BadType}) -> <<"invalid_index">>, fmt("Invalid type for index: ~s", [BadType]) }; +info(mango_idx, {partitioned_option_mismatch, BadDDoc}) -> + { + 400, + <<"invalid_partitioned_option">>, + fmt("Requested partitioned option does not match existing value on" + " design document ~s", [BadDDoc]) + }; info(mango_idx, invalid_query_ddoc_language) -> { 400, diff --git a/src/mango/src/mango_httpd.erl b/src/mango/src/mango_httpd.erl index 2e8777135d5..d73ec6cb587 100644 --- a/src/mango/src/mango_httpd.erl +++ b/src/mango/src/mango_httpd.erl @@ -170,7 +170,8 @@ handle_index_req(#httpd{path_parts=[_, _, _DDocId0, _Type, _Name]}=Req, _Db) -> handle_explain_req(#httpd{method='POST'}=Req, Db) -> chttpd:validate_ctype(Req, "application/json"), - {ok, Opts0} = mango_opts:validate_find(chttpd:json_body_obj(Req)), + Body = maybe_set_partition(Req), + {ok, Opts0} = mango_opts:validate_find(Body), {value, {selector, Sel}, Opts} = lists:keytake(selector, 1, Opts0), Resp = mango_crud:explain(Db, Sel, Opts), chttpd:send_json(Req, Resp); @@ -181,7 +182,8 @@ handle_explain_req(Req, _Db) -> handle_find_req(#httpd{method='POST'}=Req, Db) -> chttpd:validate_ctype(Req, "application/json"), - {ok, Opts0} = mango_opts:validate_find(chttpd:json_body_obj(Req)), + Body = maybe_set_partition(Req), + {ok, Opts0} = mango_opts:validate_find(Body), {value, {selector, Sel}, Opts} = lists:keytake(selector, 1, Opts0), {ok, Resp0} = start_find_resp(Req), {ok, AccOut} = run_find(Resp0, Db, Sel, Opts), @@ -224,6 +226,23 @@ get_idx_del_opts(Req) -> end. +maybe_set_partition(Req) -> + {Props} = chttpd:json_body_obj(Req), + case chttpd:qs_value(Req, "partition", undefined) of + undefined -> + {Props}; + Partition -> + case couch_util:get_value(<<"partition">>, Props) of + undefined -> + {[{<<"partition">>, ?l2b(Partition)} | Props]}; + Partition -> + {Props}; + OtherPartition -> + ?MANGO_ERROR({bad_partition, OtherPartition}) + end + end. + + convert_to_design_id(DDocId) -> case DDocId of <<"_design/", _/binary>> -> DDocId; diff --git a/src/mango/src/mango_idx.erl b/src/mango/src/mango_idx.erl index 8af92b94627..6e2abca5c70 100644 --- a/src/mango/src/mango_idx.erl +++ b/src/mango/src/mango_idx.erl @@ -33,6 +33,7 @@ name/1, type/1, def/1, + partitioned/1, opts/1, columns/1, is_usable/3, @@ -59,18 +60,20 @@ list(Db) -> get_usable_indexes(Db, Selector, Opts) -> ExistingIndexes = mango_idx:list(Db), - - GlobalIndexes = mango_cursor:remove_indexes_with_partial_filter_selector(ExistingIndexes), + GlobalIndexes = mango_cursor:remove_indexes_with_partial_filter_selector( + ExistingIndexes + ), UserSpecifiedIndex = mango_cursor:maybe_filter_indexes_by_ddoc(ExistingIndexes, Opts), UsableIndexes0 = lists:usort(GlobalIndexes ++ UserSpecifiedIndex), + UsableIndexes1 = filter_partition_indexes(UsableIndexes0, Opts), SortFields = get_sort_fields(Opts), UsableFilter = fun(I) -> is_usable(I, Selector, SortFields) end, - case lists:filter(UsableFilter, UsableIndexes0) of - [] -> + case lists:filter(UsableFilter, UsableIndexes1) of + [] -> ?MANGO_ERROR({no_usable_index, missing_sort_index}); - UsableIndexes -> + UsableIndexes -> UsableIndexes end. @@ -110,6 +113,7 @@ new(Db, Opts) -> name = IdxName, type = Type, def = Def, + partitioned = get_idx_partitioned(Opts), opts = filter_opts(Opts) }}. @@ -121,10 +125,11 @@ validate_new(Idx, Db) -> add(DDoc, Idx) -> Mod = idx_mod(Idx), - {ok, NewDDoc} = Mod:add(DDoc, Idx), + {ok, NewDDoc1} = Mod:add(DDoc, Idx), + NewDDoc2 = set_ddoc_partitioned(NewDDoc1, Idx), % Round trip through JSON for normalization - Body = ?JSON_DECODE(?JSON_ENCODE(NewDDoc#doc.body)), - {ok, NewDDoc#doc{body = Body}}. + Body = ?JSON_DECODE(?JSON_ENCODE(NewDDoc2#doc.body)), + {ok, NewDDoc2#doc{body = Body}}. remove(DDoc, Idx) -> @@ -176,7 +181,8 @@ from_ddoc(Db, {Props}) -> lists:map(fun(Idx) -> Idx#idx{ dbname = DbName, - ddoc = DDoc + ddoc = DDoc, + partitioned = get_idx_partitioned(Db, Props) } end, Idxs). @@ -213,6 +219,10 @@ def(#idx{def=Def}) -> Def. +partitioned(#idx{partitioned=Partitioned}) -> + Partitioned. + + opts(#idx{opts=Opts}) -> Opts. @@ -329,6 +339,89 @@ gen_name(Idx, Opts0) -> mango_util:enc_hex(Sha). +get_idx_partitioned(Opts) -> + case proplists:get_value(partitioned, Opts) of + B when is_boolean(B) -> + B; + db_default -> + % Default to the partitioned setting on + % the database. + undefined + end. + + +set_ddoc_partitioned(DDoc, Idx) -> + % We have to verify that the new index being added + % to this design document either matches the current + % ddoc's design options *or* this is a new design doc + #doc{ + id = DDocId, + revs = Revs, + body = {BodyProps} + } = DDoc, + OldDOpts = couch_util:get_value(<<"options">>, BodyProps), + OldOpt = case OldDOpts of + {OldDOptProps} when is_list(OldDOptProps) -> + couch_util:get_value(<<"partitioned">>, OldDOptProps); + _ -> + undefined + end, + % If new matches old we're done + if Idx#idx.partitioned == OldOpt -> DDoc; true -> + % If we're creating a ddoc then we can set the options + case Revs == {0, []} of + true when Idx#idx.partitioned /= undefined -> + set_ddoc_partitioned_option(DDoc, Idx#idx.partitioned); + true when Idx#idx.partitioned == undefined -> + DDoc; + false -> + ?MANGO_ERROR({partitioned_option_mismatch, DDocId}) + end + end. + + +set_ddoc_partitioned_option(DDoc, Partitioned) -> + #doc{ + body = {BodyProps} + } = DDoc, + NewProps = case couch_util:get_value(<<"options">>, BodyProps) of + {Existing} when is_list(Existing) -> + Opt = {<<"partitioned">>, Partitioned}, + New = lists:keystore(<<"partitioned">>, 1, Existing, Opt), + lists:keystore(<<"options">>, 1, BodyProps, {<<"options">>, New}); + undefined -> + New = {<<"options">>, {[{<<"partitioned">>, Partitioned}]}}, + lists:keystore(<<"options">>, 1, BodyProps, New) + end, + DDoc#doc{body = {NewProps}}. + + +get_idx_partitioned(Db, DDocProps) -> + Default = fabric_util:is_partitioned(Db), + case couch_util:get_value(<<"options">>, DDocProps) of + {DesignOpts} -> + case couch_util:get_value(<<"partitioned">>, DesignOpts) of + P when is_boolean(P) -> + P; + undefined -> + Default + end; + undefined -> + Default + end. + + +filter_partition_indexes(Indexes, Opts) -> + PFilt = case couch_util:get_value(partition, Opts) of + <<>> -> + fun(#idx{partitioned = P}) -> not P end; + Partition when is_binary(Partition) -> + fun(#idx{partitioned = P}) -> P end + end, + Filt = fun(Idx) -> type(Idx) == <<"special">> orelse PFilt(Idx) end, + lists:filter(Filt, Indexes). + + filter_opts([]) -> []; filter_opts([{user_ctx, _} | Rest]) -> @@ -341,6 +434,8 @@ filter_opts([{type, _} | Rest]) -> filter_opts(Rest); filter_opts([{w, _} | Rest]) -> filter_opts(Rest); +filter_opts([{partitioned, _} | Rest]) -> + filter_opts(Rest); filter_opts([Opt | Rest]) -> [Opt | filter_opts(Rest)]. @@ -374,6 +469,7 @@ index(SelectorName, Selector) -> <<"Selected">>,<<"json">>, {[{<<"fields">>,{[{<<"location">>,<<"asc">>}]}}, {SelectorName,{Selector}}]}, + false, [{<<"def">>,{[{<<"fields">>,[<<"location">>]}]}}] }. diff --git a/src/mango/src/mango_idx.hrl b/src/mango/src/mango_idx.hrl index 712031b758f..97259500bbe 100644 --- a/src/mango/src/mango_idx.hrl +++ b/src/mango/src/mango_idx.hrl @@ -16,5 +16,6 @@ name, type, def, + partitioned, opts }). diff --git a/src/mango/src/mango_idx_text.erl b/src/mango/src/mango_idx_text.erl index 29b4441a109..0b78e880db7 100644 --- a/src/mango/src/mango_idx_text.erl +++ b/src/mango/src/mango_idx_text.erl @@ -100,6 +100,7 @@ to_json(Idx) -> {ddoc, Idx#idx.ddoc}, {name, Idx#idx.name}, {type, Idx#idx.type}, + {partitioned, Idx#idx.partitioned}, {def, {def_to_json(Idx#idx.def)}} ]}. diff --git a/src/mango/src/mango_idx_view.erl b/src/mango/src/mango_idx_view.erl index 2d784b638e4..37911498c81 100644 --- a/src/mango/src/mango_idx_view.erl +++ b/src/mango/src/mango_idx_view.erl @@ -104,6 +104,7 @@ to_json(Idx) -> {ddoc, Idx#idx.ddoc}, {name, Idx#idx.name}, {type, Idx#idx.type}, + {partitioned, Idx#idx.partitioned}, {def, {def_to_json(Idx#idx.def)}} ]}. diff --git a/src/mango/src/mango_opts.erl b/src/mango/src/mango_opts.erl index 7bae9c90d20..92c07f743b7 100644 --- a/src/mango/src/mango_opts.erl +++ b/src/mango/src/mango_opts.erl @@ -34,6 +34,7 @@ validate_sort/1, validate_fields/1, validate_bulk_delete/1, + validate_partitioned/1, default_limit/0 ]). @@ -70,6 +71,12 @@ validate_idx_create({Props}) -> {optional, true}, {default, 2}, {validator, fun is_pos_integer/1} + ]}, + {<<"partitioned">>, [ + {tag, partitioned}, + {optional, true}, + {default, db_default}, + {validator, fun validate_partitioned/1} ]} ], validate(Props, Opts). @@ -117,6 +124,12 @@ validate_find({Props}) -> {default, []}, {validator, fun validate_fields/1} ]}, + {<<"partition">>, [ + {tag, partition}, + {optional, true}, + {default, <<>>}, + {validator, fun validate_partition/1} + ]}, {<<"r">>, [ {tag, r}, {optional, true}, @@ -296,6 +309,23 @@ validate_fields(Value) -> mango_fields:new(Value). +validate_partitioned(true) -> + {ok, true}; +validate_partitioned(false) -> + {ok, false}; +validate_partitioned(db_default) -> + {ok, db_default}; +validate_partitioned(Else) -> + ?MANGO_ERROR({invalid_partitioned_value, Else}). + + +validate_partition(<<>>) -> + {ok, <<>>}; +validate_partition(Partition) -> + couch_partition:validate_partition(Partition), + {ok, Partition}. + + validate_opts([], Props, Acc) -> {Props, lists:reverse(Acc)}; validate_opts([{Name, Desc} | Rest], Props, Acc) -> From 91af772d609381defdd0c9e4fe22ae9f40478da0 Mon Sep 17 00:00:00 2001 From: "Paul J. Davis" Date: Wed, 28 Nov 2018 10:58:42 -0600 Subject: [PATCH 14/14] Add Elixir tests for database partitions Co-authored-by: Garren Smith Co-authored-by: Robert Newson --- test/elixir/lib/couch/db_test.ex | 12 +- test/elixir/test/partition_all_docs_test.exs | 118 ++++ test/elixir/test/partition_crud_test.exs | 356 +++++++++++ test/elixir/test/partition_ddoc_test.exs | 171 +++++ .../test/partition_design_docs_test.exs | 16 + test/elixir/test/partition_helpers.exs | 76 +++ test/elixir/test/partition_mango_test.exs | 591 ++++++++++++++++++ test/elixir/test/partition_size_test.exs | 357 +++++++++++ test/elixir/test/partition_view_test.exs | 299 +++++++++ .../test/partition_view_update_test.exs | 155 +++++ test/elixir/test/test_helper.exs | 1 + 11 files changed, 2149 insertions(+), 3 deletions(-) create mode 100644 test/elixir/test/partition_all_docs_test.exs create mode 100644 test/elixir/test/partition_crud_test.exs create mode 100644 test/elixir/test/partition_ddoc_test.exs create mode 100644 test/elixir/test/partition_design_docs_test.exs create mode 100644 test/elixir/test/partition_helpers.exs create mode 100644 test/elixir/test/partition_mango_test.exs create mode 100644 test/elixir/test/partition_size_test.exs create mode 100644 test/elixir/test/partition_view_test.exs create mode 100644 test/elixir/test/partition_view_update_test.exs diff --git a/test/elixir/lib/couch/db_test.ex b/test/elixir/lib/couch/db_test.ex index 8992376350b..ba65a6d4eab 100644 --- a/test/elixir/lib/couch/db_test.ex +++ b/test/elixir/lib/couch/db_test.ex @@ -18,6 +18,12 @@ defmodule Couch.DBTest do |> Map.put(:db_name, random_db_name(db_name)) |> Map.put(:with_db, true) + %{:with_partitioned_db => true} -> + context + |> Map.put(:db_name, random_db_name()) + |> Map.put(:query, %{partitioned: true}) + |> Map.put(:with_db, true) + %{:with_db => true} -> Map.put(context, :db_name, random_db_name()) @@ -29,7 +35,7 @@ defmodule Couch.DBTest do end if Map.has_key?(context, :with_db) do - {:ok, _} = create_db(context[:db_name]) + {:ok, _} = create_db(context[:db_name], query: context[:query]) on_exit(fn -> delete_db(context[:db_name]) end) end @@ -154,8 +160,8 @@ defmodule Couch.DBTest do Map.put(user_doc, "_rev", resp.body["rev"]) end - def create_db(db_name) do - resp = Couch.put("/#{db_name}") + def create_db(db_name, opts \\ []) do + resp = Couch.put("/#{db_name}", opts) assert resp.status_code in [201, 202] assert resp.body == %{"ok" => true} {:ok, resp} diff --git a/test/elixir/test/partition_all_docs_test.exs b/test/elixir/test/partition_all_docs_test.exs new file mode 100644 index 00000000000..0941daf59f5 --- /dev/null +++ b/test/elixir/test/partition_all_docs_test.exs @@ -0,0 +1,118 @@ +defmodule PartitionAllDocsTest do + use CouchTestCase + import PartitionHelpers + + @moduledoc """ + Test Partition functionality for for all_docs + """ + + setup_all do + db_name = random_db_name() + {:ok, _} = create_db(db_name, query: %{partitioned: true, q: 1}) + on_exit(fn -> delete_db(db_name) end) + + create_partition_docs(db_name) + + {:ok, [db_name: db_name]} + end + + test "all_docs with partitioned:true returns partitioned fields", context do + db_name = context[:db_name] + + url = "/#{db_name}/_partition/foo/_all_docs" + resp = Couch.get(url) + assert resp.status_code == 200 + partitions = get_partitions(resp) + assert Enum.dedup(partitions) == ["foo"] + + url = "/#{db_name}/_partition/bar/_all_docs" + resp = Couch.get(url) + assert resp.status_code == 200 + partitions = get_partitions(resp) + assert Enum.dedup(partitions) == ["bar"] + end + + test "partition all_docs errors with incorrect partition supplied", context do + db_name = context[:db_name] + + url = "/#{db_name}/_partition/_bar/_all_docs" + resp = Couch.get(url) + assert resp.status_code == 400 + + url = "/#{db_name}/_partition//_all_docs" + resp = Couch.get(url) + assert resp.status_code == 400 + end + + test "partitioned _all_docs works with startkey, endkey range", context do + db_name = context[:db_name] + + url = "/#{db_name}/_partition/foo/_all_docs" + resp = Couch.get(url, query: %{start_key: "\"foo:12\"", end_key: "\"foo:2\""}) + assert resp.status_code == 200 + partitions = get_partitions(resp) + assert length(partitions) == 5 + assert Enum.dedup(partitions) == ["foo"] + end + + test "partitioned _all_docs works with keys", context do + db_name = context[:db_name] + + url = "/#{db_name}/_partition/foo/_all_docs" + resp = Couch.post(url, body: %{keys: ["foo:2", "foo:4", "foo:6"]}) + assert resp.status_code == 200 + ids = get_ids(resp) + assert length(ids) == 3 + assert ids == ["foo:2", "foo:4", "foo:6"] + end + + test "partition _all_docs works with limit", context do + db_name = context[:db_name] + + url = "/#{db_name}/_partition/foo/_all_docs" + resp = Couch.get(url, query: %{limit: 5}) + assert resp.status_code == 200 + partitions = get_partitions(resp) + assert length(partitions) == 5 + assert Enum.dedup(partitions) == ["foo"] + end + + test "partition _all_docs with descending", context do + db_name = context[:db_name] + + url = "/#{db_name}/_partition/foo/_all_docs" + resp = Couch.get(url, query: %{descending: true, limit: 5}) + assert resp.status_code == 200 + ids = get_ids(resp) + assert length(ids) == 5 + assert ids == ["foo:98", "foo:96", "foo:94", "foo:92", "foo:90"] + + resp = Couch.get(url, query: %{descending: false, limit: 5}) + assert resp.status_code == 200 + ids = get_ids(resp) + assert length(ids) == 5 + assert ids == ["foo:10", "foo:100", "foo:12", "foo:14", "foo:16"] + end + + test "partition _all_docs with skip", context do + db_name = context[:db_name] + + url = "/#{db_name}/_partition/foo/_all_docs" + resp = Couch.get(url, query: %{skip: 5, limit: 5}) + assert resp.status_code == 200 + ids = get_ids(resp) + assert length(ids) == 5 + assert ids == ["foo:18", "foo:2", "foo:20", "foo:22", "foo:24"] + end + + test "partition _all_docs with key", context do + db_name = context[:db_name] + + url = "/#{db_name}/_partition/foo/_all_docs" + resp = Couch.get(url, query: %{key: "\"foo:22\""}) + assert resp.status_code == 200 + ids = get_ids(resp) + assert length(ids) == 1 + assert ids == ["foo:22"] + end +end diff --git a/test/elixir/test/partition_crud_test.exs b/test/elixir/test/partition_crud_test.exs new file mode 100644 index 00000000000..415dd49bfe3 --- /dev/null +++ b/test/elixir/test/partition_crud_test.exs @@ -0,0 +1,356 @@ +defmodule PartitionCrudTest do + use CouchTestCase + + @tag :with_partitioned_db + test "Sets partition in db info", context do + db_name = context[:db_name] + resp = Couch.get("/#{db_name}") + %{body: body} = resp + assert body["props"] == %{"partitioned" => true} + end + + @tag :with_partitioned_db + test "PUT and GET document", context do + db_name = context[:db_name] + id = "my-partition:doc" + url = "/#{db_name}/#{id}" + + resp = Couch.put(url, body: %{partitioned_doc: true}) + %{body: doc} = resp + assert resp.status_code == 201 + assert doc["id"] == id + + resp = Couch.get(url) + assert resp.status_code == 200 + + %{body: doc} = resp + assert doc["_id"] == id + end + + @tag :with_partitioned_db + test "PUT fails if a partition key is not supplied", context do + db_name = context[:db_name] + id = "not-partitioned" + url = "/#{db_name}/#{id}" + + resp = Couch.put(url, body: %{partitioned_doc: false}) + assert resp.status_code == 400 + + error = %{ + "error" => "illegal_docid", + "reason" => "Doc id must be of form partition:id" + } + + assert Map.get(resp, :body) == error + end + + @tag :with_partitioned_db + test "PUT fails for partitions with _", context do + db_name = context[:db_name] + id = "_bad:partitioned" + url = "/#{db_name}/#{id}" + + resp = Couch.put(url, body: %{partitioned_doc: false}) + + error = %{ + "error" => "illegal_docid", + "reason" => "Only reserved document ids may start with underscore." + } + + assert resp.status_code == 400 + assert Map.get(resp, :body) == error + end + + @tag :with_partitioned_db + test "PUT fails for bad partitions", context do + db_name = context[:db_name] + id = "bad:" + url = "/#{db_name}/#{id}" + + resp = Couch.put(url, body: %{partitioned_doc: false}) + + error = %{ + "error" => "illegal_docid", + "reason" => "Document id must not be empty" + } + + assert resp.status_code == 400 + assert Map.get(resp, :body) == error + end + + @tag :with_partitioned_db + test "POST and GET document", context do + db_name = context[:db_name] + id = "my-partition-post:doc" + url = "/#{db_name}" + + resp = Couch.post(url, body: %{_id: id, partitioned_doc: true}) + assert resp.status_code == 201 + + resp = Couch.get("#{url}/#{id}") + assert resp.status_code == 200 + + %{body: doc} = resp + assert doc["_id"] == id + end + + @tag :with_partitioned_db + test "POST and _bulk_get document", context do + db_name = context[:db_name] + id = "my-partition-post:doc" + url = "/#{db_name}" + + resp = Couch.post(url, body: %{_id: id, partitioned_doc: true}) + assert resp.status_code == 201 + + resp = Couch.post("#{url}/_bulk_get", body: %{docs: [%{id: id}]}) + assert resp.status_code == 200 + + %{body: body} = resp + + assert %{ + "results" => [ + %{ + "docs" => [ + %{ + "ok" => %{ + "_id" => "my-partition-post:doc", + "_rev" => "1-43d86359741cb629c0953a2beb6e9d7a", + "partitioned_doc" => true + } + } + ], + "id" => "my-partition-post:doc" + } + ] + } == body + end + + @tag :with_partitioned_db + test "_bulk_get bad partitioned document", context do + db_name = context[:db_name] + id = "my-partition-post" + url = "/#{db_name}" + + resp = Couch.post("#{url}/_bulk_get", body: %{docs: [%{id: id}]}) + assert resp.status_code == 200 + %{:body => body} = resp + + assert %{ + "results" => [ + %{ + "docs" => [ + %{ + "error" => %{ + "error" => "illegal_docid", + "id" => "my-partition-post", + "reason" => "Doc id must be of form partition:id", + "rev" => :null + } + } + ], + "id" => "my-partition-post" + } + ] + } == body + end + + @tag :with_partitioned_db + test "POST fails if a partition key is not supplied", context do + db_name = context[:db_name] + id = "not-partitioned-post" + url = "/#{db_name}" + + resp = Couch.post(url, body: %{_id: id, partitited_doc: false}) + assert resp.status_code == 400 + end + + @tag :with_partitioned_db + test "_bulk_docs saves docs with partition key", context do + db_name = context[:db_name] + + docs = [ + %{_id: "foo:1"}, + %{_id: "bar:1"} + ] + + url = "/#{db_name}" + resp = Couch.post("#{url}/_bulk_docs", body: %{:docs => docs}) + assert resp.status_code == 201 + + resp = Couch.get("#{url}/foo:1") + assert resp.status_code == 200 + + resp = Couch.get("#{url}/bar:1") + assert resp.status_code == 200 + end + + @tag :with_partitioned_db + test "_bulk_docs errors with missing partition key", context do + db_name = context[:db_name] + + docs = [ + %{_id: "foo1"} + ] + + error = %{ + "error" => "illegal_docid", + "reason" => "Doc id must be of form partition:id" + } + + url = "/#{db_name}" + resp = Couch.post("#{url}/_bulk_docs", body: %{:docs => docs}) + assert resp.status_code == 400 + assert Map.get(resp, :body) == error + end + + @tag :with_partitioned_db + test "_bulk_docs errors with bad partition key", context do + db_name = context[:db_name] + + docs = [ + %{_id: "_foo:1"} + ] + + error = %{ + "error" => "illegal_docid", + "reason" => "Only reserved document ids may start with underscore." + } + + url = "/#{db_name}" + resp = Couch.post("#{url}/_bulk_docs", body: %{:docs => docs}) + assert resp.status_code == 400 + assert Map.get(resp, :body) == error + end + + @tag :with_partitioned_db + test "_bulk_docs errors with bad doc key", context do + db_name = context[:db_name] + + docs = [ + %{_id: "foo:"} + ] + + error = %{ + "error" => "illegal_docid", + "reason" => "Document id must not be empty" + } + + url = "/#{db_name}" + resp = Couch.post("#{url}/_bulk_docs", body: %{:docs => docs}) + assert resp.status_code == 400 + assert Map.get(resp, :body) == error + end + + @tag :with_partitioned_db + test "saves attachment with partitioned doc", context do + db_name = context[:db_name] + id = "foo:doc-with-attachment" + + doc = %{ + _id: id, + _attachments: %{ + "foo.txt": %{ + content_type: "text/plain", + data: Base.encode64("This is a text document to save") + } + } + } + + resp = Couch.put("/#{db_name}/#{id}", body: doc) + + assert resp.status_code == 201 + + resp = Couch.get("/#{db_name}/#{id}") + assert resp.status_code == 200 + body = Map.get(resp, :body) + rev = Map.get(body, "_rev") + + assert body["_attachments"] == %{ + "foo.txt" => %{ + "content_type" => "text/plain", + "digest" => "md5-OW2BoZAtMqs1E+fAnLpNBw==", + "length" => 31, + "revpos" => 1, + "stub" => true + } + } + + resp = Couch.get("/#{db_name}/#{id}/foo.txt") + assert Map.get(resp, :body) == "This is a text document to save" + + resp = + Couch.put("/#{db_name}/#{id}/bar.txt?rev=#{rev}", + headers: ["Content-Type": "text/plain"], + body: "This is another document" + ) + + assert resp.status_code == 201 + %{:body => body} = resp + assert body["ok"] == true + assert body["id"] == id + end + + @tag :with_partitioned_db + test "can purge partitioned db docs", context do + db_name = context[:db_name] + + doc = %{ + _id: "foo:bar", + value: "some value" + } + + resp = Couch.post("/#{db_name}", query: [w: 3], body: doc) + assert resp.status_code == 201 + %{body: body} = resp + rev = body["rev"] + + resp = Couch.get("/#{db_name}/foo:bar") + assert resp.status_code == 200 + + body = %{"foo:bar" => [rev]} + resp = Couch.post("/#{db_name}/_purge", query: [w: 3], body: body) + assert resp.status_code == 201 + + resp = Couch.get("/#{db_name}/foo:bar") + assert resp.status_code == 404 + assert resp.body == %{"error" => "not_found", "reason" => "missing"} + end + + @tag :with_partitioned_db + test "purge rejects unpartitioned docid", context do + db_name = context[:db_name] + body = %{"no_partition" => ["1-967a00dff5e02add41819138abb3284d"]} + resp = Couch.post("/#{db_name}/_purge", query: [w: 3], body: body) + assert resp.status_code == 400 + %{body: body} = resp + assert body["error"] == "illegal_docid" + end + + test "create database with bad `partitioned` value", _context do + resp = Couch.put("/bad-db?partitioned=tru") + assert resp.status_code == 400 + + assert Map.get(resp, :body) == %{ + "error" => "bad_request", + "reason" => "Invalid `partitioned` parameter" + } + end + + test "can create unpartitioned system db", _context do + Couch.delete("/_replicator") + resp = Couch.put("/_replicator") + assert resp.status_code == 201 + assert resp.body == %{"ok" => true} + end + + test "cannot create partitioned system db", _context do + Couch.delete("/_replicator") + + resp = Couch.put("/_replicator?partitioned=true") + assert resp.status_code == 400 + + %{:body => %{"reason" => reason}} = resp + assert Regex.match?(~r/Cannot partition a system database/, reason) + end +end diff --git a/test/elixir/test/partition_ddoc_test.exs b/test/elixir/test/partition_ddoc_test.exs new file mode 100644 index 00000000000..4b1f00d8bd2 --- /dev/null +++ b/test/elixir/test/partition_ddoc_test.exs @@ -0,0 +1,171 @@ +defmodule PartitionDDocTest do + use CouchTestCase + + @moduledoc """ + Test partition design doc interactions + """ + + setup do + db_name = random_db_name() + {:ok, _} = create_db(db_name, query: %{partitioned: true, q: 1}) + on_exit(fn -> delete_db(db_name) end) + + {:ok, [db_name: db_name]} + end + + test "PUT /dbname/_design/foo", context do + db_name = context[:db_name] + resp = Couch.put("/#{db_name}/_design/foo", body: %{stuff: "here"}) + assert resp.status_code == 201 + end + + test "PUT /dbname/_design/foo to update", context do + db_name = context[:db_name] + ddoc_id = "_design/foo" + + ddoc = %{ + _id: ddoc_id, + stuff: "here" + } + + resp = Couch.put("/#{db_name}/#{ddoc_id}", body: ddoc) + assert resp.status_code == 201 + %{body: body} = resp + + ddoc = Map.put(ddoc, :_rev, body["rev"]) + ddoc = Map.put(ddoc, :other, "attribute") + resp = Couch.put("/#{db_name}/#{ddoc_id}", body: ddoc) + assert resp.status_code == 201 + end + + test "PUT /dbname/_design/foo/readme.txt", context do + db_name = context[:db_name] + ddoc_id = "_design/foo" + + ddoc = %{ + _id: ddoc_id, + stuff: "here" + } + + resp = Couch.put("/#{db_name}/#{ddoc_id}", body: ddoc) + assert resp.status_code == 201 + %{body: body} = resp + + att = "This is a readme.txt" + + opts = [ + headers: [{:"Content-Type", "text/plain"}], + query: [rev: body["rev"]], + body: att + ] + + resp = Couch.put("/#{db_name}/#{ddoc_id}/readme.txt", opts) + assert resp.status_code == 201 + end + + test "DELETE /dbname/_design/foo", context do + db_name = context[:db_name] + ddoc_id = "_design/foo" + + ddoc = %{ + _id: ddoc_id, + stuff: "here" + } + + resp = Couch.put("/#{db_name}/#{ddoc_id}", body: ddoc) + assert resp.status_code == 201 + %{body: body} = resp + + resp = Couch.delete("/#{db_name}/#{ddoc_id}", query: [rev: body["rev"]]) + assert resp.status_code == 200 + end + + test "POST /dbname with design doc", context do + db_name = context[:db_name] + body = %{_id: "_design/foo", stuff: "here"} + resp = Couch.post("/#{db_name}", body: body) + assert resp.status_code == 201 + end + + test "POST /dbname/_bulk_docs with design doc", context do + db_name = context[:db_name] + body = %{:docs => [%{_id: "_design/foo", stuff: "here"}]} + resp = Couch.post("/#{db_name}/_bulk_docs", body: body) + assert resp.status_code == 201 + end + + test "GET /dbname/_design/foo", context do + db_name = context[:db_name] + resp = Couch.put("/#{db_name}/_design/foo", body: %{stuff: "here"}) + assert resp.status_code == 201 + + resp = Couch.get("/#{db_name}/_design/foo") + assert resp.status_code == 200 + end + + test "GET /dbname/_design/foo?rev=$rev", context do + db_name = context[:db_name] + resp = Couch.put("/#{db_name}/_design/foo", body: %{stuff: "here"}) + assert resp.status_code == 201 + %{body: body} = resp + + resp = Couch.get("/#{db_name}/_design/foo", query: [rev: body["rev"]]) + assert resp.status_code == 200 + end + + test "GET /dbname/_bulk_get", context do + db_name = context[:db_name] + resp = Couch.put("/#{db_name}/_design/foo", body: %{stuff: "here"}) + assert resp.status_code == 201 + + body = %{docs: [%{id: "_design/foo"}]} + resp = Couch.post("/#{db_name}/_bulk_get", body: body) + assert resp.status_code == 200 + %{body: body} = resp + + assert length(body["results"]) == 1 + + %{"results" => [%{"id" => "_design/foo", "docs" => [%{"ok" => _}]}]} = body + end + + test "GET /dbname/_bulk_get with rev", context do + db_name = context[:db_name] + resp = Couch.put("/#{db_name}/_design/foo", body: %{stuff: "here"}) + assert resp.status_code == 201 + %{body: body} = resp + + body = %{docs: [%{id: "_design/foo", rev: body["rev"]}]} + resp = Couch.post("/#{db_name}/_bulk_get", body: body) + assert resp.status_code == 200 + %{body: body} = resp + + assert length(body["results"]) == 1 + %{"results" => [%{"id" => "_design/foo", "docs" => [%{"ok" => _}]}]} = body + end + + test "GET /dbname/_all_docs?key=$ddoc_id", context do + db_name = context[:db_name] + resp = Couch.put("/#{db_name}/_design/foo", body: %{stuff: "here"}) + assert resp.status_code == 201 + + resp = Couch.get("/#{db_name}/_all_docs", query: [key: "\"_design/foo\""]) + assert resp.status_code == 200 + %{body: body} = resp + + assert length(body["rows"]) == 1 + %{"rows" => [%{"id" => "_design/foo"}]} = body + end + + test "GET /dbname/_design_docs", context do + db_name = context[:db_name] + resp = Couch.put("/#{db_name}/_design/foo", body: %{stuff: "here"}) + assert resp.status_code == 201 + + resp = Couch.get("/#{db_name}/_design_docs") + assert resp.status_code == 200 + %{body: body} = resp + + assert length(body["rows"]) == 1 + %{"rows" => [%{"id" => "_design/foo"}]} = body + end +end diff --git a/test/elixir/test/partition_design_docs_test.exs b/test/elixir/test/partition_design_docs_test.exs new file mode 100644 index 00000000000..42a2ced7726 --- /dev/null +++ b/test/elixir/test/partition_design_docs_test.exs @@ -0,0 +1,16 @@ +defmodule PartitionDesignDocsTest do + use CouchTestCase + + @moduledoc """ + Test Partition functionality for partition design docs + """ + + @tag :with_partitioned_db + test "/_partition/:pk/_design/doc 404", context do + db_name = context[:db_name] + + url = "/#{db_name}/_partition/fake-key/_design/mrtest/" + resp = Couch.get(url) + assert resp.status_code == 404 + end +end diff --git a/test/elixir/test/partition_helpers.exs b/test/elixir/test/partition_helpers.exs new file mode 100644 index 00000000000..6eac2b1a49e --- /dev/null +++ b/test/elixir/test/partition_helpers.exs @@ -0,0 +1,76 @@ +defmodule PartitionHelpers do + use ExUnit.Case + + def create_partition_docs(db_name, pk1 \\ "foo", pk2 \\ "bar") do + docs = + for i <- 1..100 do + id = + if rem(i, 2) == 0 do + "#{pk1}:#{i}" + else + "#{pk2}:#{i}" + end + + group = + if rem(i, 3) == 0 do + "one" + else + "two" + end + + %{ + :_id => id, + :value => i, + :some => "field", + :group => group + } + end + + resp = Couch.post("/#{db_name}/_bulk_docs", body: %{:w => 3, :docs => docs}) + assert resp.status_code == 201 + end + + def create_partition_ddoc(db_name, opts \\ %{}) do + map_fn = """ + function(doc) { + if (doc.some) { + emit(doc.value, doc.some); + } + } + """ + + default_ddoc = %{ + views: %{ + some: %{ + map: map_fn + } + } + } + + ddoc = Enum.into(opts, default_ddoc) + + resp = Couch.put("/#{db_name}/_design/mrtest", body: ddoc) + assert resp.status_code == 201 + assert Map.has_key?(resp.body, "ok") == true + end + + def get_ids(resp) do + %{:body => %{"rows" => rows}} = resp + Enum.map(rows, fn row -> row["id"] end) + end + + def get_partitions(resp) do + %{:body => %{"rows" => rows}} = resp + + Enum.map(rows, fn row -> + [partition, _] = String.split(row["id"], ":") + partition + end) + end + + def assert_correct_partition(partitions, correct_partition) do + assert Enum.all?(partitions, fn partition -> + partition == correct_partition + end) + end +end diff --git a/test/elixir/test/partition_mango_test.exs b/test/elixir/test/partition_mango_test.exs new file mode 100644 index 00000000000..1471ddb0ace --- /dev/null +++ b/test/elixir/test/partition_mango_test.exs @@ -0,0 +1,591 @@ +defmodule PartitionMangoTest do + use CouchTestCase + import PartitionHelpers, except: [get_partitions: 1] + + @moduledoc """ + Test Partition functionality for mango + """ + def create_index(db_name, fields \\ ["some"], opts \\ %{}) do + default_index = %{ + index: %{ + fields: fields + } + } + + index = Enum.into(opts, default_index) + resp = Couch.post("/#{db_name}/_index", body: index) + + assert resp.status_code == 200 + assert resp.body["result"] == "created" + end + + def get_partitions(resp) do + %{:body => %{"docs" => docs}} = resp + + Enum.map(docs, fn doc -> + [partition, _] = String.split(doc["_id"], ":") + partition + end) + end + + @tag :with_partitioned_db + test "query using _id and partition works", context do + db_name = context[:db_name] + create_partition_docs(db_name) + create_index(db_name) + + url = "/#{db_name}/_partition/foo/_find" + + resp = + Couch.post(url, + body: %{ + selector: %{ + _id: %{ + "$gt": "foo:" + } + }, + limit: 20 + } + ) + + assert resp.status_code == 200 + partitions = get_partitions(resp) + assert length(partitions) == 20 + assert_correct_partition(partitions, "foo") + + url = "/#{db_name}/_find" + + resp = + Couch.post(url, + body: %{ + selector: %{ + _id: %{ + "$lt": "foo:" + } + }, + limit: 20 + } + ) + + assert resp.status_code == 200 + partitions = get_partitions(resp) + assert length(partitions) == 20 + assert_correct_partition(partitions, "bar") + end + + @tag :with_partitioned_db + test "query using _id works for global and local query", context do + db_name = context[:db_name] + create_partition_docs(db_name) + create_index(db_name) + + url = "/#{db_name}/_partition/foo/_find" + + resp = + Couch.post(url, + body: %{ + selector: %{ + _id: %{ + "$gt": 0 + } + }, + limit: 20 + } + ) + + assert resp.status_code == 200 + partitions = get_partitions(resp) + assert length(partitions) == 20 + assert_correct_partition(partitions, "foo") + + url = "/#{db_name}/_find" + + resp = + Couch.post(url, + body: %{ + selector: %{ + _id: %{ + "$gt": 0 + } + }, + limit: 20 + } + ) + + assert resp.status_code == 200 + partitions = get_partitions(resp) + assert length(partitions) == 20 + assert_correct_partition(partitions, "bar") + end + + @tag :with_partitioned_db + test "query with partitioned:true using index and $eq", context do + db_name = context[:db_name] + create_partition_docs(db_name) + create_index(db_name) + + url = "/#{db_name}/_partition/foo/_find" + + resp = + Couch.post(url, + body: %{ + selector: %{ + some: "field" + }, + limit: 20 + } + ) + + assert resp.status_code == 200 + partitions = get_partitions(resp) + assert length(partitions) == 20 + assert_correct_partition(partitions, "foo") + + url = "/#{db_name}/_partition/bar/_find" + + resp = + Couch.post(url, + body: %{ + selector: %{ + some: "field" + }, + limit: 20 + } + ) + + assert resp.status_code == 200 + partitions = get_partitions(resp) + assert length(partitions) == 20 + assert_correct_partition(partitions, "bar") + end + + @tag :with_partitioned_db + test "partitioned query using _all_docs with $eq", context do + db_name = context[:db_name] + create_partition_docs(db_name) + + url = "/#{db_name}/_partition/foo/_find" + + resp = + Couch.post(url, + body: %{ + selector: %{ + some: "field" + }, + limit: 20 + } + ) + + assert resp.status_code == 200 + partitions = get_partitions(resp) + assert length(partitions) == 20 + assert_correct_partition(partitions, "foo") + + url = "/#{db_name}/_partition/bar/_find" + + resp = + Couch.post(url, + body: %{ + selector: %{ + some: "field" + }, + limit: 20 + } + ) + + assert resp.status_code == 200 + partitions = get_partitions(resp) + assert length(partitions) == 20 + assert_correct_partition(partitions, "bar") + end + + @tag :with_db + test "non-partitioned query using _all_docs and $eq", context do + db_name = context[:db_name] + create_partition_docs(db_name) + + url = "/#{db_name}/_find" + + resp = + Couch.post(url, + body: %{ + selector: %{ + some: "field" + }, + skip: 40, + limit: 5 + } + ) + + assert resp.status_code == 200 + partitions = get_partitions(resp) + assert length(partitions) == 5 + assert partitions == ["bar", "bar", "bar", "bar", "bar"] + + url = "/#{db_name}/_find" + + resp = + Couch.post(url, + body: %{ + selector: %{ + some: "field" + }, + skip: 50, + limit: 5 + } + ) + + assert resp.status_code == 200 + partitions = get_partitions(resp) + assert length(partitions) == 5 + assert partitions == ["foo", "foo", "foo", "foo", "foo"] + end + + @tag :with_partitioned_db + test "partitioned query using index and range scan", context do + db_name = context[:db_name] + create_partition_docs(db_name, "foo", "bar42") + create_index(db_name, ["value"]) + + url = "/#{db_name}/_partition/foo/_find" + + resp = + Couch.post(url, + body: %{ + selector: %{ + value: %{ + "$gte": 6, + "$lt": 16 + } + } + } + ) + + assert resp.status_code == 200 + partitions = get_partitions(resp) + assert length(partitions) == 5 + assert_correct_partition(partitions, "foo") + + url = "/#{db_name}/_partition/bar42/_find" + + resp = + Couch.post(url, + body: %{ + selector: %{ + value: %{ + "$gte": 6, + "$lt": 16 + } + } + } + ) + + assert resp.status_code == 200 + partitions = get_partitions(resp) + assert length(partitions) == 5 + assert_correct_partition(partitions, "bar42") + end + + @tag :with_partitioned_db + test "partitioned query using _all_docs and range scan", context do + db_name = context[:db_name] + create_partition_docs(db_name) + + url = "/#{db_name}/_partition/foo/_find" + + resp = + Couch.post(url, + body: %{ + selector: %{ + value: %{ + "$gte": 6, + "$lt": 16 + } + } + } + ) + + assert resp.status_code == 200 + partitions = get_partitions(resp) + assert length(partitions) == 5 + assert_correct_partition(partitions, "foo") + + url = "/#{db_name}/_partition/bar/_find" + + resp = + Couch.post(url, + body: %{ + selector: %{ + value: %{ + "$gte": 6, + "$lt": 16 + } + } + } + ) + + assert resp.status_code == 200 + partitions = get_partitions(resp) + assert length(partitions) == 5 + assert_correct_partition(partitions, "bar") + end + + @tag :with_partitioned_db + test "partitioned query using _all_docs", context do + db_name = context[:db_name] + create_partition_docs(db_name, "foo", "bar42") + + url = "/#{db_name}/_partition/foo/_find" + + resp = + Couch.post(url, + body: %{ + selector: %{ + value: %{ + "$gte": 6, + "$lt": 16 + } + } + } + ) + + assert resp.status_code == 200 + partitions = get_partitions(resp) + assert length(partitions) == 5 + assert_correct_partition(partitions, "foo") + + url = "/#{db_name}/_partition/bar42/_find" + + resp = + Couch.post(url, + body: %{ + selector: %{ + value: %{ + "$gte": 6, + "$lt": 16 + } + } + } + ) + + assert resp.status_code == 200 + partitions = get_partitions(resp) + assert length(partitions) == 5 + assert_correct_partition(partitions, "bar42") + end + + @tag :with_partitioned_db + test "explain works with partitions", context do + db_name = context[:db_name] + create_partition_docs(db_name) + create_index(db_name, ["some"]) + + url = "/#{db_name}/_partition/foo/_explain" + + resp = + Couch.post(url, + body: %{ + selector: %{ + value: %{ + "$gte": 6, + "$lt": 16 + } + } + } + ) + + %{:body => body} = resp + + assert body["index"]["name"] == "_all_docs" + assert body["mrargs"]["partition"] == "foo" + + url = "/#{db_name}/_partition/bar/_explain" + + resp = + Couch.post(url, + body: %{ + selector: %{ + some: "field" + } + } + ) + + %{:body => body} = resp + + assert body["index"]["def"] == %{"fields" => [%{"some" => "asc"}]} + assert body["mrargs"]["partition"] == "bar" + end + + @tag :with_db + test "explain works with non partitioned db", context do + db_name = context[:db_name] + create_partition_docs(db_name) + create_index(db_name, ["some"]) + + url = "/#{db_name}/_explain" + + resp = + Couch.post(url, + body: %{ + selector: %{ + value: %{ + "$gte": 6, + "$lt": 16 + } + } + } + ) + + %{:body => body} = resp + + assert body["index"]["name"] == "_all_docs" + assert body["mrargs"]["partition"] == :null + + resp = + Couch.post(url, + body: %{ + selector: %{ + some: "field" + } + } + ) + + %{:body => body} = resp + + assert body["index"]["def"] == %{"fields" => [%{"some" => "asc"}]} + assert body["mrargs"]["partition"] == :null + end + + @tag :with_partitioned_db + test "partitioned query using bookmarks", context do + db_name = context[:db_name] + create_partition_docs(db_name) + create_index(db_name, ["value"]) + + url = "/#{db_name}/_partition/foo/_find" + + resp = + Couch.post(url, + body: %{ + selector: %{ + value: %{ + "$gte": 6, + "$lt": 16 + } + }, + limit: 3 + } + ) + + assert resp.status_code == 200 + partitions = get_partitions(resp) + assert length(partitions) == 3 + assert_correct_partition(partitions, "foo") + + %{:body => %{"bookmark" => bookmark}} = resp + + resp = + Couch.post(url, + body: %{ + selector: %{ + value: %{ + "$gte": 6, + "$lt": 16 + } + }, + limit: 3, + bookmark: bookmark + } + ) + + assert resp.status_code == 200 + partitions = get_partitions(resp) + assert length(partitions) == 2 + assert_correct_partition(partitions, "foo") + end + + @tag :with_partitioned_db + test "global query uses global index", context do + db_name = context[:db_name] + create_partition_docs(db_name) + create_index(db_name, ["some"], %{partitioned: false}) + + url = "/#{db_name}/_explain" + + selector = %{ + selector: %{ + some: "field" + }, + limit: 100 + } + + resp = Couch.post(url, body: selector) + assert resp.status_code == 200 + %{:body => body} = resp + assert body["index"]["def"] == %{"fields" => [%{"some" => "asc"}]} + + url = "/#{db_name}/_find" + resp = Couch.post(url, body: selector) + assert resp.status_code == 200 + + partitions = get_partitions(resp) + assert length(partitions) == 100 + end + + @tag :with_partitioned_db + test "global query does not use partition index", context do + db_name = context[:db_name] + create_partition_docs(db_name) + create_index(db_name, ["some"]) + + url = "/#{db_name}/_explain" + + selector = %{ + selector: %{ + some: "field" + }, + limit: 100 + } + + resp = Couch.post(url, body: selector) + %{:body => body} = resp + assert body["index"]["name"] == "_all_docs" + + url = "/#{db_name}/_find" + resp = Couch.post(url, body: selector) + + assert resp.status_code == 200 + + partitions = get_partitions(resp) + assert length(partitions) == 100 + end + + @tag :with_partitioned_db + test "partitioned query does not use global index", context do + db_name = context[:db_name] + create_partition_docs(db_name) + create_index(db_name, ["some"], %{partitioned: false}) + + url = "/#{db_name}/_partition/foo/_explain" + + selector = %{ + selector: %{ + some: "field" + }, + limit: 50 + } + + resp = Couch.post(url, body: selector) + assert resp.status_code == 200 + %{:body => body} = resp + assert body["index"]["name"] == "_all_docs" + + url = "/#{db_name}/_partition/foo/_find" + resp = Couch.post(url, body: selector) + assert resp.status_code == 200 + + partitions = get_partitions(resp) + assert length(partitions) == 50 + assert_correct_partition(partitions, "foo") + end +end diff --git a/test/elixir/test/partition_size_test.exs b/test/elixir/test/partition_size_test.exs new file mode 100644 index 00000000000..c4d235b77c5 --- /dev/null +++ b/test/elixir/test/partition_size_test.exs @@ -0,0 +1,357 @@ +defmodule PartitionSizeTest do + use CouchTestCase + + @moduledoc """ + Test Partition size functionality + """ + + setup do + db_name = random_db_name() + {:ok, _} = create_db(db_name, query: %{partitioned: true, q: 1}) + on_exit(fn -> delete_db(db_name) end) + + {:ok, [db_name: db_name]} + end + + def get_db_info(dbname) do + resp = Couch.get("/#{dbname}") + assert resp.status_code == 200 + %{:body => body} = resp + body + end + + def get_partition_info(dbname, partition) do + resp = Couch.get("/#{dbname}/_partition/#{partition}") + assert resp.status_code == 200 + %{:body => body} = resp + body + end + + def mk_partition(i) do + i |> rem(10) |> Integer.to_string() |> String.pad_leading(3, "0") + end + + def mk_docid(i) do + id = i |> Integer.to_string() |> String.pad_leading(4, "0") + "#{mk_partition(i)}:#{id}" + end + + def mk_docs(db_name) do + docs = + for i <- 1..1000 do + group = Integer.to_string(rem(i, 3)) + + %{ + :_id => mk_docid(i), + :value => i, + :some => "field", + :group => group + } + end + + body = %{:w => 3, :docs => docs} + resp = Couch.post("/#{db_name}/_bulk_docs", body: body) + assert resp.status_code == 201 + end + + def save_doc(db_name, doc) do + resp = Couch.post("/#{db_name}", query: [w: 3], body: doc) + assert resp.status_code == 201 + %{:body => body} = resp + body["rev"] + end + + test "get empty partition", context do + db_name = context[:db_name] + partition = "non_existent_partition" + + info = get_partition_info(db_name, partition) + + assert info["doc_count"] == 0 + assert info["doc_del_count"] == 0 + assert info["partition"] == partition + assert info["sizes"]["external"] == 0 + assert info["sizes"]["active"] == 0 + end + + test "unknown partition return's zero", context do + db_name = context[:db_name] + mk_docs(db_name) + + info = get_partition_info(db_name, "unknown") + assert info["doc_count"] == 0 + assert info["doc_del_count"] == 0 + assert info["sizes"]["external"] == 0 + assert info["sizes"]["active"] == 0 + end + + test "simple partition size", context do + db_name = context[:db_name] + save_doc(db_name, %{_id: "foo:bar", val: 42}) + + info = get_partition_info(db_name, "foo") + assert info["doc_count"] == 1 + assert info["doc_del_count"] == 0 + assert info["sizes"]["external"] > 0 + assert info["sizes"]["active"] > 0 + end + + test "adding docs increases partition sizes", context do + db_name = context[:db_name] + save_doc(db_name, %{_id: "foo:bar", val: 42}) + pre_info = get_partition_info(db_name, "foo") + + save_doc(db_name, %{_id: "foo:baz", val: 24}) + post_info = get_partition_info(db_name, "foo") + + assert post_info["doc_count"] == 2 + assert post_info["doc_del_count"] == 0 + assert post_info["sizes"]["external"] > pre_info["sizes"]["external"] + assert post_info["sizes"]["active"] > pre_info["sizes"]["active"] + end + + test "updating docs affects partition sizes", context do + db_name = context[:db_name] + rev1 = save_doc(db_name, %{_id: "foo:bar", val: ""}) + info1 = get_partition_info(db_name, "foo") + + rev2 = + save_doc(db_name, %{ + _id: "foo:bar", + _rev: rev1, + val: "this is a very long string that is so super long its beyond long" + }) + + info2 = get_partition_info(db_name, "foo") + + save_doc(db_name, %{ + _id: "foo:bar", + _rev: rev2, + val: "this string is shorter" + }) + + info3 = get_partition_info(db_name, "foo") + + assert info3["doc_count"] == 1 + assert info3["doc_del_count"] == 0 + + assert info3["sizes"]["external"] > info1["sizes"]["external"] + assert info2["sizes"]["external"] > info3["sizes"]["external"] + end + + test "deleting a doc affects partition sizes", context do + db_name = context[:db_name] + rev1 = save_doc(db_name, %{_id: "foo:bar", val: "some stuff here"}) + info1 = get_partition_info(db_name, "foo") + + save_doc(db_name, %{_id: "foo:bar", _rev: rev1, _deleted: true}) + info2 = get_partition_info(db_name, "foo") + + assert info1["doc_count"] == 1 + assert info1["doc_del_count"] == 0 + + assert info2["doc_count"] == 0 + assert info2["doc_del_count"] == 1 + + assert info2["sizes"]["external"] < info1["sizes"]["external"] + end + + test "design docs do not affect partition sizes", context do + db_name = context[:db_name] + mk_docs(db_name) + + pre_infos = + 0..9 + |> Enum.map(fn i -> + get_partition_info(db_name, mk_partition(i)) + end) + + 0..5 + |> Enum.map(fn i -> + base = i |> Integer.to_string() |> String.pad_leading(5, "0") + docid = "_design/#{base}" + save_doc(db_name, %{_id: docid, value: "some stuff here"}) + end) + + post_infos = + 0..9 + |> Enum.map(fn i -> + get_partition_info(db_name, mk_partition(i)) + end) + + assert post_infos == pre_infos + end + + test "get all partition sizes", context do + db_name = context[:db_name] + mk_docs(db_name) + + {esum, asum} = + 0..9 + |> Enum.reduce({0, 0}, fn i, {esize, asize} -> + partition = mk_partition(i) + info = get_partition_info(db_name, partition) + assert info["doc_count"] == 100 + assert info["doc_del_count"] == 0 + assert info["sizes"]["external"] > 0 + assert info["sizes"]["active"] > 0 + {esize + info["sizes"]["external"], asize + info["sizes"]["active"]} + end) + + db_info = get_db_info(db_name) + assert db_info["sizes"]["external"] >= esum + assert db_info["sizes"]["active"] >= asum + end + + test "get partition size with attachment", context do + db_name = context[:db_name] + + doc = %{ + _id: "foo:doc-with-attachment", + _attachments: %{ + "foo.txt": %{ + content_type: "text/plain", + data: Base.encode64("This is a text document to save") + } + } + } + + save_doc(db_name, doc) + + db_info = get_db_info(db_name) + foo_info = get_partition_info(db_name, "foo") + + assert foo_info["doc_count"] == 1 + assert foo_info["doc_del_count"] == 0 + assert foo_info["sizes"]["active"] > 0 + assert foo_info["sizes"]["external"] > 0 + + assert foo_info["sizes"]["active"] <= db_info["sizes"]["active"] + assert foo_info["sizes"]["external"] <= db_info["sizes"]["external"] + end + + test "attachments don't affect other partitions", context do + db_name = context[:db_name] + mk_docs(db_name) + + pre_infos = + 0..9 + |> Enum.map(fn i -> + get_partition_info(db_name, mk_partition(i)) + end) + + doc = %{ + _id: "foo:doc-with-attachment", + _attachments: %{ + "foo.txt": %{ + content_type: "text/plain", + data: Base.encode64("This is a text document to save") + } + } + } + + save_doc(db_name, doc) + + att_info = get_partition_info(db_name, "foo") + assert att_info["doc_count"] == 1 + assert att_info["sizes"]["external"] > 0 + + post_infos = + 0..9 + |> Enum.map(fn i -> + get_partition_info(db_name, mk_partition(i)) + end) + + assert post_infos == pre_infos + + esize = + ([att_info] ++ post_infos) + |> Enum.reduce(0, fn info, acc -> + info["sizes"]["external"] + acc + end) + + db_info = get_db_info(db_name) + assert esize == db_info["sizes"]["external"] + end + + test "partition activity not affect other partition sizes", context do + db_name = context[:db_name] + mk_docs(db_name) + + partition1 = "000" + partition2 = "001" + + info2 = get_partition_info(db_name, partition2) + + doc_id = "#{partition1}:doc-with-attachment" + + doc = %{ + _id: doc_id, + _attachments: %{ + "foo.txt": %{ + content_type: "text/plain", + data: Base.encode64("This is a text document to save") + } + } + } + + doc_rev = save_doc(db_name, doc) + + info2_attach = get_partition_info(db_name, partition2) + assert info2_attach == info2 + + doc = + Enum.into( + %{ + another: "add another field", + _rev: doc_rev + }, + doc + ) + + doc_rev = save_doc(db_name, doc) + + info2_update = get_partition_info(db_name, partition2) + assert info2_update == info2 + + resp = Couch.delete("/#{db_name}/#{doc_id}", query: %{rev: doc_rev}) + assert resp.status_code == 200 + + info2_delete = get_partition_info(db_name, partition2) + assert info2_delete == info2 + end + + test "purging docs decreases partition size", context do + db_name = context[:db_name] + mk_docs(db_name) + + partition = "000" + + query = [ + start_key: "\"#{partition}:0000\"", + end_key: "\"#{partition}:9999\"", + limit: 50 + ] + + resp = Couch.get("/#{db_name}/_all_docs", query: query) + assert resp.status_code == 200 + %{body: body} = resp + + pre_info = get_partition_info(db_name, partition) + + pbody = + body["rows"] + |> Enum.reduce(%{}, fn row, acc -> + Map.put(acc, row["id"], [row["value"]["rev"]]) + end) + + resp = Couch.post("/#{db_name}/_purge", query: [w: 3], body: pbody) + assert resp.status_code == 201 + + post_info = get_partition_info(db_name, partition) + assert post_info["doc_count"] == pre_info["doc_count"] - 50 + assert post_info["doc_del_count"] == 0 + assert post_info["sizes"]["active"] < pre_info["sizes"]["active"] + assert post_info["sizes"]["external"] < pre_info["sizes"]["external"] + end +end diff --git a/test/elixir/test/partition_view_test.exs b/test/elixir/test/partition_view_test.exs new file mode 100644 index 00000000000..a255391725b --- /dev/null +++ b/test/elixir/test/partition_view_test.exs @@ -0,0 +1,299 @@ +defmodule ViewPartitionTest do + use CouchTestCase + import PartitionHelpers + + @moduledoc """ + Test Partition functionality for views + """ + + setup_all do + db_name = random_db_name() + {:ok, _} = create_db(db_name, query: %{partitioned: true, q: 1}) + on_exit(fn -> delete_db(db_name) end) + + create_partition_docs(db_name) + + map_fun1 = """ + function(doc) { + if (doc.some) { + emit(doc.value, doc.some); + } + } + """ + + map_fun2 = """ + function(doc) { + if (doc.group) { + emit([doc.some, doc.group], 1); + } + } + """ + + query = %{:w => 3} + + body = %{ + :docs => [ + %{ + _id: "_design/map", + views: %{some: %{map: map_fun1}} + }, + %{ + _id: "_design/map_some", + views: %{some: %{map: map_fun2}} + }, + %{ + _id: "_design/partitioned_true", + views: %{some: %{map: map_fun1}}, + options: %{partitioned: true} + }, + %{ + _id: "_design/partitioned_false", + views: %{some: %{map: map_fun1}}, + options: %{partitioned: false} + }, + %{ + _id: "_design/reduce", + views: %{some: %{map: map_fun2, reduce: "_count"}} + }, + %{ + _id: "_design/include_ddocs", + views: %{some: %{map: map_fun1}}, + options: %{include_design: true} + } + ] + } + + resp = Couch.post("/#{db_name}/_bulk_docs", query: query, body: body) + Enum.each(resp.body, &assert(&1["ok"])) + + {:ok, [db_name: db_name]} + end + + def get_reduce_result(resp) do + %{:body => %{"rows" => rows}} = resp + rows + end + + test "query with partitioned:true returns partitioned fields", context do + db_name = context[:db_name] + + url = "/#{db_name}/_partition/foo/_design/partitioned_true/_view/some" + resp = Couch.get(url) + assert resp.status_code == 200 + partitions = get_partitions(resp) + assert Enum.dedup(partitions) == ["foo"] + + url = "/#{db_name}/_partition/bar/_design/partitioned_true/_view/some" + resp = Couch.get(url) + assert resp.status_code == 200 + partitions = get_partitions(resp) + assert Enum.dedup(partitions) == ["bar"] + end + + test "default view query returns partitioned fields", context do + db_name = context[:db_name] + + url = "/#{db_name}/_partition/foo/_design/map/_view/some" + resp = Couch.get(url) + assert resp.status_code == 200 + partitions = get_partitions(resp) + assert Enum.dedup(partitions) == ["foo"] + + url = "/#{db_name}/_partition/bar/_design/map/_view/some" + resp = Couch.get(url) + assert resp.status_code == 200 + partitions = get_partitions(resp) + assert Enum.dedup(partitions) == ["bar"] + end + + test "query will return zero results for wrong inputs", context do + db_name = context[:db_name] + + url = "/#{db_name}/_partition/foo/_design/map/_view/some" + resp = Couch.get(url, query: %{start_key: "\"foo:12\""}) + assert resp.status_code == 200 + assert Map.get(resp, :body)["rows"] == [] + end + + test "partitioned ddoc cannot be used in global query", context do + db_name = context[:db_name] + + url = "/#{db_name}/_design/map/_view/some" + resp = Couch.get(url) + %{:body => %{"reason" => reason}} = resp + assert resp.status_code == 400 + assert Regex.match?(~r/mandatory for queries to this view./, reason) + end + + test "partitioned query cannot be used with global ddoc", context do + db_name = context[:db_name] + + url = "/#{db_name}/_partition/foo/_design/partitioned_false/_view/some" + resp = Couch.get(url) + %{:body => %{"reason" => reason}} = resp + assert resp.status_code == 400 + assert Regex.match?(~r/is not supported in this design doc/, reason) + end + + test "view query returns all docs for global query", context do + db_name = context[:db_name] + + url = "/#{db_name}/_design/partitioned_false/_view/some" + resp = Couch.get(url) + assert resp.status_code == 200 + ids = get_ids(resp) + assert length(ids) == 100 + end + + test "partition query errors with incorrect partition supplied", context do + db_name = context[:db_name] + + url = "/#{db_name}/_partition/_bar/_design/map/_view/some" + resp = Couch.get(url) + assert resp.status_code == 400 + + url = "/#{db_name}/_partition//_design/map/_view/some" + resp = Couch.get(url) + assert resp.status_code == 400 + end + + test "partitioned query works with startkey, endkey range", context do + db_name = context[:db_name] + + url = "/#{db_name}/_partition/foo/_design/map/_view/some" + resp = Couch.get(url, query: %{start_key: 12, end_key: 20}) + assert resp.status_code == 200 + partitions = get_partitions(resp) + assert length(partitions) == 5 + assert Enum.dedup(partitions) == ["foo"] + end + + test "partitioned query works with keys", context do + db_name = context[:db_name] + + url = "/#{db_name}/_partition/foo/_design/map/_view/some" + resp = Couch.post(url, body: %{keys: [2, 4, 6]}) + assert resp.status_code == 200 + ids = get_ids(resp) + assert length(ids) == 3 + assert ids == ["foo:2", "foo:4", "foo:6"] + end + + test "global query works with keys", context do + db_name = context[:db_name] + + url = "/#{db_name}/_design/partitioned_false/_view/some" + resp = Couch.post(url, body: %{keys: [2, 4, 6]}) + assert resp.status_code == 200 + ids = get_ids(resp) + assert length(ids) == 3 + assert ids == ["foo:2", "foo:4", "foo:6"] + end + + test "partition query works with limit", context do + db_name = context[:db_name] + + url = "/#{db_name}/_partition/foo/_design/map/_view/some" + resp = Couch.get(url, query: %{limit: 5}) + assert resp.status_code == 200 + partitions = get_partitions(resp) + assert length(partitions) == 5 + assert Enum.dedup(partitions) == ["foo"] + end + + test "partition query with descending", context do + db_name = context[:db_name] + + url = "/#{db_name}/_partition/foo/_design/map/_view/some" + resp = Couch.get(url, query: %{descending: true, limit: 5}) + assert resp.status_code == 200 + ids = get_ids(resp) + assert length(ids) == 5 + assert ids == ["foo:100", "foo:98", "foo:96", "foo:94", "foo:92"] + + resp = Couch.get(url, query: %{descending: false, limit: 5}) + assert resp.status_code == 200 + ids = get_ids(resp) + assert length(ids) == 5 + assert ids == ["foo:2", "foo:4", "foo:6", "foo:8", "foo:10"] + end + + test "partition query with skip", context do + db_name = context[:db_name] + + url = "/#{db_name}/_partition/foo/_design/map/_view/some" + resp = Couch.get(url, query: %{skip: 5, limit: 5}) + assert resp.status_code == 200 + ids = get_ids(resp) + assert length(ids) == 5 + assert ids == ["foo:12", "foo:14", "foo:16", "foo:18", "foo:20"] + end + + test "partition query with key", context do + db_name = context[:db_name] + + url = "/#{db_name}/_partition/foo/_design/map/_view/some" + resp = Couch.get(url, query: %{key: 22}) + assert resp.status_code == 200 + ids = get_ids(resp) + assert length(ids) == 1 + assert ids == ["foo:22"] + end + + test "partition query with startkey_docid and endkey_docid", context do + db_name = context[:db_name] + + url = "/#{db_name}/_partition/foo/_design/map_some/_view/some" + + resp = + Couch.get(url, + query: %{ + startkey: "[\"field\",\"one\"]", + endkey: "[\"field\",\"one\"]", + startkey_docid: "foo:12", + endkey_docid: "foo:30" + } + ) + + assert resp.status_code == 200 + ids = get_ids(resp) + assert ids == ["foo:12", "foo:18", "foo:24", "foo:30"] + end + + test "query with reduce works", context do + db_name = context[:db_name] + + url = "/#{db_name}/_partition/foo/_design/reduce/_view/some" + resp = Couch.get(url, query: %{reduce: true, group_level: 1}) + assert resp.status_code == 200 + results = get_reduce_result(resp) + assert results == [%{"key" => ["field"], "value" => 50}] + + resp = Couch.get(url, query: %{reduce: true, group_level: 2}) + results = get_reduce_result(resp) + + assert results == [ + %{"key" => ["field", "one"], "value" => 16}, + %{"key" => ["field", "two"], "value" => 34} + ] + + resp = Couch.get(url, query: %{reduce: true, group: true}) + results = get_reduce_result(resp) + + assert results == [ + %{"key" => ["field", "one"], "value" => 16}, + %{"key" => ["field", "two"], "value" => 34} + ] + end + + test "include_design works correctly", context do + db_name = context[:db_name] + + url = "/#{db_name}/_partition/foo/_design/include_ddocs/_view/some" + resp = Couch.get(url) + assert resp.status_code == 200 + partitions = get_partitions(resp) + assert length(partitions) == 50 + assert Enum.dedup(partitions) == ["foo"] + end +end diff --git a/test/elixir/test/partition_view_update_test.exs b/test/elixir/test/partition_view_update_test.exs new file mode 100644 index 00000000000..502d5fabeca --- /dev/null +++ b/test/elixir/test/partition_view_update_test.exs @@ -0,0 +1,155 @@ +defmodule PartitionViewUpdateTest do + use CouchTestCase + import PartitionHelpers + + @moduledoc """ + Test Partition view update functionality + """ + @tag :with_partitioned_db + test "view updates properly remove old keys", context do + db_name = context[:db_name] + create_partition_docs(db_name, "foo", "bar") + create_partition_ddoc(db_name) + + check_key = fn key, num_rows -> + url = "/#{db_name}/_partition/foo/_design/mrtest/_view/some" + resp = Couch.get(url, query: [key: key]) + assert resp.status_code == 200 + assert length(resp.body["rows"]) == num_rows + end + + check_key.(2, 1) + + resp = Couch.get("/#{db_name}/foo:2") + doc = Map.put(resp.body, "value", 4) + resp = Couch.put("/#{db_name}/foo:2", query: [w: 3], body: doc) + assert resp.status_code >= 201 and resp.status_code <= 202 + + check_key.(4, 2) + check_key.(2, 0) + end + + @tag :with_partitioned_db + test "query with update=false works", context do + db_name = context[:db_name] + create_partition_docs(db_name) + create_partition_ddoc(db_name) + + url = "/#{db_name}/_partition/foo/_design/mrtest/_view/some" + + resp = + Couch.get(url, + query: %{ + update: "true", + limit: 3 + } + ) + + assert resp.status_code == 200 + ids = get_ids(resp) + assert ids == ["foo:2", "foo:4", "foo:6"] + + # Avoid race conditions by attempting to get a full response + # from every shard before we do our update:false test + for _ <- 1..12 do + resp = Couch.get(url) + assert resp.status_code == 200 + end + + Couch.put("/#{db_name}/foo:1", body: %{some: "field"}) + + resp = + Couch.get(url, + query: %{ + update: "false", + limit: 3 + } + ) + + assert resp.status_code == 200 + ids = get_ids(resp) + assert ids == ["foo:2", "foo:4", "foo:6"] + end + + @tag :with_partitioned_db + test "purge removes view rows", context do + db_name = context[:db_name] + create_partition_docs(db_name) + create_partition_ddoc(db_name) + + url = "/#{db_name}/_partition/foo/_design/mrtest/_view/some" + + resp = Couch.get(url) + assert resp.status_code == 200 + %{body: body} = resp + assert length(body["rows"]) == 50 + + resp = Couch.get("/#{db_name}/foo:2") + assert resp.status_code == 200 + %{body: body} = resp + rev = body["_rev"] + + body = %{"foo:2" => [rev]} + resp = Couch.post("/#{db_name}/_purge", query: [w: 3], body: body) + assert resp.status_code == 201 + + resp = Couch.get(url) + assert resp.status_code == 200 + %{body: body} = resp + assert length(body["rows"]) == 49 + end + + @tag :with_partitioned_db + test "purged conflict changes view rows", context do + db_name = context[:db_name] + create_partition_docs(db_name) + create_partition_ddoc(db_name) + + url = "/#{db_name}/_partition/foo/_design/mrtest/_view/some" + + resp = Couch.get(url) + assert resp.status_code == 200 + %{body: body} = resp + assert length(body["rows"]) == 50 + + # Create a conflict on foo:2. Since the 4096 + # value is deeper than the conflict we can assert + # that's in the view before the purge and assert + # that 8192 is in the view after the purge. + resp = Couch.get("/#{db_name}/foo:2") + assert resp.status_code == 200 + %{body: body} = resp + rev1 = body["_rev"] + + doc = %{_id: "foo:2", _rev: rev1, value: 4096, some: "field"} + resp = Couch.post("/#{db_name}", query: [w: 3], body: doc) + assert resp.status_code == 201 + %{body: body} = resp + rev2 = body["rev"] + + query = [w: 3, new_edits: false] + conflict_rev = "1-4a75b4efa0804859b3dfd327cbc1c2f9" + doc = %{_id: "foo:2", _rev: conflict_rev, value: 8192, some: "field"} + resp = Couch.put("/#{db_name}/foo:2", query: query, body: doc) + assert resp.status_code == 201 + + # Check that our expected row exists + resp = Couch.get(url, query: [key: 4096]) + assert resp.status_code == 200 + %{body: body} = resp + [row] = body["rows"] + assert row["id"] == "foo:2" + + # Remove the current row to be replaced with + # a row from the conflict + body = %{"foo:2" => [rev2]} + resp = Couch.post("/#{db_name}/_purge", query: [w: 3], body: body) + assert resp.status_code == 201 + + resp = Couch.get(url, query: [key: 8192]) + assert resp.status_code == 200 + %{body: body} = resp + [row] = body["rows"] + assert row["id"] == "foo:2" + end +end diff --git a/test/elixir/test/test_helper.exs b/test/elixir/test/test_helper.exs index 33041fd02bb..d6843eb2181 100644 --- a/test/elixir/test/test_helper.exs +++ b/test/elixir/test/test_helper.exs @@ -1,2 +1,3 @@ ExUnit.configure(exclude: [pending: true]) ExUnit.start() +Code.require_file("partition_helpers.exs", __DIR__)