Skip to content

Commit

Permalink
Implement a background ddoc scanner
Browse files Browse the repository at this point in the history
[WIP]
  - plugin API completed
  - db / shard / ddoc traversal completed
  - ddoc compilation check implemented
  - make traversal skipping / and limits configurable

TODO:

  - feed sample documents to map and reduce function and compare results
  - add tests
  - more plugins?

The app scans all the dbs and docs. It has a plugin system to allow gathering
various things from a cluster. The first use is to scan all the javascript
design docs and run them through the new QuickJS javascript engine.

Other possible uses:
 - Detect features used in design docs
 - Gather total db and view sizes
 - Scan for document features (docs of certain sizes, contained certain fields
   and values).
 - Scan replication dbs/doc features

The plugins are managed as individual process by the couch_scanner_server with
the start_link/1 and stop/1 functions. After a plugin runner is spawned, the
only thing couch_scanner_server does is wait for it to exit.

The plugin runner process may exit normally, crash, or exit with {shutdown,
{reschedule, TSec}} if they want to reschedule to run again at some point the
future (next day, a week later, etc).

After the process starts, it will load and validate the plugin module. Then, it
will start scanning all the dbs and docs on the local node. Shard ranges will
be scanned only on one of the cluster nodes to avoid duplicating work. For
instance, if there are 2 shard ranges, 0-7, 8-f, with copies on nodes n1, n2,
n3. Then, 0-7 might be scanned on n1 only, and 8-f on n3.

The plugin API is the following (as OTP callback definitions):

```erlang
-callback start(EJson :: #{}) ->
    {ok, St :: term()} | {reschedule, TSec :: integer()}.
-callback resume(Json :: #{}) ->
    {ok, St :: term()} | {reschedule, TSec :: integer()}.
-callback stop(St :: term()) ->
    {ok | {reschedule, TSec :: integer()}, EJson :: #{}}.
-callback checkpoint(St :: term()) ->
    {ok, EJson :: #{}}.
-callback db(St :: term(), DbName :: binary()) ->
    {ok | skip | stop, St1 :: term()}.
-callback ddoc(St :: term(), DbName :: binary(), #doc{}) ->
    {ok | stop, St1 :: term()}.
-callback shards(St :: term(), [#shard{}]) ->
    {[#shard{}], St1 :: term()}.
-callback db_opened(St :: term(), Db :: term()) ->
    {ok, St :: term()}.
-callback doc_id(St :: term(), DocId :: binary(), Db :: term()) ->
    {ok | skip | stop, St1 :: term()}.
-callback doc(St :: term(), Db :: term(), #doc{}) ->
    {ok | stop, St1 :: term()}.
-callback db_closing(St :: term(), Db :: term()) ->
    {ok, St1 :: term()}.
```
  • Loading branch information
nickva committed Mar 8, 2024
1 parent a73e117 commit 79e9f8a
Show file tree
Hide file tree
Showing 13 changed files with 1,340 additions and 0 deletions.
1 change: 1 addition & 0 deletions rebar.config.script
Original file line number Diff line number Diff line change
Expand Up @@ -142,6 +142,7 @@ SubDirs = [
"src/smoosh",
"src/weatherreport",
"src/couch_prometheus",
"src/couch_scanner",
"rel"
].

Expand Down
2 changes: 2 additions & 0 deletions rel/reltool.config
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,7 @@
snappy,
weatherreport,
couch_prometheus,
couch_scanner,

%% extra
nouveau,
Expand Down Expand Up @@ -127,6 +128,7 @@
{app, snappy, [{incl_cond, include}]},
{app, weatherreport, [{incl_cond, include}]},
{app, couch_prometheus, [{incl_cond, include}]},
{app, couch_scanner, [{incl_cond, include}]},

%% extra
{app, nouveau, [{incl_cond, include}]},
Expand Down
4 changes: 4 additions & 0 deletions src/couch_scanner/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
Couch Scanner
================

Traverse all dbs periodically and emit various reports
29 changes: 29 additions & 0 deletions src/couch_scanner/src/couch_scanner.app.src
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
% Licensed under the Apache License, Version 2.0 (the "License"); you may not
% use this file except in compliance with the License. You may obtain a copy of
% the License at
%
% http://www.apache.org/licenses/LICENSE-2.0
%
% Unless required by applicable law or agreed to in writing, software
% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
% License for the specific language governing permissions and limitations under
% the License.

{application, couch_scanner, [
{description, "CouchDB Scanner"},
{vsn, git},
{registered, [
couch_scanner_server
]},
{applications, [
kernel,
stdlib,
crypto,
config,
couch_log,
couch_stats,
fabric
]},
{mod, {couch_scanner_app, []}}
]}.
20 changes: 20 additions & 0 deletions src/couch_scanner/src/couch_scanner.erl
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
% Licensed under the Apache License, Version 2.0 (the "License"); you may not
% use this file except in compliance with the License. You may obtain a copy of
% the License at
%
% http://www.apache.org/licenses/LICENSE-2.0
%
% Unless required by applicable law or agreed to in writing, software
% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
% License for the specific language governing permissions and limitations under
% the License.

-module(couch_scanner).

-export([
status/0
]).

status() ->
couch_scanner_server:status().
23 changes: 23 additions & 0 deletions src/couch_scanner/src/couch_scanner_app.erl
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
% Licensed under the Apache License, Version 2.0 (the "License"); you may not
% use this file except in compliance with the License. You may obtain a copy of
% the License at
%
% http://www.apache.org/licenses/LICENSE-2.0
%
% Unless required by applicable law or agreed to in writing, software
% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
% License for the specific language governing permissions and limitations under
% the License.

-module(couch_scanner_app).

-behaviour(application).

-export([start/2, stop/1]).

start(_StartType, _StartArgs) ->
couch_scanner_sup:start_link().

stop(_State) ->
ok.
105 changes: 105 additions & 0 deletions src/couch_scanner/src/couch_scanner_checkpoint.erl
Original file line number Diff line number Diff line change
@@ -0,0 +1,105 @@
% Licensed under the Apache License, Version 2.0 (the "License"); you may not
% use this file except in compliance with the License. You may obtain a copy of
% the License at
%
% http://www.apache.org/licenses/LICENSE-2.0
%
% Unless required by applicable law or agreed to in writing, software
% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
% License for the specific language governing permissions and limitations under
% the License.

-module(couch_scanner_checkpoint).

-export([
write/2,
read/1,
reset/1
]).

-include_lib("couch/include/couch_db.hrl").

write(<<Plugin/binary>>, #{} = State) ->
with_db(fun(Db) -> update_doc(Db, doc_id(Plugin), State) end).

read(<<Plugin/binary>>) ->
with_db(fun(Db) -> load_doc(Db, doc_id(Plugin)) end).

reset(<<Plugin/binary>>) ->
with_db(fun(Db) -> delete_doc(Db, doc_id(Plugin)) end).

% Private functions

doc_id(Plugin) ->
% Dashes are more conventional for doc ids
Plugin1 = binary:replace(Plugin, <<"_">>, <<"-">>, [global]),
<<?LOCAL_DOC_PREFIX, Plugin1/binary>>.

delete_doc(Db, DocId) ->
case couch_db:open_doc(Db, DocId, []) of
{ok, #doc{revs = {_, RevList}}} ->
{ok, _} = couch_db:delete_doc(Db, DocId, RevList),
ok;
{not_found, _} ->
ok
end.

update_doc(Db, DocId, #{} = Body) ->
EJsonBody = ?JSON_DECODE(?JSON_ENCODE(Body#{<<"_id">> => DocId})),
Doc = couch_doc:from_json_obj(EJsonBody),
case couch_db:open_doc(Db, DocId, []) of
{ok, #doc{revs = Revs}} ->
{ok, _} = couch_db:update_doc(Db, Doc#doc{revs = Revs}, []);
{not_found, _} ->
{ok, _} = couch_db:update_doc(Db, Doc, [])
end,
ok.

load_doc(Db, DocId) ->
case couch_db:open_doc(Db, DocId, [ejson_body]) of
{ok, #doc{body = EJsonBody}} ->
?JSON_DECODE(?JSON_ENCODE(EJsonBody), [return_maps]);
{not_found, _} ->
not_found
end.

with_db(Fun) when is_function(Fun, 1) ->
DbName = config:get("mem3", "shards_db", "_dbs"),
case mem3_util:ensure_exists(DbName) of
{ok, Db} ->
try
Fun(Db)
after
catch couch_db:close(Db)
end;
Else ->
throw(Else)
end.

-ifdef(TEST).

-include_lib("couch/include/couch_eunit.hrl").

couch_scanner_checkpoint_test_() ->
{
foreach,
fun test_util:start_couch/0,
fun test_util:stop_couch/1,
[
?TDEF_FE(t_read_write_reset)
]
}.

t_read_write_reset(_) ->
Plugin = <<"scanner_plugin_abc">>,
ok = reset(Plugin),
?assertEqual(ok, write(Plugin, #{<<"foo">> => 1})),
?assertEqual(#{<<"foo">> => 1}, read(Plugin)),
?assertEqual(ok, write(Plugin, #{<<"bar">> => 2})),
?assertEqual(#{<<"bar">> => 2}, read(Plugin)),
?assertEqual(not_found, read(<<"scanner_plugin_other">>)),
?assertEqual(ok, reset(Plugin)),
?assertEqual(not_found, read(Plugin)).

-endif.

0 comments on commit 79e9f8a

Please sign in to comment.