From d6e5d3223512b0b87e4f884ccbf7924efc287802 Mon Sep 17 00:00:00 2001 From: Robert Newson Date: Mon, 22 Sep 2025 11:12:03 +0100 Subject: [PATCH 1/3] add get/set interface for auto purge properties --- src/chttpd/src/chttpd_db.erl | 33 +++++++++++++++++++- src/chttpd/src/chttpd_httpd_handlers.erl | 1 + src/fabric/src/fabric.erl | 10 ++++++- src/fabric/src/fabric_auto_purge.erl | 38 ++++++++++++++++++++++++ 4 files changed, 80 insertions(+), 2 deletions(-) create mode 100644 src/fabric/src/fabric_auto_purge.erl diff --git a/src/chttpd/src/chttpd_db.erl b/src/chttpd/src/chttpd_db.erl index 4e1b75f11cb..ff97e7935cb 100644 --- a/src/chttpd/src/chttpd_db.erl +++ b/src/chttpd/src/chttpd_db.erl @@ -30,7 +30,8 @@ handle_view_cleanup_req/2, update_doc/4, http_code_from_status/1, - handle_partition_req/2 + handle_partition_req/2, + handle_auto_purge_req/2 ]). -import( @@ -390,6 +391,36 @@ update_partition_stats(PathParts) -> ok end. +handle_auto_purge_req(#httpd{method = 'GET'} = Req, Db) -> + case fabric:get_auto_purge_props(Db) of + {ok, AutoPurgeProps} -> + send_json(Req, {AutoPurgeProps}); + {error, Reason} -> + chttpd:send_error(Req, Reason) + end; +handle_auto_purge_req(#httpd{method = 'PUT'} = Req, Db) -> + {AutoPurgeProps} = chttpd:json_body_obj(Req), + validate_auto_purge_props(AutoPurgeProps), + case fabric:set_auto_purge_props(Db, AutoPurgeProps) of + ok -> + send_json(Req, 201, {[{ok, true}]}); + {error, Reason} -> + chttpd:send_error(Req, Reason) + end; +handle_auto_purge_req(#httpd{} = Req, _Db) -> + send_method_not_allowed(Req, "GET,PUT,HEAD"). + +validate_auto_purge_props([]) -> + ok; +validate_auto_purge_props([{<<"deleted_document_ttl">>, Value} | Rest]) when is_integer(Value) -> + validate_auto_purge_props(Rest); +validate_auto_purge_props([{<<"deleted_document_ttl">>, _Value} | _Rest]) -> + throw({bad_request, <<"deleted_document_ttl must be an integer">>}); +validate_auto_purge_props([{_K, _V} | _Rest]) -> + throw({bad_request, <<"invalid auto purge property">>}); +validate_auto_purge_props(_Else) -> + throw({bad_request, <<"malformed auto purge body">>}). + handle_design_req( #httpd{ path_parts = [_DbName, _Design, Name, <<"_", _/binary>> = Action | _Rest] diff --git a/src/chttpd/src/chttpd_httpd_handlers.erl b/src/chttpd/src/chttpd_httpd_handlers.erl index 932b52e5f6e..3e499b72d05 100644 --- a/src/chttpd/src/chttpd_httpd_handlers.erl +++ b/src/chttpd/src/chttpd_httpd_handlers.erl @@ -35,6 +35,7 @@ db_handler(<<"_design">>) -> fun chttpd_db:handle_design_req/2; db_handler(<<"_partition">>) -> fun chttpd_db:handle_partition_req/2; db_handler(<<"_temp_view">>) -> fun chttpd_view:handle_temp_view_req/2; db_handler(<<"_changes">>) -> fun chttpd_db:handle_changes_req/2; +db_handler(<<"_auto_purge">>) -> fun chttpd_db:handle_auto_purge_req/2; db_handler(_) -> no_match. design_handler(<<"_view">>) -> fun chttpd_view:handle_view_req/3; diff --git a/src/fabric/src/fabric.erl b/src/fabric/src/fabric.erl index 0878c4ebcfc..99d370eaf36 100644 --- a/src/fabric/src/fabric.erl +++ b/src/fabric/src/fabric.erl @@ -35,7 +35,9 @@ set_purge_infos_limit/3, get_purged_infos/1, compact/1, compact/2, - get_partition_info/2 + get_partition_info/2, + get_auto_purge_props/1, + set_auto_purge_props/2 ]). % Documents @@ -132,6 +134,12 @@ get_db_info(DbName) -> get_partition_info(DbName, Partition) -> fabric_db_partition_info:go(dbname(DbName), Partition). +get_auto_purge_props(DbName) -> + fabric_auto_purge:get(dbname(DbName)). + +set_auto_purge_props(DbName, AutoPurgeProps) -> + fabric_auto_purge:set(dbname(DbName), AutoPurgeProps). + %% @doc the number of docs in a database %% @equiv get_doc_count(DbName, <<"_all_docs">>) get_doc_count(DbName) -> diff --git a/src/fabric/src/fabric_auto_purge.erl b/src/fabric/src/fabric_auto_purge.erl new file mode 100644 index 00000000000..abc754c29d0 --- /dev/null +++ b/src/fabric/src/fabric_auto_purge.erl @@ -0,0 +1,38 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +-module(fabric_auto_purge). + +-export([get/1, set/2]). + +-include_lib("couch/include/couch_db.hrl"). +-define(KEY, <<"auto_purge">>). +-define(PROPS, <<"props">>). + +get(DbName) when is_binary(DbName) -> + Props = mem3:props(DbName), + {AutoPurgeProps} = couch_util:get_value(?KEY, Props, {[]}), + {ok, AutoPurgeProps}. + +set(DbName, AutoPurgeProps) when is_binary(DbName) -> + {ok, #doc{} = Doc0} = mem3:get_db_doc(DbName), + {DocProps0} = couch_doc:to_json_obj(Doc0, []), + {Props0} = couch_util:get_value(?PROPS, DocProps0, {[]}), + Props1 = lists:keystore(?KEY, 1, Props0, {?KEY, {AutoPurgeProps}}), + DocProps1 = lists:keystore(?PROPS, 1, DocProps0, {?PROPS, {Props1}}), + Doc1 = couch_doc:from_json_obj({DocProps1}), + case mem3:update_db_doc(Doc1) of + {ok, _NewRev} -> + ok; + {error, Reason} -> + {error, Reason} + end. From c267eed9110cfd8853d2e018a9601481f96d0b25 Mon Sep 17 00:00:00 2001 From: Robert Newson Date: Mon, 22 Sep 2025 11:12:37 +0100 Subject: [PATCH 2/3] purge deleted documents that exceed TTL --- src/couch/src/couch_auto_purge_plugin.erl | 161 ++++++++++++++++++ .../eunit/couch_auto_purge_plugin_tests.erl | 118 +++++++++++++ 2 files changed, 279 insertions(+) create mode 100644 src/couch/src/couch_auto_purge_plugin.erl create mode 100644 src/couch/test/eunit/couch_auto_purge_plugin_tests.erl diff --git a/src/couch/src/couch_auto_purge_plugin.erl b/src/couch/src/couch_auto_purge_plugin.erl new file mode 100644 index 00000000000..a6088cd42af --- /dev/null +++ b/src/couch/src/couch_auto_purge_plugin.erl @@ -0,0 +1,161 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +-module(couch_auto_purge_plugin). +-behaviour(couch_scanner_plugin). + +-export([ + start/2, + resume/2, + complete/1, + checkpoint/1, + db/2, + db_opened/2, + db_closing/2, + doc_fdi/3 +]). + +-include_lib("couch_scanner/include/couch_scanner_plugin.hrl"). +-include_lib("stdlib/include/assert.hrl"). + +start(ScanId, #{}) -> + St = init_config(ScanId), + ?INFO("Starting.", [], St), + {ok, St}. + +resume(ScanId, #{}) -> + St = init_config(ScanId), + ?INFO("Resuming.", [], St), + {ok, St}. + +complete(St) -> + ?INFO("Completed", [], St), + {ok, #{}}. + +checkpoint(_St) -> + {ok, #{}}. + +db(St, DbName) -> + case ttl(St, DbName) of + TTL when is_integer(TTL) -> + {ok, St#{ttl => TTL}}; + undefined -> + {skip, St} + end. + +db_opened(#{} = St, Db) -> + #{ttl := TTL} = St, + EndSeq = couch_time_seq:since(couch_db:get_time_seq(Db), couch_time_seq:timestamp() - TTL), + ChangeOpts = + if + EndSeq == now -> []; + true -> [{end_key, EndSeq}] + end, + ?INFO("scanning for deleted documents in ~s up to ~p", [couch_db:name(Db), EndSeq], meta(St)), + {0, ChangeOpts, St#{count => 0, end_seq => EndSeq}}. + +db_closing(#{} = St, Db) -> + #{count := Count} = St, + ?INFO("purged ~B deleted documents from ~s", [Count, couch_db:name(Db)], meta(St)), + {ok, St}. + +doc_fdi(#{} = St, #full_doc_info{deleted = true} = FDI, Db) -> + #{end_seq := EndSeq} = St, + ?assert( + FDI#full_doc_info.update_seq =< EndSeq, "FDI update_seq should not be greater than end seq" + ), + {ok, purge(St, FDI, Db)}; +doc_fdi(#{} = St, #full_doc_info{}, _Db) -> + {ok, St}. + +purge(#{} = St, #full_doc_info{} = FDI, Db) -> + {Id, Revs} = fdi_to_idrevs(FDI), + MaxBatchSize = config:get_integer(atom_to_list(?MODULE), "max_batch_size", 500), + purge(St, Id, Revs, MaxBatchSize, Db). + +purge(#{} = St, Id, Revs, MaxBatchSize, Db) when length(Revs) =< MaxBatchSize -> + DbName = mem3:dbname(couch_db:name(Db)), + PurgeFun = fun() -> fabric:purge_docs(DbName, [{Id, Revs}], [?ADMIN_CTX]) end, + Timeout = fabric_util:request_timeout(), + try fabric_util:isolate(PurgeFun, Timeout) of + {Health, Results} when Health == ok; Health == accepted -> + #{count := Count, limiter := Limiter0} = St, + {Wait, Limiter1} = couch_scanner_rate_limiter:update( + Limiter0, doc_write, length(Results) + ), + timer:sleep(Wait), + St#{count => Count + length(Results), limiter => Limiter1}; + Else -> + ?WARN( + "Failed to purge deleted documents in ~s/~s for reason ~p", + [DbName, Id, Else], + meta(St) + ), + St + catch + Class:Reason -> + ?WARN( + "Failed to purge deleted documents in ~s/~s for reason ~p:~p", + [DbName, Id, Class, Reason], + meta(St) + ), + St + end; +purge(#{} = St0, Id, Revs, MaxBatchSize, Db) -> + {RevBatch, RevRest} = lists:split(MaxBatchSize, Revs), + St1 = purge(St0, Id, RevBatch, MaxBatchSize, Db), + purge(St1, Id, RevRest, MaxBatchSize, Db). + +fdi_to_idrevs(#full_doc_info{} = FDI) -> + Revs = [ + couch_doc:rev_to_str({Pos, RevId}) + || {#leaf{}, {Pos, [RevId | _]}} <- couch_key_tree:get_all_leafs(FDI#full_doc_info.rev_tree) + ], + {FDI#full_doc_info.id, Revs}. + +init_config(ScanId) -> + #{sid => ScanId, limiter => couch_scanner_rate_limiter:get()}. + +meta(#{sid := ScanId}) -> + #{sid => ScanId}. + +ttl(St, DbName) -> + DefaultTTL = config:get(atom_to_list(?MODULE), "deleted_document_ttl"), + DbTTL = + case fabric:get_auto_purge_props(DbName) of + {ok, AutoPurgeProps} -> + case couch_util:get_value(<<"deleted_document_ttl">>, AutoPurgeProps) of + TTL when is_integer(TTL) -> + TTL; + undefined -> + undefined; + Else -> + ?WARN( + "TTL in ~s as ttl was '~p', not integer", + [DbName, Else], + meta(St) + ), + undefined + end; + {error, Reason} -> + ?WARN( + "Failed to fetch ttl in ~s for reason ~p", + [DbName, Reason], + meta(St) + ), + undefined + end, + if + DbTTL /= undefined -> DbTTL; + DefaultTTL /= undefined -> list_to_integer(DefaultTTL); + true -> undefined + end. diff --git a/src/couch/test/eunit/couch_auto_purge_plugin_tests.erl b/src/couch/test/eunit/couch_auto_purge_plugin_tests.erl new file mode 100644 index 00000000000..371abcb051a --- /dev/null +++ b/src/couch/test/eunit/couch_auto_purge_plugin_tests.erl @@ -0,0 +1,118 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +-module(couch_auto_purge_plugin_tests). + +-include_lib("couch/include/couch_eunit.hrl"). +-include_lib("couch/include/couch_db.hrl"). + +-define(PLUGIN, couch_auto_purge_plugin). + +couch_quickjs_scanner_plugin_test_() -> + { + foreach, + fun setup/0, + fun teardown/1, + [ + ?TDEF_FE(t_no_auto_purge_by_default, 10), + ?TDEF_FE(t_auto_purge_after_config_ttl, 10), + ?TDEF_FE(t_auto_purge_after_db_ttl, 10) + ] + }. + +setup() -> + {module, _} = code:ensure_loaded(?PLUGIN), + meck:new(?PLUGIN, [passthrough]), + meck:new(couch_scanner_server, [passthrough]), + meck:new(couch_scanner_util, [passthrough]), + Ctx = test_util:start_couch([fabric, couch_scanner]), + DbName = ?tempdb(), + ok = fabric:create_db(DbName, [{q, "2"}, {n, "1"}]), + config:set(atom_to_list(?PLUGIN), "max_batch_items", "1", false), + reset_stats(), + {Ctx, DbName}. + +teardown({Ctx, DbName}) -> + config_delete_section("couch_scanner"), + config_delete_section("couch_scanner_plugins"), + config_delete_section(atom_to_list(?PLUGIN)), + couch_scanner:reset_checkpoints(), + couch_scanner:resume(), + fabric:delete_db(DbName), + test_util:stop_couch(Ctx), + meck:unload(). + +t_no_auto_purge_by_default({_, DbName}) -> + ok = add_doc(DbName, <<"doc1">>, #{<<"_deleted">> => true}), + ?assertEqual(1, doc_del_count(DbName)), + meck:reset(couch_scanner_server), + meck:reset(?PLUGIN), + config:set("couch_scanner_plugins", atom_to_list(?PLUGIN), "true", false), + wait_exit(10000), + ?assertEqual(1, doc_del_count(DbName)), + ok. + +t_auto_purge_after_config_ttl({_, DbName}) -> + config:set(atom_to_list(?PLUGIN), "deleted_document_ttl", "-1000000", false), + ok = add_doc(DbName, <<"doc1">>, #{<<"_deleted">> => true}), + ?assertEqual(1, doc_del_count(DbName)), + meck:reset(couch_scanner_server), + meck:reset(?PLUGIN), + config:set("couch_scanner_plugins", atom_to_list(?PLUGIN), "true", false), + wait_exit(10000), + ?assertEqual(0, doc_del_count(DbName)), + ok. + +t_auto_purge_after_db_ttl({_, DbName}) -> + ok = fabric:set_auto_purge_props(DbName, [{<<"deleted_document_ttl">>, -1000000}]), + ok = add_doc(DbName, <<"doc1">>, #{<<"_deleted">> => true}), + ?assertEqual(1, doc_del_count(DbName)), + meck:reset(couch_scanner_server), + meck:reset(?PLUGIN), + config:set("couch_scanner_plugins", atom_to_list(?PLUGIN), "true", false), + wait_exit(10000), + ?assertEqual(0, doc_del_count(DbName)), + ok. + +reset_stats() -> + Counters = [ + [couchdb, query_server, process_error_exits], + [couchdb, query_server, process_errors], + [couchdb, query_server, process_exits] + ], + [reset_counter(C) || C <- Counters]. + +reset_counter(Counter) -> + case couch_stats:sample(Counter) of + 0 -> + ok; + N when is_integer(N), N > 0 -> + couch_stats:decrement_counter(Counter, N) + end. + +config_delete_section(Section) -> + [config:delete(K, V, false) || {K, V} <- config:get(Section)]. + +add_doc(DbName, DocId, Body) -> + {ok, _} = fabric:update_doc(DbName, mkdoc(DocId, Body), [?ADMIN_CTX]), + ok. + +mkdoc(Id, #{} = Body) -> + Body1 = Body#{<<"_id">> => Id}, + jiffy:decode(jiffy:encode(Body1)). + +wait_exit(MSec) -> + meck:wait(couch_scanner_server, handle_info, [{'EXIT', '_', '_'}, '_'], MSec). + +doc_del_count(DbName) -> + {ok, DbInfo} = fabric:get_db_info(DbName), + couch_util:get_value(doc_del_count, DbInfo). From 12b948782f54619b8af71f8c283454cad510c643 Mon Sep 17 00:00:00 2001 From: Robert Newson Date: Mon, 22 Sep 2025 11:12:59 +0100 Subject: [PATCH 3/3] Document new endpoints and config settings --- rel/overlay/etc/default.ini | 9 +++ src/docs/src/api/database/misc.rst | 90 ++++++++++++++++++++++++++++++ src/docs/src/config/scanner.rst | 11 ++++ 3 files changed, 110 insertions(+) diff --git a/rel/overlay/etc/default.ini b/rel/overlay/etc/default.ini index db4d37f3e17..ccc324e9f56 100644 --- a/rel/overlay/etc/default.ini +++ b/rel/overlay/etc/default.ini @@ -1186,6 +1186,15 @@ url = {{nouveau_url}} ; Scanner settings to skip dbs and docs would also work: ;[couch_quickjs_scanner_plugin.skip_{dbs,ddoc,docs}] +[couch_auto_purge_plugin] +; The most id/rev pairs the plugin will attempt to purge in +; one request. +;max_batch_size = 500 +; The default time-to-live, measured in seconds, before a +; deleted document is eligible to be purged by the plugin. +; Defaults to undefined, which disables auto purging. +;deleted_document_ttl = + [chttpd_auth_lockout] ; CouchDB can temporarily lock out IP addresses that repeatedly fail authentication ; mode can be set to one of three recognised values; diff --git a/src/docs/src/api/database/misc.rst b/src/docs/src/api/database/misc.rst index 69c7d467abc..446113bc9ac 100644 --- a/src/docs/src/api/database/misc.rst +++ b/src/docs/src/api/database/misc.rst @@ -330,6 +330,96 @@ following behavior: "ok": true } +.. _api/db/auto_purge: + +===================== +``/{db}/_auto_purge`` +===================== + +.. http:get:: /{db}/_auto_purge + :synopsis: Retrieves auto purge settings + + Retrieves the auto purge settings for the database. These settings + are used by the :ref:`auto purge plugin `. + + :param db: Database name + :
header Content-Type: - :mimetype:`application/json` + - :mimetype:`text/plain; charset=utf-8` + :code 200: Request completed successfully + :code 401: Unauthorized request to a protected API + :code 403: Insufficient permissions / :ref:`Too many requests with invalid credentials` + :code 500: Internal server error or timeout + + **Request**: + + .. code-block:: http + + GET /db/_auto_purge HTTP/1.1 + Accept: application/json + Host: localhost:5984 + + **Response**: + + .. code-block:: http + + HTTP/1.1 200 OK + Cache-Control: must-revalidate + Content-Length: 5 + Content-Type: application/json + Date: Mon, 22 Sep 2025 11:01:00 GMT + Server: CouchDB (Erlang/OTP) + + {"deleted_document_ttl": 259200} + +.. http:put:: /{db}/_auto_purge + :synopsis: Update auto purge settings + + Retrieves the auto purge settings for the database. These settings + are used by the :ref:`auto purge plugin `. + + :param db: Database name + :
header Content-Type: - :mimetype:`application/json` + - :mimetype:`text/plain; charset=utf-8` + :code 201: Request completed successfully + :code 401: Unauthorized request to a protected API + :code 403: Insufficient permissions / :ref:`Too many requests with invalid credentials` + :code 500: Internal server error or timeout + + **Request**: + + .. code-block:: http + + PUT /db/_auto_purge HTTP/1.1 + Accept: application/json + Content-Length: 5 + Content-Type: application/json + Host: localhost:5984 + + {"deleted_document_ttl": 259200} + + **Response**: + + .. code-block:: http + + HTTP/1.1 202 Accepted + Cache-Control: must-revalidate + Content-Length: 12 + Content-Type: application/json + Date: Mon, 22 Sep 2025 11:01:00 GMT + Server: CouchDB (Erlang/OTP) + + { + "ok": true + } + .. _api/db/missing_revs: ======================= diff --git a/src/docs/src/config/scanner.rst b/src/docs/src/config/scanner.rst index f36619f4acd..bad2480dbbf 100644 --- a/src/docs/src/config/scanner.rst +++ b/src/docs/src/config/scanner.rst @@ -249,3 +249,14 @@ settings in their ``[{plugin}]`` section. [couch_scanner_plugin_ddoc_features] ddoc_report = false + +.. _config/auto_purge_plugin: + +.. config:section:: couch_auto_purge_plugin :: Configure the Auto Purge plugin + + .. config:option:: deleted_document_ttl + + Set the default interval, in seconds, before the plugin will purge + a deleted document. The database may override this setting with the + :ref:`api/db/auto_purge` endpoint. If neither is set, the + plugin will not purge deleted documents.