Skip to content

Commit

Permalink
feat: optimize mria:running_nodes/0 by using local mria_membership in…
Browse files Browse the repository at this point in the history
…formation

Closes: EMQX-9152
  • Loading branch information
SergeTupchiy committed Apr 5, 2023
1 parent a4d79ba commit 1273fd2
Show file tree
Hide file tree
Showing 2 changed files with 22 additions and 22 deletions.
40 changes: 19 additions & 21 deletions src/mria.erl
Original file line number Diff line number Diff line change
Expand Up @@ -192,29 +192,27 @@ is_node_in_cluster(Node) ->
lists:member(Node, cluster_nodes(all)).

%% @doc Running nodes.
%% This function should be used with care, as it may not return the most up-to-date
%% view of replicant nodes, as changes in mria_membership are reflected asynchronously.
%% For example:
%% - a core node leaves the cluster and joins it back quickly,
%% - a replicant node receives monitor DOWN message (see mria_membership)
%% and marks the core node as leaving/stopped,
%% - mria_lb on the replicant re-discovers the core node (rlog_lb_update_interval),
%% - the replicant pings the core, the core pongs the replicant,
%% now each nodes shows the other one as running.
-spec running_nodes() -> list(node()).
running_nodes() ->
%% TODO: cache the results (this could be a hot call) and don't
%% fail on the first unsuccessful call, since other nodes may be
%% alive. Use info from `mria_membership'?
case mria_rlog:role() of
core ->
CoreNodes = mria_mnesia:running_nodes(),
{Replicants0, _} = rpc:multicall(CoreNodes, mria_status, replicants, [], 15000),
Replicants = [Node || Nodes <- Replicants0, is_list(Nodes), Node <- Nodes],
lists:usort(CoreNodes ++ Replicants);
replicant ->
case mria_lb:core_nodes() of
[CoreNode|_] ->
case mria_lib:rpc_call_nothrow(CoreNode, ?MODULE, running_nodes, []) of
{badrpc, _} -> [];
{badtcp, _} -> [];
Result -> Result
end;
[] ->
[]
end
end.
CoreNodes = case mria_rlog:role() of
core -> mria_mnesia:running_nodes();
replicant ->
%% Can be used on core node as well, eliminating this
%% case statement, but mria_mnesia:running_nodes/0
%% must be more accurate than mria_membership:nodelist/0...
mria_membership:running_core_nodelist()
end,
Replicants = mria_membership:running_replicant_nodelist(),
lists:usort(CoreNodes ++ Replicants).

%%--------------------------------------------------------------------
%% Cluster API
Expand Down
4 changes: 3 additions & 1 deletion test/mria_SUITE.erl
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
-include_lib("eunit/include/eunit.hrl").
-include_lib("snabbkaffe/include/snabbkaffe.hrl").
-include("mria_rlog.hrl").
-include("mria.hrl").

-record(kv_tab, {key, val}).

Expand Down Expand Up @@ -1022,7 +1023,8 @@ t_replicant_manual_join(_Config) ->
?tp(test_reconnect_node, #{node => N2}),
?wait_async_action(
?assertMatch(ok, rpc:call(N2, mria, join, [N1])),
#{?snk_kind := mria_exec_callback, type := start, ?snk_meta := #{node := N2}}),
%% need to wait until N3 pings re-joined N2 and N2 pongs N3
#{?snk_kind := mria_membership_pong, member := #member{node = N2}, ?snk_meta := #{node := N3}}),
?assertMatch([N1, N2, N3], lists:sort(rpc:call(N2, mria, running_nodes, []))),
ok
after
Expand Down

0 comments on commit 1273fd2

Please sign in to comment.