Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We鈥檒l occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(resource): ask for metrics only when needed #10359

Merged
merged 3 commits into from
Apr 14, 2023
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
1 change: 1 addition & 0 deletions apps/emqx/priv/bpapi.versions
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
{emqx_bridge,1}.
{emqx_bridge,2}.
{emqx_bridge,3}.
{emqx_bridge,4}.
{emqx_broker,1}.
{emqx_cm,1}.
{emqx_conf,1}.
Expand Down
4 changes: 2 additions & 2 deletions apps/emqx_authn/src/emqx_authn_api.erl
Original file line number Diff line number Diff line change
Expand Up @@ -872,8 +872,8 @@ lookup_from_local_node(ChainName, AuthenticatorID) ->
case emqx_resource:get_instance(ResourceId) of
{error, not_found} ->
{error, {NodeId, not_found_resource}};
{ok, _, #{status := Status, metrics := ResourceMetrics}} ->
{ok, {NodeId, Status, Metrics, ResourceMetrics}}
{ok, _, #{status := Status}} ->
{ok, {NodeId, Status, Metrics, emqx_resource:get_metrics(ResourceId)}}
end
end;
{error, Reason} ->
Expand Down
4 changes: 2 additions & 2 deletions apps/emqx_authz/src/emqx_authz_api_sources.erl
Original file line number Diff line number Diff line change
Expand Up @@ -344,8 +344,8 @@ lookup_from_local_node(Type) ->
case emqx_resource:get_instance(ResourceId) of
{error, not_found} ->
{error, {NodeId, not_found_resource}};
{ok, _, #{status := Status, metrics := ResourceMetrics}} ->
{ok, {NodeId, Status, Metrics, ResourceMetrics}}
{ok, _, #{status := Status}} ->
{ok, {NodeId, Status, Metrics, emqx_resource:get_metrics(ResourceId)}}
end;
_ ->
Metrics = emqx_metrics_worker:get_metrics(authz_metrics, Type),
Expand Down
5 changes: 4 additions & 1 deletion apps/emqx_bridge/src/emqx_bridge.erl
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@
unload/0,
lookup/1,
lookup/2,
lookup/3,
get_metrics/2,
create/3,
disable_enable/3,
remove/2,
Expand Down Expand Up @@ -271,6 +271,9 @@ lookup(Type, Name, RawConf) ->
}}
end.

get_metrics(Type, Name) ->
emqx_resource:get_metrics(emqx_bridge_resource:resource_id(Type, Name)).

maybe_upgrade(mqtt, Config) ->
emqx_bridge_compatible_config:maybe_upgrade(Config);
maybe_upgrade(webhook, Config) ->
Expand Down
137 changes: 68 additions & 69 deletions apps/emqx_bridge/src/emqx_bridge_api.erl
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@
]).

-export([lookup_from_local_node/2]).
-export([get_metrics_from_local_node/2]).

-define(BRIDGE_NOT_ENABLED,
?BAD_REQUEST(<<"Forbidden operation, bridge not enabled">>)
Expand Down Expand Up @@ -467,7 +468,7 @@ schema("/bridges_probe") ->
end;
'/bridges'(get, _Params) ->
Nodes = mria:running_nodes(),
NodeReplies = emqx_bridge_proto_v3:list_bridges_on_nodes(Nodes),
NodeReplies = emqx_bridge_proto_v4:list_bridges_on_nodes(Nodes),
case is_ok(NodeReplies) of
{ok, NodeBridges} ->
AllBridges = [
Expand Down Expand Up @@ -524,7 +525,7 @@ schema("/bridges_probe") ->
).

'/bridges/:id/metrics'(get, #{bindings := #{id := Id}}) ->
?TRY_PARSE_ID(Id, lookup_from_all_nodes_metrics(BridgeType, BridgeName, 200)).
?TRY_PARSE_ID(Id, get_metrics_from_all_nodes(BridgeType, BridgeName)).

'/bridges/:id/metrics/reset'(put, #{bindings := #{id := Id}}) ->
?TRY_PARSE_ID(
Expand Down Expand Up @@ -564,19 +565,21 @@ maybe_deobfuscate_bridge_probe(#{<<"type">> := BridgeType, <<"name">> := BridgeN
maybe_deobfuscate_bridge_probe(Params) ->
Params.

lookup_from_all_nodes(BridgeType, BridgeName, SuccCode) ->
FormatFun = fun format_bridge_info/1,
do_lookup_from_all_nodes(BridgeType, BridgeName, SuccCode, FormatFun).

lookup_from_all_nodes_metrics(BridgeType, BridgeName, SuccCode) ->
FormatFun = fun format_bridge_metrics/1,
do_lookup_from_all_nodes(BridgeType, BridgeName, SuccCode, FormatFun).
get_metrics_from_all_nodes(BridgeType, BridgeName) ->
Nodes = mria:running_nodes(),
Result = do_bpapi_call(all, get_metrics_from_all_nodes, [Nodes, BridgeType, BridgeName]),
case Result of
Metrics when is_list(Metrics) ->
{200, format_bridge_metrics(lists:zip(Nodes, Metrics))};
{error, Reason} ->
?INTERNAL_ERROR(Reason)
end.

do_lookup_from_all_nodes(BridgeType, BridgeName, SuccCode, FormatFun) ->
lookup_from_all_nodes(BridgeType, BridgeName, SuccCode) ->
Nodes = mria:running_nodes(),
case is_ok(emqx_bridge_proto_v3:lookup_from_all_nodes(Nodes, BridgeType, BridgeName)) of
case is_ok(emqx_bridge_proto_v4:lookup_from_all_nodes(Nodes, BridgeType, BridgeName)) of
{ok, [{ok, _} | _] = Results} ->
{SuccCode, FormatFun([R || {ok, R} <- Results])};
{SuccCode, format_bridge_info([R || {ok, R} <- Results])};
{ok, [{error, not_found} | _]} ->
?BRIDGE_NOT_FOUND(BridgeType, BridgeName);
{error, Reason} ->
Expand All @@ -603,6 +606,9 @@ create_or_update_bridge(BridgeType, BridgeName, Conf, HttpStatusCode) ->
?BAD_REQUEST(map_to_json(Reason))
end.

get_metrics_from_local_node(BridgeType, BridgeName) ->
format_metrics(emqx_bridge:get_metrics(BridgeType, BridgeName)).

'/bridges/:id/enable/:enable'(put, #{bindings := #{id := Id, enable := Enable}}) ->
?TRY_PARSE_ID(
Id,
Expand Down Expand Up @@ -739,7 +745,7 @@ pick_bridges_by_id(Type, Name, BridgesAllNodes) ->
).

format_bridge_info([FirstBridge | _] = Bridges) ->
Res = maps:without([node, metrics], FirstBridge),
Res = maps:remove(node, FirstBridge),
NodeStatus = node_status(Bridges),
redact(Res#{
status => aggregate_status(NodeStatus),
Expand All @@ -766,7 +772,7 @@ aggregate_status(AllStatus) ->
end.

collect_metrics(Bridges) ->
[maps:with([node, metrics], B) || B <- Bridges].
[#{node => Node, metrics => Metrics} || {Node, Metrics} <- Bridges].

aggregate_metrics(AllMetrics) ->
InitMetrics = ?EMPTY_METRICS,
Expand Down Expand Up @@ -800,9 +806,7 @@ aggregate_metrics(
M15 + N15,
M16 + N16,
M17 + N17
);
aggregate_metrics(#{}, Metrics) ->
Metrics.
).

format_resource(
#{
Expand All @@ -826,62 +830,56 @@ format_resource(
).

format_resource_data(ResData) ->
maps:fold(fun format_resource_data/3, #{}, maps:with([status, metrics, error], ResData)).
maps:fold(fun format_resource_data/3, #{}, maps:with([status, error], ResData)).

format_resource_data(error, undefined, Result) ->
Result;
format_resource_data(error, Error, Result) ->
Result#{status_reason => emqx_misc:readable_error_msg(Error)};
format_resource_data(
metrics,
#{
counters := #{
'dropped' := Dropped,
'dropped.other' := DroppedOther,
'dropped.expired' := DroppedExpired,
'dropped.queue_full' := DroppedQueueFull,
'dropped.resource_not_found' := DroppedResourceNotFound,
'dropped.resource_stopped' := DroppedResourceStopped,
'matched' := Matched,
'retried' := Retried,
'late_reply' := LateReply,
'failed' := SentFailed,
'success' := SentSucc,
'received' := Rcvd
},
gauges := Gauges,
rate := #{
matched := #{current := Rate, last5m := Rate5m, max := RateMax}
}
format_resource_data(K, V, Result) ->
Result#{K => V}.

format_metrics(#{
counters := #{
'dropped' := Dropped,
'dropped.other' := DroppedOther,
'dropped.expired' := DroppedExpired,
'dropped.queue_full' := DroppedQueueFull,
'dropped.resource_not_found' := DroppedResourceNotFound,
'dropped.resource_stopped' := DroppedResourceStopped,
'matched' := Matched,
'retried' := Retried,
'late_reply' := LateReply,
'failed' := SentFailed,
'success' := SentSucc,
'received' := Rcvd
},
Result
) ->
gauges := Gauges,
rate := #{
matched := #{current := Rate, last5m := Rate5m, max := RateMax}
}
}) ->
Queued = maps:get('queuing', Gauges, 0),
SentInflight = maps:get('inflight', Gauges, 0),
Result#{
metrics =>
?METRICS(
Dropped,
DroppedOther,
DroppedExpired,
DroppedQueueFull,
DroppedResourceNotFound,
DroppedResourceStopped,
Matched,
Queued,
Retried,
LateReply,
SentFailed,
SentInflight,
SentSucc,
Rate,
Rate5m,
RateMax,
Rcvd
)
};
format_resource_data(K, V, Result) ->
Result#{K => V}.
?METRICS(
Dropped,
DroppedOther,
DroppedExpired,
DroppedQueueFull,
DroppedResourceNotFound,
DroppedResourceStopped,
Matched,
Queued,
Retried,
LateReply,
SentFailed,
SentInflight,
SentSucc,
Rate,
Rate5m,
RateMax,
Rcvd
).

fill_defaults(Type, RawConf) ->
PackedConf = pack_bridge_conf(Type, RawConf),
Expand Down Expand Up @@ -990,7 +988,7 @@ do_bpapi_call(Node, Call, Args) ->
do_bpapi_call_vsn(SupportedVersion, Call, Args) ->
case lists:member(SupportedVersion, supported_versions(Call)) of
true ->
apply(emqx_bridge_proto_v3, Call, Args);
apply(emqx_bridge_proto_v4, Call, Args);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nit: isn't this a kind of evading from some of the bpapi checks? 馃

Copy link
Contributor Author

@keynslug keynslug Apr 11, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What scenario do you have in mind? I believe that piece of logic always (silently) assumed that the type (i.e. signature) of an existing RPC does not change across protocol versions. Which in general is not true, but I'm not aware of bpapi checks for that.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I mean, dialyzer won't check that Args type matches the bpapi's wrapper type; only that bpapi's wrapper declared type matches the type of the corresponding remote call.

false ->
{error, not_implemented}
end.
Expand All @@ -1000,9 +998,10 @@ maybe_unwrap({error, not_implemented}) ->
maybe_unwrap(RpcMulticallResult) ->
emqx_rpc:unwrap_erpc(RpcMulticallResult).

supported_versions(start_bridge_to_node) -> [2, 3];
supported_versions(start_bridges_to_all_nodes) -> [2, 3];
supported_versions(_Call) -> [1, 2, 3].
supported_versions(start_bridge_to_node) -> [2, 3, 4];
supported_versions(start_bridges_to_all_nodes) -> [2, 3, 4];
supported_versions(get_metrics_from_all_nodes) -> [4];
supported_versions(_Call) -> [1, 2, 3, 4].

redact(Term) ->
emqx_misc:redact(Term).
Expand Down
4 changes: 4 additions & 0 deletions apps/emqx_bridge/src/proto/emqx_bridge_proto_v3.erl
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@

-export([
introduced_in/0,
deprecated_since/0,

list_bridges/1,
list_bridges_on_nodes/1,
Expand All @@ -39,6 +40,9 @@
introduced_in() ->
"5.0.21".

deprecated_since() ->
"5.0.22".

-spec list_bridges(node()) -> list() | emqx_rpc:badrpc().
list_bridges(Node) ->
rpc:call(Node, emqx_bridge, list, [], ?TIMEOUT).
Expand Down