Skip to content

Commit

Permalink
Merge pull request #12663 from JimMoen/EMQX-11897/fix-cpu-usage/api
Browse files Browse the repository at this point in the history
fix(vm): cpu usage/idle handled by single worker
  • Loading branch information
JimMoen committed Mar 12, 2024
2 parents f24a76e + 0edeff4 commit 20cd47a
Show file tree
Hide file tree
Showing 9 changed files with 136 additions and 43 deletions.
92 changes: 92 additions & 0 deletions apps/emqx/src/emqx_cpu_sup_worker.erl
@@ -0,0 +1,92 @@
%%--------------------------------------------------------------------
%% Copyright (c) 2024 EMQ Technologies Co., Ltd. All Rights Reserved.
%%
%% Licensed under the Apache License, Version 2.0 (the "License");
%% you may not use this file except in compliance with the License.
%% You may obtain a copy of the License at
%%
%% http://www.apache.org/licenses/LICENSE-2.0
%%
%% Unless required by applicable law or agreed to in writing, software
%% distributed under the License is distributed on an "AS IS" BASIS,
%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
%% See the License for the specific language governing permissions and
%% limitations under the License.
%%--------------------------------------------------------------------

-module(emqx_cpu_sup_worker).

-behaviour(gen_server).

-include("logger.hrl").

%% gen_server APIs
-export([start_link/0]).

-export([
cpu_util/0,
cpu_util/1
]).

%% gen_server callbacks
-export([
init/1,
handle_continue/2,
handle_call/3,
handle_cast/2,
terminate/2,
code_change/3
]).

-define(CPU_USAGE_WORKER, ?MODULE).

%%--------------------------------------------------------------------
%% API
%%--------------------------------------------------------------------

cpu_util() ->
gen_server:call(?CPU_USAGE_WORKER, ?FUNCTION_NAME, infinity).

cpu_util(Args) ->
gen_server:call(?CPU_USAGE_WORKER, {?FUNCTION_NAME, Args}, infinity).

%%--------------------------------------------------------------------
%% gen_server callbacks
%% simply handle cpu_sup:util/0,1 called in one process
%%--------------------------------------------------------------------

start_link() ->
gen_server:start_link({local, ?CPU_USAGE_WORKER}, ?MODULE, [], []).

init([]) ->
{ok, undefined, {continue, setup}}.

handle_continue(setup, undefined) ->
%% start os_mon temporarily
{ok, _} = application:ensure_all_started(os_mon),
%% The returned value of the first call to cpu_sup:util/0 or cpu_sup:util/1 by a
%% process will on most systems be the CPU utilization since system boot,
%% but this is not guaranteed and the value should therefore be regarded as garbage.
%% This also applies to the first call after a restart of cpu_sup.
_Val = cpu_sup:util(),
{noreply, #{}}.

handle_call(cpu_util, _From, State) ->
Val = cpu_sup:util(),
{reply, Val, State};
handle_call({cpu_util, Args}, _From, State) ->
Val = erlang:apply(cpu_sup, util, Args),
{reply, Val, State};
handle_call(Req, _From, State) ->
?SLOG(error, #{msg => "unexpected_call", call => Req}),
{reply, ignored, State}.

handle_cast(Msg, State) ->
?SLOG(error, #{msg => "unexpected_cast", cast => Msg}),
{noreply, State}.

terminate(_Reason, _State) ->
ok.

code_change(_OldVsn, State, _Extra) ->
{ok, State}.
7 changes: 4 additions & 3 deletions apps/emqx/src/emqx_os_mon.erl
Expand Up @@ -18,6 +18,7 @@

-behaviour(gen_server).

-include("emqx.hrl").
-include("logger.hrl").

-export([start_link/0]).
Expand Down Expand Up @@ -47,8 +48,6 @@
]).
-export([is_os_check_supported/0]).

-include("emqx.hrl").

-define(OS_MON, ?MODULE).

start_link() ->
Expand Down Expand Up @@ -92,6 +91,8 @@ handle_continue(setup, undefined) ->
SysHW = init_os_monitor(),
MemRef = start_mem_check_timer(),
CpuRef = start_cpu_check_timer(),
%% the value of the first call should be regarded as garbage.
_Val = cpu_sup:util(),
{noreply, #{sysmem_high_watermark => SysHW, mem_time_ref => MemRef, cpu_time_ref => CpuRef}}.

init_os_monitor() ->
Expand Down Expand Up @@ -131,7 +132,7 @@ handle_info({timeout, _Timer, mem_check}, #{sysmem_high_watermark := HWM} = Stat
handle_info({timeout, _Timer, cpu_check}, State) ->
CPUHighWatermark = emqx:get_config([sysmon, os, cpu_high_watermark]) * 100,
CPULowWatermark = emqx:get_config([sysmon, os, cpu_low_watermark]) * 100,
CPUVal = emqx_vm:cpu_util(),
CPUVal = cpu_sup:util(),
case CPUVal of
%% 0 or 0.0
Busy when Busy == 0 ->
Expand Down
4 changes: 2 additions & 2 deletions apps/emqx/src/emqx_sys_mon.erl
Expand Up @@ -58,8 +58,8 @@ remove_handler() ->
post_config_update(_, _Req, NewConf, OldConf, _AppEnvs) ->
#{os := OS1, vm := VM1} = OldConf,
#{os := OS2, vm := VM2} = NewConf,
VM1 =/= VM2 andalso ?MODULE:update(VM2),
OS1 =/= OS2 andalso emqx_os_mon:update(OS2),
(VM1 =/= VM2) andalso ?MODULE:update(VM2),
(OS1 =/= OS2) andalso emqx_os_mon:update(OS2),
ok.

update(VM) ->
Expand Down
2 changes: 1 addition & 1 deletion apps/emqx/src/emqx_sys_sup.erl
Expand Up @@ -28,7 +28,7 @@ start_link() ->
init([]) ->
OsMon =
case emqx_os_mon:is_os_check_supported() of
true -> [child_spec(emqx_os_mon)];
true -> [child_spec(emqx_os_mon), child_spec(emqx_cpu_sup_worker)];
false -> []
end,
Children =
Expand Down
41 changes: 22 additions & 19 deletions apps/emqx/src/emqx_vm.erl
Expand Up @@ -16,6 +16,8 @@

-module(emqx_vm).

-include("logger.hrl").

-export([
schedulers/0,
scheduler_usage/1,
Expand Down Expand Up @@ -376,28 +378,29 @@ avg15() ->
compat_windows(fun cpu_sup:avg15/0).

cpu_util() ->
compat_windows(fun cpu_sup:util/0).
compat_windows(fun() -> emqx_cpu_sup_worker:cpu_util() end).

cpu_util(Args) ->
compat_windows(fun cpu_sup:util/1, Args).

compat_windows(fun() -> emqx_cpu_sup_worker:cpu_util(Args) end).

-spec compat_windows(function()) -> any().
compat_windows(Fun) when is_function(Fun, 0) ->
case emqx_os_mon:is_os_check_supported() of
true ->
try Fun() of
Val when is_float(Val) -> floor(Val * 100) / 100;
Val when is_number(Val) -> Val;
Val when is_tuple(Val) -> Val;
_ -> 0.0
catch
_:_ -> 0.0
end;
false ->
0.0
end;
compat_windows(Fun) ->
case compat_windows(Fun, []) of
Val when is_float(Val) -> floor(Val * 100) / 100;
Val when is_number(Val) -> Val;
_ -> 0.0
end.

compat_windows(Fun, Args) ->
try
case emqx_os_mon:is_os_check_supported() of
false -> 0.0;
true when Args =:= [] -> Fun();
true -> Fun(Args)
end
catch
_:_ -> 0.0
end.
?SLOG(warning, "Invalid function: ~p", [Fun]),
error({badarg, Fun}).

load(Avg) ->
floor((Avg / 256) * 100) / 100.
Expand Down
3 changes: 2 additions & 1 deletion apps/emqx/test/emqx_os_mon_SUITE.erl
Expand Up @@ -132,7 +132,8 @@ do_sys_mem_check_alarm(_Config) ->
get_memory_usage,
fun() -> Mem end,
fun() ->
timer:sleep(500),
%% wait for `os_mon` started
timer:sleep(10_000),
Alarms = emqx_alarm:get_alarms(activated),
?assert(
emqx_vm_mon_SUITE:is_existing(
Expand Down
5 changes: 3 additions & 2 deletions apps/emqx/test/emqx_vm_SUITE.erl
Expand Up @@ -21,7 +21,8 @@

-include_lib("eunit/include/eunit.hrl").

all() -> emqx_common_test_helpers:all(?MODULE).
all() ->
emqx_common_test_helpers:all(?MODULE).

t_load(_Config) ->
lists:foreach(
Expand Down Expand Up @@ -97,7 +98,7 @@ t_get_process_limit(_Config) ->
emqx_vm:get_process_limit().

t_cpu_util(_Config) ->
_Cpu = emqx_vm:cpu_util().
?assertMatch(Val when is_number(Val), emqx_vm:cpu_util()).

easy_server() ->
{ok, LSock} = gen_tcp:listen(5678, [binary, {packet, 0}, {active, false}]),
Expand Down
24 changes: 9 additions & 15 deletions apps/emqx_management/src/emqx_mgmt.erl
Expand Up @@ -205,23 +205,17 @@ cpu_stats() ->
false ->
[];
true ->
Idle = vm_stats('cpu.idle'),
[
{cpu_idle, Idle},
{cpu_use, 100 - Idle}
]
vm_stats('cpu')
end.

vm_stats('cpu.idle') ->
case emqx_vm:cpu_util([detailed]) of
{_Num, _Use, List, _} when is_list(List) -> proplists:get_value(idle, List, 0);
%% return {all, 0, 0, []} when cpu_sup is not started
_ -> 0
end;
vm_stats('cpu.use') ->
case vm_stats('cpu.idle') of
0 -> 0;
Idle -> 100 - Idle
vm_stats('cpu') ->
CpuUtilArg = [],
case emqx_vm:cpu_util([CpuUtilArg]) of
%% return 0.0 when `emqx_cpu_sup_worker` is not started
{all, Use, Idle, _} ->
[{cpu_use, Use}, {cpu_idle, Idle}];
_ ->
[{cpu_use, 0}, {cpu_idle, 0}]
end;
vm_stats('total.memory') ->
{_, MemTotal} = get_sys_memory(),
Expand Down
1 change: 1 addition & 0 deletions changes/ce/fix-12663.en.md
@@ -0,0 +1 @@
Fixed an issue where `emqx_vm_cpu_use` and `emqx_vm_cpu_idle` metrics in Prometheus endpoint `/prometheus/stats` are always calculating average usage since operating system boot.

0 comments on commit 20cd47a

Please sign in to comment.