Skip to content

Commit

Permalink
Implement configurable error_logger msg drop threshold
Browse files Browse the repository at this point in the history
Implement a new config option error_logger_hwm, which is a number
representing how many messages per second we should log from the
error_logger. If that threshold is exceeded, messages will be discarded
for the remainder of that second.

This is only effective if lager itself can process the messages fast
enough to satisfy the threshold. If your threshold is 1000 and lager
itself is only writing 100 messages a second (because error messages are
causing fsyncs or whatever) you'll never exceed the threshold and drops
will never happen. Thus care needs to be taken when selecting this
feature.

Setting it low is not as bad as it might seem, because when using lager,
hopefully error_logger messages are unusual. In my testing, 50/second
with the default config seemed reasonable (which has 2 file backends
installed, both of which fsync on messages at error or above).
  • Loading branch information
Vagabond committed Mar 11, 2013
1 parent c72a761 commit ef691d8
Show file tree
Hide file tree
Showing 2 changed files with 84 additions and 14 deletions.
86 changes: 73 additions & 13 deletions src/error_logger_lager_h.erl
Expand Up @@ -32,6 +32,17 @@


-export([format_reason/1]). -export([format_reason/1]).


-record(state, {
%% how many messages per second we try to deliver
hwm = undefined :: 'undefined' | pos_integer(),
%% how many messages we've received this second
mps = 0 :: non_neg_integer(),
%% the current second
lasttime = os:timestamp() :: os:timestamp(),
%% count of dropped messages this second
dropped = 0 :: non_neg_integer()
}).

-define(LOGMSG(Level, Pid, Msg), -define(LOGMSG(Level, Pid, Msg),
case ?SHOULD_LOG(Level) of case ?SHOULD_LOG(Level) of
true -> true ->
Expand All @@ -58,13 +69,73 @@
-endif. -endif.


-spec init(any()) -> {ok, {}}. -spec init(any()) -> {ok, {}}.
init(_) -> init([HighWaterMark]) ->
{ok, {}}. {ok, #state{hwm=HighWaterMark}}.


handle_call(_Request, State) -> handle_call(_Request, State) ->
{ok, ok, State}. {ok, ok, State}.


handle_event(Event, State) -> handle_event(Event, State) ->
case check_hwm(State) of
{true, NewState} ->
log_event(Event, NewState);
{false, NewState} ->
{ok, NewState}
end.

handle_info(_Info, State) ->
{ok, State}.

terminate(_Reason, _State) ->
ok.

code_change(_OldVsn, State, _Extra) ->
{ok, State}.

%% internal functions

check_hwm(State = #state{hwm = undefined}) ->
{true, State};
check_hwm(State = #state{mps = Mps, hwm = Hwm}) when Mps < Hwm ->
%% haven't hit high water mark yet, just log it
{true, State#state{mps=Mps+1}};
check_hwm(State = #state{hwm = Hwm, lasttime = Last, dropped = Drop}) ->
%% are we still in the same second?
{M, S, _} = Now = os:timestamp(),
case Last of
{M, S, _} ->
%% still in same second, but have exceeded the high water mark
NewDrops = discard_messages(Now, 0),
%io:format("dropped ~p messages~n", [NewDrops]),
{false, State#state{dropped=Drop+NewDrops}};
_ ->
%% different second, reset all counters and allow it
%% TODO - do we care if the clock goes backwards?
case Drop > 0 of
true ->
?LOGFMT(warning, self(), "lager_error_logger_h dropped ~p messages in the last second that exceeded the limit of ~p messages/sec",
[Drop, Hwm]);
false ->
ok
end,
{true, State#state{dropped = 0, mps=1, lasttime = Now}}
end.

discard_messages(Second, Count) ->
{M, S, _} = os:timestamp(),
case Second of
{M, S, _} ->
receive
_Msg ->
discard_messages(Second, Count+1)
after 0 ->
Count
end;
_ ->
Count
end.

log_event(Event, State) ->
case Event of case Event of
{error, _GL, {Pid, Fmt, Args}} -> {error, _GL, {Pid, Fmt, Args}} ->
case Fmt of case Fmt of
Expand Down Expand Up @@ -143,17 +214,6 @@ handle_event(Event, State) ->
end, end,
{ok, State}. {ok, State}.


handle_info(_Info, State) ->
{ok, State}.

terminate(_Reason, _State) ->
ok.

code_change(_OldVsn, State, _Extra) ->
{ok, State}.

%% internal functions

format_crash_report(Report, Neighbours) -> format_crash_report(Report, Neighbours) ->
Name = case proplists:get_value(registered_name, Report, []) of Name = case proplists:get_value(registered_name, Report, []) of
[] -> [] ->
Expand Down
12 changes: 11 additions & 1 deletion src/lager_app.erl
Expand Up @@ -52,6 +52,16 @@ start(_StartType, _StartArgs) ->
{_, Traces} = lager_config:get(loglevel), {_, Traces} = lager_config:get(loglevel),
lager_config:set(loglevel, {MinLog, Traces}), lager_config:set(loglevel, {MinLog, Traces}),


HighWaterMark = case application:get_env(lager, error_logger_hwm) of
{ok, HwmVal} when is_integer(HwmVal), HwmVal > 0 ->
HwmVal;
{ok, BadVal} ->
_ = lager:log(warning, self(), "Invalid error_logger high water mark: ~p, disabling", [BadVal]),
undefined;
undefined ->
undefined
end,

SavedHandlers = SavedHandlers =
case application:get_env(lager, error_logger_redirect) of case application:get_env(lager, error_logger_redirect) of
{ok, false} -> {ok, false} ->
Expand All @@ -64,7 +74,7 @@ start(_StartType, _StartArgs) ->
WhiteList WhiteList
end, end,


case supervisor:start_child(lager_handler_watcher_sup, [error_logger, error_logger_lager_h, []]) of case supervisor:start_child(lager_handler_watcher_sup, [error_logger, error_logger_lager_h, [HighWaterMark]]) of
{ok, _} -> {ok, _} ->
[begin error_logger:delete_report_handler(X), X end || [begin error_logger:delete_report_handler(X), X end ||
X <- gen_event:which_handlers(error_logger) -- [error_logger_lager_h | WhiteList]]; X <- gen_event:which_handlers(error_logger) -- [error_logger_lager_h | WhiteList]];
Expand Down

0 comments on commit ef691d8

Please sign in to comment.