Skip to content

Commit

Permalink
supervisor: Eliminate a subtle internal state corruption
Browse files Browse the repository at this point in the history
When a temporary child of a `simple_one_for_one` supervisor died, the
internal state of the supervisor would be corrupted in a way that
would cause the supervisor to retain the start arguments for
subsequent children started by the supervisor, causing
unnecessary growth of the supervisor's heap.

This bug was introduced in 59d4182.

Thanks to Zeyu Zhang (@zzydxm) for noticing this bug and to
Maxim Fedorov for making it known to me.
  • Loading branch information
bjorng committed Aug 20, 2020
1 parent 44b6531 commit e78ec22
Show file tree
Hide file tree
Showing 2 changed files with 61 additions and 3 deletions.
4 changes: 2 additions & 2 deletions lib/stdlib/src/supervisor.erl
Expand Up @@ -1605,8 +1605,8 @@ format_status(_, [_PDict, State]) ->
dyn_size(#state{dynamics = {_Kind,Db}}) ->
map_size(Db).
dyn_erase(Pid,#state{dynamics={_Kind,Db}}=State) ->
State#state{dynamics={maps,maps:remove(Pid,Db)}}.
dyn_erase(Pid,#state{dynamics={Kind,Db}}=State) ->
State#state{dynamics={Kind,maps:remove(Pid,Db)}}.
dyn_store(Pid,Args,#state{dynamics={Kind,Db}}=State) ->
case Kind of
Expand Down
60 changes: 59 additions & 1 deletion lib/stdlib/test/supervisor_SUITE.erl
Expand Up @@ -63,6 +63,7 @@
one_for_one_escalation/1, one_for_all/1,
one_for_all_escalation/1, one_for_all_other_child_fails_restart/1,
simple_one_for_one/1, simple_one_for_one_escalation/1,
simple_one_for_one_corruption/1,
rest_for_one/1, rest_for_one_escalation/1,
rest_for_one_other_child_fails_restart/1,
simple_one_for_one_extra/1, simple_one_for_one_shutdown/1]).
Expand Down Expand Up @@ -139,7 +140,8 @@ groups() ->
one_for_all_other_child_fails_restart]},
{restart_simple_one_for_one, [],
[simple_one_for_one, simple_one_for_one_shutdown,
simple_one_for_one_extra, simple_one_for_one_escalation]},
simple_one_for_one_extra, simple_one_for_one_escalation,
simple_one_for_one_corruption]},
{restart_rest_for_one, [],
[rest_for_one, rest_for_one_escalation,
rest_for_one_other_child_fails_restart]}].
Expand Down Expand Up @@ -1277,6 +1279,62 @@ simple_one_for_one_extra(Config) when is_list(Config) ->
terminate(SupPid, Pid4, Id4, abnormal),
check_exit([SupPid]).

%%-------------------------------------------------------------------------
%% Test for subtle corruption of internal state for a
%% simple_one_for_one supervisor. Thanks to Zeyu Zhang (@zzydxm) and
%% Maxim Fedorov for noticing this bug. (OTP-16804)
simple_one_for_one_corruption(Config) when is_list(Config) ->
process_flag(trap_exit, true),

logger:add_handler(?MODULE, ?MODULE, #{test_case_pid => self()}),

try
Child = #{id => child, start => {supervisor_1, start_child, []},
restart => temporary, shutdown => 1000,
type => worker, modules => []},
{ok, SupPid} = start_link({ok, {{simple_one_for_one, 2, 3600}, [Child]}}),
{ok, CPid1} = supervisor:start_child(sup_test, []),

terminate(SupPid, CPid1, child1, abnormal),

%% The first time a child of simple_one_for_one supervisor
%% with restart strategy `temporary` dies, the internal state
%% for the supervisor will become inconsistent (the `dynamics`
%% field would change from `{mapsets,Map}` to
%% `{maps,Map}`). That inconsistency will make the supervisor
%% retain the start arguments even for temporary processes.
%%
%% To test that the bug is fixed, start a new child process
%% with a large term in its argument list.

N = 50000,
BigData = binary_to_list(<<0:N/unit:8>>),
{ok, CPid2, BigData} = supervisor:start_child(sup_test, [BigData]),

%% Since the child is temporary, the supervisor should not keep
%% the argument list for the child and the supervisor's heap
%% size should shrink after a GC.

true = erlang:garbage_collect(SupPid),
{total_heap_size, HeapSize} = process_info(SupPid, total_heap_size),
if
HeapSize > 2*N ->
%% The start arguments for the child have been kept.
ct:fail({excessive_heap_size,HeapSize});
true ->
ok
end,

terminate(SupPid, CPid2, child2, abnormal),

exit(SupPid, kill)
after
logger:remove_handler(?MODULE)
end,

ok.


%%-------------------------------------------------------------------------
%% Test restart escalation on a simple_one_for_one supervisor.
simple_one_for_one_escalation(Config) when is_list(Config) ->
Expand Down

0 comments on commit e78ec22

Please sign in to comment.