Permalink
Browse files

restructure supervision tree so that folsom is an included app

All stat mods depend on folsom, yet they are not linked to it.
This change brings folsom under supervision of a core stat sup,
which also supervises the riak stat subsystem. Now when folsom exits
everyone gets to restart clean and recover.

riak_core_sup
    |
riak_core_stat_sup (rest_for_one)
    \
       - folsom_sup
       - riak_core_stats_sup (one_for_one)
        \
          - riak_*_stat
          - riak_stat_cache

riak_core_stats_sup will start and supervise gen_server stat mods at
registration time, and will re-start them should the sup crash.
  • Loading branch information...
1 parent abed2ad commit 9279d51933bde020fd86d136fff1a0c904fd4dd0 @russelldb russelldb committed Jul 25, 2012
View
@@ -55,6 +55,8 @@
riak_core_ring_util,
riak_core_stat,
riak_core_stat_cache,
+ riak_core_stat_sup,
+ riak_core_stats_sup,
riak_core_status,
riak_core_sup,
riak_core_sysmon_handler,
@@ -76,6 +78,7 @@
vclock
]},
{registered, []},
+ {included_applications, [folsom]},
{applications, [
kernel,
stdlib,
@@ -84,8 +87,7 @@
crypto,
riak_sysmon,
webmachine,
- os_mon,
- folsom
+ os_mon
]},
{mod, { riak_core_app, []}},
{env, [
View
@@ -342,7 +342,7 @@ register_mod(App, Module, Type) when is_atom(Module), is_atom(Type) ->
vnode_modules ->
riak_core_vnode_proxy_sup:start_proxies(Module);
stat_mods ->
- Module:register_stats();
+ riak_core_stats_sup:start_server(Module);
_ ->
ok
end,
View
@@ -68,6 +68,7 @@ produce_stats() ->
%% gen_server
init([]) ->
+ register_stats(),
{ok, ok}.
handle_call(_Req, _From, State) ->
@@ -70,15 +70,16 @@ init([]) ->
Tab = ets:new(?MODULE, [protected, set, named_table]),
TTL = app_helper:get_env(riak_core, stat_cache_ttl, ?TTL),
%% re-register mods, if this is a restart after a crash
- RegisteredMods = [{App, {Mod, produce_stats, [], TTL}} || {App, Mod} <- riak_core:stat_mods()],
+ RegisteredMods = lists:foldl(fun({App, Mod}, Registerd) ->
+ register_mod(App, Mod, produce_stats, [], TTL, Registerd) end,
+ orddict:new(),
+ riak_core:stat_mods()),
{ok, #state{tab=Tab, apps=orddict:from_list(RegisteredMods)}}.
handle_call({register, App, {Mod, Fun, Args}, TTL}, _From, State0=#state{apps=Apps0}) ->
Apps = case registered(App, Apps0) of
false ->
- folsom_metrics:new_histogram({?MODULE, Mod}),
- folsom_metrics:new_meter({?MODULE, App}),
- orddict:store(App, {Mod, Fun, Args, TTL}, Apps0);
+ register_mod(App, Mod, Fun, Args, TTL, Apps0);
{true, _} ->
Apps0
end,
@@ -136,6 +137,11 @@ code_change(_OldVsn, State, _Extra) ->
{ok, State}.
%% internal
+register_mod(App, Mod, Fun, Args, TTL, Apps) ->
+ folsom_metrics:new_histogram({?MODULE, Mod}),
+ folsom_metrics:new_meter({?MODULE, App}),
+ orddict:store(App, {Mod, Fun, Args, TTL}, Apps).
+
registered(App, Apps) ->
registered(orddict:find(App, Apps)).
View
@@ -0,0 +1,54 @@
+%% -------------------------------------------------------------------
+%%
+%% riak_core: Core Riak Application
+%%
+%% Copyright (c) 2007-2010 Basho Technologies, Inc. All Rights Reserved.
+%%
+%% This file is provided to you under the Apache License,
+%% Version 2.0 (the "License"); you may not use this file
+%% except in compliance with the License. You may obtain
+%% a copy of the License at
+%%
+%% http://www.apache.org/licenses/LICENSE-2.0
+%%
+%% Unless required by applicable law or agreed to in writing,
+%% software distributed under the License is distributed on an
+%% "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+%% KIND, either express or implied. See the License for the
+%% specific language governing permissions and limitations
+%% under the License.
+%%
+%% -------------------------------------------------------------------
+
+-module(riak_core_stat_sup).
+
+-behaviour(supervisor).
+
+%% API
+-export([start_link/0]).
+
+%% Supervisor callbacks
+-export([init/1]).
+
+%% Helper macro for declaring children of supervisor
+-define(CHILD(I, Type, Timeout), {I, {I, start_link, []}, permanent, Timeout, Type, [I]}).
+-define(CHILD(I, Type), ?CHILD(I, Type, 5000)).
+
+%% ===================================================================
+%% API functions
+%% ===================================================================
+
+start_link() ->
+ supervisor:start_link({local, ?MODULE}, ?MODULE, []).
+
+%% ===================================================================
+%% Supervisor callbacks
+%% ===================================================================
+
+init([]) ->
+ Children = lists:flatten(
+ [?CHILD(folsom_sup, supervisor),
+ ?CHILD(riak_core_stats_sup, supervisor)
+ ]),
+
+ {ok, {{rest_for_one, 10, 10}, Children}}.
@@ -0,0 +1,50 @@
+%% -------------------------------------------------------------------
+%% Copyright (c) 2007-2011 Basho Technologies, Inc. All Rights Reserved.
+%%
+%% This file is provided to you under the Apache License,
+%% Version 2.0 (the "License"); you may not use this file
+%% except in compliance with the License. You may obtain
+%% a copy of the License at
+%%
+%% http://www.apache.org/licenses/LICENSE-2.0
+%%
+%% Unless required by applicable law or agreed to in writing,
+%% software distributed under the License is distributed on an
+%% "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+%% KIND, either express or implied. See the License for the
+%% specific language governing permissions and limitations
+%% under the License.
+%%
+%% -------------------------------------------------------------------
+-module(riak_core_stats_sup).
+-behaviour(supervisor).
+-export([start_link/0, init/1]).
+-export([start_server/1, stop_server/1]).
+
+%% Helper macro for declaring children of supervisor
+-define(CHILD(I, Type, Timeout), {I, {I, start_link, []}, permanent, Timeout, Type, [I]}).
+-define(CHILD(I, Type), ?CHILD(I, Type, 5000)).
+
+start_link() ->
+ supervisor:start_link({local, ?MODULE}, ?MODULE, []).
+
+init([]) ->
+ Children = [stat_server(Mod) || {_App, Mod} <- riak_core:stat_mods()],
+ {ok, {{one_for_one, 5, 10}, [?CHILD(riak_core_stat_cache, worker)|Children]}}.
+
+start_server(Mod) ->
+ Ref = stat_server(Mod),
+ Pid = case supervisor:start_child(?MODULE, Ref) of
+ {ok, Child} -> Child;
+ {error, {already_started, Child}} -> Child
+ end,
+ Pid.
+
+stop_server(Mod) ->
+ supervisor:terminate_child(?MODULE, Mod),
+ supervisor:delete_child(?MODULE, Mod),
+ ok.
+
+%% @private
+stat_server(Mod) ->
+ ?CHILD(Mod, worker).
View
@@ -67,14 +67,11 @@ init([]) ->
?CHILD(riak_core_node_watcher_events, worker),
?CHILD(riak_core_node_watcher, worker),
?CHILD(riak_core_vnode_manager, worker),
- ?CHILD(riak_core_stat_cache, worker),
- ?CHILD(riak_core_stat, worker),
?CHILD(riak_core_capability, worker),
?CHILD(riak_core_gossip, worker),
?CHILD(riak_core_claimant, worker),
+ ?CHILD(riak_core_stat_sup, supervisor),
RiakWebs
]),
{ok, {{one_for_one, 10, 10}, Children}}.
-
-

2 comments on commit 9279d51

Unfortunately this prevents riak_core from starting when having an application that depends on riak_core and folsom :(

@russelldb could you ELI5 the purpose of it? I'm unable to comprehend the commit message tbh. Were you experiencing folsom crashing? Was folsom's own supervision not good enough?

Contributor

russelldb replied Jan 19, 2015

ELI15? Folsom's supervisor is not good enough. Ets tables containing histogram data were crashing and then forever errorring as some other table would not let you re-create them, nor delete them. TBH we're removing folsom in preference to exometer, so a PR that removes this change might get some attention.

Please sign in to comment.