Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

0330 exit with non zero code if conf init failed #10286

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
4 changes: 4 additions & 0 deletions apps/emqx/src/emqx_app.erl
Original file line number Diff line number Diff line change
Expand Up @@ -72,9 +72,13 @@ set_init_config_load_done() ->
get_init_config_load_done() ->
application:get_env(emqx, init_config_load_done, false).

%% @doc Set the transaction id from which this node should start applying after boot.
%% The transaction ID is received from the core node which we just copied the latest
%% config from.
set_init_tnx_id(TnxId) ->
application:set_env(emqx, cluster_rpc_init_tnx_id, TnxId).

%% @doc Get the transaction id from which this node should start applying after boot.
get_init_tnx_id() ->
application:get_env(emqx, cluster_rpc_init_tnx_id, -1).

Expand Down
6 changes: 6 additions & 0 deletions apps/emqx_conf/src/emqx_cluster_rpc.erl
Original file line number Diff line number Diff line change
Expand Up @@ -275,8 +275,13 @@ init([Node, RetryMs]) ->
_ = mria:wait_for_tables([?CLUSTER_MFA, ?CLUSTER_COMMIT]),
{ok, _} = mnesia:subscribe({table, ?CLUSTER_MFA, simple}),
State = #{node => Node, retry_interval => RetryMs},
%% The init transaction ID is set in emqx_conf_app after
%% it has fetched the latest config from one of the core nodes
TnxId = emqx_app:get_init_tnx_id(),
ok = maybe_init_tnx_id(Node, TnxId),
%% Now continue with the normal catch-up process
%% That is: apply the missing transactions after the config
%% was copied until now.
{ok, State, {continue, ?CATCH_UP}}.

%% @private
Expand Down Expand Up @@ -396,6 +401,7 @@ get_cluster_tnx_id() ->
Id -> Id
end.

%% The entry point of a config change transaction.
init_mfa(Node, MFA) ->
mnesia:write_lock_table(?CLUSTER_MFA),
LatestId = get_cluster_tnx_id(),
Expand Down
2 changes: 1 addition & 1 deletion apps/emqx_conf/src/emqx_conf_app.erl
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ start(_StartType, _StartArgs) ->
reason => E,
stacktrace => St
}),
init:stop()
init:stop(1)
end,
ok = emqx_config_logger:refresh_config(),
emqx_conf_sup:start_link().
Expand Down
2 changes: 1 addition & 1 deletion bin/emqx
Original file line number Diff line number Diff line change
Expand Up @@ -766,7 +766,7 @@ generate_config() {
local node_name="$2"
## Delete the *.siz files first or it can't start after
## changing the config 'log.rotation.size'
rm -rf "${RUNNER_LOG_DIR}"/*.siz
rm -f "${RUNNER_LOG_DIR}"/*.siz

## timestamp for each generation
local NOW_TIME
Expand Down
2 changes: 2 additions & 0 deletions changes/ce/fix-10286.en.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
Enhance logging behaviour during boot failure.
When EMQX fails to start due to corrupted configuration files, excessive logging is eliminated and no crash dump file is generated.
2 changes: 2 additions & 0 deletions changes/ce/fix-10286.zh.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
优化启动失败的错误日志。
如果 EMQX 因为损坏的配置文件无法启动时,不会再打印过多的错误日志,也不再生成 crash.dump 文件。
21 changes: 21 additions & 0 deletions scripts/test/emqx-boot.bats
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
#!/usr/bin/env bats

# https://github.com/bats-core/bats-core
# env PROFILE=emqx bats -t -p --verbose-run scripts/test/emqx-boot.bats

@test "PROFILE must be set" {
[[ -n "$PROFILE" ]]
}

@test "emqx boot with invalid node name" {
output="$(env EMQX_NODE_NAME="invliadename#" ./_build/$PROFILE/rel/emqx/bin/emqx console 2>&1|| true)"
[[ "$output" =~ "ERROR: Invalid node name,".+ ]]
}

@test "corrupted cluster config file" {
conffile="./_build/$PROFILE/rel/emqx/data/configs/cluster-override.conf"
echo "{" > $conffile
run ./_build/$PROFILE/rel/emqx/bin/emqx console
[[ $status -ne 0 ]]
rm -f $conffile
}