Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

0211 fix remsh #9961

Merged
merged 7 commits into from
Feb 13, 2023
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
92 changes: 46 additions & 46 deletions bin/emqx
Original file line number Diff line number Diff line change
Expand Up @@ -299,6 +299,8 @@ fi
# Make sure log directory exists
mkdir -p "$RUNNER_LOG_DIR"

# turn off debug as this is static
set +x
COMPATIBILITY_CHECK='
io:format("BEAM_OK~n", []),
try
Expand All @@ -321,14 +323,15 @@ COMPATIBILITY_CHECK='
end,
halt(0).
'
[ "$DEBUG" -eq 1 ] && set -x

compatiblity_info() {
# RELEASE_LIB is used by Elixir
# set crash-dump bytes to zero to ensure no crash dump is generated when erl crashes
env ERL_CRASH_DUMP_BYTES=0 "$BINDIR/$PROGNAME" \
-noshell \
-boot_var RELEASE_LIB "$ERTS_LIB_DIR/lib" \
-boot "$REL_DIR/start_clean" \
-boot_var RELEASE_LIB "$ERTS_LIB_DIR/lib" \
-eval "$COMPATIBILITY_CHECK"
}

Expand Down Expand Up @@ -464,28 +467,52 @@ if [ "$IS_BOOT_COMMAND" = 'yes' ]; then
export EMQX_BOOT_CONFIGS
fi
else
# For non-boot commands, we try to get data_dir and ssl_dist_optfile from 'ps -ef' output
# Turn off debug as the ps output can be quite noisy
set +x
# For non-boot commands, we need below runtime facts to connect to the running node:
# 1. The running node name.
# 2. The Erlang cookie in use by the running node name.
# 3. SSL options if the node is using TLS for Erlang distribution
#
# There are 3 sources of truth to get those runtime information.
# Listed in the order of preference:
# 1. The boot command (which can be inspected from 'ps -ef' command output)
# 2. The generated app.<time>.config file located in the dir pointed by 'node.data_dir'
# 3. The bootstrap config 'etc/emqx.conf'
#
# NOTES:
# * We should avoid getting runtime information with the 3rd approach because 'etc/emqx.conf' might
# be updated after the node is started. e.g. If a user starts the node with name 'emqx@127.0.0.1'
# then update the config in the file to 'node.name = "emqx@local.net"', after this change,
# there would be no way stop the running node 'emqx@127.0.0.1', because 'emqx stop' command
# would try to stop the new node instead.
# * The primary grep pattern is $RUNNER_ROOT_DIR because one can start multiple nodes at the same time
# * The grep args like '[e]mqx' but not 'emqx' is to avoid greping the grep command itself
# * The running 'remsh' and 'nodetool' processes must be excluded
# shellcheck disable=SC2009
PS_LINE="$(ps -ef | grep "\-[r]oot $RUNNER_ROOT_DIR" || true)"
PS_LINE="$(ps -ef | grep '[e]mqx' | grep -v -E '(remsh|nodetool)' | grep -oE "\-[r]oot ${RUNNER_ROOT_DIR}.*" || true)"
[ "$DEBUG" -eq 1 ] && echo "EMQX processes: $PS_LINE"
if [ "$(echo -e "$PS_LINE" | wc -l)" -eq 1 ]; then
## only one emqx node is running
## strip 'emqx_data_dir ' and ' --' because the dir in between may contain spaces
DATA_DIR="$(echo -e "$PS_LINE" | grep -oE "\-emqx_data_dir.*" | sed -E 's#.+emqx_data_dir[[:blank:]]##g' | sed -E 's#[[:blank:]]--$##g' || true)"
if [ "$DATA_DIR" = '' ]; then
## this should not happen unless -emqx_data_dir is not set
die "node_is_not_running!" 1
fi
# get ssl_dist_optfile option
SSL_DIST_OPTFILE="$(echo -e "$PS_LINE" | grep -oE '\-ssl_dist_optfile\s.+\s' | awk '{print $2}' || true)"
if [ -z "$SSL_DIST_OPTFILE" ]; then
EMQX_BOOT_CONFIGS="node.data_dir=${DATA_DIR}\ncluster.proto_dist=inet_tcp"
## only one emqx node is running, get running args from 'ps -ef' output
tmp_nodename=$(echo -e "$PS_LINE" | grep -oE "\s\-s?name.*" | awk '{print $2}' || true)
id marked this conversation as resolved.
Show resolved Hide resolved
tmp_cookie=$(echo -e "$PS_LINE" | grep -oE "\s\-setcookie.*" | awk '{print $2}' || true)
tmp_dist="$(echo -e "$PS_LINE" | grep -oE '\-ssl_dist_optfile\s.+\s' | awk '{print $2}' || true)"
# data_dir is actually not needed, but kept anyway
tmp_daadir="$(echo -e "$PS_LINE" | grep -oE "\-emqx_data_dir.*" | sed -E 's#.+emqx_data_dir[[:blank:]]##g' | sed -E 's#[[:blank:]]--$##g' || true)"
zmstone marked this conversation as resolved.
Show resolved Hide resolved
if [ -z "$tmp_dist" ]; then
tmp_proto='inet_tcp'
else
EMQX_BOOT_CONFIGS="node.data_dir=${DATA_DIR}\ncluster.proto_dist=inet_tls"
tmp_proto='inet_tls'
fi
## Make the format like what call_hocon multi_get prints out, but only need 4 args
EMQX_BOOT_CONFIGS="node.name=${tmp_nodename}\nnode.cookie=${tmp_cookie}\ncluster.proto_dist=${tmp_proto}\nnode.data_dir=${tmp_daadir}"
zmstone marked this conversation as resolved.
Show resolved Hide resolved
[ "$DEBUG" -eq 1 ] && echo "EMQX boot-configs: $EMQX_BOOT_CONFIGS"
else
## None or more than one node is running, resolve from boot config
## we have no choiece but to read the bootstrap config (with environment overrides available in the current shell)
EMQX_BOOT_CONFIGS="$(call_hocon -s "$SCHEMA_MOD" -c "$EMQX_ETC_DIR"/emqx.conf multi_get "${CONF_KEYS[@]}")"
fi
[ "$DEBUG" -eq 1 ] && set -x
fi

get_boot_config() {
Expand Down Expand Up @@ -792,23 +819,6 @@ wait_until_return_val() {
done
}

latest_vm_args() {
local hint_var_name="$1"
local vm_args_file
vm_args_file="$(find "$CONFIGS_DIR" -type f -name "vm.*.args" | sort | tail -1)"
if [ -f "$vm_args_file" ]; then
echo "$vm_args_file"
else
set +x
logerr "Node not initialized?"
logerr "Generated config file vm.*.args is not found for command '$COMMAND'"
logerr "in config dir: $CONFIGS_DIR"
logerr "In case the file has been deleted while the node is running,"
logerr "set environment variable '$hint_var_name' to continue"
exit 1
fi
}

# backward compatible with 4.x
tr_log_to_env() {
local log_to=${EMQX_LOG__TO:-undefined}
Expand Down Expand Up @@ -851,6 +861,7 @@ maybe_log_to_console() {
fi
}

## To be backward compatible, read and then unset EMQX_NODE_NAME
if [ -n "${EMQX_NODE_NAME:-}" ]; then
export EMQX_NODE__NAME="${EMQX_NODE_NAME}"
unset EMQX_NODE_NAME
Expand All @@ -862,13 +873,7 @@ fi
## or long name (with '@') e.g. 'emqx@example.net' or 'emqx@127.0.0.1'
NAME="${EMQX_NODE__NAME:-}"
if [ -z "$NAME" ]; then
if [ "$IS_BOOT_COMMAND" = 'yes' ]; then
# for boot commands, inspect emqx.conf for node name
NAME="$(get_boot_config 'node.name')"
else
vm_args_file="$(latest_vm_args 'EMQX_NODE__NAME')"
NAME="$(grep -E '^-s?name' "${vm_args_file}" | awk '{print $2}')"
fi
NAME="$(get_boot_config 'node.name')"
fi

# force to use 'emqx' short name
Expand All @@ -894,18 +899,13 @@ PIPE_DIR="${PIPE_DIR:-/$DATA_DIR/${WHOAMI}_erl_pipes/$NAME/}"

## Resolve Erlang cookie.
if [ -n "${EMQX_NODE_COOKIE:-}" ]; then
## To be backward compatible, read EMQX_NODE_COOKIE
## To be backward compatible, read and unset EMQX_NODE_COOKIE
export EMQX_NODE__COOKIE="${EMQX_NODE_COOKIE}"
unset EMQX_NODE_COOKIE
fi
COOKIE="${EMQX_NODE__COOKIE:-}"
if [ -z "$COOKIE" ]; then
if [ "$IS_BOOT_COMMAND" = 'yes' ]; then
COOKIE="$(get_boot_config 'node.cookie')"
else
vm_args_file="$(latest_vm_args 'EMQX_NODE__COOKIE')"
COOKIE="$(grep -E '^-setcookie' "${vm_args_file}" | awk '{print $2}')"
fi
COOKIE="$(get_boot_config 'node.cookie')"
fi
[ -z "$COOKIE" ] && COOKIE="$EMQX_DEFAULT_ERLANG_COOKIE"
if [ $IS_BOOT_COMMAND = 'yes' ] && [ "$COOKIE" = "$EMQX_DEFAULT_ERLANG_COOKIE" ]; then
Expand Down
1 change: 1 addition & 0 deletions changes/v5.0.18/fix-9961.en.md
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Avoid parsing config files for node name and cookie when executing non-boot commands in bin/emqx
1 change: 1 addition & 0 deletions changes/v5.0.18/fix-9961.zh.md
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
在 bin/emqx 脚本中,避免在运行非启动命令时解析 emqx.conf 来获取节点名称和 cookie