From 5f7cad902b9fc97b642ba7b8ea90178e978ad70f Mon Sep 17 00:00:00 2001 From: "Yoshiaki Ueda (bootjp)" Date: Fri, 24 Apr 2026 16:50:16 +0900 Subject: [PATCH 1/5] ops(rolling-update): add GOMEMLIMIT=1800MiB + --memory=2500m defaults Prevents kernel OOM-SIGKILL cascades observed on 2026-04-24 where all 4 live nodes were killed 22-169 times in 24h under a traffic spike, corrupting WAL tails and triggering election storms. GOMEMLIMIT makes Go GC before the ceiling; --memory keeps any surviving OOM scoped to the container (not qemu-guest-agent / systemd). --- scripts/rolling-update.env.example | 7 +++ scripts/rolling-update.sh | 73 +++++++++++++++++++++++++++++- 2 files changed, 78 insertions(+), 2 deletions(-) diff --git a/scripts/rolling-update.env.example b/scripts/rolling-update.env.example index 1f1e8c31..387e3cb6 100644 --- a/scripts/rolling-update.env.example +++ b/scripts/rolling-update.env.example @@ -51,3 +51,10 @@ SSH_STRICT_HOST_KEY_CHECKING="accept-new" RAFTADMIN_REMOTE_BIN="/tmp/elastickv-raftadmin" RAFTADMIN_RPC_TIMEOUT_SECONDS="5" RAFTADMIN_ALLOW_INSECURE="true" + +# OOM defenses applied on 2026-04-24 after kernel OOM-SIGKILL cascades. +# GOMEMLIMIT makes Go GC before the container hits --memory; --memory keeps +# any kill scoped to the container, not host processes. Set either to "" to +# opt out. User EXTRA_ENV keys override matching keys in DEFAULT_EXTRA_ENV. +DEFAULT_EXTRA_ENV="GOMEMLIMIT=1800MiB" +CONTAINER_MEMORY_LIMIT="2500m" diff --git a/scripts/rolling-update.sh b/scripts/rolling-update.sh index d84c16f6..81a72d3e 100755 --- a/scripts/rolling-update.sh +++ b/scripts/rolling-update.sh @@ -62,6 +62,18 @@ Optional environment: Each pair must be KEY=VALUE with a non-empty KEY; pairs themselves must not contain whitespace. + DEFAULT_EXTRA_ENV defaults to "GOMEMLIMIT=1800MiB" (Go runtime soft memory + ceiling; GC works harder before approaching the hard --memory limit so the + kernel OOM killer is not triggered). Merged with EXTRA_ENV before forwarding; + if a user-supplied EXTRA_ENV entry sets the same KEY, the user value wins. + Set DEFAULT_EXTRA_ENV="" to disable the default. + + CONTAINER_MEMORY_LIMIT + docker run --memory value (default: 2500m). Hard container-scoped memory + ceiling; any OOM kill is contained to the elastickv container rather than + cascading to host processes (e.g. qemu-guest-agent, systemd). Paired with + GOMEMLIMIT=1800MiB so Go GC preempts the kill. Set to "" to disable. + Notes: - If RAFT_TO_REDIS_MAP is unset, it is derived automatically from NODES, RAFT_PORT, and REDIS_PORT. @@ -113,6 +125,9 @@ SSH_TARGETS="${SSH_TARGETS:-}" ROLLING_ORDER="${ROLLING_ORDER:-}" RAFT_TO_REDIS_MAP="${RAFT_TO_REDIS_MAP:-}" RAFT_TO_S3_MAP="${RAFT_TO_S3_MAP:-}" +# Container OOM defenses. See usage() for rationale. Empty string disables. +DEFAULT_EXTRA_ENV="${DEFAULT_EXTRA_ENV-GOMEMLIMIT=1800MiB}" +CONTAINER_MEMORY_LIMIT="${CONTAINER_MEMORY_LIMIT-2500m}" if [[ -z "$NODES" ]]; then echo "NODES is required" >&2 @@ -427,6 +442,7 @@ update_one_node() { RAFT_TO_REDIS_MAP="$RAFT_TO_REDIS_MAP_Q" \ RAFT_TO_S3_MAP="$RAFT_TO_S3_MAP_Q" \ EXTRA_ENV="$EXTRA_ENV_Q" \ + CONTAINER_MEMORY_LIMIT="$CONTAINER_MEMORY_LIMIT_Q" \ bash -s <<'REMOTE' set -euo pipefail @@ -707,10 +723,20 @@ run_container() { done fi + # Optional hard container-scoped memory limit. Keeps any OOM kill contained + # to the elastickv container rather than cascading to host processes + # (e.g. qemu-guest-agent, systemd). Pair with GOMEMLIMIT via EXTRA_ENV so + # the Go runtime GCs before the kernel kills the container. + local memory_flags=() + if [[ -n "${CONTAINER_MEMORY_LIMIT:-}" ]]; then + memory_flags=(--memory "$CONTAINER_MEMORY_LIMIT") + fi + docker run -d \ --name "$CONTAINER_NAME" \ --restart unless-stopped \ --network host \ + "${memory_flags[@]}" \ -v "$DATA_DIR:$DATA_DIR" \ "${s3_creds_volume[@]}" \ "${extra_env_flags[@]}" \ @@ -868,9 +894,52 @@ ensure_remote_raftadmin_binaries # CR handling additionally covers deploy.env files edited on Windows. # `${EXTRA_ENV:-}` is required because `set -u` is active and EXTRA_ENV # may be unset (the variable is optional in deploy.env). -EXTRA_ENV_NORMALISED="${EXTRA_ENV:-}" -EXTRA_ENV_NORMALISED="${EXTRA_ENV_NORMALISED//[$'\t\r\n']/ }" +# Merge DEFAULT_EXTRA_ENV (operator-safety defaults like GOMEMLIMIT) with any +# user-supplied EXTRA_ENV. User-supplied KEYs win over defaults for the same +# KEY; the remote parser forwards pairs via `-e KEY=VALUE` so docker evaluates +# the last occurrence, which means pre-pending defaults is correct: later user +# entries override earlier defaults. We still de-duplicate here so the printed +# command line stays clean. +EXTRA_ENV_USER_NORMALISED="${EXTRA_ENV:-}" +EXTRA_ENV_USER_NORMALISED="${EXTRA_ENV_USER_NORMALISED//[$'\t\r\n']/ }" +EXTRA_ENV_DEFAULT_NORMALISED="${DEFAULT_EXTRA_ENV:-}" +EXTRA_ENV_DEFAULT_NORMALISED="${EXTRA_ENV_DEFAULT_NORMALISED//[$'\t\r\n']/ }" + +merge_extra_env() { + local defaults="$1" + local user="$2" + local -A seen=() + local -a user_pairs=() + local -a default_pairs=() + local pair key merged="" + + read -r -a user_pairs <<< "$user" + for pair in "${user_pairs[@]}"; do + [[ -n "$pair" ]] || continue + [[ "$pair" == *=* ]] || continue + key="${pair%%=*}" + seen["$key"]=1 + done + + read -r -a default_pairs <<< "$defaults" + for pair in "${default_pairs[@]}"; do + [[ -n "$pair" ]] || continue + [[ "$pair" == *=* ]] || continue + key="${pair%%=*}" + if [[ -z "${seen[$key]:-}" ]]; then + merged+="${merged:+ }$pair" + fi + done + for pair in "${user_pairs[@]}"; do + [[ -n "$pair" ]] || continue + merged+="${merged:+ }$pair" + done + printf '%s' "$merged" +} + +EXTRA_ENV_NORMALISED="$(merge_extra_env "$EXTRA_ENV_DEFAULT_NORMALISED" "$EXTRA_ENV_USER_NORMALISED")" EXTRA_ENV_Q="$(printf '%q' "$EXTRA_ENV_NORMALISED")" +CONTAINER_MEMORY_LIMIT_Q="$(printf '%q' "${CONTAINER_MEMORY_LIMIT:-}")" S3_CREDENTIALS_FILE_Q="$(printf '%q' "${S3_CREDENTIALS_FILE:-}")" IMAGE_Q="$(printf '%q' "$IMAGE")" DATA_DIR_Q="$(printf '%q' "$DATA_DIR")" From a3f0cf00afc3dd88c8e9d4d4d8e507dc3e4acc61 Mon Sep 17 00:00:00 2001 From: "Yoshiaki Ueda (bootjp)" Date: Sat, 25 Apr 2026 00:11:37 +0900 Subject: [PATCH 2/5] fix(rolling-update): merge_extra_env without bash 4 associative arrays merge_extra_env used `local -A seen=()` which requires Bash 4+. macOS ships Bash 3.2 by default, so operators invoking the script from a Mac workstation hit a syntax error on source (Gemini MEDIUM feedback). Replace the associative array with a space-padded string + substring match (" KEY "). The EXTRA_ENV list is a handful of entries, so the linear lookup cost is trivial. Verified behaviourally unchanged: the five merge cases (defaults+user disjoint, user overrides default, default only, user only, both empty) all produce the same output as the Bash-4 implementation. --- scripts/rolling-update.sh | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/scripts/rolling-update.sh b/scripts/rolling-update.sh index 81a72d3e..8296fb4d 100755 --- a/scripts/rolling-update.sh +++ b/scripts/rolling-update.sh @@ -908,17 +908,20 @@ EXTRA_ENV_DEFAULT_NORMALISED="${EXTRA_ENV_DEFAULT_NORMALISED//[$'\t\r\n']/ }" merge_extra_env() { local defaults="$1" local user="$2" - local -A seen=() + # Portable across Bash 3.2 (macOS default) which lacks associative + # arrays: concatenate user KEYs into a space-padded string and match + # with " KEY " to test set membership. The EXTRA_ENV list is typically + # a handful of entries, so the linear check is negligible. local -a user_pairs=() local -a default_pairs=() - local pair key merged="" + local pair key seen=" " merged="" read -r -a user_pairs <<< "$user" for pair in "${user_pairs[@]}"; do [[ -n "$pair" ]] || continue [[ "$pair" == *=* ]] || continue key="${pair%%=*}" - seen["$key"]=1 + seen+="${key} " done read -r -a default_pairs <<< "$defaults" @@ -926,7 +929,7 @@ merge_extra_env() { [[ -n "$pair" ]] || continue [[ "$pair" == *=* ]] || continue key="${pair%%=*}" - if [[ -z "${seen[$key]:-}" ]]; then + if [[ "$seen" != *" ${key} "* ]]; then merged+="${merged:+ }$pair" fi done From edd2a9106e32c3f5617737cfe65a25624514b499 Mon Sep 17 00:00:00 2001 From: "Yoshiaki Ueda (bootjp)" Date: Sat, 25 Apr 2026 00:34:51 +0900 Subject: [PATCH 3/5] fix(rolling-update): guard read -a against empty here-strings under set -e MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Gemini HIGH (line 919 + 927): on Bash 3.2 (macOS default, which this script must stay compatible with after the previous fix) `read -a` on an empty here-string returns non-zero, and `set -euo pipefail` then terminates the script. Skip the read when the source string is empty — the empty array is the intended result either way. Verified under `set -euo pipefail` with all five merge cases (both empty, only default, only user, user overrides default, disjoint user + default) — all produce the same output as before. --- scripts/rolling-update.sh | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/scripts/rolling-update.sh b/scripts/rolling-update.sh index 8296fb4d..dcdb415f 100755 --- a/scripts/rolling-update.sh +++ b/scripts/rolling-update.sh @@ -916,7 +916,13 @@ merge_extra_env() { local -a default_pairs=() local pair key seen=" " merged="" - read -r -a user_pairs <<< "$user" + # Guard the here-strings: on Bash 3.2 (macOS default) `read` on an + # empty here-string returns non-zero, which trips `set -e`. Skip the + # read when the source string is empty — the empty array is the + # intended result either way. + if [[ -n "$user" ]]; then + read -r -a user_pairs <<< "$user" + fi for pair in "${user_pairs[@]}"; do [[ -n "$pair" ]] || continue [[ "$pair" == *=* ]] || continue @@ -924,7 +930,9 @@ merge_extra_env() { seen+="${key} " done - read -r -a default_pairs <<< "$defaults" + if [[ -n "$defaults" ]]; then + read -r -a default_pairs <<< "$defaults" + fi for pair in "${default_pairs[@]}"; do [[ -n "$pair" ]] || continue [[ "$pair" == *=* ]] || continue From 941ed32dd84cea7e2d474efbbdd6c2c7aeeb6588 Mon Sep 17 00:00:00 2001 From: "Yoshiaki Ueda (bootjp)" Date: Sat, 25 Apr 2026 00:55:08 +0900 Subject: [PATCH 4/5] fix(rolling-update): explicit IFS + fail loud on malformed DEFAULT_EXTRA_ENV - Gemini Medium (lines 924, 934): set IFS=$' \t\n' per read -a call so the split is stable regardless of any surrounding IFS mutation. - Codex P2 (line 938): validate DEFAULT_EXTRA_ENV entries explicitly. Unlike user-supplied EXTRA_ENV (forgivable typo, silently dropped for compatibility with existing deploy.env habits), DEFAULT_EXTRA_ENV is where we install safeguards like GOMEMLIMIT; a typo there means the safeguard is silently absent. Fail the merge with a clear message instead. --- scripts/rolling-update.sh | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) diff --git a/scripts/rolling-update.sh b/scripts/rolling-update.sh index dcdb415f..44dc5167 100755 --- a/scripts/rolling-update.sh +++ b/scripts/rolling-update.sh @@ -920,8 +920,10 @@ merge_extra_env() { # empty here-string returns non-zero, which trips `set -e`. Skip the # read when the source string is empty — the empty array is the # intended result either way. + # IFS is explicitly set per-read so a caller's surrounding IFS + # doesn't change how DEFAULT_EXTRA_ENV / EXTRA_ENV are split. if [[ -n "$user" ]]; then - read -r -a user_pairs <<< "$user" + IFS=$' \t\n' read -r -a user_pairs <<< "$user" fi for pair in "${user_pairs[@]}"; do [[ -n "$pair" ]] || continue @@ -931,7 +933,18 @@ merge_extra_env() { done if [[ -n "$defaults" ]]; then - read -r -a default_pairs <<< "$defaults" + IFS=$' \t\n' read -r -a default_pairs <<< "$defaults" + # Unlike EXTRA_ENV (user-supplied, forgivable typos), DEFAULT_EXTRA_ENV + # is baked into deploy.env — a malformed token there means a + # safeguard we installed deliberately is silently ignored. Fail + # loudly instead of dropping it. + for pair in "${default_pairs[@]}"; do + [[ -n "$pair" ]] || continue + if [[ "$pair" != *=* ]]; then + echo "rolling-update: malformed DEFAULT_EXTRA_ENV entry '$pair' (expected KEY=VALUE)" >&2 + return 1 + fi + done fi for pair in "${default_pairs[@]}"; do [[ -n "$pair" ]] || continue From 75db3bfdf4b4d52dfcec740128c59b0a7468b0de Mon Sep 17 00:00:00 2001 From: "Yoshiaki Ueda (bootjp)" Date: Sat, 25 Apr 2026 01:11:43 +0900 Subject: [PATCH 5/5] fix(rolling-update): reject empty-key DEFAULT_EXTRA_ENV entries - Codex P2 (rolling-update.sh:944, new angle over round 3): the prior validator only rejected entries without any equals sign; an entry like =1800MiB satisfied the *=* match and was let through, only to be rejected later by run_container key regex. Fail the merge explicitly for empty-key entries at the same point we fail on missing equals sign, so a malformed safeguard is caught with a clear message. The round-3 Gemini Medium about read -a / set -e was verified against bash 3.2 (macOS default): the existing [[ -n "$user" ]] guard prevents the empty-here-string case and whitespace-only input reads successfully into an empty array under bash 3.2. Not changing the || true shape the reviewer suggested; the guard is sufficient and appending || true would swallow real read failures. --- scripts/rolling-update.sh | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/scripts/rolling-update.sh b/scripts/rolling-update.sh index 44dc5167..97e23de6 100755 --- a/scripts/rolling-update.sh +++ b/scripts/rolling-update.sh @@ -938,12 +938,21 @@ merge_extra_env() { # is baked into deploy.env — a malformed token there means a # safeguard we installed deliberately is silently ignored. Fail # loudly instead of dropping it. + # Three failure modes to catch early: + # - no `=` at all (e.g. GOMEMLIMIT) -> malformed + # - empty key before `=` (e.g. =1800MiB) -> malformed + # (the `*=*` pattern match alone accepts this) + # - empty pair (covered by the continue above) for pair in "${default_pairs[@]}"; do [[ -n "$pair" ]] || continue if [[ "$pair" != *=* ]]; then echo "rolling-update: malformed DEFAULT_EXTRA_ENV entry '$pair' (expected KEY=VALUE)" >&2 return 1 fi + if [[ "${pair%%=*}" == "" ]]; then + echo "rolling-update: malformed DEFAULT_EXTRA_ENV entry '$pair' (empty key)" >&2 + return 1 + fi done fi for pair in "${default_pairs[@]}"; do