From 68be24976a3eb006c988955f1b40a88079f2d9a1 Mon Sep 17 00:00:00 2001 From: margaretjgu Date: Mon, 4 May 2026 15:54:42 -0400 Subject: [PATCH 01/38] test: add KB data-views functional tests --- test/functional/kb/alerting.sh | 64 ++++++++++++++++++++++++++ test/functional/kb/connectors.sh | 79 ++++++++++++++++++++++++++++++++ test/functional/kb/data_views.sh | 58 +++++++++++++++++++++++ test/functional/kb/run.sh | 41 +++++++++++++++++ test/functional/kb/spaces.sh | 60 ++++++++++++++++++++++++ 5 files changed, 302 insertions(+) create mode 100755 test/functional/kb/alerting.sh create mode 100755 test/functional/kb/connectors.sh create mode 100755 test/functional/kb/data_views.sh create mode 100755 test/functional/kb/run.sh create mode 100755 test/functional/kb/spaces.sh diff --git a/test/functional/kb/alerting.sh b/test/functional/kb/alerting.sh new file mode 100755 index 0000000..1c6ade5 --- /dev/null +++ b/test/functional/kb/alerting.sh @@ -0,0 +1,64 @@ +#!/usr/bin/env bash +# Copyright Elasticsearch B.V. and contributors +# SPDX-License-Identifier: Apache-2.0 +# +# Functional tests for the Kibana alerting API namespace. +# Exercises create rule / get rule / find rules / delete rule. + +set -euo pipefail +exec < /dev/null + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +REPO_ROOT="$(cd "$SCRIPT_DIR/../../.." && pwd)" +CLI="node $REPO_ROOT/dist/cli.js" + +RULE_ID="cli-ft-rule" + +# Use the built-in .es-query rule type (available in all Kibana deployments). +RULE_PARAMS='{"searchType":"esqlQuery","esqlQuery":{"esql":"FROM * | LIMIT 1"},"timeWindowSize":5,"timeWindowUnit":"m","threshold":[0],"thresholdComparator":">","size":0,"timeField":"@timestamp"}' + +teardown() { + $CLI stack kb alerting delete-alerting-rule-id --id "$RULE_ID" --json >/dev/null 2>&1 || true +} +trap teardown EXIT + +# ── create ──────────────────────────────────────────────────────────── + +output=$($CLI stack kb alerting post-alerting-rule-id \ + --id "$RULE_ID" \ + --consumer "alerts" \ + --name "CLI FT Rule" \ + --rule-type-id ".es-query" \ + --schedule '{"interval":"1m"}' \ + --params "$RULE_PARAMS" \ + --json 2>/dev/null) +[ "$(echo "$output" | jq -r '.id')" = "$RULE_ID" ] \ + || { echo "FAIL: alerting create — id mismatch"; exit 1; } +[ "$(echo "$output" | jq -r '.name')" = "CLI FT Rule" ] \ + || { echo "FAIL: alerting create — name mismatch"; exit 1; } +[ "$(echo "$output" | jq -r '.rule_type_id')" = ".es-query" ] \ + || { echo "FAIL: alerting create — rule_type_id mismatch"; exit 1; } + +# ── get ─────────────────────────────────────────────────────────────── + +output=$($CLI stack kb alerting get-alerting-rule-id --id "$RULE_ID" --json 2>/dev/null) +[ "$(echo "$output" | jq -r '.id')" = "$RULE_ID" ] \ + || { echo "FAIL: alerting get — id mismatch"; exit 1; } +[ "$(echo "$output" | jq -r '.enabled')" = "true" ] \ + || { echo "FAIL: alerting get — rule should be enabled by default"; exit 1; } + +# ── find ────────────────────────────────────────────────────────────── + +output=$($CLI stack kb alerting get-alerting-rules-find --json 2>/dev/null) +count=$(echo "$output" | jq '[.data[] | select(.id == "'"$RULE_ID"'")] | length') +[ "$count" -eq 1 ] || { echo "FAIL: alerting find — created rule not found"; exit 1; } + +# ── delete ──────────────────────────────────────────────────────────── + +$CLI stack kb alerting delete-alerting-rule-id --id "$RULE_ID" --json >/dev/null 2>/dev/null + +output=$($CLI stack kb alerting get-alerting-rules-find --json 2>/dev/null) +count=$(echo "$output" | jq '[.data[] | select(.id == "'"$RULE_ID"'")] | length') +[ "$count" -eq 0 ] || { echo "FAIL: alerting delete — rule still present after delete"; exit 1; } + +echo "PASS: kb/alerting.sh" diff --git a/test/functional/kb/connectors.sh b/test/functional/kb/connectors.sh new file mode 100755 index 0000000..2555a9c --- /dev/null +++ b/test/functional/kb/connectors.sh @@ -0,0 +1,79 @@ +#!/usr/bin/env bash +# Copyright Elasticsearch B.V. and contributors +# SPDX-License-Identifier: Apache-2.0 +# +# Functional tests for the Kibana connectors API namespace. +# Exercises list-types / create / get / list / delete. + +set -euo pipefail +exec < /dev/null + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +REPO_ROOT="$(cd "$SCRIPT_DIR/../../.." && pwd)" +CLI="node $REPO_ROOT/dist/cli.js" + +CONNECTOR_ID="" + +teardown() { + if [ -n "$CONNECTOR_ID" ]; then + $CLI stack kb connectors delete-actions-connector-id --id "$CONNECTOR_ID" --json >/dev/null 2>&1 || true + fi +} +trap teardown EXIT + +# ── list connector types ─────────────────────────────────────────────── + +output=$($CLI stack kb connectors get-actions-connector-types --json 2>/dev/null) +count=$(echo "$output" | jq 'length') +[ "$count" -gt 0 ] || { echo "FAIL: connectors list-types — empty list"; exit 1; } + +# The .index connector type ships with all Kibana deployments. +index_type=$(echo "$output" | jq -r '[.[] | select(.id == ".index")] | .[0].id') +[ "$index_type" = ".index" ] \ + || { echo "FAIL: connectors list-types — .index type not found"; exit 1; } + +# ── create ──────────────────────────────────────────────────────────── + +# Kibana requires a UUID for connectors with encrypted secrets (most types). +# The .index connector does not have encrypted secrets so any UUID works. +CONNECTOR_UUID=$(python3 -c "import uuid; print(str(uuid.uuid4()))" 2>/dev/null \ + || uuidgen 2>/dev/null | tr '[:upper:]' '[:lower:]') +CONNECTOR_ID="$CONNECTOR_UUID" + +output=$($CLI stack kb connectors post-actions-connector-id \ + --id "$CONNECTOR_UUID" \ + --connector-type-id ".index" \ + --name "CLI FT Index Connector" \ + --kb-config '{"index":"cli-ft-connector-*"}' \ + --json 2>/dev/null) +[ "$(echo "$output" | jq -r '.id')" = "$CONNECTOR_UUID" ] \ + || { echo "FAIL: connectors create — id mismatch"; exit 1; } +[ "$(echo "$output" | jq -r '.name')" = "CLI FT Index Connector" ] \ + || { echo "FAIL: connectors create — name mismatch"; exit 1; } +[ "$(echo "$output" | jq -r '.connector_type_id')" = ".index" ] \ + || { echo "FAIL: connectors create — connector_type_id mismatch"; exit 1; } + +# ── get ─────────────────────────────────────────────────────────────── + +output=$($CLI stack kb connectors get-actions-connector-id --id "$CONNECTOR_ID" --json 2>/dev/null) +[ "$(echo "$output" | jq -r '.id')" = "$CONNECTOR_ID" ] \ + || { echo "FAIL: connectors get — id mismatch"; exit 1; } +[ "$(echo "$output" | jq -r '.name')" = "CLI FT Index Connector" ] \ + || { echo "FAIL: connectors get — name mismatch"; exit 1; } + +# ── list ────────────────────────────────────────────────────────────── + +output=$($CLI stack kb connectors get-actions-connectors --json 2>/dev/null) +count=$(echo "$output" | jq '[.[] | select(.id == "'"$CONNECTOR_ID"'")] | length') +[ "$count" -eq 1 ] || { echo "FAIL: connectors list — created connector not found"; exit 1; } + +# ── delete ──────────────────────────────────────────────────────────── + +$CLI stack kb connectors delete-actions-connector-id --id "$CONNECTOR_ID" --json >/dev/null 2>/dev/null +CONNECTOR_ID="" + +output=$($CLI stack kb connectors get-actions-connectors --json 2>/dev/null) +count=$(echo "$output" | jq '[.[] | select(.name == "CLI FT Index Connector")] | length') +[ "$count" -eq 0 ] || { echo "FAIL: connectors delete — connector still present after delete"; exit 1; } + +echo "PASS: kb/connectors.sh" diff --git a/test/functional/kb/data_views.sh b/test/functional/kb/data_views.sh new file mode 100755 index 0000000..7950c8b --- /dev/null +++ b/test/functional/kb/data_views.sh @@ -0,0 +1,58 @@ +#!/usr/bin/env bash +# Copyright Elasticsearch B.V. and contributors +# SPDX-License-Identifier: Apache-2.0 +# +# Functional tests for the Kibana data-views API namespace. +# Exercises create / get / list / delete for a single data view. + +set -euo pipefail +exec < /dev/null + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +REPO_ROOT="$(cd "$SCRIPT_DIR/../../.." && pwd)" +CLI="node $REPO_ROOT/dist/cli.js" + +VIEW_ID="" + +teardown() { + if [ -n "$VIEW_ID" ]; then + $CLI stack kb data-views delete-data-view-default --view-id "$VIEW_ID" --json >/dev/null 2>&1 || true + fi +} +trap teardown EXIT + +# ── create ──────────────────────────────────────────────────────────── + +output=$(echo '{"data_view":{"title":"cli-ft-dv-*","name":"cli-ft-dv"}}' \ + | $CLI stack kb data-views create-data-view-defaultw --json 2>/dev/null) +VIEW_ID=$(echo "$output" | jq -r '.data_view.id') +[ -n "$VIEW_ID" ] || { echo "FAIL: data_views create — empty id"; exit 1; } +[ "$(echo "$output" | jq -r '.data_view.name')" = "cli-ft-dv" ] \ + || { echo "FAIL: data_views create — name mismatch"; exit 1; } +[ "$(echo "$output" | jq -r '.data_view.title')" = "cli-ft-dv-*" ] \ + || { echo "FAIL: data_views create — title mismatch"; exit 1; } + +# ── get ─────────────────────────────────────────────────────────────── + +output=$($CLI stack kb data-views get-data-view-default --view-id "$VIEW_ID" --json 2>/dev/null) +[ "$(echo "$output" | jq -r '.data_view.id')" = "$VIEW_ID" ] \ + || { echo "FAIL: data_views get — id mismatch"; exit 1; } +[ "$(echo "$output" | jq -r '.data_view.name')" = "cli-ft-dv" ] \ + || { echo "FAIL: data_views get — name mismatch"; exit 1; } + +# ── list ────────────────────────────────────────────────────────────── + +output=$($CLI stack kb data-views get-all-data-views-default --json 2>/dev/null) +count=$(echo "$output" | jq '[.data_view[] | select(.id == "'"$VIEW_ID"'")] | length') +[ "$count" -eq 1 ] || { echo "FAIL: data_views list — created view not found in list"; exit 1; } + +# ── delete ──────────────────────────────────────────────────────────── + +$CLI stack kb data-views delete-data-view-default --view-id "$VIEW_ID" --json >/dev/null 2>/dev/null +VIEW_ID="" + +output=$($CLI stack kb data-views get-all-data-views-default --json 2>/dev/null) +count=$(echo "$output" | jq '[.data_view[] | select(.name == "cli-ft-dv")] | length') +[ "$count" -eq 0 ] || { echo "FAIL: data_views delete — view still present after delete"; exit 1; } + +echo "PASS: kb/data_views.sh" diff --git a/test/functional/kb/run.sh b/test/functional/kb/run.sh new file mode 100755 index 0000000..a5e0a13 --- /dev/null +++ b/test/functional/kb/run.sh @@ -0,0 +1,41 @@ +#!/usr/bin/env bash +# Copyright Elasticsearch B.V. and contributors +# SPDX-License-Identifier: Apache-2.0 +# +# Runner for Kibana functional tests. +# Each test file is run in a subshell; failures are collected and reported. + +set -euo pipefail + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +PASSED=0 +FAILED=0 +ERRORS="" + +run_test() { + local name="$1" + local file="$SCRIPT_DIR/$2" + if OUTPUT=$(bash "$file" 2>&1); then + PASSED=$((PASSED + 1)) + echo "PASS: $name" + else + FAILED=$((FAILED + 1)) + ERRORS="$ERRORS\n FAIL: $name" + echo "FAIL: $name" + echo "$OUTPUT" | tail -5 + fi +} + +run_test "data_views" "data_views.sh" +run_test "spaces" "spaces.sh" +run_test "alerting" "alerting.sh" +run_test "connectors" "connectors.sh" + +echo "" +echo "================================" +echo "Results: $PASSED passed, $FAILED failed" +if [ "$FAILED" -gt 0 ]; then + printf "Failures:%b\n" "$ERRORS" + exit 1 +fi +echo "================================" diff --git a/test/functional/kb/spaces.sh b/test/functional/kb/spaces.sh new file mode 100755 index 0000000..d676d24 --- /dev/null +++ b/test/functional/kb/spaces.sh @@ -0,0 +1,60 @@ +#!/usr/bin/env bash +# Copyright Elasticsearch B.V. and contributors +# SPDX-License-Identifier: Apache-2.0 +# +# Functional tests for the Kibana spaces API namespace. +# Exercises create / get / list / delete for a non-default space. + +set -euo pipefail +exec < /dev/null + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +REPO_ROOT="$(cd "$SCRIPT_DIR/../../.." && pwd)" +CLI="node $REPO_ROOT/dist/cli.js" + +SPACE_ID="cli-ft-space" + +teardown() { + $CLI stack kb spaces delete-spaces-space-id --id "$SPACE_ID" --json >/dev/null 2>&1 || true +} +trap teardown EXIT + +# ── create ──────────────────────────────────────────────────────────── + +output=$($CLI stack kb spaces post-spaces-space \ + --id "$SPACE_ID" --name "CLI FT Space" --description "Created by functional test" \ + --json 2>/dev/null) +[ "$(echo "$output" | jq -r '.id')" = "$SPACE_ID" ] \ + || { echo "FAIL: spaces create — id mismatch"; exit 1; } +[ "$(echo "$output" | jq -r '.name')" = "CLI FT Space" ] \ + || { echo "FAIL: spaces create — name mismatch"; exit 1; } + +# ── get ─────────────────────────────────────────────────────────────── + +output=$($CLI stack kb spaces get-spaces-space-id --id "$SPACE_ID" --json 2>/dev/null) +[ "$(echo "$output" | jq -r '.id')" = "$SPACE_ID" ] \ + || { echo "FAIL: spaces get — id mismatch"; exit 1; } +[ "$(echo "$output" | jq -r '.description')" = "Created by functional test" ] \ + || { echo "FAIL: spaces get — description mismatch"; exit 1; } + +# ── list ────────────────────────────────────────────────────────────── + +# include_authorized_purposes is required by the generated schema even though +# the Kibana docs treat it as optional; pass false to satisfy validation. +output=$($CLI stack kb spaces get-spaces-space --include-authorized-purposes false --json 2>/dev/null) +count=$(echo "$output" | jq '[.[] | select(.id == "'"$SPACE_ID"'")] | length') +[ "$count" -eq 1 ] || { echo "FAIL: spaces list — created space not found"; exit 1; } + +# Default space must always be present. +default_count=$(echo "$output" | jq '[.[] | select(.id == "default")] | length') +[ "$default_count" -eq 1 ] || { echo "FAIL: spaces list — default space missing"; exit 1; } + +# ── delete ──────────────────────────────────────────────────────────── + +$CLI stack kb spaces delete-spaces-space-id --id "$SPACE_ID" --json >/dev/null 2>/dev/null + +output=$($CLI stack kb spaces get-spaces-space --include-authorized-purposes false --json 2>/dev/null) +count=$(echo "$output" | jq '[.[] | select(.id == "'"$SPACE_ID"'")] | length') +[ "$count" -eq 0 ] || { echo "FAIL: spaces delete — space still present after delete"; exit 1; } + +echo "PASS: kb/spaces.sh" From 1cc0758020f371b2d711685d337d7760ea65f094 Mon Sep 17 00:00:00 2001 From: margaretjgu Date: Mon, 4 May 2026 15:54:42 -0400 Subject: [PATCH 02/38] test: add KB spaces functional tests --- .buildkite/run-kb-tests.sh | 130 +++++++++++++++++++++++++++++++++++++ 1 file changed, 130 insertions(+) create mode 100755 .buildkite/run-kb-tests.sh diff --git a/.buildkite/run-kb-tests.sh b/.buildkite/run-kb-tests.sh new file mode 100755 index 0000000..cf6bb2a --- /dev/null +++ b/.buildkite/run-kb-tests.sh @@ -0,0 +1,130 @@ +#!/usr/bin/env bash +# Copyright Elasticsearch B.V. and contributors +# SPDX-License-Identifier: Apache-2.0 +# +# Buildkite entry point for Kibana functional tests. +# Starts an Elasticsearch container, then a Kibana container that connects to it, +# generates a CLI config pointing at both, and runs the hand-authored KB test suite. + +set -euo pipefail + +STACK_VERSION="${STACK_VERSION:-9.3.0}" +ES_CONTAINER_NAME="elastic-cli-kb-es" +KB_CONTAINER_NAME="elastic-cli-kb" +NETWORK_NAME="elastic-cli-kb-net" + +cleanup() { + echo "--- Cleaning up" + docker rm -f "$KB_CONTAINER_NAME" 2>/dev/null || true + docker rm -f "$ES_CONTAINER_NAME" 2>/dev/null || true + docker network rm "$NETWORK_NAME" 2>/dev/null || true +} +trap cleanup EXIT + +echo "--- Setting up Node.js ${NODE_VERSION}" +export NVM_DIR="${NVM_DIR:-$HOME/.nvm}" +if [ ! -s "$NVM_DIR/nvm.sh" ]; then + echo "nvm not found, installing..." + mkdir -p "$NVM_DIR" + NVM_VERSION=$(curl -s https://api.github.com/repos/nvm-sh/nvm/releases/latest | jq -r '.tag_name // "v0.39.7"') + echo "Installing nvm ${NVM_VERSION}" + curl -o- "https://raw.githubusercontent.com/nvm-sh/nvm/${NVM_VERSION}/install.sh" | bash +fi +# shellcheck source=/dev/null +. "$NVM_DIR/nvm.sh" +nvm install "$NODE_VERSION" +nvm use "$NODE_VERSION" + +echo "--- Installing jq 1.7.1" +JQ_VERSION="1.7.1" +if ! jq --version 2>/dev/null | grep -q "$JQ_VERSION"; then + mkdir -p "$HOME/.local/bin" + curl -sfL "https://github.com/jqlang/jq/releases/download/jq-${JQ_VERSION}/jq-linux-amd64" -o "$HOME/.local/bin/jq" + chmod +x "$HOME/.local/bin/jq" + export PATH="$HOME/.local/bin:$PATH" +fi +echo "Using jq $(jq --version)" + +echo "--- Installing dependencies" +npm ci + +export NODE_OPTIONS="${NODE_OPTIONS:-} --max-old-space-size=6144" + +echo "--- Building CLI" +npm run build +npm link + +echo "--- Creating Docker network" +docker network create "$NETWORK_NAME" 2>/dev/null || true + +echo "--- Starting Elasticsearch ${STACK_VERSION}" +docker run \ + --name "$ES_CONTAINER_NAME" \ + --network "$NETWORK_NAME" \ + --network-alias elasticsearch \ + --env "discovery.type=single-node" \ + --env "xpack.security.enabled=false" \ + --env "xpack.license.self_generated.type=trial" \ + --env "action.destructive_requires_name=false" \ + --env "ES_JAVA_OPTS=-Xms512m -Xmx512m" \ + --detach \ + --rm \ + "docker.elastic.co/elasticsearch/elasticsearch:${STACK_VERSION}" + +echo "--- Waiting for Elasticsearch to be healthy" +RETRIES=0 +MAX_RETRIES=60 +until docker exec "$ES_CONTAINER_NAME" curl -sf http://localhost:9200/_cluster/health > /dev/null 2>&1; do + RETRIES=$((RETRIES + 1)) + if [ "$RETRIES" -ge "$MAX_RETRIES" ]; then + echo "Elasticsearch did not become healthy in time" + docker logs "$ES_CONTAINER_NAME" + exit 1 + fi + sleep 2 +done +echo "Elasticsearch is ready" + +echo "--- Starting Kibana ${STACK_VERSION}" +docker run \ + --name "$KB_CONTAINER_NAME" \ + --network "$NETWORK_NAME" \ + --publish 5601:5601 \ + --env "ELASTICSEARCH_HOSTS=http://elasticsearch:9200" \ + --env "xpack.security.enabled=false" \ + --env "xpack.encryptedSavedObjects.encryptionKey=aaaabbbbccccddddeeeeffffgggghhhh" \ + --env "xpack.reporting.encryptionKey=aaaabbbbccccddddeeeeffffgggghhhh" \ + --env "xpack.security.encryptionKey=aaaabbbbccccddddeeeeffffgggghhhh" \ + --detach \ + --rm \ + "docker.elastic.co/kibana/kibana:${STACK_VERSION}" + +echo "--- Waiting for Kibana to be healthy" +RETRIES=0 +MAX_RETRIES=90 +until curl -sf http://localhost:5601/api/status | jq -e '.status.overall.level == "available"' > /dev/null 2>&1; do + RETRIES=$((RETRIES + 1)) + if [ "$RETRIES" -ge "$MAX_RETRIES" ]; then + echo "Kibana did not become healthy in time" + docker logs "$KB_CONTAINER_NAME" + exit 1 + fi + sleep 3 +done +echo "Kibana is ready" + +echo "--- Generating CI config file" +CI_CONFIG_FILE="$(pwd)/.elasticrc-kb-ci.yml" +cat > "$CI_CONFIG_FILE" < Date: Mon, 4 May 2026 15:54:46 -0400 Subject: [PATCH 03/38] ci: add KB functional tests step to Buildkite pipeline --- .buildkite/pipeline.yml | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/.buildkite/pipeline.yml b/.buildkite/pipeline.yml index 567e33f..a5f00ab 100644 --- a/.buildkite/pipeline.yml +++ b/.buildkite/pipeline.yml @@ -30,6 +30,23 @@ steps: STACK_VERSION: "9.3.0" command: ".buildkite/run-es-tests.sh" + - group: "KB Functional Tests" + steps: + - label: ":kibana: KB functional tests — Node {{matrix.node}}" + key: "kb-functional" + matrix: + setup: + node: + - "22" + - "24" + agents: + provider: gcp + image: family/core-ubuntu-2204 + env: + NODE_VERSION: "{{matrix.node}}" + STACK_VERSION: "9.3.0" + command: ".buildkite/run-kb-tests.sh" + - group: "Cloud Smoke Tests" steps: - label: ":cloud: Cloud smoke tests — Node {{matrix.node}}" From b24359b2c273122ae73626e375f32e647d744633 Mon Sep 17 00:00:00 2001 From: margaretjgu Date: Mon, 4 May 2026 15:54:46 -0400 Subject: [PATCH 04/38] chore: add test:functional:kb npm script --- package.json | 1 + 1 file changed, 1 insertion(+) diff --git a/package.json b/package.json index a518122..20ef319 100644 --- a/package.json +++ b/package.json @@ -17,6 +17,7 @@ "codegen:functional": "npx tsx codegen/functional/index.ts --tests-dir ../elasticsearch-clients-tests/tests", "test:functional:es": "bash test/functional/es/run.sh", "test:functional:cloud": "bash test/functional/cloud/smoke.sh", + "test:functional:kb": "bash test/functional/kb/run.sh", "test:license": "license-checker --production", "test:spdx": "./scripts/check-spdx", "generate:notice": "node scripts/generate-notice.mjs", From 7dba2f0255f41dd104a67833e0608a368447b945 Mon Sep 17 00:00:00 2001 From: margaretjgu Date: Mon, 4 May 2026 16:13:57 -0400 Subject: [PATCH 05/38] feat(kb): add requestType and responseType to KbApiDefinition --- src/kb/types.ts | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/kb/types.ts b/src/kb/types.ts index f5fbd6e..731774e 100644 --- a/src/kb/types.ts +++ b/src/kb/types.ts @@ -70,6 +70,10 @@ export interface KbApiDefinition { pathParams?: KbPathParam[] queryParams?: KbQueryParam[] bodyParams?: KbBodyParam[] + /** When 'multipart', the request body must be sent as multipart/form-data. */ + requestType?: 'multipart' + /** When 'ndjson', the success response is newline-delimited JSON (parsed into an array). */ + responseType?: 'ndjson' } const VALID_NAME = /^[a-z0-9][a-z0-9-]*$/ From 447bf4a305bdba6717ae882ad17becaf637d6587 Mon Sep 17 00:00:00 2001 From: margaretjgu Date: Mon, 4 May 2026 16:13:57 -0400 Subject: [PATCH 06/38] fix(kb): handle ndjson response and multipart/form-data request --- src/lib/kibana-client.ts | 31 ++++++++++++++++++++++++++++--- 1 file changed, 28 insertions(+), 3 deletions(-) diff --git a/src/lib/kibana-client.ts b/src/lib/kibana-client.ts index 015ad65..d9d898c 100644 --- a/src/lib/kibana-client.ts +++ b/src/lib/kibana-client.ts @@ -3,6 +3,8 @@ * SPDX-License-Identifier: Apache-2.0 */ +import fs from 'node:fs' +import path from 'node:path' import { getResolvedConfig } from '../config/store.ts' import { isLoopbackUrl } from './is-loopback-host.ts' @@ -16,6 +18,8 @@ export interface KibanaRequestParams { path: string querystring?: Record body?: unknown + /** When set, the request is sent as multipart/form-data. Keys map to form field names; string values that resolve to an existing file path are sent as file uploads. */ + multipartFields?: Record } /** @@ -66,7 +70,6 @@ export class KibanaClient { const headers: Record = { 'Authorization': this.authHeader, 'Accept': 'application/json', - 'Content-Type': 'application/json', } // Kibana requires kbn-xsrf for all state-mutating requests to protect against CSRF @@ -76,7 +79,21 @@ export class KibanaClient { const init: RequestInit = { method, headers, redirect: 'error' } - if (params.body !== undefined) { + if (params.multipartFields != null) { + // Send as multipart/form-data; do NOT set Content-Type manually (fetch sets it with the boundary) + const form = new FormData() + for (const [field, value] of Object.entries(params.multipartFields)) { + const resolved = path.resolve(value) + if (fs.existsSync(resolved)) { + const blob = new Blob([fs.readFileSync(resolved)], { type: 'application/octet-stream' }) + form.append(field, blob, path.basename(resolved)) + } else { + form.append(field, value) + } + } + init.body = form + } else if (params.body !== undefined) { + headers['Content-Type'] = 'application/json' init.body = JSON.stringify(params.body) } @@ -88,7 +105,15 @@ export class KibanaClient { } const text = await response.text() - return text.length > 0 ? JSON.parse(text) : {} + if (text.length === 0) return {} + + // application/x-ndjson: parse each non-empty line as a JSON object + const contentType = response.headers.get('content-type') ?? '' + if (contentType.includes('ndjson')) { + return text.split('\n').filter((l) => l.trim().length > 0).map((l) => JSON.parse(l)) + } + + return JSON.parse(text) } /** From bee9d662fb0d3beda216ac3a957547736d4cf0be Mon Sep 17 00:00:00 2001 From: margaretjgu Date: Mon, 4 May 2026 16:13:57 -0400 Subject: [PATCH 07/38] fix(kb): route multipart body params to form fields in request builder --- src/kb/request-builder.ts | 28 ++++++++++++++++++++++++++-- 1 file changed, 26 insertions(+), 2 deletions(-) diff --git a/src/kb/request-builder.ts b/src/kb/request-builder.ts index 4871208..8de8b18 100644 --- a/src/kb/request-builder.ts +++ b/src/kb/request-builder.ts @@ -27,11 +27,17 @@ export function buildKibanaRequestParams ( const path = interpolatePath(def, input) const querystring = buildQuerystring(def, input) - const body = collectBody(def, input) const params: KibanaRequestParams = { method: def.method, path } if (Object.keys(querystring).length > 0) params.querystring = querystring - if (body !== undefined) params.body = body + + if (def.requestType === 'multipart') { + const fields = collectMultipartFields(def, input) + if (fields != null) params.multipartFields = fields + } else { + const body = collectBody(def, input) + if (body !== undefined) params.body = body + } return params } @@ -72,6 +78,24 @@ function buildQuerystring ( return qs } +/** + * Collects multipart form fields from body params. + * Each body param value is treated as a string (file path or literal value). + * Returns `undefined` when no fields are present. + */ +function collectMultipartFields ( + def: KbApiDefinition, + input: Record +): Record | undefined { + const fields: Record = {} + for (const param of def.bodyParams ?? []) { + const key = param.cliFlag ?? param.name + const value = input[key] + if (value !== undefined) fields[param.name] = String(value) + } + return Object.keys(fields).length === 0 ? undefined : fields +} + /** * Collects request body fields from body params. * Returns `undefined` when no body fields are present. From 7dd5841f17664f3313032ae0d37cc70ac0107ee8 Mon Sep 17 00:00:00 2001 From: margaretjgu Date: Mon, 4 May 2026 16:13:57 -0400 Subject: [PATCH 08/38] chore(kb): regenerate saved-objects with requestType and responseType --- src/kb/apis/saved-objects.ts | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/src/kb/apis/saved-objects.ts b/src/kb/apis/saved-objects.ts index b322ac9..09a92dc 100644 --- a/src/kb/apis/saved-objects.ts +++ b/src/kb/apis/saved-objects.ts @@ -26,6 +26,7 @@ export const savedObjectsApis: KbApiDefinition[] = [ { name: "search", type: "string", description: "Search for documents to export using the Elasticsearch Simple Query String syntax." }, { name: "type", type: "string", description: "The saved object types to include in the export. Use `*` to export all the types. Valid options depend on enabled plugins, but may include `visualization`, `dashboard`, `search`, `index-pattern`, `tag`, `config`, `config-global`, `lens`, `map`, `event-annotation-group`, `query`, `url`, `action`, `alert`, `alerting_rule_template`, `apm-indices`, `cases-user-actions`, `cases`, `cases-comments`, `infrastructure-monitoring-log-view`, `ml-trained-model`, `osquery-saved-query`, `osquery-pack`, `osquery-pack-asset`." }, ], + responseType: "ndjson", }, { name: "post-saved-objects-import", @@ -38,5 +39,25 @@ export const savedObjectsApis: KbApiDefinition[] = [ { name: "createNewCopies", type: "boolean", description: "Creates copies of saved objects, regenerates each object ID, and resets the origin. When used, potential conflict errors are avoided. NOTE: This option cannot be used with the `overwrite` and `compatibilityMode` options." }, { name: "compatibilityMode", type: "boolean", description: "Applies various adjustments to the saved objects that are being imported to maintain compatibility between different Kibana versions. Use this option only if you encounter issues with imported saved objects. NOTE: This option cannot be used with the `createNewCopies` option." }, ], + bodyParams: [ + { name: "file", type: "string", description: "A file exported using the export API. Changing the contents of the exported file in any way before importing it can cause errors, crashes or data loss. NOTE: The `savedObjects.maxImportExportSize` configuration setting limits the number of saved objects which may be included in this file. Similarly, the `savedObjects.maxImportPayloadBytes` setting limits the overall size of the file that can be imported.", required: true }, + ], + requestType: "multipart", + }, + { + name: "post-saved-objects-resolve-import-errors", + namespace: "saved-objects", + description: "Resolve import errors", + method: "POST", + path: "/api/saved_objects/_resolve_import_errors", + queryParams: [ + { name: "createNewCopies", type: "boolean", description: "Creates copies of saved objects, regenerates each object ID, and resets the origin." }, + { name: "compatibilityMode", type: "boolean", description: "Applies adjustments to maintain compatibility between different Kibana versions." }, + ], + bodyParams: [ + { name: "file", type: "string", description: "", required: true }, + { name: "retries", type: "string", description: "", required: true }, + ], + requestType: "multipart", }, ] From abbeb90d2a977308d3055e13222fb8d94bd9f29e Mon Sep 17 00:00:00 2001 From: margaretjgu Date: Mon, 4 May 2026 16:13:57 -0400 Subject: [PATCH 09/38] test(kb): add saved-objects export/import functional test --- test/functional/kb/run.sh | 9 ++--- test/functional/kb/saved_objects.sh | 56 +++++++++++++++++++++++++++++ 2 files changed, 61 insertions(+), 4 deletions(-) create mode 100755 test/functional/kb/saved_objects.sh diff --git a/test/functional/kb/run.sh b/test/functional/kb/run.sh index a5e0a13..c89097a 100755 --- a/test/functional/kb/run.sh +++ b/test/functional/kb/run.sh @@ -26,10 +26,11 @@ run_test() { fi } -run_test "data_views" "data_views.sh" -run_test "spaces" "spaces.sh" -run_test "alerting" "alerting.sh" -run_test "connectors" "connectors.sh" +run_test "data_views" "data_views.sh" +run_test "spaces" "spaces.sh" +run_test "alerting" "alerting.sh" +run_test "connectors" "connectors.sh" +run_test "saved_objects" "saved_objects.sh" echo "" echo "================================" diff --git a/test/functional/kb/saved_objects.sh b/test/functional/kb/saved_objects.sh new file mode 100755 index 0000000..493d617 --- /dev/null +++ b/test/functional/kb/saved_objects.sh @@ -0,0 +1,56 @@ +#!/usr/bin/env bash +# Copyright Elasticsearch B.V. and contributors +# SPDX-License-Identifier: Apache-2.0 +# +# Functional tests for the Kibana saved-objects API namespace. +# Exercises export (ndjson response) and import (multipart/form-data request) +# using the built-in 'config' saved-object type which is always present. + +set -euo pipefail +exec < /dev/null + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +REPO_ROOT="$(cd "$SCRIPT_DIR/../../.." && pwd)" +CLI="node $REPO_ROOT/dist/cli.js" + +EXPORT_FILE="/tmp/cli-ft-so-export-$$.ndjson" + +teardown() { + rm -f "$EXPORT_FILE" +} +trap teardown EXIT + +# ── export ──────────────────────────────────────────────────────────── +# Export returns application/x-ndjson; the CLI parses it into a JSON array. + +output=$($CLI stack kb saved-objects post-saved-objects-export \ + --type "config" --exclude-export-details true --json 2>/dev/null) + +# Response must be a non-empty JSON array. +arr_type=$(echo "$output" | jq -r 'type') +[ "$arr_type" = "array" ] || { echo "FAIL: saved_objects export — response is not an array (got $arr_type)"; exit 1; } + +count=$(echo "$output" | jq 'length') +[ "$count" -gt 0 ] || { echo "FAIL: saved_objects export — empty array"; exit 1; } + +# Each element must have a type field. +first_type=$(echo "$output" | jq -r '.[0].type') +[ -n "$first_type" ] || { echo "FAIL: saved_objects export — first element missing type field"; exit 1; } + +# ── import ──────────────────────────────────────────────────────────── +# Re-serialise the JSON array back to ndjson (one compact object per line) +# then send as multipart/form-data via --file. + +echo "$output" | jq -c '.[]' > "$EXPORT_FILE" +ndjson_lines=$(wc -l < "$EXPORT_FILE" | tr -d ' ') +[ "$ndjson_lines" -gt 0 ] || { echo "FAIL: saved_objects import — ndjson file is empty"; exit 1; } + +import_result=$($CLI stack kb saved-objects post-saved-objects-import \ + --overwrite true --file "$EXPORT_FILE" --json 2>/dev/null) +success=$(echo "$import_result" | jq -r '.success') +[ "$success" = "true" ] || { echo "FAIL: saved_objects import — success != true (got $success)"; exit 1; } + +success_count=$(echo "$import_result" | jq -r '.successCount') +[ "$success_count" -gt 0 ] || { echo "FAIL: saved_objects import — successCount = 0"; exit 1; } + +echo "PASS: kb/saved_objects.sh" From 33e7e21d14c1f1dc1ca8112986293b4732679792 Mon Sep 17 00:00:00 2001 From: margaretjgu Date: Mon, 4 May 2026 16:26:07 -0400 Subject: [PATCH 10/38] fix(kb): make auth optional in KibanaClient for security-disabled deployments --- src/lib/kibana-client.ts | 32 ++++++++++++++++---------------- 1 file changed, 16 insertions(+), 16 deletions(-) diff --git a/src/lib/kibana-client.ts b/src/lib/kibana-client.ts index d9d898c..113e72d 100644 --- a/src/lib/kibana-client.ts +++ b/src/lib/kibana-client.ts @@ -35,12 +35,14 @@ export interface KibanaRequestParams { */ export class KibanaClient { readonly baseUrl: string - private readonly authHeader: string + private readonly authHeader: string | undefined private _fetch: typeof fetch = globalThis.fetch - constructor (baseUrl: string, auth: { api_key: string } | { username: string; password: string }) { + constructor (baseUrl: string, auth?: { api_key: string } | { username: string; password: string }) { this.baseUrl = baseUrl.replace(/\/+$/, '') - if ('api_key' in auth) { + if (auth == null) { + this.authHeader = undefined + } else if ('api_key' in auth) { this.authHeader = `ApiKey ${auth.api_key}` } else { const encoded = Buffer.from(`${auth.username}:${auth.password}`).toString('base64') @@ -68,9 +70,11 @@ export class KibanaClient { const method = params.method.toUpperCase() const headers: Record = { - 'Authorization': this.authHeader, 'Accept': 'application/json', } + if (this.authHeader != null) { + headers['Authorization'] = this.authHeader + } // Kibana requires kbn-xsrf for all state-mutating requests to protect against CSRF if (method !== 'GET' && method !== 'HEAD') { @@ -148,19 +152,15 @@ export function getKibanaClient (): KibanaClient { } const { url, auth } = kb - const authRecord = auth as Record - - let typedAuth: { api_key: string } | { username: string; password: string } - if (typeof authRecord['api_key'] === 'string') { - typedAuth = { api_key: authRecord['api_key'] } - } else if (typeof authRecord['username'] === 'string' && typeof authRecord['password'] === 'string') { - typedAuth = { username: authRecord['username'], password: authRecord['password'] } - } else { - throw new Error( - 'missing_config: Kibana auth requires either api_key or username/password. ' + - 'Check your .elasticrc.yml config file.' - ) + const authRecord = auth as Record | undefined + + let typedAuth: { api_key: string } | { username: string; password: string } | undefined + if (typeof authRecord?.['api_key'] === 'string') { + typedAuth = { api_key: authRecord['api_key'] as string } + } else if (typeof authRecord?.['username'] === 'string' && typeof authRecord?.['password'] === 'string') { + typedAuth = { username: authRecord['username'] as string, password: authRecord['password'] as string } } + // auth is optional — when absent (e.g. security disabled), requests are sent without credentials _client = new KibanaClient(url, typedAuth) return _client From 15a8ef53689e14ff18ba00180f26b16e79b78ad4 Mon Sep 17 00:00:00 2001 From: margaretjgu Date: Mon, 4 May 2026 16:34:30 -0400 Subject: [PATCH 11/38] test(kb): update unit tests for optional auth in KibanaClient --- test/kb/kibana-client.test.ts | 22 ++++++++++++++-------- 1 file changed, 14 insertions(+), 8 deletions(-) diff --git a/test/kb/kibana-client.test.ts b/test/kb/kibana-client.test.ts index 64e734b..d43f154 100644 --- a/test/kb/kibana-client.test.ts +++ b/test/kb/kibana-client.test.ts @@ -52,6 +52,16 @@ function makeCapturingFetch (status = 200, body = '{}'): { // Constructor / auth header // --------------------------------------------------------------------------- +describe('KibanaClient — no auth', () => { + it('omits Authorization header when no auth is provided', async () => { + const { fetch, calls } = makeCapturingFetch() + const client = new KibanaClient('https://kb.example.com') + client._testSetFetch(fetch) + await client.request({ method: 'GET', path: '/api/status' }) + assert.equal((calls[0]!.init.headers as Record)['Authorization'], undefined) + }) +}) + describe('KibanaClient — API key auth', () => { it('sets Authorization header to ApiKey ', async () => { const { fetch, calls } = makeCapturingFetch() @@ -222,15 +232,11 @@ describe('getKibanaClient', () => { ) }) - it('throws missing_config when auth is missing api_key and credentials', () => { + it('returns an unauthenticated client when auth block is empty or absent', () => { setResolvedConfig(makeConfig({ kibana: { url: 'https://kb.example.com', auth: {} as never } })) - assert.throws( - () => getKibanaClient(), - (err: Error) => { - assert.ok(err.message.includes('missing_config')) - return true - } - ) + const client = getKibanaClient() + assert.ok(client instanceof KibanaClient) + assert.equal(client.baseUrl, 'https://kb.example.com') }) it('returns a KibanaClient instance configured with api_key auth', () => { From 620a99d133f593d625befa6a0f092836173dcdf0 Mon Sep 17 00:00:00 2001 From: margaretjgu Date: Mon, 4 May 2026 16:47:31 -0400 Subject: [PATCH 12/38] fix(test): use stackAlerts consumer and Node UUID in KB functional tests --- test/functional/kb/alerting.sh | 5 +++-- test/functional/kb/connectors.sh | 12 ++++++------ 2 files changed, 9 insertions(+), 8 deletions(-) diff --git a/test/functional/kb/alerting.sh b/test/functional/kb/alerting.sh index 1c6ade5..3aebe67 100755 --- a/test/functional/kb/alerting.sh +++ b/test/functional/kb/alerting.sh @@ -26,12 +26,13 @@ trap teardown EXIT output=$($CLI stack kb alerting post-alerting-rule-id \ --id "$RULE_ID" \ - --consumer "alerts" \ + --consumer "stackAlerts" \ --name "CLI FT Rule" \ --rule-type-id ".es-query" \ --schedule '{"interval":"1m"}' \ --params "$RULE_PARAMS" \ - --json 2>/dev/null) + --json 2>/dev/null) \ + || { echo "FAIL: alerting create — command failed"; exit 1; } [ "$(echo "$output" | jq -r '.id')" = "$RULE_ID" ] \ || { echo "FAIL: alerting create — id mismatch"; exit 1; } [ "$(echo "$output" | jq -r '.name')" = "CLI FT Rule" ] \ diff --git a/test/functional/kb/connectors.sh b/test/functional/kb/connectors.sh index 2555a9c..4744a34 100755 --- a/test/functional/kb/connectors.sh +++ b/test/functional/kb/connectors.sh @@ -23,7 +23,8 @@ trap teardown EXIT # ── list connector types ─────────────────────────────────────────────── -output=$($CLI stack kb connectors get-actions-connector-types --json 2>/dev/null) +output=$($CLI stack kb connectors get-actions-connector-types --json 2>/dev/null) \ + || { echo "FAIL: connectors list-types — command failed"; exit 1; } count=$(echo "$output" | jq 'length') [ "$count" -gt 0 ] || { echo "FAIL: connectors list-types — empty list"; exit 1; } @@ -34,10 +35,8 @@ index_type=$(echo "$output" | jq -r '[.[] | select(.id == ".index")] | .[0].id') # ── create ──────────────────────────────────────────────────────────── -# Kibana requires a UUID for connectors with encrypted secrets (most types). -# The .index connector does not have encrypted secrets so any UUID works. -CONNECTOR_UUID=$(python3 -c "import uuid; print(str(uuid.uuid4()))" 2>/dev/null \ - || uuidgen 2>/dev/null | tr '[:upper:]' '[:lower:]') +# Use Node.js crypto to generate a UUID (Node is always available in CI). +CONNECTOR_UUID=$(node -e "process.stdout.write(require('crypto').randomUUID())") CONNECTOR_ID="$CONNECTOR_UUID" output=$($CLI stack kb connectors post-actions-connector-id \ @@ -45,7 +44,8 @@ output=$($CLI stack kb connectors post-actions-connector-id \ --connector-type-id ".index" \ --name "CLI FT Index Connector" \ --kb-config '{"index":"cli-ft-connector-*"}' \ - --json 2>/dev/null) + --json 2>/dev/null) \ + || { echo "FAIL: connectors create — command failed"; exit 1; } [ "$(echo "$output" | jq -r '.id')" = "$CONNECTOR_UUID" ] \ || { echo "FAIL: connectors create — id mismatch"; exit 1; } [ "$(echo "$output" | jq -r '.name')" = "CLI FT Index Connector" ] \ From e690f8e342a89b8d1c79326357cafebbe9e42e8e Mon Sep 17 00:00:00 2001 From: margaretjgu Date: Mon, 4 May 2026 16:56:59 -0400 Subject: [PATCH 13/38] fix(ci): wait for alerting and actions plugins before running KB tests --- .buildkite/run-kb-tests.sh | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) diff --git a/.buildkite/run-kb-tests.sh b/.buildkite/run-kb-tests.sh index cf6bb2a..d6daa4d 100755 --- a/.buildkite/run-kb-tests.sh +++ b/.buildkite/run-kb-tests.sh @@ -111,7 +111,24 @@ until curl -sf http://localhost:5601/api/status | jq -e '.status.overall.level = fi sleep 3 done -echo "Kibana is ready" +echo "Kibana core is ready" + +# The actions and alerting plugins initialise after the main health check passes. +# Wait until their APIs return 200 before running tests. +echo "--- Waiting for alerting and actions plugins to be ready" +RETRIES=0 +MAX_RETRIES=30 +until curl -sf http://localhost:5601/api/actions/connector_types > /dev/null 2>&1 && \ + curl -sf "http://localhost:5601/api/alerting/rules/_find" > /dev/null 2>&1; do + RETRIES=$((RETRIES + 1)) + if [ "$RETRIES" -ge "$MAX_RETRIES" ]; then + echo "Alerting/actions plugins did not become ready in time" + docker logs "$KB_CONTAINER_NAME" --tail 50 + exit 1 + fi + sleep 3 +done +echo "Kibana plugins are ready" echo "--- Generating CI config file" CI_CONFIG_FILE="$(pwd)/.elasticrc-kb-ci.yml" From 98bd3e0dd5d5181075e9b6b1569ba33db3c133d4 Mon Sep 17 00:00:00 2001 From: margaretjgu Date: Mon, 4 May 2026 17:13:36 -0400 Subject: [PATCH 14/38] fix(ci): enable ES/Kibana security so actions and alerting plugins work --- .buildkite/run-kb-tests.sh | 29 ++++++++++++++++++++--------- 1 file changed, 20 insertions(+), 9 deletions(-) diff --git a/.buildkite/run-kb-tests.sh b/.buildkite/run-kb-tests.sh index d6daa4d..46af32a 100755 --- a/.buildkite/run-kb-tests.sh +++ b/.buildkite/run-kb-tests.sh @@ -57,15 +57,18 @@ npm link echo "--- Creating Docker network" docker network create "$NETWORK_NAME" 2>/dev/null || true +# Use a fixed password so the CLI config can reference it without secrets management. +ES_PASSWORD="changeme" + echo "--- Starting Elasticsearch ${STACK_VERSION}" docker run \ --name "$ES_CONTAINER_NAME" \ --network "$NETWORK_NAME" \ --network-alias elasticsearch \ --env "discovery.type=single-node" \ - --env "xpack.security.enabled=false" \ --env "xpack.license.self_generated.type=trial" \ --env "action.destructive_requires_name=false" \ + --env "ELASTIC_PASSWORD=${ES_PASSWORD}" \ --env "ES_JAVA_OPTS=-Xms512m -Xmx512m" \ --detach \ --rm \ @@ -74,7 +77,7 @@ docker run \ echo "--- Waiting for Elasticsearch to be healthy" RETRIES=0 MAX_RETRIES=60 -until docker exec "$ES_CONTAINER_NAME" curl -sf http://localhost:9200/_cluster/health > /dev/null 2>&1; do +until curl -sf -u "elastic:${ES_PASSWORD}" http://localhost:9200/_cluster/health > /dev/null 2>&1; do RETRIES=$((RETRIES + 1)) if [ "$RETRIES" -ge "$MAX_RETRIES" ]; then echo "Elasticsearch did not become healthy in time" @@ -91,10 +94,11 @@ docker run \ --network "$NETWORK_NAME" \ --publish 5601:5601 \ --env "ELASTICSEARCH_HOSTS=http://elasticsearch:9200" \ - --env "xpack.security.enabled=false" \ - --env "xpack.encryptedSavedObjects.encryptionKey=aaaabbbbccccddddeeeeffffgggghhhh" \ - --env "xpack.reporting.encryptionKey=aaaabbbbccccddddeeeeffffgggghhhh" \ - --env "xpack.security.encryptionKey=aaaabbbbccccddddeeeeffffgggghhhh" \ + --env "ELASTICSEARCH_USERNAME=elastic" \ + --env "ELASTICSEARCH_PASSWORD=${ES_PASSWORD}" \ + --env "xpack.encryptedSavedObjects.encryptionKey=xP9mfMqnRrNHmSmzPoBtLQvLFzYdHxKj" \ + --env "xpack.reporting.encryptionKey=xP9mfMqnRrNHmSmzPoBtLQvLFzYdHxKj" \ + --env "xpack.security.encryptionKey=xP9mfMqnRrNHmSmzPoBtLQvLFzYdHxKj" \ --detach \ --rm \ "docker.elastic.co/kibana/kibana:${STACK_VERSION}" @@ -102,7 +106,8 @@ docker run \ echo "--- Waiting for Kibana to be healthy" RETRIES=0 MAX_RETRIES=90 -until curl -sf http://localhost:5601/api/status | jq -e '.status.overall.level == "available"' > /dev/null 2>&1; do +until curl -sf -u "elastic:${ES_PASSWORD}" http://localhost:5601/api/status \ + | jq -e '.status.overall.level == "available"' > /dev/null 2>&1; do RETRIES=$((RETRIES + 1)) if [ "$RETRIES" -ge "$MAX_RETRIES" ]; then echo "Kibana did not become healthy in time" @@ -118,8 +123,8 @@ echo "Kibana core is ready" echo "--- Waiting for alerting and actions plugins to be ready" RETRIES=0 MAX_RETRIES=30 -until curl -sf http://localhost:5601/api/actions/connector_types > /dev/null 2>&1 && \ - curl -sf "http://localhost:5601/api/alerting/rules/_find" > /dev/null 2>&1; do +until curl -sf -u "elastic:${ES_PASSWORD}" http://localhost:5601/api/actions/connector_types > /dev/null 2>&1 && \ + curl -sf -u "elastic:${ES_PASSWORD}" http://localhost:5601/api/alerting/rules/_find > /dev/null 2>&1; do RETRIES=$((RETRIES + 1)) if [ "$RETRIES" -ge "$MAX_RETRIES" ]; then echo "Alerting/actions plugins did not become ready in time" @@ -137,8 +142,14 @@ contexts: ci: elasticsearch: url: http://localhost:9200 + auth: + username: elastic + password: "${ES_PASSWORD}" kibana: url: http://localhost:5601 + auth: + username: elastic + password: "${ES_PASSWORD}" current_context: ci EOF export ELASTIC_CLI_CONFIG_FILE="$CI_CONFIG_FILE" From 72ea637c93a2b12c7ea5b81dbb8ced61b960bdb8 Mon Sep 17 00:00:00 2001 From: margaretjgu Date: Mon, 4 May 2026 17:23:34 -0400 Subject: [PATCH 15/38] fix(ci): publish ES port 9200 and use docker exec for health check --- .buildkite/run-kb-tests.sh | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/.buildkite/run-kb-tests.sh b/.buildkite/run-kb-tests.sh index 46af32a..88392cd 100755 --- a/.buildkite/run-kb-tests.sh +++ b/.buildkite/run-kb-tests.sh @@ -65,6 +65,7 @@ docker run \ --name "$ES_CONTAINER_NAME" \ --network "$NETWORK_NAME" \ --network-alias elasticsearch \ + --publish 9200:9200 \ --env "discovery.type=single-node" \ --env "xpack.license.self_generated.type=trial" \ --env "action.destructive_requires_name=false" \ @@ -76,8 +77,9 @@ docker run \ echo "--- Waiting for Elasticsearch to be healthy" RETRIES=0 -MAX_RETRIES=60 -until curl -sf -u "elastic:${ES_PASSWORD}" http://localhost:9200/_cluster/health > /dev/null 2>&1; do +MAX_RETRIES=90 +until docker exec "$ES_CONTAINER_NAME" \ + curl -sf -u "elastic:${ES_PASSWORD}" http://localhost:9200/_cluster/health > /dev/null 2>&1; do RETRIES=$((RETRIES + 1)) if [ "$RETRIES" -ge "$MAX_RETRIES" ]; then echo "Elasticsearch did not become healthy in time" From 24c600b5de45dbc62d3af3990ebe70b5d87a7096 Mon Sep 17 00:00:00 2001 From: margaretjgu Date: Tue, 5 May 2026 13:52:48 -0400 Subject: [PATCH 16/38] fix(ci): use host curl for ES health check now that port 9200 is published --- .buildkite/run-kb-tests.sh | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/.buildkite/run-kb-tests.sh b/.buildkite/run-kb-tests.sh index 88392cd..f2bedac 100755 --- a/.buildkite/run-kb-tests.sh +++ b/.buildkite/run-kb-tests.sh @@ -77,9 +77,8 @@ docker run \ echo "--- Waiting for Elasticsearch to be healthy" RETRIES=0 -MAX_RETRIES=90 -until docker exec "$ES_CONTAINER_NAME" \ - curl -sf -u "elastic:${ES_PASSWORD}" http://localhost:9200/_cluster/health > /dev/null 2>&1; do +MAX_RETRIES=120 +until curl -sf -u "elastic:${ES_PASSWORD}" http://localhost:9200/_cluster/health > /dev/null 2>&1; do RETRIES=$((RETRIES + 1)) if [ "$RETRIES" -ge "$MAX_RETRIES" ]; then echo "Elasticsearch did not become healthy in time" From 7dcf50b14e7fcce39d94fb435edaf973566dc8cd Mon Sep 17 00:00:00 2001 From: margaretjgu Date: Tue, 5 May 2026 13:58:38 -0400 Subject: [PATCH 17/38] fix(ci): suppress gitleaks false positives for dummy encryption keys --- .buildkite/run-kb-tests.sh | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/.buildkite/run-kb-tests.sh b/.buildkite/run-kb-tests.sh index f2bedac..22808a7 100755 --- a/.buildkite/run-kb-tests.sh +++ b/.buildkite/run-kb-tests.sh @@ -57,8 +57,10 @@ npm link echo "--- Creating Docker network" docker network create "$NETWORK_NAME" 2>/dev/null || true -# Use a fixed password so the CLI config can reference it without secrets management. +# Use fixed dummy values so the CLI config can reference them without secrets management. ES_PASSWORD="changeme" +# Dummy key used only for local/CI testing — not a real secret. # gitleaks:allow +KIBANA_ENCRYPTION_KEY="xP9mfMqnRrNHmSmzPoBtLQvLFzYdHxKj" echo "--- Starting Elasticsearch ${STACK_VERSION}" docker run \ @@ -97,9 +99,9 @@ docker run \ --env "ELASTICSEARCH_HOSTS=http://elasticsearch:9200" \ --env "ELASTICSEARCH_USERNAME=elastic" \ --env "ELASTICSEARCH_PASSWORD=${ES_PASSWORD}" \ - --env "xpack.encryptedSavedObjects.encryptionKey=xP9mfMqnRrNHmSmzPoBtLQvLFzYdHxKj" \ - --env "xpack.reporting.encryptionKey=xP9mfMqnRrNHmSmzPoBtLQvLFzYdHxKj" \ - --env "xpack.security.encryptionKey=xP9mfMqnRrNHmSmzPoBtLQvLFzYdHxKj" \ + --env "xpack.encryptedSavedObjects.encryptionKey=${KIBANA_ENCRYPTION_KEY}" \ + --env "xpack.reporting.encryptionKey=${KIBANA_ENCRYPTION_KEY}" \ + --env "xpack.security.encryptionKey=${KIBANA_ENCRYPTION_KEY}" \ --detach \ --rm \ "docker.elastic.co/kibana/kibana:${STACK_VERSION}" From 7896fce483de6247d4d898cfbaf2f53609f4e7b6 Mon Sep 17 00:00:00 2001 From: margaretjgu Date: Tue, 5 May 2026 14:07:08 -0400 Subject: [PATCH 18/38] fix(ci): allowlist dummy Kibana encryption key in gitleaks config --- .buildkite/run-kb-tests.sh | 3 +-- .gitleaks.toml | 8 ++++++++ 2 files changed, 9 insertions(+), 2 deletions(-) create mode 100644 .gitleaks.toml diff --git a/.buildkite/run-kb-tests.sh b/.buildkite/run-kb-tests.sh index 22808a7..12d20c6 100755 --- a/.buildkite/run-kb-tests.sh +++ b/.buildkite/run-kb-tests.sh @@ -59,8 +59,7 @@ docker network create "$NETWORK_NAME" 2>/dev/null || true # Use fixed dummy values so the CLI config can reference them without secrets management. ES_PASSWORD="changeme" -# Dummy key used only for local/CI testing — not a real secret. # gitleaks:allow -KIBANA_ENCRYPTION_KEY="xP9mfMqnRrNHmSmzPoBtLQvLFzYdHxKj" +KIBANA_ENCRYPTION_KEY="xP9mfMqnRrNHmSmzPoBtLQvLFzYdHxKj" # gitleaks:allow echo "--- Starting Elasticsearch ${STACK_VERSION}" docker run \ diff --git a/.gitleaks.toml b/.gitleaks.toml new file mode 100644 index 0000000..82a0f1b --- /dev/null +++ b/.gitleaks.toml @@ -0,0 +1,8 @@ +[extend] +# Extend the default gitleaks ruleset. +useDefault = true + +[[allowlist]] +description = "Dummy Kibana encryption key used only in CI test scripts" +regexTarget = "line" +regex = '''xP9mfMqnRrNHmSmzPoBtLQvLFzYdHxKj''' From 80e025416e0573137b040b96f98fc9d4485d903d Mon Sep 17 00:00:00 2001 From: margaretjgu Date: Tue, 5 May 2026 14:13:53 -0400 Subject: [PATCH 19/38] fix(ci): correct gitleaks allowlist syntax (singular table, regexes array) --- .gitleaks.toml | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/.gitleaks.toml b/.gitleaks.toml index 82a0f1b..d8877be 100644 --- a/.gitleaks.toml +++ b/.gitleaks.toml @@ -2,7 +2,10 @@ # Extend the default gitleaks ruleset. useDefault = true -[[allowlist]] -description = "Dummy Kibana encryption key used only in CI test scripts" +[allowlist] +description = "Dummy values used only in CI test scripts — not real secrets" regexTarget = "line" -regex = '''xP9mfMqnRrNHmSmzPoBtLQvLFzYdHxKj''' +regexes = [ + # Dummy Kibana encryption key in run-kb-tests.sh + '''xP9mfMqnRrNHmSmzPoBtLQvLFzYdHxKj''', +] From 3eda510a4a853730ebfad88d6e43040b0479ba71 Mon Sep 17 00:00:00 2001 From: margaretjgu Date: Tue, 5 May 2026 14:27:10 -0400 Subject: [PATCH 20/38] fix(ci): increase ES health timeout to 6 min, add progress logging --- .buildkite/run-kb-tests.sh | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/.buildkite/run-kb-tests.sh b/.buildkite/run-kb-tests.sh index 12d20c6..dd87e3d 100755 --- a/.buildkite/run-kb-tests.sh +++ b/.buildkite/run-kb-tests.sh @@ -78,14 +78,18 @@ docker run \ echo "--- Waiting for Elasticsearch to be healthy" RETRIES=0 -MAX_RETRIES=120 +MAX_RETRIES=180 until curl -sf -u "elastic:${ES_PASSWORD}" http://localhost:9200/_cluster/health > /dev/null 2>&1; do RETRIES=$((RETRIES + 1)) if [ "$RETRIES" -ge "$MAX_RETRIES" ]; then - echo "Elasticsearch did not become healthy in time" + echo "Elasticsearch did not become healthy in time after $((MAX_RETRIES * 2))s" docker logs "$ES_CONTAINER_NAME" exit 1 fi + # Print progress every 30 seconds so CI logs show we are still waiting. + if [ $((RETRIES % 15)) -eq 0 ]; then + echo " still waiting for Elasticsearch... (${RETRIES}/${MAX_RETRIES})" + fi sleep 2 done echo "Elasticsearch is ready" From 3042673676fb076e7176c76d88a249283d8a3b08 Mon Sep 17 00:00:00 2001 From: margaretjgu Date: Tue, 5 May 2026 14:50:59 -0400 Subject: [PATCH 21/38] fix(ci): wait for ES security index before starting Kibana --- .buildkite/run-kb-tests.sh | 22 +++++++++++++++++++++- 1 file changed, 21 insertions(+), 1 deletion(-) diff --git a/.buildkite/run-kb-tests.sh b/.buildkite/run-kb-tests.sh index dd87e3d..4582ed2 100755 --- a/.buildkite/run-kb-tests.sh +++ b/.buildkite/run-kb-tests.sh @@ -86,12 +86,32 @@ until curl -sf -u "elastic:${ES_PASSWORD}" http://localhost:9200/_cluster/health docker logs "$ES_CONTAINER_NAME" exit 1 fi - # Print progress every 30 seconds so CI logs show we are still waiting. if [ $((RETRIES % 15)) -eq 0 ]; then echo " still waiting for Elasticsearch... (${RETRIES}/${MAX_RETRIES})" fi sleep 2 done +echo "Elasticsearch cluster is up" + +# The cluster can report healthy before the .security index is fully bootstrapped. +# Kibana's alerting/connectors plugins depend on ES API keys (encryptedSavedObjects), +# so we must wait for the security index to be ready before starting Kibana. +# Technique borrowed from Kibana's own kbn-es tooling (wait_for_security_index.ts). +echo "--- Waiting for Elasticsearch security index to be ready" +RETRIES=0 +MAX_RETRIES=60 +until curl -sf -u "elastic:${ES_PASSWORD}" \ + -X POST "http://localhost:9200/_security/api_key" \ + -H "Content-Type: application/json" \ + -d '{"name":"healthcheck","expiration":"1m"}' > /dev/null 2>&1; do + RETRIES=$((RETRIES + 1)) + if [ "$RETRIES" -ge "$MAX_RETRIES" ]; then + echo "Elasticsearch security index did not become ready in time" + docker logs "$ES_CONTAINER_NAME" + exit 1 + fi + sleep 2 +done echo "Elasticsearch is ready" echo "--- Starting Kibana ${STACK_VERSION}" From 24038291b57a6349321330a9490a1f3c5e74c2bc Mon Sep 17 00:00:00 2001 From: margaretjgu Date: Tue, 5 May 2026 15:04:48 -0400 Subject: [PATCH 22/38] fix(ci): pull Docker images before health check timer to avoid image download timeout --- .buildkite/run-kb-tests.sh | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/.buildkite/run-kb-tests.sh b/.buildkite/run-kb-tests.sh index 4582ed2..d53af38 100755 --- a/.buildkite/run-kb-tests.sh +++ b/.buildkite/run-kb-tests.sh @@ -61,6 +61,12 @@ docker network create "$NETWORK_NAME" 2>/dev/null || true ES_PASSWORD="changeme" KIBANA_ENCRYPTION_KEY="xP9mfMqnRrNHmSmzPoBtLQvLFzYdHxKj" # gitleaks:allow +# Pull images up front so the health-check timer measures actual startup time, +# not image download time (image pull can take several minutes on cold CI agents). +echo "--- Pulling Docker images" +docker pull "docker.elastic.co/elasticsearch/elasticsearch:${STACK_VERSION}" +docker pull "docker.elastic.co/kibana/kibana:${STACK_VERSION}" + echo "--- Starting Elasticsearch ${STACK_VERSION}" docker run \ --name "$ES_CONTAINER_NAME" \ From f1afbd29fc6a091d21e933ea44b538ab01531db0 Mon Sep 17 00:00:00 2001 From: margaretjgu Date: Tue, 5 May 2026 15:19:13 -0400 Subject: [PATCH 23/38] fix(ci): start ES before npm build so it boots during the ~15 min build phase --- .buildkite/run-kb-tests.sh | 58 ++++++++++++++++++++------------------ 1 file changed, 30 insertions(+), 28 deletions(-) diff --git a/.buildkite/run-kb-tests.sh b/.buildkite/run-kb-tests.sh index d53af38..3cea589 100755 --- a/.buildkite/run-kb-tests.sh +++ b/.buildkite/run-kb-tests.sh @@ -21,6 +21,34 @@ cleanup() { } trap cleanup EXIT +# Use fixed dummy values so the CLI config can reference them without secrets management. +ES_PASSWORD="changeme" +KIBANA_ENCRYPTION_KEY="xP9mfMqnRrNHmSmzPoBtLQvLFzYdHxKj" # gitleaks:allow + +# Pull images and start ES as early as possible so it can boot in the background +# while npm install and the CLI build run (which together take ~15-20 minutes). +# On slow CI agents, Docker container startup + ES security bootstrap alone can +# take 7-10 minutes, so giving it a head start is critical. +echo "--- Pulling Docker images" +docker network create "$NETWORK_NAME" 2>/dev/null || true +docker pull "docker.elastic.co/elasticsearch/elasticsearch:${STACK_VERSION}" +docker pull "docker.elastic.co/kibana/kibana:${STACK_VERSION}" + +echo "--- Starting Elasticsearch ${STACK_VERSION} (background)" +docker run \ + --name "$ES_CONTAINER_NAME" \ + --network "$NETWORK_NAME" \ + --network-alias elasticsearch \ + --publish 9200:9200 \ + --env "discovery.type=single-node" \ + --env "xpack.license.self_generated.type=trial" \ + --env "action.destructive_requires_name=false" \ + --env "ELASTIC_PASSWORD=${ES_PASSWORD}" \ + --env "ES_JAVA_OPTS=-Xms512m -Xmx512m" \ + --detach \ + --rm \ + "docker.elastic.co/elasticsearch/elasticsearch:${STACK_VERSION}" + echo "--- Setting up Node.js ${NODE_VERSION}" export NVM_DIR="${NVM_DIR:-$HOME/.nvm}" if [ ! -s "$NVM_DIR/nvm.sh" ]; then @@ -54,34 +82,8 @@ echo "--- Building CLI" npm run build npm link -echo "--- Creating Docker network" -docker network create "$NETWORK_NAME" 2>/dev/null || true - -# Use fixed dummy values so the CLI config can reference them without secrets management. -ES_PASSWORD="changeme" -KIBANA_ENCRYPTION_KEY="xP9mfMqnRrNHmSmzPoBtLQvLFzYdHxKj" # gitleaks:allow - -# Pull images up front so the health-check timer measures actual startup time, -# not image download time (image pull can take several minutes on cold CI agents). -echo "--- Pulling Docker images" -docker pull "docker.elastic.co/elasticsearch/elasticsearch:${STACK_VERSION}" -docker pull "docker.elastic.co/kibana/kibana:${STACK_VERSION}" - -echo "--- Starting Elasticsearch ${STACK_VERSION}" -docker run \ - --name "$ES_CONTAINER_NAME" \ - --network "$NETWORK_NAME" \ - --network-alias elasticsearch \ - --publish 9200:9200 \ - --env "discovery.type=single-node" \ - --env "xpack.license.self_generated.type=trial" \ - --env "action.destructive_requires_name=false" \ - --env "ELASTIC_PASSWORD=${ES_PASSWORD}" \ - --env "ES_JAVA_OPTS=-Xms512m -Xmx512m" \ - --detach \ - --rm \ - "docker.elastic.co/elasticsearch/elasticsearch:${STACK_VERSION}" - +# ES has been running in the background during the entire npm install + build phase. +# It should be healthy (or close to it) by now. echo "--- Waiting for Elasticsearch to be healthy" RETRIES=0 MAX_RETRIES=180 From 060a36d50c0a30d74b63b258e98d46dd2db51611 Mon Sep 17 00:00:00 2001 From: margaretjgu Date: Tue, 5 May 2026 15:34:09 -0400 Subject: [PATCH 24/38] fix(ci): start ES+Kibana before build, use kibana-ubuntu-2404 agent with n2-standard-4 --- .buildkite/pipeline.yml | 3 +- .buildkite/run-kb-tests.sh | 79 ++++++++++++++++++++++++-------------- 2 files changed, 52 insertions(+), 30 deletions(-) diff --git a/.buildkite/pipeline.yml b/.buildkite/pipeline.yml index a5f00ab..0892f41 100644 --- a/.buildkite/pipeline.yml +++ b/.buildkite/pipeline.yml @@ -41,7 +41,8 @@ steps: - "24" agents: provider: gcp - image: family/core-ubuntu-2204 + image: family/kibana-ubuntu-2404 + machineType: n2-standard-4 env: NODE_VERSION: "{{matrix.node}}" STACK_VERSION: "9.3.0" diff --git a/.buildkite/run-kb-tests.sh b/.buildkite/run-kb-tests.sh index 3cea589..1b94dcf 100755 --- a/.buildkite/run-kb-tests.sh +++ b/.buildkite/run-kb-tests.sh @@ -3,8 +3,8 @@ # SPDX-License-Identifier: Apache-2.0 # # Buildkite entry point for Kibana functional tests. -# Starts an Elasticsearch container, then a Kibana container that connects to it, -# generates a CLI config pointing at both, and runs the hand-authored KB test suite. +# Starts Elasticsearch and Kibana containers in the background, then builds the +# CLI concurrently, and runs the hand-authored KB test suite once everything is ready. set -euo pipefail @@ -25,14 +25,30 @@ trap cleanup EXIT ES_PASSWORD="changeme" KIBANA_ENCRYPTION_KEY="xP9mfMqnRrNHmSmzPoBtLQvLFzYdHxKj" # gitleaks:allow -# Pull images and start ES as early as possible so it can boot in the background -# while npm install and the CLI build run (which together take ~15-20 minutes). -# On slow CI agents, Docker container startup + ES security bootstrap alone can -# take 7-10 minutes, so giving it a head start is critical. -echo "--- Pulling Docker images" +ES_IMAGE="docker.elastic.co/elasticsearch/elasticsearch:${STACK_VERSION}" +KB_IMAGE="docker.elastic.co/kibana/kibana:${STACK_VERSION}" + +# ── Docker setup ──────────────────────────────────────────────────────────── +# Start containers as early as possible so ES and Kibana boot in the background +# while npm install + build run (~15-20 min). On slow CI agents ES overlay2 +# setup + security bootstrap alone can take 7-10 min, so the head start is critical. + +echo "--- Creating Docker network" docker network create "$NETWORK_NAME" 2>/dev/null || true -docker pull "docker.elastic.co/elasticsearch/elasticsearch:${STACK_VERSION}" -docker pull "docker.elastic.co/kibana/kibana:${STACK_VERSION}" + +# Use the pre-cached ES snapshot on kibana-ubuntu-2404 agents if available, +# otherwise fall back to a registry pull (same technique as Kibana's kbn-es tooling). +echo "--- Loading Elasticsearch image" +ES_CACHE_DIR="${ES_CACHE_DIR:-}" +if [[ -n "$ES_CACHE_DIR" ]] && compgen -G "$ES_CACHE_DIR/elasticsearch-$STACK_VERSION*.tar.gz" > /dev/null 2>&1; then + echo " Loading from agent cache: $ES_CACHE_DIR" + docker load < "$(ls "$ES_CACHE_DIR/elasticsearch-$STACK_VERSION"*.tar.gz | head -1)" +else + docker pull "$ES_IMAGE" +fi + +echo "--- Loading Kibana image" +docker pull "$KB_IMAGE" echo "--- Starting Elasticsearch ${STACK_VERSION} (background)" docker run \ @@ -47,7 +63,26 @@ docker run \ --env "ES_JAVA_OPTS=-Xms512m -Xmx512m" \ --detach \ --rm \ - "docker.elastic.co/elasticsearch/elasticsearch:${STACK_VERSION}" + "$ES_IMAGE" + +# Start Kibana immediately alongside ES — Kibana will retry the ES connection +# until ES is ready, so the order doesn't matter for correctness. +echo "--- Starting Kibana ${STACK_VERSION} (background)" +docker run \ + --name "$KB_CONTAINER_NAME" \ + --network "$NETWORK_NAME" \ + --publish 5601:5601 \ + --env "ELASTICSEARCH_HOSTS=http://elasticsearch:9200" \ + --env "ELASTICSEARCH_USERNAME=elastic" \ + --env "ELASTICSEARCH_PASSWORD=${ES_PASSWORD}" \ + --env "xpack.encryptedSavedObjects.encryptionKey=${KIBANA_ENCRYPTION_KEY}" \ + --env "xpack.reporting.encryptionKey=${KIBANA_ENCRYPTION_KEY}" \ + --env "xpack.security.encryptionKey=${KIBANA_ENCRYPTION_KEY}" \ + --detach \ + --rm \ + "$KB_IMAGE" + +# ── Build CLI (runs concurrently with ES + Kibana startup) ────────────────── echo "--- Setting up Node.js ${NODE_VERSION}" export NVM_DIR="${NVM_DIR:-$HOME/.nvm}" @@ -82,8 +117,8 @@ echo "--- Building CLI" npm run build npm link -# ES has been running in the background during the entire npm install + build phase. -# It should be healthy (or close to it) by now. +# ── Wait for services (should be near-instant after the ~15 min build) ────── + echo "--- Waiting for Elasticsearch to be healthy" RETRIES=0 MAX_RETRIES=180 @@ -103,7 +138,7 @@ echo "Elasticsearch cluster is up" # The cluster can report healthy before the .security index is fully bootstrapped. # Kibana's alerting/connectors plugins depend on ES API keys (encryptedSavedObjects), -# so we must wait for the security index to be ready before starting Kibana. +# so we must confirm the security index is ready. # Technique borrowed from Kibana's own kbn-es tooling (wait_for_security_index.ts). echo "--- Waiting for Elasticsearch security index to be ready" RETRIES=0 @@ -122,21 +157,6 @@ until curl -sf -u "elastic:${ES_PASSWORD}" \ done echo "Elasticsearch is ready" -echo "--- Starting Kibana ${STACK_VERSION}" -docker run \ - --name "$KB_CONTAINER_NAME" \ - --network "$NETWORK_NAME" \ - --publish 5601:5601 \ - --env "ELASTICSEARCH_HOSTS=http://elasticsearch:9200" \ - --env "ELASTICSEARCH_USERNAME=elastic" \ - --env "ELASTICSEARCH_PASSWORD=${ES_PASSWORD}" \ - --env "xpack.encryptedSavedObjects.encryptionKey=${KIBANA_ENCRYPTION_KEY}" \ - --env "xpack.reporting.encryptionKey=${KIBANA_ENCRYPTION_KEY}" \ - --env "xpack.security.encryptionKey=${KIBANA_ENCRYPTION_KEY}" \ - --detach \ - --rm \ - "docker.elastic.co/kibana/kibana:${STACK_VERSION}" - echo "--- Waiting for Kibana to be healthy" RETRIES=0 MAX_RETRIES=90 @@ -153,7 +173,6 @@ done echo "Kibana core is ready" # The actions and alerting plugins initialise after the main health check passes. -# Wait until their APIs return 200 before running tests. echo "--- Waiting for alerting and actions plugins to be ready" RETRIES=0 MAX_RETRIES=30 @@ -169,6 +188,8 @@ until curl -sf -u "elastic:${ES_PASSWORD}" http://localhost:5601/api/actions/con done echo "Kibana plugins are ready" +# ── Run tests ──────────────────────────────────────────────────────────────── + echo "--- Generating CI config file" CI_CONFIG_FILE="$(pwd)/.elasticrc-kb-ci.yml" cat > "$CI_CONFIG_FILE" < Date: Tue, 5 May 2026 15:48:34 -0400 Subject: [PATCH 25/38] fix(ci): use container IP instead of published port for ES/Kibana health checks Ubuntu 24.04 agents use nftables which can break Docker --publish port forwarding. Resolve container IPs on the Docker bridge network immediately after docker run and use those for all health checks and CLI config. The Linux host can always reach bridge network container IPs directly without going through port publishing. --- .buildkite/run-kb-tests.sh | 33 +++++++++++++++++++++------------ 1 file changed, 21 insertions(+), 12 deletions(-) diff --git a/.buildkite/run-kb-tests.sh b/.buildkite/run-kb-tests.sh index 1b94dcf..436444a 100755 --- a/.buildkite/run-kb-tests.sh +++ b/.buildkite/run-kb-tests.sh @@ -30,8 +30,11 @@ KB_IMAGE="docker.elastic.co/kibana/kibana:${STACK_VERSION}" # ── Docker setup ──────────────────────────────────────────────────────────── # Start containers as early as possible so ES and Kibana boot in the background -# while npm install + build run (~15-20 min). On slow CI agents ES overlay2 -# setup + security bootstrap alone can take 7-10 min, so the head start is critical. +# while npm install + build run. +# +# Note: we do NOT use --publish to expose ports on the host. Ubuntu 24.04 agents +# use nftables, which can break Docker port publishing. Instead we connect directly +# to the container IPs on the bridge network, which always works on Linux hosts. echo "--- Creating Docker network" docker network create "$NETWORK_NAME" 2>/dev/null || true @@ -55,7 +58,6 @@ docker run \ --name "$ES_CONTAINER_NAME" \ --network "$NETWORK_NAME" \ --network-alias elasticsearch \ - --publish 9200:9200 \ --env "discovery.type=single-node" \ --env "xpack.license.self_generated.type=trial" \ --env "action.destructive_requires_name=false" \ @@ -71,7 +73,6 @@ echo "--- Starting Kibana ${STACK_VERSION} (background)" docker run \ --name "$KB_CONTAINER_NAME" \ --network "$NETWORK_NAME" \ - --publish 5601:5601 \ --env "ELASTICSEARCH_HOSTS=http://elasticsearch:9200" \ --env "ELASTICSEARCH_USERNAME=elastic" \ --env "ELASTICSEARCH_PASSWORD=${ES_PASSWORD}" \ @@ -82,6 +83,14 @@ docker run \ --rm \ "$KB_IMAGE" +# Resolve container IPs on the Docker bridge network. +# We use these directly instead of published ports to avoid iptables/nftables issues +# on Ubuntu 24.04 agents. On Linux the host can always reach bridge network IPs. +ES_IP=$(docker inspect --format='{{(index .NetworkSettings.Networks "'"$NETWORK_NAME"'").IPAddress}}' "$ES_CONTAINER_NAME") +KB_IP=$(docker inspect --format='{{(index .NetworkSettings.Networks "'"$NETWORK_NAME"'").IPAddress}}' "$KB_CONTAINER_NAME") +echo "Elasticsearch IP: ${ES_IP}" +echo "Kibana IP: ${KB_IP}" + # ── Build CLI (runs concurrently with ES + Kibana startup) ────────────────── echo "--- Setting up Node.js ${NODE_VERSION}" @@ -117,12 +126,12 @@ echo "--- Building CLI" npm run build npm link -# ── Wait for services (should be near-instant after the ~15 min build) ────── +# ── Wait for services (should be near-instant after the build) ────────────── echo "--- Waiting for Elasticsearch to be healthy" RETRIES=0 MAX_RETRIES=180 -until curl -sf -u "elastic:${ES_PASSWORD}" http://localhost:9200/_cluster/health > /dev/null 2>&1; do +until curl -sf -u "elastic:${ES_PASSWORD}" "http://${ES_IP}:9200/_cluster/health" > /dev/null 2>&1; do RETRIES=$((RETRIES + 1)) if [ "$RETRIES" -ge "$MAX_RETRIES" ]; then echo "Elasticsearch did not become healthy in time after $((MAX_RETRIES * 2))s" @@ -144,7 +153,7 @@ echo "--- Waiting for Elasticsearch security index to be ready" RETRIES=0 MAX_RETRIES=60 until curl -sf -u "elastic:${ES_PASSWORD}" \ - -X POST "http://localhost:9200/_security/api_key" \ + -X POST "http://${ES_IP}:9200/_security/api_key" \ -H "Content-Type: application/json" \ -d '{"name":"healthcheck","expiration":"1m"}' > /dev/null 2>&1; do RETRIES=$((RETRIES + 1)) @@ -160,7 +169,7 @@ echo "Elasticsearch is ready" echo "--- Waiting for Kibana to be healthy" RETRIES=0 MAX_RETRIES=90 -until curl -sf -u "elastic:${ES_PASSWORD}" http://localhost:5601/api/status \ +until curl -sf -u "elastic:${ES_PASSWORD}" "http://${KB_IP}:5601/api/status" \ | jq -e '.status.overall.level == "available"' > /dev/null 2>&1; do RETRIES=$((RETRIES + 1)) if [ "$RETRIES" -ge "$MAX_RETRIES" ]; then @@ -176,8 +185,8 @@ echo "Kibana core is ready" echo "--- Waiting for alerting and actions plugins to be ready" RETRIES=0 MAX_RETRIES=30 -until curl -sf -u "elastic:${ES_PASSWORD}" http://localhost:5601/api/actions/connector_types > /dev/null 2>&1 && \ - curl -sf -u "elastic:${ES_PASSWORD}" http://localhost:5601/api/alerting/rules/_find > /dev/null 2>&1; do +until curl -sf -u "elastic:${ES_PASSWORD}" "http://${KB_IP}:5601/api/actions/connector_types" > /dev/null 2>&1 && \ + curl -sf -u "elastic:${ES_PASSWORD}" "http://${KB_IP}:5601/api/alerting/rules/_find" > /dev/null 2>&1; do RETRIES=$((RETRIES + 1)) if [ "$RETRIES" -ge "$MAX_RETRIES" ]; then echo "Alerting/actions plugins did not become ready in time" @@ -196,12 +205,12 @@ cat > "$CI_CONFIG_FILE" < Date: Tue, 5 May 2026 16:03:05 -0400 Subject: [PATCH 26/38] fix(ci): use --network host for ES and Kibana containers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Docker bridge/port-publishing both fail on kibana-ubuntu-2404 agents (Ubuntu 24.04 uses nftables which breaks Docker NAT rules). --network host puts both containers directly on the host network stack so localhost:9200 and localhost:5601 work unconditionally — equivalent to how Kibana's own CI runs ES as a native process via node scripts/es snapshot. --- .buildkite/run-kb-tests.sh | 55 ++++++++++++++++---------------------- 1 file changed, 23 insertions(+), 32 deletions(-) diff --git a/.buildkite/run-kb-tests.sh b/.buildkite/run-kb-tests.sh index 436444a..01ddfc3 100755 --- a/.buildkite/run-kb-tests.sh +++ b/.buildkite/run-kb-tests.sh @@ -3,21 +3,22 @@ # SPDX-License-Identifier: Apache-2.0 # # Buildkite entry point for Kibana functional tests. -# Starts Elasticsearch and Kibana containers in the background, then builds the -# CLI concurrently, and runs the hand-authored KB test suite once everything is ready. +# Starts Elasticsearch and Kibana using --network host so both services are +# reachable at localhost:9200 and localhost:5601 from the host — no Docker +# port publishing or bridge routing needed. This mirrors how Kibana's own CI +# runs ES natively (via node scripts/es snapshot) and avoids iptables/nftables +# issues present on Ubuntu 24.04 agents. set -euo pipefail STACK_VERSION="${STACK_VERSION:-9.3.0}" ES_CONTAINER_NAME="elastic-cli-kb-es" KB_CONTAINER_NAME="elastic-cli-kb" -NETWORK_NAME="elastic-cli-kb-net" cleanup() { echo "--- Cleaning up" docker rm -f "$KB_CONTAINER_NAME" 2>/dev/null || true docker rm -f "$ES_CONTAINER_NAME" 2>/dev/null || true - docker network rm "$NETWORK_NAME" 2>/dev/null || true } trap cleanup EXIT @@ -32,15 +33,15 @@ KB_IMAGE="docker.elastic.co/kibana/kibana:${STACK_VERSION}" # Start containers as early as possible so ES and Kibana boot in the background # while npm install + build run. # -# Note: we do NOT use --publish to expose ports on the host. Ubuntu 24.04 agents -# use nftables, which can break Docker port publishing. Instead we connect directly -# to the container IPs on the bridge network, which always works on Linux hosts. - -echo "--- Creating Docker network" -docker network create "$NETWORK_NAME" 2>/dev/null || true +# Both containers use --network host so they bind directly to the host's network +# stack. This means: +# - ES is reachable at localhost:9200 from the host and from Kibana +# - Kibana is reachable at localhost:5601 from the host +# - No iptables/nftables port forwarding rules are needed +# This is functionally equivalent to Kibana's approach of running ES natively. # Use the pre-cached ES snapshot on kibana-ubuntu-2404 agents if available, -# otherwise fall back to a registry pull (same technique as Kibana's kbn-es tooling). +# otherwise fall back to a registry pull. echo "--- Loading Elasticsearch image" ES_CACHE_DIR="${ES_CACHE_DIR:-}" if [[ -n "$ES_CACHE_DIR" ]] && compgen -G "$ES_CACHE_DIR/elasticsearch-$STACK_VERSION*.tar.gz" > /dev/null 2>&1; then @@ -56,8 +57,7 @@ docker pull "$KB_IMAGE" echo "--- Starting Elasticsearch ${STACK_VERSION} (background)" docker run \ --name "$ES_CONTAINER_NAME" \ - --network "$NETWORK_NAME" \ - --network-alias elasticsearch \ + --network host \ --env "discovery.type=single-node" \ --env "xpack.license.self_generated.type=trial" \ --env "action.destructive_requires_name=false" \ @@ -67,13 +67,12 @@ docker run \ --rm \ "$ES_IMAGE" -# Start Kibana immediately alongside ES — Kibana will retry the ES connection -# until ES is ready, so the order doesn't matter for correctness. +# Kibana connects to ES at localhost:9200 since both share the host network. echo "--- Starting Kibana ${STACK_VERSION} (background)" docker run \ --name "$KB_CONTAINER_NAME" \ - --network "$NETWORK_NAME" \ - --env "ELASTICSEARCH_HOSTS=http://elasticsearch:9200" \ + --network host \ + --env "ELASTICSEARCH_HOSTS=http://localhost:9200" \ --env "ELASTICSEARCH_USERNAME=elastic" \ --env "ELASTICSEARCH_PASSWORD=${ES_PASSWORD}" \ --env "xpack.encryptedSavedObjects.encryptionKey=${KIBANA_ENCRYPTION_KEY}" \ @@ -83,14 +82,6 @@ docker run \ --rm \ "$KB_IMAGE" -# Resolve container IPs on the Docker bridge network. -# We use these directly instead of published ports to avoid iptables/nftables issues -# on Ubuntu 24.04 agents. On Linux the host can always reach bridge network IPs. -ES_IP=$(docker inspect --format='{{(index .NetworkSettings.Networks "'"$NETWORK_NAME"'").IPAddress}}' "$ES_CONTAINER_NAME") -KB_IP=$(docker inspect --format='{{(index .NetworkSettings.Networks "'"$NETWORK_NAME"'").IPAddress}}' "$KB_CONTAINER_NAME") -echo "Elasticsearch IP: ${ES_IP}" -echo "Kibana IP: ${KB_IP}" - # ── Build CLI (runs concurrently with ES + Kibana startup) ────────────────── echo "--- Setting up Node.js ${NODE_VERSION}" @@ -131,7 +122,7 @@ npm link echo "--- Waiting for Elasticsearch to be healthy" RETRIES=0 MAX_RETRIES=180 -until curl -sf -u "elastic:${ES_PASSWORD}" "http://${ES_IP}:9200/_cluster/health" > /dev/null 2>&1; do +until curl -sf -u "elastic:${ES_PASSWORD}" "http://localhost:9200/_cluster/health" > /dev/null 2>&1; do RETRIES=$((RETRIES + 1)) if [ "$RETRIES" -ge "$MAX_RETRIES" ]; then echo "Elasticsearch did not become healthy in time after $((MAX_RETRIES * 2))s" @@ -153,7 +144,7 @@ echo "--- Waiting for Elasticsearch security index to be ready" RETRIES=0 MAX_RETRIES=60 until curl -sf -u "elastic:${ES_PASSWORD}" \ - -X POST "http://${ES_IP}:9200/_security/api_key" \ + -X POST "http://localhost:9200/_security/api_key" \ -H "Content-Type: application/json" \ -d '{"name":"healthcheck","expiration":"1m"}' > /dev/null 2>&1; do RETRIES=$((RETRIES + 1)) @@ -169,7 +160,7 @@ echo "Elasticsearch is ready" echo "--- Waiting for Kibana to be healthy" RETRIES=0 MAX_RETRIES=90 -until curl -sf -u "elastic:${ES_PASSWORD}" "http://${KB_IP}:5601/api/status" \ +until curl -sf -u "elastic:${ES_PASSWORD}" "http://localhost:5601/api/status" \ | jq -e '.status.overall.level == "available"' > /dev/null 2>&1; do RETRIES=$((RETRIES + 1)) if [ "$RETRIES" -ge "$MAX_RETRIES" ]; then @@ -185,8 +176,8 @@ echo "Kibana core is ready" echo "--- Waiting for alerting and actions plugins to be ready" RETRIES=0 MAX_RETRIES=30 -until curl -sf -u "elastic:${ES_PASSWORD}" "http://${KB_IP}:5601/api/actions/connector_types" > /dev/null 2>&1 && \ - curl -sf -u "elastic:${ES_PASSWORD}" "http://${KB_IP}:5601/api/alerting/rules/_find" > /dev/null 2>&1; do +until curl -sf -u "elastic:${ES_PASSWORD}" "http://localhost:5601/api/actions/connector_types" > /dev/null 2>&1 && \ + curl -sf -u "elastic:${ES_PASSWORD}" "http://localhost:5601/api/alerting/rules/_find" > /dev/null 2>&1; do RETRIES=$((RETRIES + 1)) if [ "$RETRIES" -ge "$MAX_RETRIES" ]; then echo "Alerting/actions plugins did not become ready in time" @@ -205,12 +196,12 @@ cat > "$CI_CONFIG_FILE" < Date: Tue, 5 May 2026 16:25:35 -0400 Subject: [PATCH 27/38] fix(ci): run health checks and tests inside Docker network via test-runner container MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit On kibana-ubuntu-2404 agents, all host→container networking is broken: --network host is blocked (user namespace remapping), --publish doesn't work (nftables replaces iptables), and direct bridge IPs are not routed to the host. The only reliable networking is inter-container communication on a custom bridge network. This matches how Kibana's own CI works: Kibana runs ES natively so localhost always works; we achieve the same by running a dedicated test-runner container (node:NODE_VERSION-bookworm-slim) on the same network as ES and Kibana, using Docker DNS aliases (elasticsearch:9200, kibana:5601). The built workspace is mounted read-only so node dist/cli.js works without rebuilding. --- .buildkite/run-kb-tests-runner.sh | 100 ++++++++++++++++++ .buildkite/run-kb-tests.sh | 165 +++++++++--------------------- 2 files changed, 150 insertions(+), 115 deletions(-) create mode 100755 .buildkite/run-kb-tests-runner.sh diff --git a/.buildkite/run-kb-tests-runner.sh b/.buildkite/run-kb-tests-runner.sh new file mode 100755 index 0000000..4ec283e --- /dev/null +++ b/.buildkite/run-kb-tests-runner.sh @@ -0,0 +1,100 @@ +#!/usr/bin/env bash +# Copyright Elasticsearch B.V. and contributors +# SPDX-License-Identifier: Apache-2.0 +# +# Runs INSIDE the test-runner container on the same Docker network as ES/Kibana. +# Uses Docker DNS aliases (elasticsearch:9200, kibana:5601) for all connectivity, +# which is the only networking that works reliably on kibana-ubuntu-2404 agents. + +set -euo pipefail + +ES_PASSWORD="${ES_PASSWORD:-changeme}" + +echo "--- Installing curl and jq" +apt-get update -qq && apt-get install -y -q --no-install-recommends curl jq + +echo "--- Waiting for Elasticsearch to be healthy" +RETRIES=0 +MAX_RETRIES=180 +until curl -sf -u "elastic:${ES_PASSWORD}" "http://elasticsearch:9200/_cluster/health" > /dev/null 2>&1; do + RETRIES=$((RETRIES + 1)) + if [ "$RETRIES" -ge "$MAX_RETRIES" ]; then + echo "Elasticsearch did not become healthy in time after $((MAX_RETRIES * 2))s" + exit 1 + fi + if [ $((RETRIES % 15)) -eq 0 ]; then + echo " still waiting for Elasticsearch... (${RETRIES}/${MAX_RETRIES})" + fi + sleep 2 +done +echo "Elasticsearch cluster is up" + +# The cluster can report healthy before the .security index is fully bootstrapped. +# Kibana's alerting/connectors plugins depend on ES API keys (encryptedSavedObjects), +# so we must confirm the security index is ready. +# Technique borrowed from Kibana's own kbn-es tooling (wait_for_security_index.ts). +echo "--- Waiting for Elasticsearch security index to be ready" +RETRIES=0 +MAX_RETRIES=60 +until curl -sf -u "elastic:${ES_PASSWORD}" \ + -X POST "http://elasticsearch:9200/_security/api_key" \ + -H "Content-Type: application/json" \ + -d '{"name":"healthcheck","expiration":"1m"}' > /dev/null 2>&1; do + RETRIES=$((RETRIES + 1)) + if [ "$RETRIES" -ge "$MAX_RETRIES" ]; then + echo "Elasticsearch security index did not become ready in time" + exit 1 + fi + sleep 2 +done +echo "Elasticsearch is ready" + +echo "--- Waiting for Kibana to be healthy" +RETRIES=0 +MAX_RETRIES=90 +until curl -sf -u "elastic:${ES_PASSWORD}" "http://kibana:5601/api/status" \ + | jq -e '.status.overall.level == "available"' > /dev/null 2>&1; do + RETRIES=$((RETRIES + 1)) + if [ "$RETRIES" -ge "$MAX_RETRIES" ]; then + echo "Kibana did not become healthy in time" + exit 1 + fi + sleep 3 +done +echo "Kibana core is ready" + +# The actions and alerting plugins initialise after the main health check passes. +echo "--- Waiting for alerting and actions plugins to be ready" +RETRIES=0 +MAX_RETRIES=30 +until curl -sf -u "elastic:${ES_PASSWORD}" "http://kibana:5601/api/actions/connector_types" > /dev/null 2>&1 && \ + curl -sf -u "elastic:${ES_PASSWORD}" "http://kibana:5601/api/alerting/rules/_find" > /dev/null 2>&1; do + RETRIES=$((RETRIES + 1)) + if [ "$RETRIES" -ge "$MAX_RETRIES" ]; then + echo "Alerting/actions plugins did not become ready in time" + exit 1 + fi + sleep 3 +done +echo "Kibana plugins are ready" + +echo "--- Generating CLI config file" +cat > /tmp/elastic-rc.yml </dev/null || true docker rm -f "$KB_CONTAINER_NAME" 2>/dev/null || true docker rm -f "$ES_CONTAINER_NAME" 2>/dev/null || true + docker network rm "$NETWORK_NAME" 2>/dev/null || true } trap cleanup EXIT @@ -30,15 +41,10 @@ ES_IMAGE="docker.elastic.co/elasticsearch/elasticsearch:${STACK_VERSION}" KB_IMAGE="docker.elastic.co/kibana/kibana:${STACK_VERSION}" # ── Docker setup ──────────────────────────────────────────────────────────── -# Start containers as early as possible so ES and Kibana boot in the background -# while npm install + build run. -# -# Both containers use --network host so they bind directly to the host's network -# stack. This means: -# - ES is reachable at localhost:9200 from the host and from Kibana -# - Kibana is reachable at localhost:5601 from the host -# - No iptables/nftables port forwarding rules are needed -# This is functionally equivalent to Kibana's approach of running ES natively. +# Start all containers as early as possible so they boot while the CLI builds. + +echo "--- Creating Docker network" +docker network create "$NETWORK_NAME" 2>/dev/null || true # Use the pre-cached ES snapshot on kibana-ubuntu-2404 agents if available, # otherwise fall back to a registry pull. @@ -54,10 +60,17 @@ fi echo "--- Loading Kibana image" docker pull "$KB_IMAGE" +# Pull the test-runner image in the background while ES/Kibana boot and the +# CLI builds — it's only needed at the very end. +echo "--- Pulling test-runner image (background)" +docker pull "$NODE_RUNNER_IMAGE" & +NODE_PULL_PID=$! + echo "--- Starting Elasticsearch ${STACK_VERSION} (background)" docker run \ --name "$ES_CONTAINER_NAME" \ - --network host \ + --network "$NETWORK_NAME" \ + --network-alias elasticsearch \ --env "discovery.type=single-node" \ --env "xpack.license.self_generated.type=trial" \ --env "action.destructive_requires_name=false" \ @@ -67,12 +80,12 @@ docker run \ --rm \ "$ES_IMAGE" -# Kibana connects to ES at localhost:9200 since both share the host network. echo "--- Starting Kibana ${STACK_VERSION} (background)" docker run \ --name "$KB_CONTAINER_NAME" \ - --network host \ - --env "ELASTICSEARCH_HOSTS=http://localhost:9200" \ + --network "$NETWORK_NAME" \ + --network-alias kibana \ + --env "ELASTICSEARCH_HOSTS=http://elasticsearch:9200" \ --env "ELASTICSEARCH_USERNAME=elastic" \ --env "ELASTICSEARCH_PASSWORD=${ES_PASSWORD}" \ --env "xpack.encryptedSavedObjects.encryptionKey=${KIBANA_ENCRYPTION_KEY}" \ @@ -82,7 +95,7 @@ docker run \ --rm \ "$KB_IMAGE" -# ── Build CLI (runs concurrently with ES + Kibana startup) ────────────────── +# ── Build CLI (concurrent with container startup + test-runner image pull) ── echo "--- Setting up Node.js ${NODE_VERSION}" export NVM_DIR="${NVM_DIR:-$HOME/.nvm}" @@ -115,99 +128,21 @@ export NODE_OPTIONS="${NODE_OPTIONS:-} --max-old-space-size=6144" echo "--- Building CLI" npm run build -npm link - -# ── Wait for services (should be near-instant after the build) ────────────── - -echo "--- Waiting for Elasticsearch to be healthy" -RETRIES=0 -MAX_RETRIES=180 -until curl -sf -u "elastic:${ES_PASSWORD}" "http://localhost:9200/_cluster/health" > /dev/null 2>&1; do - RETRIES=$((RETRIES + 1)) - if [ "$RETRIES" -ge "$MAX_RETRIES" ]; then - echo "Elasticsearch did not become healthy in time after $((MAX_RETRIES * 2))s" - docker logs "$ES_CONTAINER_NAME" - exit 1 - fi - if [ $((RETRIES % 15)) -eq 0 ]; then - echo " still waiting for Elasticsearch... (${RETRIES}/${MAX_RETRIES})" - fi - sleep 2 -done -echo "Elasticsearch cluster is up" - -# The cluster can report healthy before the .security index is fully bootstrapped. -# Kibana's alerting/connectors plugins depend on ES API keys (encryptedSavedObjects), -# so we must confirm the security index is ready. -# Technique borrowed from Kibana's own kbn-es tooling (wait_for_security_index.ts). -echo "--- Waiting for Elasticsearch security index to be ready" -RETRIES=0 -MAX_RETRIES=60 -until curl -sf -u "elastic:${ES_PASSWORD}" \ - -X POST "http://localhost:9200/_security/api_key" \ - -H "Content-Type: application/json" \ - -d '{"name":"healthcheck","expiration":"1m"}' > /dev/null 2>&1; do - RETRIES=$((RETRIES + 1)) - if [ "$RETRIES" -ge "$MAX_RETRIES" ]; then - echo "Elasticsearch security index did not become ready in time" - docker logs "$ES_CONTAINER_NAME" - exit 1 - fi - sleep 2 -done -echo "Elasticsearch is ready" - -echo "--- Waiting for Kibana to be healthy" -RETRIES=0 -MAX_RETRIES=90 -until curl -sf -u "elastic:${ES_PASSWORD}" "http://localhost:5601/api/status" \ - | jq -e '.status.overall.level == "available"' > /dev/null 2>&1; do - RETRIES=$((RETRIES + 1)) - if [ "$RETRIES" -ge "$MAX_RETRIES" ]; then - echo "Kibana did not become healthy in time" - docker logs "$KB_CONTAINER_NAME" - exit 1 - fi - sleep 3 -done -echo "Kibana core is ready" - -# The actions and alerting plugins initialise after the main health check passes. -echo "--- Waiting for alerting and actions plugins to be ready" -RETRIES=0 -MAX_RETRIES=30 -until curl -sf -u "elastic:${ES_PASSWORD}" "http://localhost:5601/api/actions/connector_types" > /dev/null 2>&1 && \ - curl -sf -u "elastic:${ES_PASSWORD}" "http://localhost:5601/api/alerting/rules/_find" > /dev/null 2>&1; do - RETRIES=$((RETRIES + 1)) - if [ "$RETRIES" -ge "$MAX_RETRIES" ]; then - echo "Alerting/actions plugins did not become ready in time" - docker logs "$KB_CONTAINER_NAME" --tail 50 - exit 1 - fi - sleep 3 -done -echo "Kibana plugins are ready" - -# ── Run tests ──────────────────────────────────────────────────────────────── - -echo "--- Generating CI config file" -CI_CONFIG_FILE="$(pwd)/.elasticrc-kb-ci.yml" -cat > "$CI_CONFIG_FILE" < Date: Tue, 5 May 2026 16:43:37 -0400 Subject: [PATCH 28/38] debug(ci): add network diagnostics and IP fallback for kb test runner The test runner container couldn't reach ES via Docker DNS (elasticsearch:9200) in build 268 for an unknown reason. This adds: - Network diagnostics in the runner (resolv.conf, routes, first verbose curl) so the next failure gives us the exact error (DNS failure, TCP refused, etc.) - IP fallback: docker inspect fetches ES/Kibana container IPs on the host and passes them as ES_IP / KB_IP; the runner uses these if DNS lookup fails - docker logs for ES and Kibana in the cleanup trap for visibility - Lower Node.js heap limit (6GB -> 4GB) to reduce memory pressure during build --- .buildkite/run-kb-tests-runner.sh | 67 ++++++++++++++++++++++++++----- .buildkite/run-kb-tests.sh | 29 +++++++++---- 2 files changed, 79 insertions(+), 17 deletions(-) diff --git a/.buildkite/run-kb-tests-runner.sh b/.buildkite/run-kb-tests-runner.sh index 4ec283e..1a18d67 100755 --- a/.buildkite/run-kb-tests-runner.sh +++ b/.buildkite/run-kb-tests-runner.sh @@ -3,8 +3,9 @@ # SPDX-License-Identifier: Apache-2.0 # # Runs INSIDE the test-runner container on the same Docker network as ES/Kibana. -# Uses Docker DNS aliases (elasticsearch:9200, kibana:5601) for all connectivity, -# which is the only networking that works reliably on kibana-ubuntu-2404 agents. +# Prefers Docker DNS aliases (elasticsearch / kibana) but falls back to the +# container IPs passed via ES_IP / KB_IP if the embedded DNS server is +# unavailable (known issue with some rootless/userns Docker configurations). set -euo pipefail @@ -13,13 +14,59 @@ ES_PASSWORD="${ES_PASSWORD:-changeme}" echo "--- Installing curl and jq" apt-get update -qq && apt-get install -y -q --no-install-recommends curl jq +# ── Network diagnostics ────────────────────────────────────────────────────── +echo "--- Network diagnostics" +echo "resolv.conf:" +cat /etc/resolv.conf || true +echo "Routes:" +ip route 2>/dev/null || true + +# Determine whether to use DNS names or IPs. +ES_HOST="elasticsearch" +KB_HOST="kibana" + +if getent hosts elasticsearch > /dev/null 2>&1; then + RESOLVED=$(getent hosts elasticsearch | awk '{print $1}') + echo "DNS OK: elasticsearch -> $RESOLVED" +else + echo "DNS lookup for 'elasticsearch' failed" + if [[ -n "${ES_IP:-}" ]]; then + echo "Falling back to ES_IP=${ES_IP}" + ES_HOST="$ES_IP" + else + echo "No ES_IP provided and DNS failed — health checks will fail" + fi +fi + +if getent hosts kibana > /dev/null 2>&1; then + RESOLVED=$(getent hosts kibana | awk '{print $1}') + echo "DNS OK: kibana -> $RESOLVED" +else + echo "DNS lookup for 'kibana' failed" + if [[ -n "${KB_IP:-}" ]]; then + echo "Falling back to KB_IP=${KB_IP}" + KB_HOST="$KB_IP" + else + echo "No KB_IP provided and DNS failed — health checks will fail" + fi +fi + +echo "Using ES_HOST=${ES_HOST}, KB_HOST=${KB_HOST}" + +# First connection attempt with full output for debugging. +echo "First curl attempt (verbose):" +curl -v -u "elastic:${ES_PASSWORD}" "http://${ES_HOST}:9200/_cluster/health" 2>&1 || true + +# ── Wait for Elasticsearch ─────────────────────────────────────────────────── echo "--- Waiting for Elasticsearch to be healthy" RETRIES=0 MAX_RETRIES=180 -until curl -sf -u "elastic:${ES_PASSWORD}" "http://elasticsearch:9200/_cluster/health" > /dev/null 2>&1; do +until curl -sf -u "elastic:${ES_PASSWORD}" "http://${ES_HOST}:9200/_cluster/health" > /dev/null 2>&1; do RETRIES=$((RETRIES + 1)) if [ "$RETRIES" -ge "$MAX_RETRIES" ]; then echo "Elasticsearch did not become healthy in time after $((MAX_RETRIES * 2))s" + echo "Last curl attempt:" + curl -v -u "elastic:${ES_PASSWORD}" "http://${ES_HOST}:9200/_cluster/health" 2>&1 || true exit 1 fi if [ $((RETRIES % 15)) -eq 0 ]; then @@ -37,7 +84,7 @@ echo "--- Waiting for Elasticsearch security index to be ready" RETRIES=0 MAX_RETRIES=60 until curl -sf -u "elastic:${ES_PASSWORD}" \ - -X POST "http://elasticsearch:9200/_security/api_key" \ + -X POST "http://${ES_HOST}:9200/_security/api_key" \ -H "Content-Type: application/json" \ -d '{"name":"healthcheck","expiration":"1m"}' > /dev/null 2>&1; do RETRIES=$((RETRIES + 1)) @@ -52,11 +99,13 @@ echo "Elasticsearch is ready" echo "--- Waiting for Kibana to be healthy" RETRIES=0 MAX_RETRIES=90 -until curl -sf -u "elastic:${ES_PASSWORD}" "http://kibana:5601/api/status" \ +until curl -sf -u "elastic:${ES_PASSWORD}" "http://${KB_HOST}:5601/api/status" \ | jq -e '.status.overall.level == "available"' > /dev/null 2>&1; do RETRIES=$((RETRIES + 1)) if [ "$RETRIES" -ge "$MAX_RETRIES" ]; then echo "Kibana did not become healthy in time" + echo "Last Kibana status:" + curl -sf -u "elastic:${ES_PASSWORD}" "http://${KB_HOST}:5601/api/status" 2>&1 || true exit 1 fi sleep 3 @@ -67,8 +116,8 @@ echo "Kibana core is ready" echo "--- Waiting for alerting and actions plugins to be ready" RETRIES=0 MAX_RETRIES=30 -until curl -sf -u "elastic:${ES_PASSWORD}" "http://kibana:5601/api/actions/connector_types" > /dev/null 2>&1 && \ - curl -sf -u "elastic:${ES_PASSWORD}" "http://kibana:5601/api/alerting/rules/_find" > /dev/null 2>&1; do +until curl -sf -u "elastic:${ES_PASSWORD}" "http://${KB_HOST}:5601/api/actions/connector_types" > /dev/null 2>&1 && \ + curl -sf -u "elastic:${ES_PASSWORD}" "http://${KB_HOST}:5601/api/alerting/rules/_find" > /dev/null 2>&1; do RETRIES=$((RETRIES + 1)) if [ "$RETRIES" -ge "$MAX_RETRIES" ]; then echo "Alerting/actions plugins did not become ready in time" @@ -83,12 +132,12 @@ cat > /tmp/elastic-rc.yml <&1 | tail -50 || true + echo "--- Kibana logs (last 20 lines)" + docker logs "$KB_CONTAINER_NAME" 2>&1 | tail -20 || true echo "--- Cleaning up" docker rm -f "$TEST_RUNNER_NAME" 2>/dev/null || true docker rm -f "$KB_CONTAINER_NAME" 2>/dev/null || true @@ -95,6 +98,15 @@ docker run \ --rm \ "$KB_IMAGE" +# Fetch container IPs immediately after starting — Docker assigns them before +# the main process runs. We pass these to the test runner as a fallback in case +# the embedded DNS server (127.0.0.11) is unavailable on this agent. +ES_IP=$(docker inspect "$ES_CONTAINER_NAME" \ + --format="{{(index .NetworkSettings.Networks \"$NETWORK_NAME\").IPAddress}}") +KB_IP=$(docker inspect "$KB_CONTAINER_NAME" \ + --format="{{(index .NetworkSettings.Networks \"$NETWORK_NAME\").IPAddress}}") +echo "Container IPs — ES: ${ES_IP}, Kibana: ${KB_IP}" + # ── Build CLI (concurrent with container startup + test-runner image pull) ── echo "--- Setting up Node.js ${NODE_VERSION}" @@ -124,7 +136,7 @@ echo "Using jq $(jq --version)" echo "--- Installing dependencies" npm ci -export NODE_OPTIONS="${NODE_OPTIONS:-} --max-old-space-size=6144" +export NODE_OPTIONS="${NODE_OPTIONS:-} --max-old-space-size=4096" echo "--- Building CLI" npm run build @@ -133,8 +145,7 @@ echo "--- Waiting for test-runner image pull to finish" wait "$NODE_PULL_PID" # ── Run health checks and tests inside the Docker network ─────────────────── -# The test-runner container has access to ES and Kibana via Docker DNS aliases. -# The workspace (including the built CLI at dist/cli.js) is mounted read-only. +# The test-runner container uses ES_IP / KB_IP directly if DNS is unavailable. echo "--- Running tests inside Docker network" docker run \ @@ -144,5 +155,7 @@ docker run \ --volume "$(pwd):/workspace" \ --workdir /workspace \ --env "ES_PASSWORD=${ES_PASSWORD}" \ + --env "ES_IP=${ES_IP}" \ + --env "KB_IP=${KB_IP}" \ "$NODE_RUNNER_IMAGE" \ bash /workspace/.buildkite/run-kb-tests-runner.sh From 34980d12abff77a6be2d3f0eee875bf7dce53a78 Mon Sep 17 00:00:00 2001 From: margaretjgu Date: Tue, 5 May 2026 16:51:26 -0400 Subject: [PATCH 29/38] fix(ci): disable ES HTTP TLS to allow plain HTTP connections ES 8.0+ auto-enables HTTPS on the HTTP layer when ELASTIC_PASSWORD is set. This caused "Empty reply from server" errors because the test runner and Kibana were connecting via http:// to a port expecting TLS. Kibana then crashed and its DNS entry was removed, explaining the secondary "kibana DNS failed" symptom. Setting xpack.security.http.ssl.enabled=false keeps security (auth, RBAC, API keys) enabled while allowing plain HTTP access, which is fine for CI. --- .buildkite/run-kb-tests.sh | 1 + 1 file changed, 1 insertion(+) diff --git a/.buildkite/run-kb-tests.sh b/.buildkite/run-kb-tests.sh index 97cd46a..31b9faf 100755 --- a/.buildkite/run-kb-tests.sh +++ b/.buildkite/run-kb-tests.sh @@ -78,6 +78,7 @@ docker run \ --env "xpack.license.self_generated.type=trial" \ --env "action.destructive_requires_name=false" \ --env "ELASTIC_PASSWORD=${ES_PASSWORD}" \ + --env "xpack.security.http.ssl.enabled=false" \ --env "ES_JAVA_OPTS=-Xms512m -Xmx512m" \ --detach \ --rm \ From 863f980a25e5530e76c468d929abd6d4bebd6f84 Mon Sep 17 00:00:00 2001 From: margaretjgu Date: Tue, 5 May 2026 17:08:52 -0400 Subject: [PATCH 30/38] fix(ci): start Kibana after build and capture its crash logs Two fixes: - Kibana was crashing silently because --rm auto-removed the container before the cleanup trap could collect logs. Removed --rm so docker logs always works. - Kibana was likely crashing because it tried to connect to ES before ES finished bootstrapping. Moved Kibana startup to after the npm build (~3 min buffer), so ES is fully ready when Kibana first connects. ES still starts early. - Also adds xpack.security.transport.ssl.enabled=false to ES for consistency with the http.ssl flag (aligns with elastic/start-local reference setup). --- .buildkite/run-kb-tests.sh | 96 +++++++++++++++++++++----------------- 1 file changed, 54 insertions(+), 42 deletions(-) diff --git a/.buildkite/run-kb-tests.sh b/.buildkite/run-kb-tests.sh index 31b9faf..886d084 100755 --- a/.buildkite/run-kb-tests.sh +++ b/.buildkite/run-kb-tests.sh @@ -13,6 +13,12 @@ # the same Docker bridge network as ES and Kibana. Container IPs are fetched via # docker inspect on the host and passed to the runner in case embedded DNS is # unavailable (known issue with some rootless/userns Docker configurations). +# +# Startup order: +# 1. Start ES early so it is fully ready before Kibana connects. +# 2. Pull Kibana + test-runner images while the CLI builds. +# 3. Start Kibana only after the build completes (~3 min buffer for ES). +# 4. Run the test-runner container for health checks + tests. set -euo pipefail @@ -26,8 +32,8 @@ NODE_RUNNER_IMAGE="node:${NODE_VERSION}-bookworm-slim" cleanup() { echo "--- ES logs (last 50 lines)" docker logs "$ES_CONTAINER_NAME" 2>&1 | tail -50 || true - echo "--- Kibana logs (last 20 lines)" - docker logs "$KB_CONTAINER_NAME" 2>&1 | tail -20 || true + echo "--- Kibana logs (last 50 lines)" + docker logs "$KB_CONTAINER_NAME" 2>&1 | tail -50 || true echo "--- Cleaning up" docker rm -f "$TEST_RUNNER_NAME" 2>/dev/null || true docker rm -f "$KB_CONTAINER_NAME" 2>/dev/null || true @@ -43,14 +49,15 @@ KIBANA_ENCRYPTION_KEY="xP9mfMqnRrNHmSmzPoBtLQvLFzYdHxKj" # gitleaks:allow ES_IMAGE="docker.elastic.co/elasticsearch/elasticsearch:${STACK_VERSION}" KB_IMAGE="docker.elastic.co/kibana/kibana:${STACK_VERSION}" -# ── Docker setup ──────────────────────────────────────────────────────────── -# Start all containers as early as possible so they boot while the CLI builds. - +# ── Docker network ─────────────────────────────────────────────────────────── echo "--- Creating Docker network" docker network create "$NETWORK_NAME" 2>/dev/null || true -# Use the pre-cached ES snapshot on kibana-ubuntu-2404 agents if available, -# otherwise fall back to a registry pull. +# ── Elasticsearch ──────────────────────────────────────────────────────────── +# Start ES as early as possible. It needs ~1-2 minutes to bootstrap the +# security index. Kibana will not start until after the build so ES has +# plenty of time to be fully ready before Kibana connects. + echo "--- Loading Elasticsearch image" ES_CACHE_DIR="${ES_CACHE_DIR:-}" if [[ -n "$ES_CACHE_DIR" ]] && compgen -G "$ES_CACHE_DIR/elasticsearch-$STACK_VERSION*.tar.gz" > /dev/null 2>&1; then @@ -60,15 +67,6 @@ else docker pull "$ES_IMAGE" fi -echo "--- Loading Kibana image" -docker pull "$KB_IMAGE" - -# Pull the test-runner image in the background while ES/Kibana boot and the -# CLI builds — it's only needed at the very end. -echo "--- Pulling test-runner image (background)" -docker pull "$NODE_RUNNER_IMAGE" & -NODE_PULL_PID=$! - echo "--- Starting Elasticsearch ${STACK_VERSION} (background)" docker run \ --name "$ES_CONTAINER_NAME" \ @@ -79,36 +77,22 @@ docker run \ --env "action.destructive_requires_name=false" \ --env "ELASTIC_PASSWORD=${ES_PASSWORD}" \ --env "xpack.security.http.ssl.enabled=false" \ + --env "xpack.security.transport.ssl.enabled=false" \ --env "ES_JAVA_OPTS=-Xms512m -Xmx512m" \ --detach \ --rm \ "$ES_IMAGE" -echo "--- Starting Kibana ${STACK_VERSION} (background)" -docker run \ - --name "$KB_CONTAINER_NAME" \ - --network "$NETWORK_NAME" \ - --network-alias kibana \ - --env "ELASTICSEARCH_HOSTS=http://elasticsearch:9200" \ - --env "ELASTICSEARCH_USERNAME=elastic" \ - --env "ELASTICSEARCH_PASSWORD=${ES_PASSWORD}" \ - --env "xpack.encryptedSavedObjects.encryptionKey=${KIBANA_ENCRYPTION_KEY}" \ - --env "xpack.reporting.encryptionKey=${KIBANA_ENCRYPTION_KEY}" \ - --env "xpack.security.encryptionKey=${KIBANA_ENCRYPTION_KEY}" \ - --detach \ - --rm \ - "$KB_IMAGE" +# Pull Kibana and the test-runner images while ES boots and the CLI builds. +echo "--- Pulling Kibana image (background)" +docker pull "$KB_IMAGE" & +KB_PULL_PID=$! -# Fetch container IPs immediately after starting — Docker assigns them before -# the main process runs. We pass these to the test runner as a fallback in case -# the embedded DNS server (127.0.0.11) is unavailable on this agent. -ES_IP=$(docker inspect "$ES_CONTAINER_NAME" \ - --format="{{(index .NetworkSettings.Networks \"$NETWORK_NAME\").IPAddress}}") -KB_IP=$(docker inspect "$KB_CONTAINER_NAME" \ - --format="{{(index .NetworkSettings.Networks \"$NETWORK_NAME\").IPAddress}}") -echo "Container IPs — ES: ${ES_IP}, Kibana: ${KB_IP}" +echo "--- Pulling test-runner image (background)" +docker pull "$NODE_RUNNER_IMAGE" & +NODE_PULL_PID=$! -# ── Build CLI (concurrent with container startup + test-runner image pull) ── +# ── Build CLI (concurrent with ES startup + image pulls) ──────────────────── echo "--- Setting up Node.js ${NODE_VERSION}" export NVM_DIR="${NVM_DIR:-$HOME/.nvm}" @@ -142,11 +126,39 @@ export NODE_OPTIONS="${NODE_OPTIONS:-} --max-old-space-size=4096" echo "--- Building CLI" npm run build -echo "--- Waiting for test-runner image pull to finish" -wait "$NODE_PULL_PID" +# ── Start Kibana (after build, so ES has had ~3 min to fully boot) ─────────── + +echo "--- Waiting for Kibana image pull to finish" +wait "$KB_PULL_PID" + +echo "--- Starting Kibana ${STACK_VERSION}" +# Intentionally no --rm so crash logs are always available in cleanup. +docker run \ + --name "$KB_CONTAINER_NAME" \ + --network "$NETWORK_NAME" \ + --network-alias kibana \ + --env "ELASTICSEARCH_HOSTS=http://elasticsearch:9200" \ + --env "ELASTICSEARCH_USERNAME=elastic" \ + --env "ELASTICSEARCH_PASSWORD=${ES_PASSWORD}" \ + --env "xpack.encryptedSavedObjects.encryptionKey=${KIBANA_ENCRYPTION_KEY}" \ + --env "xpack.reporting.encryptionKey=${KIBANA_ENCRYPTION_KEY}" \ + --env "xpack.security.encryptionKey=${KIBANA_ENCRYPTION_KEY}" \ + --detach \ + "$KB_IMAGE" + +# Fetch container IPs immediately after starting — Docker assigns them before +# the main process runs. We pass these to the test runner as a fallback in case +# the embedded DNS server (127.0.0.11) is unavailable on this agent. +ES_IP=$(docker inspect "$ES_CONTAINER_NAME" \ + --format="{{(index .NetworkSettings.Networks \"$NETWORK_NAME\").IPAddress}}") +KB_IP=$(docker inspect "$KB_CONTAINER_NAME" \ + --format="{{(index .NetworkSettings.Networks \"$NETWORK_NAME\").IPAddress}}") +echo "Container IPs — ES: ${ES_IP}, Kibana: ${KB_IP}" # ── Run health checks and tests inside the Docker network ─────────────────── -# The test-runner container uses ES_IP / KB_IP directly if DNS is unavailable. + +echo "--- Waiting for test-runner image pull to finish" +wait "$NODE_PULL_PID" echo "--- Running tests inside Docker network" docker run \ From fa6f0d359b2b559a925b345b0e4d70e1e367a76f Mon Sep 17 00:00:00 2001 From: margaretjgu Date: Tue, 5 May 2026 17:23:20 -0400 Subject: [PATCH 31/38] fix(ci): use kibana_system user via setup container Kibana 9.x explicitly forbids ELASTICSEARCH_USERNAME=elastic with a fatal config validation error. We must use kibana_system instead. Since the host cannot reach ES directly on this agent, a one-shot Node.js container (setup-kibana.js) runs on the same Docker network, waits for ES cluster health and the security index, sets the kibana_system password, then exits. Kibana is then started with ELASTICSEARCH_USERNAME=kibana_system. --- .buildkite/run-kb-tests.sh | 25 ++++++++++--- .buildkite/setup-kibana.js | 75 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 95 insertions(+), 5 deletions(-) create mode 100644 .buildkite/setup-kibana.js diff --git a/.buildkite/run-kb-tests.sh b/.buildkite/run-kb-tests.sh index 886d084..43bbd3d 100755 --- a/.buildkite/run-kb-tests.sh +++ b/.buildkite/run-kb-tests.sh @@ -126,7 +126,25 @@ export NODE_OPTIONS="${NODE_OPTIONS:-} --max-old-space-size=4096" echo "--- Building CLI" npm run build -# ── Start Kibana (after build, so ES has had ~3 min to fully boot) ─────────── +# ── Configure kibana_system user (after build, ES has had ~3 min to boot) ─── +# Kibana 9.x forbids using the elastic superuser as ELASTICSEARCH_USERNAME. +# We must use kibana_system instead, which requires setting its password via the +# ES API. A one-shot Node.js container on the same network handles this without +# needing the host to reach ES directly. + +echo "--- Waiting for node runner image pull to finish" +wait "$NODE_PULL_PID" + +echo "--- Configuring kibana_system user" +docker run \ + --rm \ + --network "$NETWORK_NAME" \ + --volume "$(pwd):/workspace:ro" \ + --env "ES_PASSWORD=${ES_PASSWORD}" \ + "$NODE_RUNNER_IMAGE" \ + node /workspace/.buildkite/setup-kibana.js + +# ── Start Kibana ───────────────────────────────────────────────────────────── echo "--- Waiting for Kibana image pull to finish" wait "$KB_PULL_PID" @@ -138,7 +156,7 @@ docker run \ --network "$NETWORK_NAME" \ --network-alias kibana \ --env "ELASTICSEARCH_HOSTS=http://elasticsearch:9200" \ - --env "ELASTICSEARCH_USERNAME=elastic" \ + --env "ELASTICSEARCH_USERNAME=kibana_system" \ --env "ELASTICSEARCH_PASSWORD=${ES_PASSWORD}" \ --env "xpack.encryptedSavedObjects.encryptionKey=${KIBANA_ENCRYPTION_KEY}" \ --env "xpack.reporting.encryptionKey=${KIBANA_ENCRYPTION_KEY}" \ @@ -157,9 +175,6 @@ echo "Container IPs — ES: ${ES_IP}, Kibana: ${KB_IP}" # ── Run health checks and tests inside the Docker network ─────────────────── -echo "--- Waiting for test-runner image pull to finish" -wait "$NODE_PULL_PID" - echo "--- Running tests inside Docker network" docker run \ --name "$TEST_RUNNER_NAME" \ diff --git a/.buildkite/setup-kibana.js b/.buildkite/setup-kibana.js new file mode 100644 index 0000000..e50fff2 --- /dev/null +++ b/.buildkite/setup-kibana.js @@ -0,0 +1,75 @@ +'use strict'; +// Runs inside a Node.js container on the same Docker network as Elasticsearch. +// Waits for ES to be fully ready (cluster health + security index), then sets +// the kibana_system password so Kibana can connect as that user. +const http = require('http'); + +const ES_PASSWORD = process.env.ES_PASSWORD || 'changeme'; +const KB_PASSWORD = process.env.KB_PASSWORD || ES_PASSWORD; +const auth = 'Basic ' + Buffer.from(`elastic:${ES_PASSWORD}`).toString('base64'); + +function request(method, path, body) { + return new Promise((resolve, reject) => { + const data = body ? JSON.stringify(body) : null; + const req = http.request( + { + hostname: 'elasticsearch', + port: 9200, + path, + method, + headers: { + Authorization: auth, + ...(data && { 'Content-Type': 'application/json', 'Content-Length': Buffer.byteLength(data) }), + }, + }, + res => { + let raw = ''; + res.on('data', chunk => (raw += chunk)); + res.on('end', () => { + try { resolve({ status: res.statusCode, body: JSON.parse(raw) }); } + catch { resolve({ status: res.statusCode, body: raw }); } + }); + } + ); + req.on('error', reject); + if (data) req.write(data); + req.end(); + }); +} + +function delay(ms) { return new Promise(r => setTimeout(r, ms)); } + +async function retry(fn, label, maxRetries = 180, intervalMs = 2000) { + for (let i = 0; i < maxRetries; i++) { + try { + const ok = await fn(); + if (ok) return; + } catch { /* not ready yet */ } + if (i > 0 && i % 15 === 0) console.log(` still waiting for ${label}... (${i}/${maxRetries})`); + await delay(intervalMs); + } + throw new Error(`${label} did not become ready in time`); +} + +async function main() { + console.log('Waiting for Elasticsearch cluster health...'); + await retry(async () => { + const { body } = await request('GET', '/_cluster/health'); + return ['green', 'yellow'].includes(body.status); + }, 'ES cluster health'); + console.log('ES cluster is up'); + + console.log('Waiting for ES security index...'); + await retry(async () => { + const { status } = await request('POST', '/_security/api_key', { name: 'setup-check', expiration: '1m' }); + return status >= 200 && status < 300; + }, 'ES security index', 60); + console.log('ES security index is ready'); + + console.log('Setting kibana_system password...'); + const { status } = await request('POST', '/_security/user/kibana_system/_password', { password: KB_PASSWORD }); + if (status < 200 || status >= 300) throw new Error(`HTTP ${status}`); + console.log('kibana_system password configured'); +} + +main().catch(e => { console.error('Setup failed:', e.message); process.exit(1); }); From 08908a64466b34db9d6cd2e53c2d278d49a735a0 Mon Sep 17 00:00:00 2001 From: margaretjgu Date: Tue, 5 May 2026 17:30:43 -0400 Subject: [PATCH 32/38] fix(ci): rename setup-kibana to .cjs and add SPDX header The repo has "type": "module" in package.json so .js files are treated as ESM, causing "require is not defined in ES module scope". Renaming to .cjs forces Node to treat it as CommonJS regardless of package.json. Also adds the missing SPDX-License-Identifier header to pass the test:spdx check. --- .buildkite/run-kb-tests.sh | 2 +- .buildkite/{setup-kibana.js => setup-kibana.cjs} | 4 +++- 2 files changed, 4 insertions(+), 2 deletions(-) rename .buildkite/{setup-kibana.js => setup-kibana.cjs} (96%) diff --git a/.buildkite/run-kb-tests.sh b/.buildkite/run-kb-tests.sh index 43bbd3d..5fca254 100755 --- a/.buildkite/run-kb-tests.sh +++ b/.buildkite/run-kb-tests.sh @@ -142,7 +142,7 @@ docker run \ --volume "$(pwd):/workspace:ro" \ --env "ES_PASSWORD=${ES_PASSWORD}" \ "$NODE_RUNNER_IMAGE" \ - node /workspace/.buildkite/setup-kibana.js + node /workspace/.buildkite/setup-kibana.cjs # ── Start Kibana ───────────────────────────────────────────────────────────── diff --git a/.buildkite/setup-kibana.js b/.buildkite/setup-kibana.cjs similarity index 96% rename from .buildkite/setup-kibana.js rename to .buildkite/setup-kibana.cjs index e50fff2..971196e 100644 --- a/.buildkite/setup-kibana.js +++ b/.buildkite/setup-kibana.cjs @@ -1,4 +1,6 @@ -'use strict'; +// Copyright Elasticsearch B.V. and contributors +// SPDX-License-Identifier: Apache-2.0 +// // Runs inside a Node.js container on the same Docker network as Elasticsearch. // Waits for ES to be fully ready (cluster health + security index), then sets // the kibana_system password so Kibana can connect as that user. From 756570adcdb6badd7cad0f5a6ff5ed4ec5784ccc Mon Sep 17 00:00:00 2001 From: margaretjgu Date: Tue, 5 May 2026 17:33:42 -0400 Subject: [PATCH 33/38] fix(ci): add 5s HTTP timeout to setup-kibana requests --- .buildkite/setup-kibana.cjs | 1 + 1 file changed, 1 insertion(+) diff --git a/.buildkite/setup-kibana.cjs b/.buildkite/setup-kibana.cjs index 971196e..7b1aadf 100644 --- a/.buildkite/setup-kibana.cjs +++ b/.buildkite/setup-kibana.cjs @@ -33,6 +33,7 @@ function request(method, path, body) { }); } ); + req.setTimeout(5000, () => { req.destroy(new Error('request timed out')); }); req.on('error', reject); if (data) req.write(data); req.end(); From 90df631b016b8318c27f35094c47ae03928cf208 Mon Sep 17 00:00:00 2001 From: margaretjgu Date: Tue, 5 May 2026 17:42:16 -0400 Subject: [PATCH 34/38] fix(ci): replace fragile plugin endpoint check with a short sleep The /api/actions/connector_types and /api/alerting/rules/_find health checks were timing out because the Fleet plugin's retry loop (FleetEncryptedSaved ObjectEncryptionKeyRequired for agent binary source) was causing those endpoints to return non-200 responses. Fleet's issue is unrelated to our tests. Replace the 30-retry polling loop with a 15-second sleep after Kibana reports "available". By that point all essential plugins (alerting, actions) are initialised as part of the "available" state. --- .buildkite/run-kb-tests-runner.sh | 20 ++++++-------------- 1 file changed, 6 insertions(+), 14 deletions(-) diff --git a/.buildkite/run-kb-tests-runner.sh b/.buildkite/run-kb-tests-runner.sh index 1a18d67..cbeeb12 100755 --- a/.buildkite/run-kb-tests-runner.sh +++ b/.buildkite/run-kb-tests-runner.sh @@ -112,20 +112,12 @@ until curl -sf -u "elastic:${ES_PASSWORD}" "http://${KB_HOST}:5601/api/status" \ done echo "Kibana core is ready" -# The actions and alerting plugins initialise after the main health check passes. -echo "--- Waiting for alerting and actions plugins to be ready" -RETRIES=0 -MAX_RETRIES=30 -until curl -sf -u "elastic:${ES_PASSWORD}" "http://${KB_HOST}:5601/api/actions/connector_types" > /dev/null 2>&1 && \ - curl -sf -u "elastic:${ES_PASSWORD}" "http://${KB_HOST}:5601/api/alerting/rules/_find" > /dev/null 2>&1; do - RETRIES=$((RETRIES + 1)) - if [ "$RETRIES" -ge "$MAX_RETRIES" ]; then - echo "Alerting/actions plugins did not become ready in time" - exit 1 - fi - sleep 3 -done -echo "Kibana plugins are ready" +# Give essential plugins (alerting, actions) a moment to finish initialising +# after the overall status flips to "available". Fleet may be retrying in the +# background (it needs an encrypted API key for agent binary source) but that +# does not block the plugins we actually test. +sleep 15 +echo "Kibana is ready" echo "--- Generating CLI config file" cat > /tmp/elastic-rc.yml < Date: Tue, 5 May 2026 18:25:50 -0400 Subject: [PATCH 35/38] fix(kb-tests): fix alerting/connectors failures and improve readiness check Three root causes addressed: 1. `params` (alerting create) and `config`/`secrets` (connector create/update) were typed as "string" in the generated API definitions. The CLI factory only JSON-parses flag values for "object"/"array" typed params, so these were sent as raw string literals instead of JSON objects, producing 400 errors from the Kibana API. Fixed in the generator (elastic-client-generator-js#174) and reflected here. 2. The previous `sleep 15` after Kibana's "available" status was not reliable. Kibana's actions plugin serves 403 "license information is not available" until its license subscription fires after connecting to ES. Replaced with an active poll on GET /api/actions/connector_types which directly confirms the license is loaded and the actions API is ready. 3. Added stderr capture (2>/tmp/cli-err.txt + cat on failure) to the first CLI call in alerting.sh and connectors.sh so the actual HTTP error is visible in the Buildkite log if any future failure occurs. --- .buildkite/run-kb-tests-runner.sh | 28 +++++++++++++++++++++++----- src/kb/apis/alerting.ts | 2 +- src/kb/apis/connectors.ts | 10 +++++----- test/functional/kb/alerting.sh | 4 ++-- test/functional/kb/connectors.sh | 8 ++++---- 5 files changed, 35 insertions(+), 17 deletions(-) diff --git a/.buildkite/run-kb-tests-runner.sh b/.buildkite/run-kb-tests-runner.sh index cbeeb12..2c742b4 100755 --- a/.buildkite/run-kb-tests-runner.sh +++ b/.buildkite/run-kb-tests-runner.sh @@ -112,11 +112,29 @@ until curl -sf -u "elastic:${ES_PASSWORD}" "http://${KB_HOST}:5601/api/status" \ done echo "Kibana core is ready" -# Give essential plugins (alerting, actions) a moment to finish initialising -# after the overall status flips to "available". Fleet may be retrying in the -# background (it needs an encrypted API key for agent binary source) but that -# does not block the plugins we actually test. -sleep 15 +# Poll the actions API directly — it requires the license to be loaded from ES. +# Kibana's actions plugin returns 403 with "license information is not available" +# until its licensing subscription fires (usually a few seconds after "available", +# but can be longer on cold starts). Polling the real endpoint is more reliable +# than a fixed sleep. +echo "--- Waiting for Kibana actions API (license must be loaded)" +RETRIES=0 +MAX_RETRIES=60 +until curl -sf -u "elastic:${ES_PASSWORD}" \ + "http://${KB_HOST}:5601/api/actions/connector_types" > /dev/null 2>&1; do + RETRIES=$((RETRIES + 1)) + if [ "$RETRIES" -ge "$MAX_RETRIES" ]; then + echo "Kibana actions API did not become ready in time" + echo "Last response:" + curl -u "elastic:${ES_PASSWORD}" \ + "http://${KB_HOST}:5601/api/actions/connector_types" 2>&1 || true + exit 1 + fi + if [ $((RETRIES % 10)) -eq 0 ]; then + echo " still waiting for actions API... (${RETRIES}/${MAX_RETRIES})" + fi + sleep 2 +done echo "Kibana is ready" echo "--- Generating CLI config file" diff --git a/src/kb/apis/alerting.ts b/src/kb/apis/alerting.ts index 20f05ec..82aeff8 100644 --- a/src/kb/apis/alerting.ts +++ b/src/kb/apis/alerting.ts @@ -54,7 +54,7 @@ export const alertingApis: KbApiDefinition[] = [ { name: "schedule", type: "object", description: "The check interval, which specifies how frequently the rule conditions are checked.", required: true }, { name: "tags", type: "array", description: "The tags for the rule." }, { name: "throttle", type: "string", description: "Use the `throttle` property in the action `frequency` object instead. The throttle interval, which defines how often an alert generates repeated actions. NOTE: You cannot specify the throttle interval at both the rule and action level. If you set it at the rule level then update the rule in Kibana, it is automatically changed to use action-specific values." }, - { name: "params", type: "string", description: "The parameters for the rule." }, + { name: "params", type: "object", description: "The parameters for the rule." }, ], }, { diff --git a/src/kb/apis/connectors.ts b/src/kb/apis/connectors.ts index 0d082fe..7d5a7db 100644 --- a/src/kb/apis/connectors.ts +++ b/src/kb/apis/connectors.ts @@ -68,8 +68,8 @@ export const connectorsApis: KbApiDefinition[] = [ bodyParams: [ { name: "connector_type_id", type: "string", description: "The type of connector.", required: true }, { name: "name", type: "string", description: "The display name for the connector.", required: true }, - { name: "config", cliFlag: "kb-config", type: "string", description: "The connector configuration details." }, - { name: "secrets", type: "string", description: "" }, + { name: "config", cliFlag: "kb-config", type: "object", description: "The connector configuration details." }, + { name: "secrets", type: "object", description: "" }, ], }, { @@ -83,8 +83,8 @@ export const connectorsApis: KbApiDefinition[] = [ ], bodyParams: [ { name: "name", type: "string", description: "The display name for the connector.", required: true }, - { name: "config", cliFlag: "kb-config", type: "string", description: "The connector configuration details." }, - { name: "secrets", type: "string", description: "" }, + { name: "config", cliFlag: "kb-config", type: "object", description: "The connector configuration details." }, + { name: "secrets", type: "object", description: "" }, ], }, { @@ -97,7 +97,7 @@ export const connectorsApis: KbApiDefinition[] = [ { name: "id", description: "An identifier for the connector.", required: true }, ], bodyParams: [ - { name: "params", type: "string", description: "", required: true }, + { name: "params", type: "object", description: "", required: true }, ], }, { diff --git a/test/functional/kb/alerting.sh b/test/functional/kb/alerting.sh index 3aebe67..5a114be 100755 --- a/test/functional/kb/alerting.sh +++ b/test/functional/kb/alerting.sh @@ -31,8 +31,8 @@ output=$($CLI stack kb alerting post-alerting-rule-id \ --rule-type-id ".es-query" \ --schedule '{"interval":"1m"}' \ --params "$RULE_PARAMS" \ - --json 2>/dev/null) \ - || { echo "FAIL: alerting create — command failed"; exit 1; } + --json 2>/tmp/cli-err.txt) \ + || { echo "FAIL: alerting create — command failed"; cat /tmp/cli-err.txt; exit 1; } [ "$(echo "$output" | jq -r '.id')" = "$RULE_ID" ] \ || { echo "FAIL: alerting create — id mismatch"; exit 1; } [ "$(echo "$output" | jq -r '.name')" = "CLI FT Rule" ] \ diff --git a/test/functional/kb/connectors.sh b/test/functional/kb/connectors.sh index 4744a34..26c7af3 100755 --- a/test/functional/kb/connectors.sh +++ b/test/functional/kb/connectors.sh @@ -23,8 +23,8 @@ trap teardown EXIT # ── list connector types ─────────────────────────────────────────────── -output=$($CLI stack kb connectors get-actions-connector-types --json 2>/dev/null) \ - || { echo "FAIL: connectors list-types — command failed"; exit 1; } +output=$($CLI stack kb connectors get-actions-connector-types --json 2>/tmp/cli-err.txt) \ + || { echo "FAIL: connectors list-types — command failed"; cat /tmp/cli-err.txt; exit 1; } count=$(echo "$output" | jq 'length') [ "$count" -gt 0 ] || { echo "FAIL: connectors list-types — empty list"; exit 1; } @@ -44,8 +44,8 @@ output=$($CLI stack kb connectors post-actions-connector-id \ --connector-type-id ".index" \ --name "CLI FT Index Connector" \ --kb-config '{"index":"cli-ft-connector-*"}' \ - --json 2>/dev/null) \ - || { echo "FAIL: connectors create — command failed"; exit 1; } + --json 2>/tmp/cli-err.txt) \ + || { echo "FAIL: connectors create — command failed"; cat /tmp/cli-err.txt; exit 1; } [ "$(echo "$output" | jq -r '.id')" = "$CONNECTOR_UUID" ] \ || { echo "FAIL: connectors create — id mismatch"; exit 1; } [ "$(echo "$output" | jq -r '.name')" = "CLI FT Index Connector" ] \ From c4b4944ad94258b25d64197183bb33a548012c4a Mon Sep 17 00:00:00 2001 From: margaretjgu Date: Tue, 5 May 2026 18:44:14 -0400 Subject: [PATCH 36/38] fix(ci): use plugin status check instead of polling actions endpoint Polling GET /api/actions/connector_types directly was causing repeated 500 Server Errors in Kibana's HTTP access log (the actions plugin HTTP context is not yet wired when Kibana first reports 'available', so early requests get a 500). This looked like the Fleet error resurfacing. Switch to polling /api/status and checking .status.plugins.actions.level == 'available' .status.plugins.alerting.level == 'available' The status endpoint always returns 200 and never causes log noise. Fleet degradation appears only in plugins.fleet and does not affect plugins.actions or plugins.alerting. --- .buildkite/run-kb-tests-runner.sh | 31 ++++++++++++++++++------------- 1 file changed, 18 insertions(+), 13 deletions(-) diff --git a/.buildkite/run-kb-tests-runner.sh b/.buildkite/run-kb-tests-runner.sh index 2c742b4..28bfc2f 100755 --- a/.buildkite/run-kb-tests-runner.sh +++ b/.buildkite/run-kb-tests-runner.sh @@ -112,26 +112,31 @@ until curl -sf -u "elastic:${ES_PASSWORD}" "http://${KB_HOST}:5601/api/status" \ done echo "Kibana core is ready" -# Poll the actions API directly — it requires the license to be loaded from ES. -# Kibana's actions plugin returns 403 with "license information is not available" -# until its licensing subscription fires (usually a few seconds after "available", -# but can be longer on cold starts). Polling the real endpoint is more reliable -# than a fixed sleep. -echo "--- Waiting for Kibana actions API (license must be loaded)" +# Wait for the actions and alerting plugins to be individually "available" by +# polling /api/status. That endpoint always returns HTTP 200, so it never +# causes the 500 Server Error noise in Kibana logs that polling the actions +# endpoint directly does (the actions HTTP context isn't wired until slightly +# after overall "available", returning 500 during that window). +# Fleet being degraded shows up only in plugins.fleet — it does not affect +# plugins.actions or plugins.alerting. +echo "--- Waiting for actions + alerting plugins to be available" RETRIES=0 MAX_RETRIES=60 -until curl -sf -u "elastic:${ES_PASSWORD}" \ - "http://${KB_HOST}:5601/api/actions/connector_types" > /dev/null 2>&1; do +until curl -sf -u "elastic:${ES_PASSWORD}" "http://${KB_HOST}:5601/api/status" \ + | jq -e ' + (.status.plugins.actions.level // "") == "available" and + (.status.plugins.alerting.level // "") == "available" + ' > /dev/null 2>&1; do RETRIES=$((RETRIES + 1)) if [ "$RETRIES" -ge "$MAX_RETRIES" ]; then - echo "Kibana actions API did not become ready in time" - echo "Last response:" - curl -u "elastic:${ES_PASSWORD}" \ - "http://${KB_HOST}:5601/api/actions/connector_types" 2>&1 || true + echo "Actions/alerting plugins did not reach 'available' in time" + echo "Last plugin statuses:" + curl -sf -u "elastic:${ES_PASSWORD}" "http://${KB_HOST}:5601/api/status" \ + | jq '.status.plugins | with_entries(select(.value.level != "available"))' 2>&1 || true exit 1 fi if [ $((RETRIES % 10)) -eq 0 ]; then - echo " still waiting for actions API... (${RETRIES}/${MAX_RETRIES})" + echo " still waiting... (${RETRIES}/${MAX_RETRIES})" fi sleep 2 done From 5c6ddb9b32fca045a7b61ec26c2a4513f99a501a Mon Sep 17 00:00:00 2001 From: margaretjgu Date: Tue, 5 May 2026 18:55:19 -0400 Subject: [PATCH 37/38] fix(ci): use SCREAMING_SNAKE_CASE env vars for Kibana encryption keys Kibana's Docker entrypoint only processes environment variables in SCREAMING_SNAKE_CASE format (e.g. XPACK_ENCRYPTEDSAVEDOBJECTS_ENCRYPTIONKEY). Dotted-notation names (e.g. xpack.encryptedSavedObjects.encryptionKey) are not picked up, so encryptedSavedObjects.canEncrypt stayed false in CI. Every call to getActionsClient() checks canEncrypt and throws 'Unable to create actions client because the Encrypted Saved Objects plugin is missing encryption key' causing a 500 on both POST /api/alerting/rule and GET /api/actions/connector_types. Confirmed: local Kibana (start-local) sets XPACK_ENCRYPTEDSAVEDOBJECTS_ENCRYPTIONKEY and all 5 functional tests pass (5/5 locally). --- .buildkite/run-kb-tests.sh | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.buildkite/run-kb-tests.sh b/.buildkite/run-kb-tests.sh index 5fca254..b9da071 100755 --- a/.buildkite/run-kb-tests.sh +++ b/.buildkite/run-kb-tests.sh @@ -158,9 +158,9 @@ docker run \ --env "ELASTICSEARCH_HOSTS=http://elasticsearch:9200" \ --env "ELASTICSEARCH_USERNAME=kibana_system" \ --env "ELASTICSEARCH_PASSWORD=${ES_PASSWORD}" \ - --env "xpack.encryptedSavedObjects.encryptionKey=${KIBANA_ENCRYPTION_KEY}" \ - --env "xpack.reporting.encryptionKey=${KIBANA_ENCRYPTION_KEY}" \ - --env "xpack.security.encryptionKey=${KIBANA_ENCRYPTION_KEY}" \ + --env "XPACK_ENCRYPTEDSAVEDOBJECTS_ENCRYPTIONKEY=${KIBANA_ENCRYPTION_KEY}" \ + --env "XPACK_REPORTING_ENCRYPTIONKEY=${KIBANA_ENCRYPTION_KEY}" \ + --env "XPACK_SECURITY_ENCRYPTIONKEY=${KIBANA_ENCRYPTION_KEY}" \ --detach \ "$KB_IMAGE" From a1ce6a9a5263118e65c8b4f88db48f5de529c7cb Mon Sep 17 00:00:00 2001 From: margaretjgu Date: Tue, 5 May 2026 19:02:57 -0400 Subject: [PATCH 38/38] chore(ci): remove verbose debug logging from kb test runner --- .buildkite/run-kb-tests-runner.sh | 60 ++++++++----------------------- 1 file changed, 14 insertions(+), 46 deletions(-) diff --git a/.buildkite/run-kb-tests-runner.sh b/.buildkite/run-kb-tests-runner.sh index 28bfc2f..fe25332 100755 --- a/.buildkite/run-kb-tests-runner.sh +++ b/.buildkite/run-kb-tests-runner.sh @@ -14,48 +14,20 @@ ES_PASSWORD="${ES_PASSWORD:-changeme}" echo "--- Installing curl and jq" apt-get update -qq && apt-get install -y -q --no-install-recommends curl jq -# ── Network diagnostics ────────────────────────────────────────────────────── -echo "--- Network diagnostics" -echo "resolv.conf:" -cat /etc/resolv.conf || true -echo "Routes:" -ip route 2>/dev/null || true - -# Determine whether to use DNS names or IPs. +# Prefer Docker DNS aliases; fall back to container IPs if DNS is unavailable. ES_HOST="elasticsearch" KB_HOST="kibana" -if getent hosts elasticsearch > /dev/null 2>&1; then - RESOLVED=$(getent hosts elasticsearch | awk '{print $1}') - echo "DNS OK: elasticsearch -> $RESOLVED" -else - echo "DNS lookup for 'elasticsearch' failed" - if [[ -n "${ES_IP:-}" ]]; then - echo "Falling back to ES_IP=${ES_IP}" - ES_HOST="$ES_IP" - else - echo "No ES_IP provided and DNS failed — health checks will fail" - fi +if ! getent hosts elasticsearch > /dev/null 2>&1; then + echo "DNS for 'elasticsearch' unavailable; falling back to ES_IP=${ES_IP:-}" + ES_HOST="${ES_IP:-elasticsearch}" fi - -if getent hosts kibana > /dev/null 2>&1; then - RESOLVED=$(getent hosts kibana | awk '{print $1}') - echo "DNS OK: kibana -> $RESOLVED" -else - echo "DNS lookup for 'kibana' failed" - if [[ -n "${KB_IP:-}" ]]; then - echo "Falling back to KB_IP=${KB_IP}" - KB_HOST="$KB_IP" - else - echo "No KB_IP provided and DNS failed — health checks will fail" - fi +if ! getent hosts kibana > /dev/null 2>&1; then + echo "DNS for 'kibana' unavailable; falling back to KB_IP=${KB_IP:-}" + KB_HOST="${KB_IP:-kibana}" fi -echo "Using ES_HOST=${ES_HOST}, KB_HOST=${KB_HOST}" - -# First connection attempt with full output for debugging. -echo "First curl attempt (verbose):" -curl -v -u "elastic:${ES_PASSWORD}" "http://${ES_HOST}:9200/_cluster/health" 2>&1 || true +echo "ES_HOST=${ES_HOST} KB_HOST=${KB_HOST}" # ── Wait for Elasticsearch ─────────────────────────────────────────────────── echo "--- Waiting for Elasticsearch to be healthy" @@ -64,9 +36,8 @@ MAX_RETRIES=180 until curl -sf -u "elastic:${ES_PASSWORD}" "http://${ES_HOST}:9200/_cluster/health" > /dev/null 2>&1; do RETRIES=$((RETRIES + 1)) if [ "$RETRIES" -ge "$MAX_RETRIES" ]; then - echo "Elasticsearch did not become healthy in time after $((MAX_RETRIES * 2))s" - echo "Last curl attempt:" - curl -v -u "elastic:${ES_PASSWORD}" "http://${ES_HOST}:9200/_cluster/health" 2>&1 || true + echo "Elasticsearch did not become healthy after $((MAX_RETRIES * 2))s" + curl -s -u "elastic:${ES_PASSWORD}" "http://${ES_HOST}:9200/_cluster/health" 2>&1 || true exit 1 fi if [ $((RETRIES % 15)) -eq 0 ]; then @@ -112,13 +83,10 @@ until curl -sf -u "elastic:${ES_PASSWORD}" "http://${KB_HOST}:5601/api/status" \ done echo "Kibana core is ready" -# Wait for the actions and alerting plugins to be individually "available" by -# polling /api/status. That endpoint always returns HTTP 200, so it never -# causes the 500 Server Error noise in Kibana logs that polling the actions -# endpoint directly does (the actions HTTP context isn't wired until slightly -# after overall "available", returning 500 during that window). -# Fleet being degraded shows up only in plugins.fleet — it does not affect -# plugins.actions or plugins.alerting. +# Poll /api/status for plugin-level readiness rather than calling the actions +# endpoint directly (the actions HTTP context returns 500 briefly after +# Kibana's overall "available", and Fleet degradation is isolated to +# plugins.fleet and does not affect plugins.actions or plugins.alerting). echo "--- Waiting for actions + alerting plugins to be available" RETRIES=0 MAX_RETRIES=60