From 53f74d12b7f94804a7ea8959fb5a377ce8be4075 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Miko=C5=82aj=20=C5=9Awi=C4=85tek?= Date: Fri, 14 Nov 2025 20:48:50 +0100 Subject: [PATCH 1/3] Ensure monitoring the Otel collector never runs in a beat process --- .../application/coordinator/coordinator.go | 6 +++++ .../integration/ess/beat_receivers_test.go | 23 ++++++++++--------- 2 files changed, 18 insertions(+), 11 deletions(-) diff --git a/internal/pkg/agent/application/coordinator/coordinator.go b/internal/pkg/agent/application/coordinator/coordinator.go index a0067d3190b..945163b1830 100644 --- a/internal/pkg/agent/application/coordinator/coordinator.go +++ b/internal/pkg/agent/application/coordinator/coordinator.go @@ -1817,6 +1817,12 @@ func (c *Coordinator) splitModelBetweenManagers(model *component.Model) (runtime case component.OtelRuntimeManager: otelComponents = append(otelComponents, comp) case component.ProcessRuntimeManager: + // Hack to fix https://github.com/elastic/elastic-agent/issues/11169 + // TODO: Remove this after https://github.com/elastic/elastic-agent/issues/10220 is resolved + if comp.ID == "prometheus/metrics-monitoring" { + c.logger.Warnf("The Otel prometheus metrics monitoring input can't run in a beats process, skipping") + continue + } runtimeComponents = append(runtimeComponents, comp) default: // this should be impossible if we parse the configuration correctly diff --git a/testing/integration/ess/beat_receivers_test.go b/testing/integration/ess/beat_receivers_test.go index 6e7e8fb8a6a..6ed4fe207a8 100644 --- a/testing/integration/ess/beat_receivers_test.go +++ b/testing/integration/ess/beat_receivers_test.go @@ -848,7 +848,6 @@ outputs: hosts: [http://localhost:9200] api_key: placeholder indices: [] # not supported by the elasticsearch exporter -agent.monitoring.enabled: false ` // this is the context for the whole test, with a global timeout defined @@ -872,13 +871,14 @@ agent.monitoring.enabled: false status, statusErr := fixture.ExecStatus(ctx) assert.NoError(collect, statusErr) // we should be running beats processes even though the otel runtime was requested - assertBeatsHealthy(collect, &status, component.ProcessRuntimeManager, 1) + assertBeatsHealthy(collect, &status, component.ProcessRuntimeManager, 4) }, 1*time.Minute, 1*time.Second) logsBytes, err := fixture.Exec(ctx, []string{"logs", "-n", "1000", "--exclude-events"}) require.NoError(t, err) // verify we've logged a warning about using the process runtime - var unsupportedLogRecord map[string]any + var unsupportedLogRecords []map[string]any + var prometheusUnsupportedLogRecord map[string]any for _, line := range strings.Split(string(logsBytes), "\n") { line = strings.TrimSpace(line) if line == "" { @@ -889,9 +889,13 @@ agent.monitoring.enabled: false continue } - if message, ok := logRecord["message"].(string); ok && strings.HasPrefix(message, "otel runtime is not supported") { - unsupportedLogRecord = logRecord - break + if message, ok := logRecord["message"].(string); ok { + if strings.HasPrefix(message, "otel runtime is not supported") { + unsupportedLogRecords = append(unsupportedLogRecords, logRecord) + } + if strings.HasPrefix(message, "The Otel prometheus metrics monitoring input can't run in a beats process, skipping") { + prometheusUnsupportedLogRecord = logRecord + } } } @@ -902,11 +906,8 @@ agent.monitoring.enabled: false } }) - require.NotNil(t, unsupportedLogRecord, "unsupported log message should be present") - message, ok := unsupportedLogRecord["message"].(string) - require.True(t, ok, "log message field should be a string") - expectedMessage := "otel runtime is not supported for component system/metrics-default, switching to process runtime, reason: unsupported configuration for system/metrics-default: error translating config for output: default, unit: system/metrics-default, error: indices is currently not supported: unsupported operation" - assert.Equal(t, expectedMessage, message) + assert.Len(t, unsupportedLogRecords, 5, "one log line for each component we try to run") + assert.NotEmpty(t, prometheusUnsupportedLogRecord, "should get a log line about Otel prometheus metrics input being skipped") } // TestComponentWorkDir verifies that the component working directory is not deleted when moving the component from From 0c875986e0d055cbb36a45d0ffb7886b3ecf8810 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Miko=C5=82aj=20=C5=9Awi=C4=85tek?= Date: Mon, 17 Nov 2025 13:26:34 +0100 Subject: [PATCH 2/3] Add changelog entry --- ...82303-prometheus-otel-self-monitoring.yaml | 32 +++++++++++++++++++ 1 file changed, 32 insertions(+) create mode 100644 changelog/fragments/1763382303-prometheus-otel-self-monitoring.yaml diff --git a/changelog/fragments/1763382303-prometheus-otel-self-monitoring.yaml b/changelog/fragments/1763382303-prometheus-otel-self-monitoring.yaml new file mode 100644 index 00000000000..46dd652f3fa --- /dev/null +++ b/changelog/fragments/1763382303-prometheus-otel-self-monitoring.yaml @@ -0,0 +1,32 @@ +# Kind can be one of: +# - breaking-change: a change to previously-documented behavior +# - deprecation: functionality that is being removed in a later release +# - bug-fix: fixes a problem in a previous version +# - enhancement: extends functionality but does not break or fix existing behavior +# - feature: new functionality +# - known-issue: problems that we are aware of in a given version +# - security: impacts on the security of a product or a user’s deployment. +# - upgrade: important information for someone upgrading from a prior version +# - other: does not fit into any of the other categories +kind: bug-fix + +# Change summary; a 80ish characters long description of the change. +summary: Ensure the monitoring input for the Otel collector can only run inside the collector. + +# Long description; in case the summary is not enough to describe the change +# this field accommodate a description without length limits. +# NOTE: This field will be rendered only for breaking-change and known-issue kinds at the moment. +#description: + +# Affected component; usually one of "elastic-agent", "fleet-server", "filebeat", "metricbeat", "auditbeat", "all", etc. +component: elastic-agent + +# PR URL; optional; the PR number that added the changeset. +# If not present is automatically filled by the tooling finding the PR where this changelog fragment has been added. +# NOTE: the tooling supports backports, so it's able to fill the original PR number instead of the backport PR number. +# Please provide it if you are adding a fragment for a different PR. +#pr: https://github.com/owner/repo/1234 + +# Issue URL; optional; the GitHub issue related to this changeset (either closes or is part of). +# If not present is automatically filled by the tooling with the issue linked to the PR number. +#issue: https://github.com/owner/repo/1234 From f9294cb4ea6d9c255da98af83ff484b4b78da3c3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Miko=C5=82aj=20=C5=9Awi=C4=85tek?= Date: Wed, 19 Nov 2025 15:58:22 +0100 Subject: [PATCH 3/3] Move log lines to constants --- testing/integration/ess/beat_receivers_test.go | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/testing/integration/ess/beat_receivers_test.go b/testing/integration/ess/beat_receivers_test.go index 6ed4fe207a8..936f2780846 100644 --- a/testing/integration/ess/beat_receivers_test.go +++ b/testing/integration/ess/beat_receivers_test.go @@ -817,6 +817,12 @@ agent.monitoring.enabled: false } } +// Log lines TestBeatsReceiverProcessRuntimeFallback checks for +const ( + otelRuntimeUnsupportedLogLineStart = "otel runtime is not supported" + prometheusInputSkippedLogLine = "The Otel prometheus metrics monitoring input can't run in a beats process, skipping" +) + // TestBeatsReceiverProcessRuntimeFallback verifies that we fall back to the process runtime if the otel runtime // does not support the requested configuration. func TestBeatsReceiverProcessRuntimeFallback(t *testing.T) { @@ -890,10 +896,10 @@ outputs: } if message, ok := logRecord["message"].(string); ok { - if strings.HasPrefix(message, "otel runtime is not supported") { + if strings.HasPrefix(message, otelRuntimeUnsupportedLogLineStart) { unsupportedLogRecords = append(unsupportedLogRecords, logRecord) } - if strings.HasPrefix(message, "The Otel prometheus metrics monitoring input can't run in a beats process, skipping") { + if strings.HasPrefix(message, prometheusInputSkippedLogLine) { prometheusUnsupportedLogRecord = logRecord } }