Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
32 changes: 32 additions & 0 deletions changelog/fragments/1763650536-self-monitoring-runtime.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
# Kind can be one of:
# - breaking-change: a change to previously-documented behavior
# - deprecation: functionality that is being removed in a later release
# - bug-fix: fixes a problem in a previous version
# - enhancement: extends functionality but does not break or fix existing behavior
# - feature: new functionality
# - known-issue: problems that we are aware of in a given version
# - security: impacts on the security of a product or a user’s deployment.
# - upgrade: important information for someone upgrading from a prior version
# - other: does not fit into any of the other categories
kind: bug-fix

# Change summary; a 80ish characters long description of the change.
summary: Ensure the self-monitoring configuration accounts for the runtime components actually run in.

# Long description; in case the summary is not enough to describe the change
# this field accommodate a description without length limits.
# NOTE: This field will be rendered only for breaking-change and known-issue kinds at the moment.
#description:

# Affected component; usually one of "elastic-agent", "fleet-server", "filebeat", "metricbeat", "auditbeat", "all", etc.
component: elastic-agent

# PR URL; optional; the PR number that added the changeset.
# If not present is automatically filled by the tooling finding the PR where this changelog fragment has been added.
# NOTE: the tooling supports backports, so it's able to fill the original PR number instead of the backport PR number.
# Please provide it if you are adding a fragment for a different PR.
#pr: https://github.com/owner/repo/1234

# Issue URL; optional; the GitHub issue related to this changeset (either closes or is part of).
# If not present is automatically filled by the tooling with the issue linked to the PR number.
#issue: https://github.com/owner/repo/1234
3 changes: 2 additions & 1 deletion internal/pkg/agent/application/application.go
Original file line number Diff line number Diff line change
Expand Up @@ -149,6 +149,7 @@ func New(
cfg.Settings.DownloadConfig.OS(),
cfg.Settings.MonitoringConfig,
agentInfo,
log,
)

runtime, err := runtime.NewManager(
Expand All @@ -171,7 +172,7 @@ func New(

var configMgr coordinator.ConfigManager
var managed *managedConfigManager
var compModifiers = []coordinator.ComponentsModifier{InjectAPMConfig}
var compModifiers = []component.ComponentsModifier{InjectAPMConfig}
var composableManaged bool
var isManaged bool
var actionAcker acker.Acker
Expand Down
33 changes: 11 additions & 22 deletions internal/pkg/agent/application/coordinator/coordinator.go
Original file line number Diff line number Diff line change
Expand Up @@ -224,10 +224,6 @@ type VarsManager interface {
Watch() <-chan []*transpiler.Vars
}

// ComponentsModifier is a function that takes the computed components model and modifies it before
// passing it into the components runtime manager.
type ComponentsModifier func(comps []component.Component, cfg map[string]interface{}) ([]component.Component, error)

// managerShutdownTimeout is how long the coordinator will wait during shutdown
// to receive termination states from its managers.
// Note: The current timeout (5s) is shorter than the default stop timeout for
Expand Down Expand Up @@ -267,7 +263,7 @@ type Coordinator struct {
otelCfg *confmap.Conf

caps capabilities.Capabilities
modifiers []ComponentsModifier
modifiers []component.ComponentsModifier

// The current state of the Coordinator. This value and its subfields are
// safe to read directly from within the main Coordinator goroutine.
Expand Down Expand Up @@ -439,7 +435,7 @@ func New(
otelMgr OTelManager,
fleetAcker acker.Acker,
initialUpgradeDetails *details.Details,
modifiers ...ComponentsModifier,
modifiers ...component.ComponentsModifier,
) *Coordinator {
var fleetState cproto.State
var fleetMessage string
Expand Down Expand Up @@ -1812,17 +1808,10 @@ func (c *Coordinator) updateManagersWithConfig(model *component.Model) {
func (c *Coordinator) splitModelBetweenManagers(model *component.Model) (runtimeModel *component.Model, otelModel *component.Model) {
var otelComponents, runtimeComponents []component.Component
for _, comp := range model.Components {
c.maybeOverrideRuntimeForComponent(&comp)
switch comp.RuntimeManager {
case component.OtelRuntimeManager:
otelComponents = append(otelComponents, comp)
case component.ProcessRuntimeManager:
// Hack to fix https://github.com/elastic/elastic-agent/issues/11169
// TODO: Remove this after https://github.com/elastic/elastic-agent/issues/10220 is resolved
if comp.ID == "prometheus/metrics-monitoring" {
c.logger.Warnf("The Otel prometheus metrics monitoring input can't run in a beats process, skipping")
continue
}
runtimeComponents = append(runtimeComponents, comp)
default:
// this should be impossible if we parse the configuration correctly
Expand All @@ -1844,7 +1833,7 @@ func (c *Coordinator) splitModelBetweenManagers(model *component.Model) (runtime
// Normally, we use the runtime set in the component itself via the configuration, but
// we may also fall back to the process runtime if the otel runtime is unsupported for
// some reason. One example is the output using unsupported config options.
func (c *Coordinator) maybeOverrideRuntimeForComponent(comp *component.Component) {
func maybeOverrideRuntimeForComponent(logger *logger.Logger, comp *component.Component) {
if comp.RuntimeManager == component.ProcessRuntimeManager {
// do nothing, the process runtime can handle any component
return
Expand All @@ -1853,7 +1842,7 @@ func (c *Coordinator) maybeOverrideRuntimeForComponent(comp *component.Component
// check if the component is actually supported
err := translate.VerifyComponentIsOtelSupported(comp)
if err != nil {
c.logger.Warnf("otel runtime is not supported for component %s, switching to process runtime, reason: %v", comp.ID, err)
logger.Warnf("otel runtime is not supported for component %s, switching to process runtime, reason: %v", comp.ID, err)
comp.RuntimeManager = component.ProcessRuntimeManager
}
}
Expand Down Expand Up @@ -1943,8 +1932,15 @@ func (c *Coordinator) generateComponentModel() (err error) {
existingCompState[comp.Component.ID] = comp.State.Pid
}

otelRuntimeModifier := func(comps []component.Component, cfg map[string]interface{}) ([]component.Component, error) {
for i := range comps {
maybeOverrideRuntimeForComponent(c.logger, &comps[i])
}
return comps, nil
}
comps, err := c.specs.ToComponents(
cfg,
append(c.modifiers, otelRuntimeModifier),
configInjector,
c.state.LogLevel,
c.agentInfo,
Expand All @@ -1957,13 +1953,6 @@ func (c *Coordinator) generateComponentModel() (err error) {
// Filter any disallowed inputs/outputs from the components
comps = c.filterByCapabilities(comps)

for _, modifier := range c.modifiers {
comps, err = modifier(comps, cfg)
if err != nil {
return fmt.Errorf("failed to modify components: %w", err)
}
}

// If we made it this far, update our internal derived values and
// return with no error
c.derivedConfig = cfg
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,6 @@ import (

"github.com/elastic/elastic-agent-client/v7/pkg/client"
"github.com/elastic/elastic-agent-libs/transport/tlscommon"
"github.com/elastic/elastic-agent/internal/pkg/agent/application/coordinator"
"github.com/elastic/elastic-agent/pkg/component"
"github.com/elastic/elastic-agent/pkg/core/logger"
)
Expand Down Expand Up @@ -50,7 +49,7 @@ func (tlsCache) MakeKey(keyPassPath, certPath, keyPath string) string {
// "revision": 1,
// "type": "endpoint"
// }
func EndpointSignedComponentModifier() coordinator.ComponentsModifier {
func EndpointSignedComponentModifier() component.ComponentsModifier {
return func(comps []component.Component, cfg map[string]interface{}) ([]component.Component, error) {
const signedKey = "signed"

Expand Down Expand Up @@ -83,7 +82,7 @@ func EndpointSignedComponentModifier() coordinator.ComponentsModifier {
// 'key_passphrase_path'.
// It does so, ONLY for the client TLS configuration for mTLS used with
// fleet-server.
func EndpointTLSComponentModifier(log *logger.Logger) coordinator.ComponentsModifier {
func EndpointTLSComponentModifier(log *logger.Logger) component.ComponentsModifier {
return newEndpointTLSComponentModifier(log, &tlsCache{mu: &sync.Mutex{}})
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,6 @@ import (
"github.com/elastic/elastic-agent-client/v7/pkg/client"
"github.com/elastic/elastic-agent-client/v7/pkg/proto"
"github.com/elastic/elastic-agent-libs/testing/certutil"
"github.com/elastic/elastic-agent/internal/pkg/agent/application/coordinator"
"github.com/elastic/elastic-agent/internal/pkg/testutils/fipsutils"
"github.com/elastic/elastic-agent/pkg/core/logger/loggertest"

Expand All @@ -44,7 +43,7 @@ func TestEndpointComponentModifier(t *testing.T) {

tests := map[string][]struct {
name string
compModifier coordinator.ComponentsModifier
compModifier component.ComponentsModifier
comps []component.Component
cfg map[string]interface{}
wantComps []component.Component
Expand Down
4 changes: 2 additions & 2 deletions internal/pkg/agent/application/fleet_server_bootstrap.go
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ var injectFleetServerInput = config.MustNewConfigFrom(map[string]interface{}{

// FleetServerComponentModifier modifies the comps to inject extra information from the policy into
// the Fleet Server component and units needed to run Fleet Server correctly.
func FleetServerComponentModifier(serverCfg *configuration.FleetServerConfig) coordinator.ComponentsModifier {
func FleetServerComponentModifier(serverCfg *configuration.FleetServerConfig) component.ComponentsModifier {
return func(comps []component.Component, _ map[string]interface{}) ([]component.Component, error) {
for i, comp := range comps {
if comp.InputSpec != nil && comp.InputSpec.InputType == fleetServer && comp.Err == nil {
Expand Down Expand Up @@ -118,7 +118,7 @@ func addBootstrapCfg(dst map[string]interface{}, es *configuration.Elasticsearch

// InjectFleetConfigComponentModifier The modifier that injects the fleet configuration for the components
// that need to be able to connect to fleet server.
func InjectFleetConfigComponentModifier(fleetCfg *configuration.FleetAgentConfig, agentInfo info.Agent) coordinator.ComponentsModifier {
func InjectFleetConfigComponentModifier(fleetCfg *configuration.FleetAgentConfig, agentInfo info.Agent) component.ComponentsModifier {
return func(comps []component.Component, cfg map[string]interface{}) ([]component.Component, error) {
hostsStr := fleetCfg.Client.GetHosts()
fleetHosts := make([]interface{}, 0, len(hostsStr))
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,6 @@ import (
"golang.org/x/net/http/httpproxy"

"github.com/elastic/elastic-agent-client/v7/pkg/client"
"github.com/elastic/elastic-agent/internal/pkg/agent/application/coordinator"
"github.com/elastic/elastic-agent/pkg/component"
)

Expand All @@ -20,7 +19,7 @@ import (
// The URL used is the HTTPS_PROXY env var. If that's not set the HTTP_PROXY env var is used.
// If there are no env vars set, or the unit's config has `proxy_disable: true`, nothing is injected
// If the output config has `proxy_url: ""`, it will not be overwritten.
func InjectProxyEndpointModifier() coordinator.ComponentsModifier {
func InjectProxyEndpointModifier() component.ComponentsModifier {
return func(comps []component.Component, _ map[string]interface{}) ([]component.Component, error) {
for i, comp := range comps {
if comp.InputSpec != nil && comp.InputSpec.InputType == endpoint {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -915,4 +915,7 @@ inputs:
type: system/metrics
use_output: monitoring
outputs:
monitoring: {}
monitoring:
hosts:
- localhost:9200
type: elasticsearch
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,8 @@ agent:
enabled: false
metrics: true
inputs:
- id: filestream-monitoring-agent
- _runtime_experimental: process
id: filestream-monitoring-agent
name: filestream-monitoring-agent
streams:
- close:
Expand Down Expand Up @@ -110,7 +111,8 @@ inputs:
type: filestream
type: filestream
use_output: monitoring
- data_stream:
- _runtime_experimental: process
data_stream:
namespace: default
id: metrics-monitoring-beats
name: metrics-monitoring-beats
Expand Down Expand Up @@ -357,9 +359,18 @@ inputs:
binary: metricbeat
id: prometheus/metrics-monitoring
target: component
- drop_fields:
fields:
- beat.stats.cgroup
- beat.stats.cpu
- beat.stats.handles
- beat.stats.memstats
- beat.stats.runtime
ignore_missing: true
type: beat/metrics
use_output: monitoring
- data_stream:
- _runtime_experimental: process
data_stream:
namespace: default
id: metrics-monitoring-agent
name: metrics-monitoring-agent
Expand Down Expand Up @@ -875,61 +886,6 @@ inputs:
binary: metricbeat
id: http/metrics-monitoring
target: component
- data_stream:
dataset: elastic_agent.elastic_agent
namespace: default
type: metrics
failure_threshold: 5
hosts:
- placeholder
id: metrics-monitoring-metricbeat-1
index: metrics-elastic_agent.elastic_agent-default
metricsets:
- json
namespace: agent
path: /stats
period: 1m0s
processors:
- add_fields:
fields:
dataset: elastic_agent.elastic_agent
target: event
- add_fields:
fields:
id: ""
process: metricbeat
snapshot: false
version: placeholder
target: elastic_agent
- add_fields:
fields:
id: ""
target: agent
- copy_fields:
fail_on_error: false
fields:
- from: http.agent.beat.cpu
to: system.process.cpu
- from: http.agent.beat.memstats.memory_sys
to: system.process.memory.size
- from: http.agent.beat.handles
to: system.process.fd
- from: http.agent.beat.cgroup
to: system.process.cgroup
- from: http.agent.apm-server
to: apm-server
- from: http.filebeat_input
to: filebeat_input
ignore_missing: true
- drop_fields:
fields:
- http
ignore_missing: true
- add_fields:
fields:
binary: metricbeat
id: prometheus/metrics-monitoring
target: component
type: http/metrics
use_output: monitoring
- _runtime_experimental: otel
Expand Down Expand Up @@ -1099,4 +1055,7 @@ inputs:
type: system/metrics
use_output: monitoring
outputs:
monitoring: {}
monitoring:
hosts:
- localhost:9200
type: elasticsearch
Loading