Skip to content

Commit

Permalink
operator: fix errors/warnings metric.
Browse files Browse the repository at this point in the history
This was broken during transition of pkg/metrics to integrate with Hive where relevant operator metrics where never initialized.
This adds a init func specific for operator and cleans up the "flush" logic used as a work around for errors/warnings emitted prior to agent starting (in the case of the operator).

Addresses: #29525

Signed-off-by: Tom Hadlaw <tom.hadlaw@isovalent.com>
  • Loading branch information
tommyp1ckles committed Mar 12, 2024
1 parent 1907334 commit 941c700
Show file tree
Hide file tree
Showing 3 changed files with 26 additions and 6 deletions.
4 changes: 4 additions & 0 deletions operator/metrics/metrics.go
Original file line number Diff line number Diff line change
Expand Up @@ -103,5 +103,9 @@ func registerMetricsManager(p params) {
Registry.MustRegister(metric.(prometheus.Collector))
}

metrics.InitOperatorMetrics()
Registry.MustRegister(metrics.ErrorsWarnings)
metrics.FlushLoggingMetrics()

p.Lifecycle.Append(mm)
}
7 changes: 7 additions & 0 deletions pkg/metrics/logging_hook.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,13 @@ import (

var metricsInitialized chan struct{} = make(chan struct{})

func FlushLoggingMetrics() {
if metricsInitialized != nil {
close(metricsInitialized)
metricsInitialized = nil
}
}

// LoggingHook is a hook for logrus which counts error and warning messages as a
// Prometheus metric.
type LoggingHook struct {
Expand Down
21 changes: 15 additions & 6 deletions pkg/metrics/metrics.go
Original file line number Diff line number Diff line change
Expand Up @@ -981,12 +981,7 @@ func NewLegacyMetrics() *LegacyMetrics {
Help: "Number of services events labeled by action type",
}, []string{LabelAction}),

ErrorsWarnings: metric.NewCounterVec(metric.CounterOpts{
ConfigName: Namespace + "_errors_warnings_total",
Namespace: Namespace,
Name: "errors_warnings_total",
Help: "Number of total errors in cilium-agent instances",
}, []string{"level", "subsystem"}),
ErrorsWarnings: newErrorsWarningsMetric(),

ControllerRuns: metric.NewCounterVec(metric.CounterOpts{
ConfigName: Namespace + "_controllers_runs_total",
Expand Down Expand Up @@ -1413,6 +1408,20 @@ func NewLegacyMetrics() *LegacyMetrics {
return lm
}

// InitOperatorMetrics is used to init legacy metrics necessary during operator init.
func InitOperatorMetrics() {
ErrorsWarnings = newErrorsWarningsMetric()
}

func newErrorsWarningsMetric() metric.Vec[metric.Counter] {
return metric.NewCounterVec(metric.CounterOpts{
ConfigName: Namespace + "_errors_warnings_total",
Namespace: Namespace,
Name: "errors_warnings_total",
Help: "Number of total errors in cilium-agent instances",
}, []string{"level", "subsystem"})
}

// GaugeWithThreshold is a prometheus gauge that registers itself with
// prometheus if over a threshold value and unregisters when under.
type GaugeWithThreshold struct {
Expand Down

0 comments on commit 941c700

Please sign in to comment.