Skip to content

Commit

Permalink
tetragon: Add events buffer queue metrics
Browse files Browse the repository at this point in the history
Adding following metric counters for events buffer queue:

  ringbufqueuemetrics.Received - number of records received from perf event reader go routine
  ringbufqueuemetrics.Lost     - number of records lost because the RB Queue channel was full

Signed-off-by: Jiri Olsa <jolsa@kernel.org>
  • Loading branch information
olsajiri authored and jrfastab committed Sep 6, 2023
1 parent 4d30ef1 commit 9ed09fa
Show file tree
Hide file tree
Showing 3 changed files with 34 additions and 0 deletions.
2 changes: 2 additions & 0 deletions pkg/metrics/config/initmetrics.go
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ import (
pfmetrics "github.com/cilium/tetragon/pkg/metrics/policyfilter"
"github.com/cilium/tetragon/pkg/metrics/processexecmetrics"
"github.com/cilium/tetragon/pkg/metrics/ringbufmetrics"
"github.com/cilium/tetragon/pkg/metrics/ringbufqueuemetrics"
"github.com/cilium/tetragon/pkg/metrics/syscallmetrics"
"github.com/cilium/tetragon/pkg/metrics/watchermetrics"
"github.com/cilium/tetragon/pkg/observer"
Expand All @@ -33,6 +34,7 @@ func InitAllMetrics(registry *prometheus.Registry) {
pfmetrics.InitMetrics(registry)
processexecmetrics.InitMetrics(registry)
ringbufmetrics.InitMetrics(registry)
ringbufqueuemetrics.InitMetrics(registry)
syscallmetrics.InitMetrics(registry)
watchermetrics.InitMetrics(registry)
observer.InitMetrics(registry)
Expand Down
29 changes: 29 additions & 0 deletions pkg/metrics/ringbufqueuemetrics/ringbufqueuemetrics.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
// SPDX-License-Identifier: Apache-2.0
// Copyright Authors of Tetragon

package ringbufqueuemetrics

import (
"github.com/cilium/tetragon/pkg/metrics/consts"
"github.com/prometheus/client_golang/prometheus"
)

var (
Received = prometheus.NewCounter(prometheus.CounterOpts{
Namespace: consts.MetricsNamespace,
Name: "ringbuf_queue_received_total",
Help: "The total number of Tetragon events ring buffer queue received.",
ConstLabels: nil,
})
Lost = prometheus.NewCounter(prometheus.CounterOpts{
Namespace: consts.MetricsNamespace,
Name: "ringbuf_queue_lost_total",
Help: "The total number of Tetragon events ring buffer queue lost.",
ConstLabels: nil,
})
)

func InitMetrics(registry *prometheus.Registry) {
registry.MustRegister(Received)
registry.MustRegister(Lost)
}
3 changes: 3 additions & 0 deletions pkg/observer/observer.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ import (
"github.com/cilium/tetragon/pkg/metrics/errormetrics"
"github.com/cilium/tetragon/pkg/metrics/opcodemetrics"
"github.com/cilium/tetragon/pkg/metrics/ringbufmetrics"
"github.com/cilium/tetragon/pkg/metrics/ringbufqueuemetrics"
"github.com/cilium/tetragon/pkg/option"
"github.com/cilium/tetragon/pkg/reader/notify"
"github.com/cilium/tetragon/pkg/sensors"
Expand Down Expand Up @@ -242,6 +243,7 @@ func (k *Observer) RunEvents(stopCtx context.Context, ready func()) error {
case eventsQueue <- &record:
default:
// eventsQueue channel is full, drop the event
ringbufqueuemetrics.Lost.Inc()
}
k.recvCntr++
ringbufmetrics.PerfEventReceived.Inc()
Expand All @@ -263,6 +265,7 @@ func (k *Observer) RunEvents(stopCtx context.Context, ready func()) error {
select {
case event := <-eventsQueue:
k.receiveEvent(event.RawSample)
ringbufqueuemetrics.Received.Inc()
case <-stopCtx.Done():
k.log.WithError(stopCtx.Err()).Infof("Listening for events completed.")
k.log.Debugf("Unprocessed events in RB queue: %d", len(eventsQueue))
Expand Down

0 comments on commit 9ed09fa

Please sign in to comment.