Skip to content

Commit

Permalink
ipsec: Expose XFRM config counts as metrics
Browse files Browse the repository at this point in the history
This commit adds new Prometheus metrics for the number of XFRM states
and policies for each direction. These can be used to alert on XFRM
leaks. We had one such leak in the past for XFRM out policies which led
to a performance degradation when the node churn was high.

Signed-off-by: Paul Chaignon <paul.chaignon@gmail.com>
  • Loading branch information
pchaigno authored and joestringer committed Oct 19, 2023
1 parent ac529da commit 6db3b8f
Show file tree
Hide file tree
Showing 2 changed files with 38 additions and 4 deletions.
2 changes: 2 additions & 0 deletions Documentation/observability/metrics.rst
Original file line number Diff line number Diff line change
Expand Up @@ -348,6 +348,8 @@ Name Labels
============================================= ================================================== ========== ===========================================================
``ipsec_xfrm_error`` ``error``, ``type`` Enabled Total number of xfrm errors
``ipsec_keys`` Enabled Number of keys in use
``ipsec_xfrm_states`` ``direction`` Enabled Number of XFRM states
``ipsec_xfrm_policies`` ``direction`` Enabled Number of XFRM policies
============================================= ================================================== ========== ===========================================================

eBPF
Expand Down
40 changes: 36 additions & 4 deletions pkg/datapath/linux/ipsec/xfrm_collector.go
Original file line number Diff line number Diff line change
Expand Up @@ -36,13 +36,19 @@ const (
labelErrorAcquire = "acquire"
labelErrorBundleGeneration = "bundle_generation"
labelErrorBundleCheck = "bundle_check"

labelDir = "direction"

labelDirIn = "in"
labelDirOut = "out"
labelDirFwd = "fwd"
)

type xfrmCollector struct {
// XFRM errors
xfrmErrorDesc *prometheus.Desc
// Number of keys
nbKeysDesc *prometheus.Desc
xfrmErrorDesc *prometheus.Desc
nbKeysDesc *prometheus.Desc
nbXFRMStatesDesc *prometheus.Desc
nbXFRMPolsDesc *prometheus.Desc
}

func NewXFRMCollector() prometheus.Collector {
Expand All @@ -57,12 +63,24 @@ func NewXFRMCollector() prometheus.Collector {
"Number of IPsec keys in use",
[]string{}, nil,
),
nbXFRMStatesDesc: prometheus.NewDesc(
prometheus.BuildFQName(metrics.Namespace, subsystem, "xfrm_states"),
"Number of XFRM states",
[]string{labelDir}, nil,
),
nbXFRMPolsDesc: prometheus.NewDesc(
prometheus.BuildFQName(metrics.Namespace, subsystem, "xfrm_policies"),
"Number of XFRM policies",
[]string{labelDir}, nil,
),
}
}

func (x *xfrmCollector) Describe(ch chan<- *prometheus.Desc) {
ch <- x.xfrmErrorDesc
ch <- x.nbKeysDesc
ch <- x.nbXFRMStatesDesc
ch <- x.nbXFRMPolsDesc
}

func (x *xfrmCollector) collectErrors(ch chan<- prometheus.Metric) {
Expand Down Expand Up @@ -111,6 +129,20 @@ func (x *xfrmCollector) collectConfigStats(ch chan<- prometheus.Metric) {
}
nbKeys := ipsec.CountUniqueIPsecKeys(states)
ch <- prometheus.MustNewConstMetric(x.nbKeysDesc, prometheus.GaugeValue, float64(nbKeys))

nbStatesIn, nbStatesOut := ipsec.CountXfrmStatesByDir(states)
ch <- prometheus.MustNewConstMetric(x.nbXFRMStatesDesc, prometheus.GaugeValue, float64(nbStatesIn), labelDirIn)
ch <- prometheus.MustNewConstMetric(x.nbXFRMStatesDesc, prometheus.GaugeValue, float64(nbStatesOut), labelDirOut)

policies, err := netlink.XfrmPolicyList(netlink.FAMILY_ALL)
if err != nil {
log.WithError(err).Error("Failed to retrieve XFRM policies to compute Prometheus metrics")
return
}
nbPolIn, nbPolOut, nbPolFwd := ipsec.CountXfrmPoliciesByDir(policies)
ch <- prometheus.MustNewConstMetric(x.nbXFRMPolsDesc, prometheus.GaugeValue, float64(nbPolIn), labelDirIn)
ch <- prometheus.MustNewConstMetric(x.nbXFRMPolsDesc, prometheus.GaugeValue, float64(nbPolOut), labelDirOut)
ch <- prometheus.MustNewConstMetric(x.nbXFRMPolsDesc, prometheus.GaugeValue, float64(nbPolFwd), labelDirFwd)
}

func (x *xfrmCollector) Collect(ch chan<- prometheus.Metric) {
Expand Down

0 comments on commit 6db3b8f

Please sign in to comment.