Skip to content

Commit

Permalink
ipsec: Expose XFRM config counts as metrics
Browse files Browse the repository at this point in the history
This commit adds new Prometheus metrics for the number of XFRM states
and policies for each direction. These can be used to alert on XFRM
leaks. We had one such leak in the past for XFRM out policies which led
to a performance degradation when the node churn was high.

Signed-off-by: Paul Chaignon <paul.chaignon@gmail.com>
  • Loading branch information
pchaigno committed Oct 9, 2023
1 parent 81bb71e commit 69902f1
Show file tree
Hide file tree
Showing 2 changed files with 42 additions and 8 deletions.
28 changes: 23 additions & 5 deletions pkg/datapath/linux/ipsec/xfrm_collector.go
Original file line number Diff line number Diff line change
Expand Up @@ -48,8 +48,9 @@ const (
)

type xfrmCollector struct {
xfrmStatFunc func() (procfs.XfrmStat, error)
xfrmStateFunc func() ([]netlink.XfrmState, error)
xfrmStatFunc func() (procfs.XfrmStat, error)
xfrmStateFunc func() ([]netlink.XfrmState, error)
xfrmPolicyFunc func() ([]netlink.XfrmPolicy, error)

// XFRM errors
xfrmErrorDesc *prometheus.Desc
Expand All @@ -62,13 +63,16 @@ type xfrmCollector struct {
func NewXFRMCollector() prometheus.Collector {
return newXFRMCollector(procfs.NewXfrmStat, func() ([]netlink.XfrmState, error) {
return netlink.XfrmStateList(netlink.FAMILY_ALL)
}, func() ([]netlink.XfrmPolicy, error) {
return netlink.XfrmPolicyList(netlink.FAMILY_ALL)
})
}

func newXFRMCollector(statFn func() (procfs.XfrmStat, error), xfrmStateFn func() ([]netlink.XfrmState, error)) prometheus.Collector {
func newXFRMCollector(statFn func() (procfs.XfrmStat, error), xfrmStateFn func() ([]netlink.XfrmState, error), xfrmPolicyFn func() ([]netlink.XfrmPolicy, error)) prometheus.Collector {
return &xfrmCollector{
xfrmStatFunc: statFn,
xfrmStateFunc: xfrmStateFn,
xfrmStatFunc: statFn,
xfrmStateFunc: xfrmStateFn,
xfrmPolicyFunc: xfrmPolicyFn,

xfrmErrorDesc: prometheus.NewDesc(
prometheus.BuildFQName(metrics.Namespace, subsystem, "xfrm_error"),
Expand Down Expand Up @@ -134,6 +138,20 @@ func (x *xfrmCollector) collectConfigStats(ch chan<- prometheus.Metric) {
}
nbKeys := ipsec.CountUniqueIPsecKeys(states)
ch <- prometheus.MustNewConstMetric(x.xfrmConfigStatDesc, prometheus.GaugeValue, float64(nbKeys), labelStatKeys)

nbStatesIn, nbStatesOut := ipsec.CountXfrmStatesByDir(states)
ch <- prometheus.MustNewConstMetric(x.xfrmConfigStatDesc, prometheus.GaugeValue, float64(nbStatesIn), labelStatStateIn)
ch <- prometheus.MustNewConstMetric(x.xfrmConfigStatDesc, prometheus.GaugeValue, float64(nbStatesOut), labelStatStateOut)

policies, err := x.xfrmPolicyFunc()
if err != nil {
log.WithError(err).Error("Error while getting XFRM policies")
return
}
nbPolIn, nbPolOut, nbPolFwd := ipsec.CountXfrmPoliciesByDir(policies)
ch <- prometheus.MustNewConstMetric(x.xfrmConfigStatDesc, prometheus.GaugeValue, float64(nbPolIn), labelStatPolIn)
ch <- prometheus.MustNewConstMetric(x.xfrmConfigStatDesc, prometheus.GaugeValue, float64(nbPolOut), labelStatPolOut)
ch <- prometheus.MustNewConstMetric(x.xfrmConfigStatDesc, prometheus.GaugeValue, float64(nbPolFwd), labelStatePolFwd)
}

func (x *xfrmCollector) Collect(ch chan<- prometheus.Metric) {
Expand Down
22 changes: 19 additions & 3 deletions pkg/datapath/linux/ipsec/xfrm_collector_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,11 @@ cilium_ipsec_xfrm_error{error="template_mismatched",type="inbound"} 0
# HELP cilium_ipsec_xfrm_stats Statistics on IPsec configuration
# TYPE cilium_ipsec_xfrm_stats gauge
cilium_ipsec_xfrm_stats{item="keys"} 0
cilium_ipsec_xfrm_stats{item="policy_fwd"} 0
cilium_ipsec_xfrm_stats{item="policy_in"} 0
cilium_ipsec_xfrm_stats{item="policy_out"} 0
cilium_ipsec_xfrm_stats{item="state_in"} 0
cilium_ipsec_xfrm_stats{item="state_out"} 0
`
someErrorMetric = `
# HELP cilium_ipsec_xfrm_error Total number of xfrm errors
Expand Down Expand Up @@ -118,6 +123,11 @@ cilium_ipsec_xfrm_error{error="template_mismatched",type="inbound"} 11
# HELP cilium_ipsec_xfrm_stats Statistics on IPsec configuration
# TYPE cilium_ipsec_xfrm_stats gauge
cilium_ipsec_xfrm_stats{item="keys"} 0
cilium_ipsec_xfrm_stats{item="policy_fwd"} 0
cilium_ipsec_xfrm_stats{item="policy_in"} 0
cilium_ipsec_xfrm_stats{item="policy_out"} 0
cilium_ipsec_xfrm_stats{item="state_in"} 0
cilium_ipsec_xfrm_stats{item="state_out"} 0
`
)

Expand All @@ -126,6 +136,7 @@ func (x *XFRMCollectorTest) Test_xfrmCollector_Collect(c *C) {
name string
statsFn func() (procfs.XfrmStat, error)
xfrmStateFn func() ([]netlink.XfrmState, error)
xfrmPolicyFn func() ([]netlink.XfrmPolicy, error)
expectedMetric string
expectedCount int
}{
Expand All @@ -137,6 +148,9 @@ func (x *XFRMCollectorTest) Test_xfrmCollector_Collect(c *C) {
xfrmStateFn: func() ([]netlink.XfrmState, error) {
return nil, fmt.Errorf("error due to some reason")
},
xfrmPolicyFn: func() ([]netlink.XfrmPolicy, error) {
return nil, fmt.Errorf("error due to some reason")
},
expectedCount: 0,
expectedMetric: "",
},
Expand All @@ -146,7 +160,8 @@ func (x *XFRMCollectorTest) Test_xfrmCollector_Collect(c *C) {
return procfs.XfrmStat{}, nil
},
xfrmStateFn: func() ([]netlink.XfrmState, error) { return nil, nil },
expectedCount: 29,
xfrmPolicyFn: func() ([]netlink.XfrmPolicy, error) { return nil, nil },
expectedCount: 34,
expectedMetric: noErrorMetric,
},
{
Expand All @@ -155,14 +170,15 @@ func (x *XFRMCollectorTest) Test_xfrmCollector_Collect(c *C) {
return sampleStats, nil
},
xfrmStateFn: func() ([]netlink.XfrmState, error) { return nil, nil },
expectedCount: 29,
xfrmPolicyFn: func() ([]netlink.XfrmPolicy, error) { return nil, nil },
expectedCount: 34,
expectedMetric: someErrorMetric,
},
}

for _, tt := range tests {
c.Log("Test : ", tt.name)
collector := newXFRMCollector(tt.statsFn, tt.xfrmStateFn)
collector := newXFRMCollector(tt.statsFn, tt.xfrmStateFn, tt.xfrmPolicyFn)

// perform static checks such as prometheus naming convention, number of labels matching, etc
lintProblems, err := testutil.CollectAndLint(collector)
Expand Down

0 comments on commit 69902f1

Please sign in to comment.