Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

ipam: when a CiliumNode is removed, delete node label from metrics. #27713

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
10 changes: 10 additions & 0 deletions pkg/ipam/metrics/metrics.go
Original file line number Diff line number Diff line change
Expand Up @@ -264,6 +264,15 @@ func (p *prometheusMetrics) SetIPNeeded(node string, usage int) {
p.NeededIPs.WithLabelValues(node).Set(float64(usage))
}

// DeleteNode removes all per-node metrics for a particular node (i.e. those labeled with "target_node").
// This is to ensure that when a Node/CiliumNode delete event happens that the operator will no longer report
// metrics for that node.
func (p *prometheusMetrics) DeleteNode(node string) {
p.AvailableIPs.DeleteLabelValues(node)
p.UsedIPs.DeleteLabelValues(node)
p.NeededIPs.DeleteLabelValues(node)
}

type triggerMetrics struct {
total prometheus.Counter
folds prometheus.Gauge
Expand Down Expand Up @@ -349,6 +358,7 @@ func (m *NoOpMetrics) SetIPNeeded(node string, n int)
func (m *NoOpMetrics) PoolMaintainerTrigger() trigger.MetricsObserver { return &NoOpMetricsObserver{} }
func (m *NoOpMetrics) K8sSyncTrigger() trigger.MetricsObserver { return &NoOpMetricsObserver{} }
func (m *NoOpMetrics) ResyncTrigger() trigger.MetricsObserver { return &NoOpMetricsObserver{} }
func (m *NoOpMetrics) DeleteNode(n string) {}

func merge(slices ...[]float64) []float64 {
result := make([]float64, 1)
Expand Down
24 changes: 24 additions & 0 deletions pkg/ipam/metrics/mock/mock.go
Original file line number Diff line number Diff line change
Expand Up @@ -197,6 +197,30 @@ func (m *mockMetrics) SetIPNeeded(s string, n int) {
m.mutex.Unlock()
}

func (m *mockMetrics) GetPerNodeMetrics(n string) (*int, *int, *int) {
m.mutex.Lock()
defer m.mutex.Unlock()
var avail, used, needed *int
if c, ok := m.nodeIPAvailable[n]; ok {
avail = &c
}
if c, ok := m.nodeIPUsed[n]; ok {
used = &c
}
if c, ok := m.nodeIPNeeded[n]; ok {
needed = &c
}
return avail, used, needed
}

func (m *mockMetrics) DeleteNode(n string) {
m.mutex.Lock()
defer m.mutex.Unlock()
delete(m.nodeIPAvailable, n)
delete(m.nodeIPUsed, n)
delete(m.nodeIPNeeded, n)
}

func (m *mockMetrics) PoolMaintainerTrigger() trigger.MetricsObserver {
return nil
}
Expand Down
4 changes: 4 additions & 0 deletions pkg/ipam/node_manager.go
Original file line number Diff line number Diff line change
Expand Up @@ -157,6 +157,7 @@ type MetricsNodeAPI interface {
SetIPAvailable(node string, cap int)
SetIPUsed(node string, used int)
SetIPNeeded(node string, needed int)
DeleteNode(node string)
}

// nodeMap is a mapping of node names to ENI nodes
Expand Down Expand Up @@ -383,6 +384,9 @@ func (n *NodeManager) Delete(resource *v2.CiliumNode) {
n.mutex.Lock()

if node, ok := n.nodes[resource.Name]; ok {
// Stop target_node metrics related to this node being emitted.
n.metricsAPI.DeleteNode(node.name)

if node.poolMaintainer != nil {
node.poolMaintainer.Shutdown()
}
Expand Down
32 changes: 30 additions & 2 deletions pkg/ipam/node_manager_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -207,8 +207,6 @@ func (e *IPAMSuite) TestNodeManagerGet(c *check.C) {
c.Assert(err, check.IsNil)
c.Assert(mngr, check.Not(check.IsNil))

// instances.Resync(context.TODO())

node1 := newCiliumNode("node1", 0, 0, 0)
mngr.Upsert(node1)

Expand All @@ -220,6 +218,36 @@ func (e *IPAMSuite) TestNodeManagerGet(c *check.C) {
c.Assert(mngr.Get("node2"), check.IsNil)
}

func (e *IPAMSuite) TestNodeManagerDelete(c *check.C) {
am := newAllocationImplementationMock()
c.Assert(am, check.Not(check.IsNil))
metrics := metricsmock.NewMockMetrics()
mngr, err := NewNodeManager(am, k8sapi, metrics, 10, false, false)
c.Assert(err, check.IsNil)
c.Assert(mngr, check.Not(check.IsNil))

node1 := newCiliumNode("node-foo", 0, 0, 0)
mngr.Upsert(node1)

c.Assert(mngr.Get("node-foo"), check.Not(check.IsNil))
c.Assert(mngr.Get("node2"), check.IsNil)

mngr.Resync(context.Background(), time.Now())
avail, used, needed := metrics.GetPerNodeMetrics("node-foo")
c.Assert(avail, check.Not(check.IsNil))
c.Assert(used, check.Not(check.IsNil))
c.Assert(needed, check.Not(check.IsNil))
mngr.Delete(node1)
// Following a node Delete, we expect the per-node metrics for that Node to be
// deleted.
avail, used, needed = metrics.GetPerNodeMetrics("node-foo")
c.Assert(avail, check.IsNil)
c.Assert(used, check.IsNil)
c.Assert(needed, check.IsNil)
c.Assert(mngr.Get("node-foo"), check.IsNil)
c.Assert(mngr.Get("node2"), check.IsNil)
}

type k8sMock struct{}

func (k *k8sMock) Update(origNode, orig *v2.CiliumNode) (*v2.CiliumNode, error) {
Expand Down