-
Notifications
You must be signed in to change notification settings - Fork 2.7k
/
metrics.go
155 lines (136 loc) · 4.27 KB
/
metrics.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
// SPDX-License-Identifier: Apache-2.0
// Copyright Authors of Cilium
package metrics
import (
"context"
"net/netip"
"strconv"
"github.com/prometheus/client_golang/prometheus"
"github.com/sirupsen/logrus"
"github.com/cilium/cilium/pkg/bgpv1/agent"
"github.com/cilium/cilium/pkg/bgpv1/types"
"github.com/cilium/cilium/pkg/hive/cell"
"github.com/cilium/cilium/pkg/metrics"
"github.com/cilium/cilium/pkg/option"
"github.com/cilium/cilium/pkg/time"
)
const (
LabelVRouter = "vrouter"
LabelNeighbor = "neighbor"
LabelAfi = "afi"
LabelSafi = "safi"
metricsSubsystem = "bgp_control_plane"
)
type collector struct {
SessionState *prometheus.Desc
TotalAdvertisedRoutes *prometheus.Desc
TotalReceivedRoutes *prometheus.Desc
in collectorIn
}
type collectorIn struct {
cell.In
Logger logrus.FieldLogger
DaemonConfig *option.DaemonConfig
Registry *metrics.Registry
RouterManager agent.BGPRouterManager
}
// RegisterCollector registers the BGP Control Plane metrics collector to the
// global prometheus registry. We don't rely on the cell.Metric because the
// collectors we can provide through cell.Metric needs to implement
// prometheus.Collector per metric which is not optimal in our case. We can
// retrieve the multiple metrics from the single call to
// RouterManager.GetPeers() and it is wasteful to call the same function
// multiple times for each metric. Thus, we provide a raw Collector through
// MustRegister interface. We may want to revisit this in the future.
func RegisterCollector(in collectorIn) {
// Don't provide the collector if BGP control plane is disabled
if !in.DaemonConfig.EnableBGPControlPlane {
return
}
in.Registry.MustRegister(&collector{
SessionState: prometheus.NewDesc(
prometheus.BuildFQName(metrics.Namespace, metricsSubsystem, "session_state"),
"Current state of the BGP session with the peer, Up = 1 or Down = 0",
[]string{LabelVRouter, LabelNeighbor}, nil,
),
TotalAdvertisedRoutes: prometheus.NewDesc(
prometheus.BuildFQName(metrics.Namespace, metricsSubsystem, "advertised_routes"),
"Number of routes advertised to the peer",
[]string{LabelVRouter, LabelNeighbor, LabelAfi, LabelSafi}, nil,
),
TotalReceivedRoutes: prometheus.NewDesc(
prometheus.BuildFQName(metrics.Namespace, metricsSubsystem, "received_routes"),
"Number of routes received from the peer",
[]string{LabelVRouter, LabelNeighbor, LabelAfi, LabelSafi}, nil,
),
in: in,
})
}
func (c *collector) Describe(ch chan<- *prometheus.Desc) {
ch <- c.SessionState
ch <- c.TotalAdvertisedRoutes
ch <- c.TotalReceivedRoutes
}
func (c *collector) Collect(ch chan<- prometheus.Metric) {
// We defensively set a 5 sec timeout here. When the underlying router
// is not responsive, we cannot make a progress. 5 sec is chosen to be
// a too long time that we should never hit for normal cases. We should
// revisit this timeout when the metrics collection starts to involve a
// network communication.
ctx, cancel := context.WithTimeout(context.Background(), time.Second*5)
peers, err := c.in.RouterManager.GetPeers(ctx)
cancel()
if err != nil {
c.in.Logger.WithError(err).Error("Failed to retrieve BGP peer information. Metrics is not collected.")
return
}
for _, peer := range peers {
if peer == nil {
continue
}
vrouterLabel := strconv.FormatInt(peer.LocalAsn, 10)
addr, err := netip.ParseAddr(peer.PeerAddress)
if err != nil {
continue
}
neighborLabel := netip.AddrPortFrom(addr, uint16(peer.PeerPort)).String()
// Collect session state metrics
var up float64
if peer.SessionState == types.SessionEstablished.String() {
up = 1
} else {
up = 0
}
ch <- prometheus.MustNewConstMetric(
c.SessionState,
prometheus.GaugeValue,
up,
vrouterLabel,
neighborLabel,
)
// Collect route metrics per address family
for _, family := range peer.Families {
if family == nil {
continue
}
ch <- prometheus.MustNewConstMetric(
c.TotalAdvertisedRoutes,
prometheus.GaugeValue,
float64(family.Advertised),
vrouterLabel,
neighborLabel,
family.Afi,
family.Safi,
)
ch <- prometheus.MustNewConstMetric(
c.TotalReceivedRoutes,
prometheus.GaugeValue,
float64(family.Received),
vrouterLabel,
neighborLabel,
family.Afi,
family.Safi,
)
}
}
}