Skip to content

Commit

Permalink
update status metric
Browse files Browse the repository at this point in the history
todo: update docs and tests after initial testing

Signed-off-by: jshr-w <shjayaraman@microsoft.com>
  • Loading branch information
jshr-w committed Jun 18, 2024
1 parent 3a934d5 commit 2633c20
Show file tree
Hide file tree
Showing 4 changed files with 43 additions and 24 deletions.
27 changes: 11 additions & 16 deletions pkg/health/client/client.go
Original file line number Diff line number Diff line change
Expand Up @@ -164,19 +164,12 @@ func GetPathConnectivityStatusType(cp *models.PathStatus) ConnectivityStatusType
return status
}

func SummarizePathConnectivityStatusType(cps []*models.PathStatus) ConnectivityStatusType {
status := ConnStatusReachable
// Returns an array with [# of reachable paths, # of unreachable paths, # of unknown paths].
func SummarizePathConnectivityStatusType(cps []*models.PathStatus) [3]int {
status := [3]int{0, 0, 0}
for _, cp := range cps {
switch GetPathConnectivityStatusType(cp) {
case ConnStatusUnreachable:
// If any status is unreachable, return it immediately.
return ConnStatusUnreachable
case ConnStatusUnknown:
// If the status is unknown, prepare to return it. It's
// going to be returned if there is no unreachable
// status in next iterations.
status = ConnStatusUnknown
}
cst := GetPathConnectivityStatusType(cp)
status[cst]++
}
return status
}
Expand Down Expand Up @@ -324,10 +317,12 @@ func formatNodeStatus(w io.Writer, node *models.NodeStatus, printAll, succinct,
}
ips = append(ips, addr.IP)
}
fmt.Fprintf(w, " %s%s\t%s\t%s\t%s\n", node.Name,
localStr, strings.Join(ips, ","),
SummarizePathConnectivityStatusType(GetAllHostAddresses(node)).String(),
SummarizePathConnectivityStatusType(GetAllEndpointAddresses(node)).String())
hostStatuses := SummarizePathConnectivityStatusType(GetAllHostAddresses(node))
endpointStatuses := SummarizePathConnectivityStatusType(GetAllEndpointAddresses(node))
fmt.Fprintf(w, " %s%s\t%s\t%s:Host %d/%d\tEP %d/%d\n", node.Name,
localStr, strings.Join(ips, ","), ConnStatusReachable,
hostStatuses[ConnStatusReachable], len(GetAllHostAddresses(node)),
endpointStatuses[ConnStatusReachable], len(GetAllEndpointAddresses(node)))
}
} else {
fmt.Fprintf(w, " %s%s:\n", node.Name, localStr)
Expand Down
32 changes: 24 additions & 8 deletions pkg/health/server/server.go
Original file line number Diff line number Diff line change
Expand Up @@ -195,18 +195,34 @@ func (s *Server) collectNodeConnectivityMetrics() {
nodePathSecondaryAddress := healthClientPkg.GetHostSecondaryAddresses(n)

endpointPathStatus := n.HealthEndpoint
isEndpointReachable := healthClientPkg.SummarizePathConnectivityStatusType(healthClientPkg.GetAllEndpointAddresses(n)) == healthClientPkg.ConnStatusReachable
isNodeReachable := healthClientPkg.SummarizePathConnectivityStatusType(healthClientPkg.GetAllHostAddresses(n)) == healthClientPkg.ConnStatusReachable
isEndpointReachable := healthClientPkg.SummarizePathConnectivityStatusType(healthClientPkg.GetAllEndpointAddresses(n))
isNodeReachable := healthClientPkg.SummarizePathConnectivityStatusType(healthClientPkg.GetAllHostAddresses(n))

// Aggregated status for endpoint connectivity
// Aggregated statuses for endpoint connectivity
metrics.NodeConnectivityStatus.WithLabelValues(
localClusterName, localNodeName, metrics.LabelPeerEndpoint).
Set(metrics.BoolToFloat64(isEndpointReachable))
localClusterName, localNodeName, metrics.LabelPeerEndpoint, metrics.LabelReachable).
Set(float64(isEndpointReachable[healthClientPkg.ConnStatusReachable]))

// Aggregated status for node connectivity
metrics.NodeConnectivityStatus.WithLabelValues(
localClusterName, localNodeName, metrics.LabelPeerNode).
Set(metrics.BoolToFloat64(isNodeReachable))
localClusterName, localNodeName, metrics.LabelPeerEndpoint, metrics.LabelUnreachable).
Set(float64(isEndpointReachable[healthClientPkg.ConnStatusUnreachable]))

metrics.NodeConnectivityStatus.WithLabelValues(
localClusterName, localNodeName, metrics.LabelPeerEndpoint, metrics.LabelUnreachable).
Set(float64(isEndpointReachable[healthClientPkg.ConnStatusUnknown]))

// Aggregated statuses for node connectivity
metrics.NodeConnectivityStatus.WithLabelValues(
localClusterName, localNodeName, metrics.LabelPeerEndpoint, metrics.LabelReachable).
Set(float64(isNodeReachable[healthClientPkg.ConnStatusReachable]))

metrics.NodeConnectivityStatus.WithLabelValues(
localClusterName, localNodeName, metrics.LabelPeerEndpoint, metrics.LabelUnreachable).
Set(float64(isNodeReachable[healthClientPkg.ConnStatusUnreachable]))

metrics.NodeConnectivityStatus.WithLabelValues(
localClusterName, localNodeName, metrics.LabelPeerEndpoint, metrics.LabelUnreachable).
Set(float64(isNodeReachable[healthClientPkg.ConnStatusUnknown]))

// HTTP endpoint primary
collectConnectivityMetric(endpointPathStatus.PrimaryAddress.HTTP, localClusterName, localNodeName,
Expand Down
1 change: 1 addition & 0 deletions pkg/health/server/server_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ import (
"github.com/cilium/cilium/pkg/metrics/metric"
)

// TODO: jshr
var sampleSingleClusterConnectivity = &healthReport{
nodes: []*healthModels.NodeStatus{
{
Expand Down
7 changes: 7 additions & 0 deletions pkg/metrics/metrics.go
Original file line number Diff line number Diff line change
Expand Up @@ -254,6 +254,12 @@ const (
LabelAddressType = "address_type"
LabelAddressTypePrimary = "primary"
LabelAddressTypeSecondary = "secondary"

// LabelConnectivityStatus is the label for connectivity statuses
LabelConnectivityStatus = "connectivity_status"
LabelReachable = "reachable"
LabelUnreachable = "unreachable"
LabelUnknown = "unknown"
)

var (
Expand Down Expand Up @@ -1304,6 +1310,7 @@ func NewLegacyMetrics() *LegacyMetrics {
LabelSourceCluster,
LabelSourceNodeName,
LabelType,
LabelConnectivityStatus,
}),

NodeConnectivityLatency: metric.NewHistogramVec(metric.HistogramOpts{
Expand Down

0 comments on commit 2633c20

Please sign in to comment.