forked from kiali/kiali
/
health.go
126 lines (106 loc) · 4.19 KB
/
health.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
package models
import (
"regexp"
"github.com/prometheus/common/model"
)
// NamespaceAppHealth is an alias of map of app name x health
type NamespaceAppHealth map[string]*AppHealth
// NamespaceServiceHealth is an alias of map of service name x health
type NamespaceServiceHealth map[string]*ServiceHealth
// NamespaceWorkloadHealth is an alias of map of workload name x health
type NamespaceWorkloadHealth map[string]*WorkloadHealth
// ServiceHealth contains aggregated health from various sources, for a given service
type ServiceHealth struct {
Requests RequestHealth `json:"requests"`
}
// AppHealth contains aggregated health from various sources, for a given app
type AppHealth struct {
WorkloadStatuses []WorkloadStatus `json:"workloadStatuses"`
Requests RequestHealth `json:"requests"`
}
var (
grpcErrorRegexp, _ = regexp.Compile(`^[1-9]$|^1[0-6]$`) // 1..16
httpErrorRegexp, _ = regexp.Compile(`^[4-5]\d\d$`) // 4xx, 5xx
)
func NewEmptyRequestHealth() RequestHealth {
return RequestHealth{ErrorRatio: -1, InboundErrorRatio: -1, OutboundErrorRatio: -1}
}
// EmptyAppHealth create an empty AppHealth
func EmptyAppHealth() AppHealth {
return AppHealth{
WorkloadStatuses: []WorkloadStatus{},
Requests: NewEmptyRequestHealth(),
}
}
// EmptyServiceHealth create an empty ServiceHealth
func EmptyServiceHealth() ServiceHealth {
return ServiceHealth{
Requests: NewEmptyRequestHealth(),
}
}
// WorkloadHealth contains aggregated health from various sources, for a given workload
type WorkloadHealth struct {
WorkloadStatus WorkloadStatus `json:"workloadStatus"`
Requests RequestHealth `json:"requests"`
}
// WorkloadStatus gives
// - number of desired replicas defined in the Spec of a controller
// - number of current replicas that matches selector of a controller
// - number of available replicas for a given workload
// In healthy scenarios all variables should point same value.
// When something wrong happens the different values can indicate an unhealthy situation.
// i.e.
// desired = 1, current = 10, available = 0 would means that a user scaled down a workload from 10 to 1
// but in the operaton 10 pods showed problems, so no pod is available/ready but user will see 10 pods under a workload
type WorkloadStatus struct {
Name string `json:"name"`
DesiredReplicas int32 `json:"desiredReplicas"`
CurrentReplicas int32 `json:"currentReplicas"`
AvailableReplicas int32 `json:"availableReplicas"`
}
// RequestHealth holds several stats about recent request errors
type RequestHealth struct {
inboundErrorRate float64
outboundErrorRate float64
inboundRequestRate float64
outboundRequestRate float64
ErrorRatio float64 `json:"errorRatio"`
InboundErrorRatio float64 `json:"inboundErrorRatio"`
OutboundErrorRatio float64 `json:"outboundErrorRatio"`
}
// AggregateInbound adds the provided metric sample to internal inbound counters and updates error ratios
func (in *RequestHealth) AggregateInbound(sample *model.Sample) {
aggregate(sample, &in.inboundRequestRate, &in.inboundErrorRate, &in.InboundErrorRatio)
in.updateGlobalErrorRatio()
}
// AggregateOutbound adds the provided metric sample to internal outbound counters and updates error ratios
func (in *RequestHealth) AggregateOutbound(sample *model.Sample) {
aggregate(sample, &in.outboundRequestRate, &in.outboundErrorRate, &in.OutboundErrorRatio)
in.updateGlobalErrorRatio()
}
func (in *RequestHealth) updateGlobalErrorRatio() {
globalRequestRate := in.inboundRequestRate + in.outboundRequestRate
globalErrorRate := in.inboundErrorRate + in.outboundErrorRate
if globalRequestRate == 0 {
in.ErrorRatio = -1
} else {
in.ErrorRatio = globalErrorRate / globalRequestRate
}
}
func aggregate(sample *model.Sample, requestRate, errorRate, errorRatio *float64) {
*requestRate += float64(sample.Value)
responseCode := sample.Metric["response_code"]
regexp := httpErrorRegexp
if string(sample.Metric["request_protocol"]) == "grpc" {
responseCode = sample.Metric["grpc_response_status"]
regexp = grpcErrorRegexp
}
if regexp.MatchString(string(responseCode)) {
*errorRate += float64(sample.Value)
}
if *requestRate == 0 {
*errorRatio = -1
} else {
*errorRatio = *errorRate / *requestRate
}
}