-
Notifications
You must be signed in to change notification settings - Fork 2
/
custom_metrics.go
186 lines (160 loc) · 7.85 KB
/
custom_metrics.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
package metrics
import (
"time"
"github.com/equinor/radix-operator/pkg/apis/utils"
"github.com/equinor/radix-operator/pkg/apis/defaults"
v1 "github.com/equinor/radix-operator/pkg/apis/radix/v1"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/client_golang/prometheus/promauto"
corev1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/api/resource"
)
var (
nrCrQueued = promauto.NewCounterVec(prometheus.CounterOpts{
Name: "radix_operator_cr_queued",
Help: "The total number of radix custom resources added, updated or deleted in queue",
}, []string{"cr_type", "operation", "skipped", "requeued"})
nrCrDeleted = promauto.NewCounterVec(prometheus.CounterOpts{
Name: "radix_operator_cr_deleted",
Help: "The total number of radix custom resources deleted",
}, []string{"cr_type"})
nrErrors = promauto.NewCounterVec(prometheus.CounterOpts{
Name: "radix_operator_errors",
Help: "The total number of radix operator errors",
}, []string{"cr_type", "err_type", "method"})
nrCrDeQueued = promauto.NewCounterVec(prometheus.CounterOpts{
Name: "radix_operator_cr_de_queued",
Help: "The total number of radix custom resources removed from queue",
}, []string{"cr_type"})
recTimeBucket = prometheus.NewHistogramVec(
prometheus.HistogramOpts{
Name: "radix_operator_reconciliation_duration_seconds_hist",
Help: "Request duration seconds bucket",
Buckets: DefaultBuckets(),
},
[]string{"cr_type"},
)
radixRequestedCPU = promauto.NewGaugeVec(prometheus.GaugeOpts{
Name: "radix_operator_requested_cpu",
Help: "Requested cpu in millicore by environment and component",
}, []string{"application", "environment", "component", "wbs"})
radixRequestedMemory = promauto.NewGaugeVec(prometheus.GaugeOpts{
Name: "radix_operator_requested_memory",
Help: "Requested memory in megabyte by environment and component. 1Mi = 1024 * 1024 bytes > 1MB = 1000000 bytes (ref https://simple.wikipedia.org/wiki/Mebibyte)",
}, []string{"application", "environment", "component", "wbs"})
radixRequestedReplicas = promauto.NewGaugeVec(prometheus.GaugeOpts{
Name: "radix_operator_requested_replicas",
Help: "Requested replicas by environment and component",
}, []string{"application", "environment", "component", "wbs"})
radixJobProcessed = promauto.NewCounterVec(prometheus.CounterOpts{
Name: "radix_operator_radix_job_processed",
Help: "The number of radix jobs processed with status",
}, []string{"application", "pipeline_type", "status", "pipeline_image"})
)
func init() {
prometheus.MustRegister(recTimeBucket)
}
// RequestedResources adds metrics for requested resources
func RequestedResources(rr *v1.RadixRegistration, rd *v1.RadixDeployment) {
if rd == nil || rd.Status.Condition == v1.DeploymentInactive || rr == nil {
return
}
defaultCPU := defaults.GetDefaultCPURequest()
defaultMemory := defaults.GetDefaultMemoryRequest()
for _, comp := range rd.Spec.Components {
resources := utils.GetResourceRequirements(&comp)
nrReplicas := float64(comp.GetNrOfReplicas())
var cpu, memory resource.Quantity
if defaultCPU != nil {
cpu = *defaultCPU
}
if defaultMemory != nil {
memory = *defaultMemory
}
if componentCpu, ok := resources.Requests[corev1.ResourceCPU]; ok {
cpu = componentCpu
}
if componentMemory, ok := resources.Requests[corev1.ResourceMemory]; ok {
memory = componentMemory
}
radixRequestedCPU.
With(prometheus.Labels{"application": rd.Spec.AppName, "environment": rd.Spec.Environment, "component": comp.Name, "wbs": rr.Spec.WBS}).
Set(float64(cpu.MilliValue()))
radixRequestedMemory.
With(prometheus.Labels{"application": rd.Spec.AppName, "environment": rd.Spec.Environment, "component": comp.Name, "wbs": rr.Spec.WBS}).
Set(float64(memory.ScaledValue(resource.Mega)))
radixRequestedReplicas.
With(prometheus.Labels{"application": rd.Spec.AppName, "environment": rd.Spec.Environment, "component": comp.Name, "wbs": rr.Spec.WBS}).
Set(nrReplicas)
}
}
// InitiateRadixJobStatusChanged initiate metric with value 0 to count the number of radix jobs processed.
func InitiateRadixJobStatusChanged(rj *v1.RadixJob) {
if rj == nil {
return
}
radixJobProcessed.With(prometheus.Labels{"application": rj.Spec.AppName, "pipeline_type": string(rj.Spec.PipeLineType),
"status": string(v1.JobWaiting), "pipeline_image": rj.Spec.PipelineImage}).Add(0)
radixJobProcessed.With(prometheus.Labels{"application": rj.Spec.AppName, "pipeline_type": string(rj.Spec.PipeLineType),
"status": string(v1.JobQueued), "pipeline_image": rj.Spec.PipelineImage}).Add(0)
radixJobProcessed.With(prometheus.Labels{"application": rj.Spec.AppName, "pipeline_type": string(rj.Spec.PipeLineType),
"status": string(v1.JobRunning), "pipeline_image": rj.Spec.PipelineImage}).Add(0)
radixJobProcessed.With(prometheus.Labels{"application": rj.Spec.AppName, "pipeline_type": string(rj.Spec.PipeLineType),
"status": string(v1.JobFailed), "pipeline_image": rj.Spec.PipelineImage}).Add(0)
radixJobProcessed.With(prometheus.Labels{"application": rj.Spec.AppName, "pipeline_type": string(rj.Spec.PipeLineType),
"status": string(v1.JobStopped), "pipeline_image": rj.Spec.PipelineImage}).Add(0)
radixJobProcessed.With(prometheus.Labels{"application": rj.Spec.AppName, "pipeline_type": string(rj.Spec.PipeLineType),
"status": string(v1.JobSucceeded), "pipeline_image": rj.Spec.PipelineImage}).Add(0)
}
// RadixJobStatusChanged increments metric to count the number of radix jobs processed
func RadixJobStatusChanged(rj *v1.RadixJob) {
if rj == nil {
return
}
radixJobProcessed.With(prometheus.Labels{"application": rj.Spec.AppName, "pipeline_type": string(rj.Spec.PipeLineType),
"status": string(rj.Status.Condition), "pipeline_image": rj.Spec.PipelineImage}).Inc()
}
// DefaultBuckets Holds the buckets used as default
func DefaultBuckets() []float64 {
return []float64{0.03, 0.1, 0.3, 1, 2, 3, 5, 8, 15, 23}
}
// CustomResourceAdded Increments metric to count the number of cr added
func CustomResourceAdded(kind string) {
nrCrQueued.With(prometheus.Labels{"cr_type": kind, "operation": "add", "skipped": "false", "requeued": "false"}).Inc()
}
// CustomResourceUpdated Increments metric to count the number of cr updated
func CustomResourceUpdated(kind string) {
nrCrQueued.With(prometheus.Labels{"cr_type": kind, "operation": "update", "skipped": "false", "requeued": "false"}).Inc()
}
// CustomResourceUpdatedAndRequeued Increments metric to count the number of cr updated due to update to child
func CustomResourceUpdatedAndRequeued(kind string) {
nrCrQueued.With(prometheus.Labels{"cr_type": kind, "operation": "update", "skipped": "false", "requeued": "true"}).Inc()
}
// CustomResourceAddedButSkipped Increments metric to count the number of cr added and ignored
func CustomResourceAddedButSkipped(kind string) {
nrCrQueued.With(prometheus.Labels{"cr_type": kind, "operation": "add", "skipped": "true", "requeued": "false"}).Inc()
}
// CustomResourceUpdatedButSkipped Increments metric to count the number of cr updated and ignored
func CustomResourceUpdatedButSkipped(kind string) {
nrCrQueued.With(prometheus.Labels{"cr_type": kind, "operation": "update", "skipped": "true", "requeued": "false"}).Inc()
}
// CustomResourceDeleted Increments metric to count the number of cr deleted
func CustomResourceDeleted(kind string) {
nrCrDeleted.With(prometheus.Labels{"cr_type": kind}).Inc()
}
// CustomResourceRemovedFromQueue Decrements metric to count the number of cr in queue
func CustomResourceRemovedFromQueue(kind string) {
nrCrDeQueued.With(prometheus.Labels{"cr_type": kind}).Inc()
}
// OperatorError Add error
func OperatorError(kind, method, errorType string) {
nrErrors.With(prometheus.Labels{
"cr_type": kind,
"method": method,
"err_type": errorType,
}).Inc()
}
// AddDurationOfReconciliation Add duration it takes to reconcile
func AddDurationOfReconciliation(kind string, duration time.Duration) {
recTimeBucket.WithLabelValues(kind).Observe(duration.Seconds())
}