-
Notifications
You must be signed in to change notification settings - Fork 793
/
manager_metrics.go
147 lines (133 loc) · 5.29 KB
/
manager_metrics.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
package ruler
import (
"github.com/prometheus/client_golang/prometheus"
"github.com/cortexproject/cortex/pkg/util"
)
// ManagerMetrics aggregates metrics exported by the Prometheus
// rules package and returns them as Cortex metrics
type ManagerMetrics struct {
regs *util.UserRegistries
EvalDuration *prometheus.Desc
IterationDuration *prometheus.Desc
IterationsMissed *prometheus.Desc
IterationsScheduled *prometheus.Desc
EvalTotal *prometheus.Desc
EvalFailures *prometheus.Desc
GroupInterval *prometheus.Desc
GroupLastEvalTime *prometheus.Desc
GroupLastDuration *prometheus.Desc
GroupRules *prometheus.Desc
GroupLastEvalSamples *prometheus.Desc
}
// NewManagerMetrics returns a ManagerMetrics struct
func NewManagerMetrics() *ManagerMetrics {
return &ManagerMetrics{
regs: util.NewUserRegistries(),
EvalDuration: prometheus.NewDesc(
"cortex_prometheus_rule_evaluation_duration_seconds",
"The duration for a rule to execute.",
[]string{"user"},
nil,
),
IterationDuration: prometheus.NewDesc(
"cortex_prometheus_rule_group_duration_seconds",
"The duration of rule group evaluations.",
[]string{"user"},
nil,
),
IterationsMissed: prometheus.NewDesc(
"cortex_prometheus_rule_group_iterations_missed_total",
"The total number of rule group evaluations missed due to slow rule group evaluation.",
[]string{"user"},
nil,
),
IterationsScheduled: prometheus.NewDesc(
"cortex_prometheus_rule_group_iterations_total",
"The total number of scheduled rule group evaluations, whether executed or missed.",
[]string{"user"},
nil,
),
EvalTotal: prometheus.NewDesc(
"cortex_prometheus_rule_evaluations_total",
"The total number of rule evaluations.",
[]string{"user", "rule_group"},
nil,
),
EvalFailures: prometheus.NewDesc(
"cortex_prometheus_rule_evaluation_failures_total",
"The total number of rule evaluation failures.",
[]string{"user", "rule_group"},
nil,
),
GroupInterval: prometheus.NewDesc(
"cortex_prometheus_rule_group_interval_seconds",
"The interval of a rule group.",
[]string{"user", "rule_group"},
nil,
),
GroupLastEvalTime: prometheus.NewDesc(
"cortex_prometheus_rule_group_last_evaluation_timestamp_seconds",
"The timestamp of the last rule group evaluation in seconds.",
[]string{"user", "rule_group"},
nil,
),
GroupLastDuration: prometheus.NewDesc(
"cortex_prometheus_rule_group_last_duration_seconds",
"The duration of the last rule group evaluation.",
[]string{"user", "rule_group"},
nil,
),
GroupRules: prometheus.NewDesc(
"cortex_prometheus_rule_group_rules",
"The number of rules.",
[]string{"user", "rule_group"},
nil,
),
GroupLastEvalSamples: prometheus.NewDesc(
"cortex_prometheus_last_evaluation_samples",
"The number of samples returned during the last rule group evaluation.",
[]string{"user", "rule_group"},
nil,
),
}
}
// AddUserRegistry adds a user-specific Prometheus registry.
func (m *ManagerMetrics) AddUserRegistry(user string, reg *prometheus.Registry) {
m.regs.AddUserRegistry(user, reg)
}
// RemoveUserRegistry removes user-specific Prometheus registry.
func (m *ManagerMetrics) RemoveUserRegistry(user string) {
m.regs.RemoveUserRegistry(user, true)
}
// Describe implements the Collector interface
func (m *ManagerMetrics) Describe(out chan<- *prometheus.Desc) {
out <- m.EvalDuration
out <- m.IterationDuration
out <- m.IterationsMissed
out <- m.IterationsScheduled
out <- m.EvalTotal
out <- m.EvalFailures
out <- m.GroupInterval
out <- m.GroupLastEvalTime
out <- m.GroupLastDuration
out <- m.GroupRules
out <- m.GroupLastEvalSamples
}
// Collect implements the Collector interface
func (m *ManagerMetrics) Collect(out chan<- prometheus.Metric) {
data := m.regs.BuildMetricFamiliesPerUser()
// WARNING: It is important that all metrics generated in this method are "Per User".
// Thanks to that we can actually *remove* metrics for given user (see RemoveUserRegistry).
// If same user is later re-added, all metrics will start from 0, which is fine.
data.SendSumOfSummariesPerUser(out, m.EvalDuration, "prometheus_rule_evaluation_duration_seconds")
data.SendSumOfSummariesPerUser(out, m.IterationDuration, "prometheus_rule_group_duration_seconds")
data.SendSumOfCountersPerUser(out, m.IterationsMissed, "prometheus_rule_group_iterations_missed_total")
data.SendSumOfCountersPerUser(out, m.IterationsScheduled, "prometheus_rule_group_iterations_total")
data.SendSumOfCountersPerUserWithLabels(out, m.EvalTotal, "prometheus_rule_evaluations_total", "rule_group")
data.SendSumOfCountersPerUserWithLabels(out, m.EvalFailures, "prometheus_rule_evaluation_failures_total", "rule_group")
data.SendSumOfGaugesPerUserWithLabels(out, m.GroupInterval, "prometheus_rule_group_interval_seconds", "rule_group")
data.SendSumOfGaugesPerUserWithLabels(out, m.GroupLastEvalTime, "prometheus_rule_group_last_evaluation_timestamp_seconds", "rule_group")
data.SendSumOfGaugesPerUserWithLabels(out, m.GroupLastDuration, "prometheus_rule_group_last_duration_seconds", "rule_group")
data.SendSumOfGaugesPerUserWithLabels(out, m.GroupRules, "prometheus_rule_group_rules", "rule_group")
data.SendSumOfGaugesPerUserWithLabels(out, m.GroupLastEvalSamples, "prometheus_rule_group_last_evaluation_samples", "rule_group")
}