Skip to content

Commit a1fc64d

Browse files
authored
Switched pkg/alertmanager metrics to promauto (#2310)
Signed-off-by: Marco Pracucci <marco@pracucci.com>
1 parent a68f440 commit a1fc64d

File tree

4 files changed

+96
-85
lines changed

4 files changed

+96
-85
lines changed

integration/asserts.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@ var (
3232
Querier: []string{},
3333
QueryFrontend: []string{"cortex_frontend", "cortex_query_frontend"},
3434
TableManager: []string{},
35-
AlertManager: []string{},
35+
AlertManager: []string{"cortex_alertmanager"},
3636
Ruler: []string{},
3737
}
3838

pkg/alertmanager/alertmanager_metrics_test.go

Lines changed: 26 additions & 56 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@ import (
66

77
"github.com/prometheus/alertmanager/types"
88
"github.com/prometheus/client_golang/prometheus"
9+
"github.com/prometheus/client_golang/prometheus/promauto"
910
"github.com/prometheus/client_golang/prometheus/testutil"
1011
"github.com/stretchr/testify/require"
1112
)
@@ -220,48 +221,37 @@ type nflogMetrics struct {
220221
func newNflogMetrics(r prometheus.Registerer) *nflogMetrics {
221222
m := &nflogMetrics{}
222223

223-
m.gcDuration = prometheus.NewSummary(prometheus.SummaryOpts{
224+
m.gcDuration = promauto.With(r).NewSummary(prometheus.SummaryOpts{
224225
Name: "alertmanager_nflog_gc_duration_seconds",
225226
Help: "Duration of the last notification log garbage collection cycle.",
226227
Objectives: map[float64]float64{},
227228
})
228-
m.snapshotDuration = prometheus.NewSummary(prometheus.SummaryOpts{
229+
m.snapshotDuration = promauto.With(r).NewSummary(prometheus.SummaryOpts{
229230
Name: "alertmanager_nflog_snapshot_duration_seconds",
230231
Help: "Duration of the last notification log snapshot.",
231232
Objectives: map[float64]float64{},
232233
})
233-
m.snapshotSize = prometheus.NewGauge(prometheus.GaugeOpts{
234+
m.snapshotSize = promauto.With(r).NewGauge(prometheus.GaugeOpts{
234235
Name: "alertmanager_nflog_snapshot_size_bytes",
235236
Help: "Size of the last notification log snapshot in bytes.",
236237
})
237-
m.queriesTotal = prometheus.NewCounter(prometheus.CounterOpts{
238+
m.queriesTotal = promauto.With(r).NewCounter(prometheus.CounterOpts{
238239
Name: "alertmanager_nflog_queries_total",
239240
Help: "Number of notification log queries were received.",
240241
})
241-
m.queryErrorsTotal = prometheus.NewCounter(prometheus.CounterOpts{
242+
m.queryErrorsTotal = promauto.With(r).NewCounter(prometheus.CounterOpts{
242243
Name: "alertmanager_nflog_query_errors_total",
243244
Help: "Number notification log received queries that failed.",
244245
})
245-
m.queryDuration = prometheus.NewHistogram(prometheus.HistogramOpts{
246+
m.queryDuration = promauto.With(r).NewHistogram(prometheus.HistogramOpts{
246247
Name: "alertmanager_nflog_query_duration_seconds",
247248
Help: "Duration of notification log query evaluation.",
248249
})
249-
m.propagatedMessagesTotal = prometheus.NewCounter(prometheus.CounterOpts{
250+
m.propagatedMessagesTotal = promauto.With(r).NewCounter(prometheus.CounterOpts{
250251
Name: "alertmanager_nflog_gossip_messages_propagated_total",
251252
Help: "Number of received gossip messages that have been further gossiped.",
252253
})
253254

254-
if r != nil {
255-
r.MustRegister(
256-
m.gcDuration,
257-
m.snapshotDuration,
258-
m.snapshotSize,
259-
m.queriesTotal,
260-
m.queryErrorsTotal,
261-
m.queryDuration,
262-
m.propagatedMessagesTotal,
263-
)
264-
}
265255
return m
266256
}
267257

@@ -282,66 +272,52 @@ type silenceMetrics struct {
282272
func newSilenceMetrics(r prometheus.Registerer) *silenceMetrics {
283273
m := &silenceMetrics{}
284274

285-
m.gcDuration = prometheus.NewSummary(prometheus.SummaryOpts{
275+
m.gcDuration = promauto.With(r).NewSummary(prometheus.SummaryOpts{
286276
Name: "alertmanager_silences_gc_duration_seconds",
287277
Help: "Duration of the last silence garbage collection cycle.",
288278
Objectives: map[float64]float64{},
289279
})
290-
m.snapshotDuration = prometheus.NewSummary(prometheus.SummaryOpts{
280+
m.snapshotDuration = promauto.With(r).NewSummary(prometheus.SummaryOpts{
291281
Name: "alertmanager_silences_snapshot_duration_seconds",
292282
Help: "Duration of the last silence snapshot.",
293283
Objectives: map[float64]float64{},
294284
})
295-
m.snapshotSize = prometheus.NewGauge(prometheus.GaugeOpts{
285+
m.snapshotSize = promauto.With(r).NewGauge(prometheus.GaugeOpts{
296286
Name: "alertmanager_silences_snapshot_size_bytes",
297287
Help: "Size of the last silence snapshot in bytes.",
298288
})
299-
m.queriesTotal = prometheus.NewCounter(prometheus.CounterOpts{
289+
m.queriesTotal = promauto.With(r).NewCounter(prometheus.CounterOpts{
300290
Name: "alertmanager_silences_queries_total",
301291
Help: "How many silence queries were received.",
302292
})
303-
m.queryErrorsTotal = prometheus.NewCounter(prometheus.CounterOpts{
293+
m.queryErrorsTotal = promauto.With(r).NewCounter(prometheus.CounterOpts{
304294
Name: "alertmanager_silences_query_errors_total",
305295
Help: "How many silence received queries did not succeed.",
306296
})
307-
m.queryDuration = prometheus.NewHistogram(prometheus.HistogramOpts{
297+
m.queryDuration = promauto.With(r).NewHistogram(prometheus.HistogramOpts{
308298
Name: "alertmanager_silences_query_duration_seconds",
309299
Help: "Duration of silence query evaluation.",
310300
})
311-
m.propagatedMessagesTotal = prometheus.NewCounter(prometheus.CounterOpts{
301+
m.propagatedMessagesTotal = promauto.With(r).NewCounter(prometheus.CounterOpts{
312302
Name: "alertmanager_silences_gossip_messages_propagated_total",
313303
Help: "Number of received gossip messages that have been further gossiped.",
314304
})
315-
m.silencesActive = prometheus.NewGauge(prometheus.GaugeOpts{
305+
m.silencesActive = promauto.With(r).NewGauge(prometheus.GaugeOpts{
316306
Name: "alertmanager_silences",
317307
Help: "How many silences by state.",
318308
ConstLabels: prometheus.Labels{"state": string(types.SilenceStateActive)},
319309
})
320-
m.silencesPending = prometheus.NewGauge(prometheus.GaugeOpts{
310+
m.silencesPending = promauto.With(r).NewGauge(prometheus.GaugeOpts{
321311
Name: "alertmanager_silences",
322312
Help: "How many silences by state.",
323313
ConstLabels: prometheus.Labels{"state": string(types.SilenceStatePending)},
324314
})
325-
m.silencesExpired = prometheus.NewGauge(prometheus.GaugeOpts{
315+
m.silencesExpired = promauto.With(r).NewGauge(prometheus.GaugeOpts{
326316
Name: "alertmanager_silences",
327317
Help: "How many silences by state.",
328318
ConstLabels: prometheus.Labels{"state": string(types.SilenceStateExpired)},
329319
})
330320

331-
if r != nil {
332-
r.MustRegister(
333-
m.gcDuration,
334-
m.snapshotDuration,
335-
m.snapshotSize,
336-
m.queriesTotal,
337-
m.queryErrorsTotal,
338-
m.queryDuration,
339-
m.silencesActive,
340-
m.silencesPending,
341-
m.silencesExpired,
342-
m.propagatedMessagesTotal,
343-
)
344-
}
345321
return m
346322
}
347323

@@ -354,17 +330,17 @@ type notifyMetrics struct {
354330

355331
func newNotifyMetrics(r prometheus.Registerer) *notifyMetrics {
356332
m := &notifyMetrics{
357-
numNotifications: prometheus.NewCounterVec(prometheus.CounterOpts{
333+
numNotifications: promauto.With(r).NewCounterVec(prometheus.CounterOpts{
358334
Namespace: "alertmanager",
359335
Name: "notifications_total",
360336
Help: "The total number of attempted notifications.",
361337
}, []string{"integration"}),
362-
numFailedNotifications: prometheus.NewCounterVec(prometheus.CounterOpts{
338+
numFailedNotifications: promauto.With(r).NewCounterVec(prometheus.CounterOpts{
363339
Namespace: "alertmanager",
364340
Name: "notifications_failed_total",
365341
Help: "The total number of failed notifications.",
366342
}, []string{"integration"}),
367-
notificationLatencySeconds: prometheus.NewHistogramVec(prometheus.HistogramOpts{
343+
notificationLatencySeconds: promauto.With(r).NewHistogramVec(prometheus.HistogramOpts{
368344
Namespace: "alertmanager",
369345
Name: "notification_latency_seconds",
370346
Help: "The latency of notifications in seconds.",
@@ -376,7 +352,6 @@ func newNotifyMetrics(r prometheus.Registerer) *notifyMetrics {
376352
m.numFailedNotifications.WithLabelValues(integration)
377353
m.notificationLatencySeconds.WithLabelValues(integration)
378354
}
379-
r.MustRegister(m.numNotifications, m.numFailedNotifications, m.notificationLatencySeconds)
380355
return m
381356
}
382357

@@ -385,15 +360,12 @@ type markerMetrics struct {
385360
}
386361

387362
func newMarkerMetrics(r prometheus.Registerer) *markerMetrics {
388-
m := &markerMetrics{
389-
alerts: prometheus.NewGaugeVec(prometheus.GaugeOpts{
363+
return &markerMetrics{
364+
alerts: promauto.With(r).NewGaugeVec(prometheus.GaugeOpts{
390365
Name: "alertmanager_alerts",
391366
Help: "How many alerts by state.",
392367
}, []string{"state"}),
393368
}
394-
395-
r.MustRegister(m.alerts)
396-
return m
397369
}
398370

399371
// Copied from github.com/alertmanager/api/metrics/metrics.go
@@ -404,19 +376,17 @@ type apiMetrics struct {
404376
}
405377

406378
func newAPIMetrics(version string, r prometheus.Registerer) *apiMetrics {
407-
numReceivedAlerts := prometheus.NewCounterVec(prometheus.CounterOpts{
379+
numReceivedAlerts := promauto.With(r).NewCounterVec(prometheus.CounterOpts{
408380
Name: "alertmanager_alerts_received_total",
409381
Help: "The total number of received alerts.",
410382
ConstLabels: prometheus.Labels{"version": version},
411383
}, []string{"status"})
412-
numInvalidAlerts := prometheus.NewCounter(prometheus.CounterOpts{
384+
numInvalidAlerts := promauto.With(r).NewCounter(prometheus.CounterOpts{
413385
Name: "alertmanager_alerts_invalid_total",
414386
Help: "The total number of received alerts that were invalid.",
415387
ConstLabels: prometheus.Labels{"version": version},
416388
})
417-
if r != nil {
418-
r.MustRegister(numReceivedAlerts, numInvalidAlerts)
419-
}
389+
420390
return &apiMetrics{
421391
firing: numReceivedAlerts.WithLabelValues("firing"),
422392
resolved: numReceivedAlerts.WithLabelValues("resolved"),

pkg/alertmanager/multitenant.go

Lines changed: 35 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@ import (
1919
"github.com/prometheus/alertmanager/cluster"
2020
amconfig "github.com/prometheus/alertmanager/config"
2121
"github.com/prometheus/client_golang/prometheus"
22+
"github.com/prometheus/client_golang/prometheus/promauto"
2223
"github.com/weaveworks/common/user"
2324

2425
"github.com/cortexproject/cortex/pkg/alertmanager/alerts"
@@ -70,20 +71,10 @@ const (
7071
)
7172

7273
var (
73-
totalConfigs = prometheus.NewGaugeVec(prometheus.GaugeOpts{
74-
Namespace: "cortex",
75-
Name: "alertmanager_configs",
76-
Help: "How many configs the multitenant alertmanager knows about.",
77-
}, []string{"status"})
7874
statusTemplate *template.Template
7975
)
8076

8177
func init() {
82-
// Ensure the metric values are initialized.
83-
totalConfigs.WithLabelValues(configStatusInvalid).Set(0)
84-
totalConfigs.WithLabelValues(configStatusValid).Set(0)
85-
86-
prometheus.MustRegister(totalConfigs)
8778
statusTemplate = template.Must(template.New("statusPage").Funcs(map[string]interface{}{
8879
"state": func(enabled bool) string {
8980
if enabled {
@@ -133,6 +124,24 @@ func (cfg *MultitenantAlertmanagerConfig) RegisterFlags(f *flag.FlagSet) {
133124
cfg.Store.RegisterFlags(f)
134125
}
135126

127+
type multitenantAlertmanagerMetrics struct {
128+
totalConfigs *prometheus.GaugeVec
129+
}
130+
131+
func newMultitenantAlertmanagerMetrics(reg prometheus.Registerer) *multitenantAlertmanagerMetrics {
132+
m := &multitenantAlertmanagerMetrics{}
133+
134+
m.totalConfigs = promauto.With(reg).NewGaugeVec(prometheus.GaugeOpts{
135+
Namespace: "cortex",
136+
Name: "alertmanager_configs",
137+
Help: "How many configs the multitenant alertmanager knows about.",
138+
}, []string{"status"})
139+
m.totalConfigs.WithLabelValues(configStatusInvalid).Set(0)
140+
m.totalConfigs.WithLabelValues(configStatusValid).Set(0)
141+
142+
return m
143+
}
144+
136145
// A MultitenantAlertmanager manages Alertmanager instances for multiple
137146
// organizations.
138147
type MultitenantAlertmanager struct {
@@ -153,8 +162,9 @@ type MultitenantAlertmanager struct {
153162
alertmanagersMtx sync.Mutex
154163
alertmanagers map[string]*Alertmanager
155164

156-
logger log.Logger
157-
metrics *alertmanagerMetrics
165+
logger log.Logger
166+
alertmanagerMetrics *alertmanagerMetrics
167+
multitenantMetrics *multitenantAlertmanagerMetrics
158168

159169
peer *cluster.Peer
160170
}
@@ -213,18 +223,19 @@ func NewMultitenantAlertmanager(cfg *MultitenantAlertmanagerConfig, logger log.L
213223

214224
func createMultitenantAlertmanager(cfg *MultitenantAlertmanagerConfig, fallbackConfig []byte, peer *cluster.Peer, store AlertStore, logger log.Logger, registerer prometheus.Registerer) *MultitenantAlertmanager {
215225
am := &MultitenantAlertmanager{
216-
cfg: cfg,
217-
fallbackConfig: string(fallbackConfig),
218-
cfgs: map[string]alerts.AlertConfigDesc{},
219-
alertmanagers: map[string]*Alertmanager{},
220-
metrics: newAlertmanagerMetrics(),
221-
peer: peer,
222-
store: store,
223-
logger: log.With(logger, "component", "MultiTenantAlertmanager"),
226+
cfg: cfg,
227+
fallbackConfig: string(fallbackConfig),
228+
cfgs: map[string]alerts.AlertConfigDesc{},
229+
alertmanagers: map[string]*Alertmanager{},
230+
alertmanagerMetrics: newAlertmanagerMetrics(),
231+
multitenantMetrics: newMultitenantAlertmanagerMetrics(registerer),
232+
peer: peer,
233+
store: store,
234+
logger: log.With(logger, "component", "MultiTenantAlertmanager"),
224235
}
225236

226237
if registerer != nil {
227-
registerer.MustRegister(am.metrics)
238+
registerer.MustRegister(am.alertmanagerMetrics)
228239
}
229240

230241
am.Service = services.NewTimerService(am.cfg.PollInterval, am.starting, am.iteration, am.stopping)
@@ -320,8 +331,8 @@ func (am *MultitenantAlertmanager) syncConfigs(cfgs map[string]alerts.AlertConfi
320331
level.Info(am.logger).Log("msg", "deactivated per-tenant alertmanager", "user", user)
321332
}
322333
}
323-
totalConfigs.WithLabelValues(configStatusInvalid).Set(float64(invalid))
324-
totalConfigs.WithLabelValues(configStatusValid).Set(float64(len(am.cfgs) - invalid))
334+
am.multitenantMetrics.totalConfigs.WithLabelValues(configStatusInvalid).Set(float64(invalid))
335+
am.multitenantMetrics.totalConfigs.WithLabelValues(configStatusValid).Set(float64(len(am.cfgs) - invalid))
325336
}
326337

327338
func (am *MultitenantAlertmanager) transformConfig(userID string, amConfig *amconfig.Config) (*amconfig.Config, error) {
@@ -437,7 +448,6 @@ func (am *MultitenantAlertmanager) setConfig(cfg alerts.AlertConfigDesc) error {
437448

438449
func (am *MultitenantAlertmanager) newAlertmanager(userID string, amConfig *amconfig.Config) (*Alertmanager, error) {
439450
reg := prometheus.NewRegistry()
440-
am.metrics.addUserRegistry(userID, reg)
441451
newAM, err := New(&Config{
442452
UserID: userID,
443453
DataDir: am.cfg.DataDir,
@@ -455,7 +465,7 @@ func (am *MultitenantAlertmanager) newAlertmanager(userID string, amConfig *amco
455465
return nil, fmt.Errorf("unable to apply initial config for user %v: %v", userID, err)
456466
}
457467

458-
am.metrics.addUserRegistry(userID, reg)
468+
am.alertmanagerMetrics.addUserRegistry(userID, reg)
459469
return newAM, nil
460470
}
461471

0 commit comments

Comments
 (0)