@@ -104,6 +104,12 @@ func TestAlertmanagerMetricsStore(t *testing.T) {
104104 # HELP cortex_alertmanager_nflog_snapshot_size_bytes Size of the last notification log snapshot in bytes.
105105 # TYPE cortex_alertmanager_nflog_snapshot_size_bytes gauge
106106 cortex_alertmanager_nflog_snapshot_size_bytes 111
107+ # HELP cortex_alertmanager_nflog_maintenance_total How many maintenances were executed for the notification log.
108+ # TYPE cortex_alertmanager_nflog_maintenance_total counter
109+ cortex_alertmanager_nflog_maintenance_total 111
110+ # HELP cortex_alertmanager_nflog_maintenance_errors_total How many maintenances were executed for the notification log that failed.
111+ # TYPE cortex_alertmanager_nflog_maintenance_errors_total counter
112+ cortex_alertmanager_nflog_maintenance_errors_total 111
107113 # HELP cortex_alertmanager_notification_latency_seconds The latency of notifications in seconds.
108114 # TYPE cortex_alertmanager_notification_latency_seconds histogram
109115 cortex_alertmanager_notification_latency_seconds_bucket{le="1"} 14
@@ -277,6 +283,12 @@ func TestAlertmanagerMetricsStore(t *testing.T) {
277283 # HELP cortex_alertmanager_silences_snapshot_size_bytes Size of the last silence snapshot in bytes.
278284 # TYPE cortex_alertmanager_silences_snapshot_size_bytes gauge
279285 cortex_alertmanager_silences_snapshot_size_bytes 111
286+ # HELP cortex_alertmanager_silences_maintenance_total How many maintenances were executed for silences.
287+ # TYPE cortex_alertmanager_silences_maintenance_total counter
288+ cortex_alertmanager_silences_maintenance_total 111
289+ # HELP cortex_alertmanager_silences_maintenance_errors_total How many maintenances were executed for silences that failed.
290+ # TYPE cortex_alertmanager_silences_maintenance_errors_total counter
291+ cortex_alertmanager_silences_maintenance_errors_total 111
280292 # HELP cortex_alertmanager_state_fetch_replica_state_failed_total Number of times we have failed to read and merge the full state from another replica.
281293 # TYPE cortex_alertmanager_state_fetch_replica_state_failed_total counter
282294 cortex_alertmanager_state_fetch_replica_state_failed_total 0
@@ -414,6 +426,13 @@ func TestAlertmanagerMetricsRemoval(t *testing.T) {
414426 # TYPE cortex_alertmanager_nflog_snapshot_size_bytes gauge
415427 cortex_alertmanager_nflog_snapshot_size_bytes 111
416428
429+ # HELP cortex_alertmanager_nflog_maintenance_total How many maintenances were executed for the notification log.
430+ # TYPE cortex_alertmanager_nflog_maintenance_total counter
431+ cortex_alertmanager_nflog_maintenance_total 111
432+ # HELP cortex_alertmanager_nflog_maintenance_errors_total How many maintenances were executed for the notification log that failed.
433+ # TYPE cortex_alertmanager_nflog_maintenance_errors_total counter
434+ cortex_alertmanager_nflog_maintenance_errors_total 111
435+
417436 # HELP cortex_alertmanager_notification_latency_seconds The latency of notifications in seconds.
418437 # TYPE cortex_alertmanager_notification_latency_seconds histogram
419438 cortex_alertmanager_notification_latency_seconds_bucket{le="1"} 14
@@ -598,6 +617,14 @@ func TestAlertmanagerMetricsRemoval(t *testing.T) {
598617 # HELP cortex_alertmanager_silences_snapshot_size_bytes Size of the last silence snapshot in bytes.
599618 # TYPE cortex_alertmanager_silences_snapshot_size_bytes gauge
600619 cortex_alertmanager_silences_snapshot_size_bytes 111
620+
621+ # HELP cortex_alertmanager_silences_maintenance_total How many maintenances were executed for silences.
622+ # TYPE cortex_alertmanager_silences_maintenance_total counter
623+ cortex_alertmanager_silences_maintenance_total 111
624+ # HELP cortex_alertmanager_silences_maintenance_errors_total How many maintenances were executed for silences that failed.
625+ # TYPE cortex_alertmanager_silences_maintenance_errors_total counter
626+ cortex_alertmanager_silences_maintenance_errors_total 111
627+
601628 # HELP cortex_alertmanager_state_fetch_replica_state_failed_total Number of times we have failed to read and merge the full state from another replica.
602629 # TYPE cortex_alertmanager_state_fetch_replica_state_failed_total counter
603630 cortex_alertmanager_state_fetch_replica_state_failed_total 0
@@ -715,6 +742,13 @@ func TestAlertmanagerMetricsRemoval(t *testing.T) {
715742 # TYPE cortex_alertmanager_nflog_snapshot_size_bytes gauge
716743 cortex_alertmanager_nflog_snapshot_size_bytes 11
717744
745+ # HELP cortex_alertmanager_nflog_maintenance_total How many maintenances were executed for the notification log.
746+ # TYPE cortex_alertmanager_nflog_maintenance_total counter
747+ cortex_alertmanager_nflog_maintenance_total 111
748+ # HELP cortex_alertmanager_nflog_maintenance_errors_total How many maintenances were executed for the notification log that failed.
749+ # TYPE cortex_alertmanager_nflog_maintenance_errors_total counter
750+ cortex_alertmanager_nflog_maintenance_errors_total 111
751+
718752 # HELP cortex_alertmanager_notification_latency_seconds The latency of notifications in seconds.
719753 # TYPE cortex_alertmanager_notification_latency_seconds histogram
720754 cortex_alertmanager_notification_latency_seconds_bucket{le="1"} 14
@@ -863,6 +897,13 @@ func TestAlertmanagerMetricsRemoval(t *testing.T) {
863897 # TYPE cortex_alertmanager_silences_snapshot_size_bytes gauge
864898 cortex_alertmanager_silences_snapshot_size_bytes 11
865899
900+ # HELP cortex_alertmanager_silences_maintenance_total How many maintenances were executed for silences.
901+ # TYPE cortex_alertmanager_silences_maintenance_total counter
902+ cortex_alertmanager_silences_maintenance_total 111
903+ # HELP cortex_alertmanager_silences_maintenance_errors_total How many maintenances were executed for silences that failed.
904+ # TYPE cortex_alertmanager_silences_maintenance_errors_total counter
905+ cortex_alertmanager_silences_maintenance_errors_total 111
906+
866907 # HELP cortex_alertmanager_state_fetch_replica_state_failed_total Number of times we have failed to read and merge the full state from another replica.
867908 # TYPE cortex_alertmanager_state_fetch_replica_state_failed_total counter
868909 cortex_alertmanager_state_fetch_replica_state_failed_total 0
@@ -913,6 +954,8 @@ func populateAlertmanager(base float64) *prometheus.Registry {
913954 s .silencesActive .Set (base )
914955 s .silencesExpired .Set (base * 2 )
915956 s .silencesPending .Set (base * 3 )
957+ s .silencesMaintenanceTotal .Add (base )
958+ s .silencesMaintenanceErrorsTotal .Add (base )
916959
917960 n := newNflogMetrics (reg )
918961 n .gcDuration .Observe (base )
@@ -922,6 +965,8 @@ func populateAlertmanager(base float64) *prometheus.Registry {
922965 n .queryErrorsTotal .Add (base )
923966 n .queryDuration .Observe (base )
924967 n .propagatedMessagesTotal .Add (base )
968+ n .maintenanceTotal .Add (base )
969+ n .maintenanceErrorsTotal .Add (base )
925970
926971 nm := newNotifyMetrics (reg )
927972 for i , integration := range integrations {
@@ -967,6 +1012,8 @@ type nflogMetrics struct {
9671012 queryErrorsTotal prometheus.Counter
9681013 queryDuration prometheus.Histogram
9691014 propagatedMessagesTotal prometheus.Counter
1015+ maintenanceTotal prometheus.Counter
1016+ maintenanceErrorsTotal prometheus.Counter
9701017}
9711018
9721019func newNflogMetrics (r prometheus.Registerer ) * nflogMetrics {
@@ -1002,22 +1049,32 @@ func newNflogMetrics(r prometheus.Registerer) *nflogMetrics {
10021049 Name : "alertmanager_nflog_gossip_messages_propagated_total" ,
10031050 Help : "Number of received gossip messages that have been further gossiped." ,
10041051 })
1052+ m .maintenanceTotal = promauto .With (r ).NewCounter (prometheus.CounterOpts {
1053+ Name : "alertmanager_nflog_maintenance_total" ,
1054+ Help : "How many maintenances were executed for the notification log." ,
1055+ })
1056+ m .maintenanceErrorsTotal = promauto .With (r ).NewCounter (prometheus.CounterOpts {
1057+ Name : "alertmanager_nflog_maintenance_errors_total" ,
1058+ Help : "How many maintenances were executed for the notification log that failed." ,
1059+ })
10051060
10061061 return m
10071062}
10081063
10091064// Copied from github.com/alertmanager/silence/silence.go
10101065type silenceMetrics struct {
1011- gcDuration prometheus.Summary
1012- snapshotDuration prometheus.Summary
1013- snapshotSize prometheus.Gauge
1014- queriesTotal prometheus.Counter
1015- queryErrorsTotal prometheus.Counter
1016- queryDuration prometheus.Histogram
1017- silencesActive prometheus.Gauge
1018- silencesPending prometheus.Gauge
1019- silencesExpired prometheus.Gauge
1020- propagatedMessagesTotal prometheus.Counter
1066+ gcDuration prometheus.Summary
1067+ snapshotDuration prometheus.Summary
1068+ snapshotSize prometheus.Gauge
1069+ queriesTotal prometheus.Counter
1070+ queryErrorsTotal prometheus.Counter
1071+ queryDuration prometheus.Histogram
1072+ silencesActive prometheus.Gauge
1073+ silencesPending prometheus.Gauge
1074+ silencesExpired prometheus.Gauge
1075+ propagatedMessagesTotal prometheus.Counter
1076+ silencesMaintenanceTotal prometheus.Counter
1077+ silencesMaintenanceErrorsTotal prometheus.Counter
10211078}
10221079
10231080func newSilenceMetrics (r prometheus.Registerer ) * silenceMetrics {
@@ -1068,6 +1125,14 @@ func newSilenceMetrics(r prometheus.Registerer) *silenceMetrics {
10681125 Help : "How many silences by state." ,
10691126 ConstLabels : prometheus.Labels {"state" : string (types .SilenceStateExpired )},
10701127 })
1128+ m .silencesMaintenanceTotal = promauto .With (r ).NewCounter (prometheus.CounterOpts {
1129+ Name : "alertmanager_silences_maintenance_total" ,
1130+ Help : "How many maintenances were executed for silences." ,
1131+ })
1132+ m .silencesMaintenanceErrorsTotal = promauto .With (r ).NewCounter (prometheus.CounterOpts {
1133+ Name : "alertmanager_silences_maintenance_errors_total" ,
1134+ Help : "How many maintenances were executed for silences that failed." ,
1135+ })
10711136
10721137 return m
10731138}
0 commit comments