forked from grafana/loki
/
metrics.go
152 lines (141 loc) · 6.24 KB
/
metrics.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
package ingester
import (
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/client_golang/prometheus/promauto"
"github.com/frelon/loki/v2/pkg/validation"
)
type ingesterMetrics struct {
checkpointDeleteFail prometheus.Counter
checkpointDeleteTotal prometheus.Counter
checkpointCreationFail prometheus.Counter
checkpointCreationTotal prometheus.Counter
checkpointDuration prometheus.Summary
checkpointLoggedBytesTotal prometheus.Counter
walDiskFullFailures prometheus.Counter
walReplayActive prometheus.Gauge
walReplayDuration prometheus.Gauge
walReplaySamplesDropped *prometheus.CounterVec
walReplayBytesDropped *prometheus.CounterVec
walCorruptionsTotal *prometheus.CounterVec
walLoggedBytesTotal prometheus.Counter
walRecordsLogged prometheus.Counter
recoveredStreamsTotal prometheus.Counter
recoveredChunksTotal prometheus.Counter
recoveredEntriesTotal prometheus.Counter
duplicateEntriesTotal prometheus.Counter
recoveredBytesTotal prometheus.Counter
recoveryBytesInUse prometheus.Gauge
recoveryIsFlushing prometheus.Gauge
limiterEnabled prometheus.Gauge
autoForgetUnhealthyIngestersTotal prometheus.Counter
}
// setRecoveryBytesInUse bounds the bytes reports to >= 0.
// TODO(owen-d): we can gain some efficiency by having the flusher never update this after recovery ends.
func (m *ingesterMetrics) setRecoveryBytesInUse(v int64) {
if v < 0 {
v = 0
}
m.recoveryBytesInUse.Set(float64(v))
}
const (
walTypeCheckpoint = "checkpoint"
walTypeSegment = "segment"
duplicateReason = "duplicate"
)
func newIngesterMetrics(r prometheus.Registerer) *ingesterMetrics {
return &ingesterMetrics{
walDiskFullFailures: promauto.With(r).NewCounter(prometheus.CounterOpts{
Name: "loki_ingester_wal_disk_full_failures_total",
Help: "Total number of wal write failures due to full disk.",
}),
walReplayActive: promauto.With(r).NewGauge(prometheus.GaugeOpts{
Name: "loki_ingester_wal_replay_active",
Help: "Whether the WAL is replaying",
}),
walReplayDuration: promauto.With(r).NewGauge(prometheus.GaugeOpts{
Name: "loki_ingester_wal_replay_duration_seconds",
Help: "Time taken to replay the checkpoint and the WAL.",
}),
walReplaySamplesDropped: promauto.With(r).NewCounterVec(prometheus.CounterOpts{
Name: "loki_ingester_wal_discarded_samples_total",
Help: "WAL segment entries discarded during replay",
}, []string{validation.ReasonLabel}),
walReplayBytesDropped: promauto.With(r).NewCounterVec(prometheus.CounterOpts{
Name: "loki_ingester_wal_discarded_bytes_total",
Help: "WAL segment bytes discarded during replay",
}, []string{validation.ReasonLabel}),
walCorruptionsTotal: promauto.With(r).NewCounterVec(prometheus.CounterOpts{
Name: "loki_ingester_wal_corruptions_total",
Help: "Total number of WAL corruptions encountered.",
}, []string{"type"}),
checkpointDeleteFail: promauto.With(r).NewCounter(prometheus.CounterOpts{
Name: "loki_ingester_checkpoint_deletions_failed_total",
Help: "Total number of checkpoint deletions that failed.",
}),
checkpointDeleteTotal: promauto.With(r).NewCounter(prometheus.CounterOpts{
Name: "loki_ingester_checkpoint_deletions_total",
Help: "Total number of checkpoint deletions attempted.",
}),
checkpointCreationFail: promauto.With(r).NewCounter(prometheus.CounterOpts{
Name: "loki_ingester_checkpoint_creations_failed_total",
Help: "Total number of checkpoint creations that failed.",
}),
checkpointCreationTotal: promauto.With(r).NewCounter(prometheus.CounterOpts{
Name: "loki_ingester_checkpoint_creations_total",
Help: "Total number of checkpoint creations attempted.",
}),
checkpointDuration: promauto.With(r).NewSummary(prometheus.SummaryOpts{
Name: "loki_ingester_checkpoint_duration_seconds",
Help: "Time taken to create a checkpoint.",
Objectives: map[float64]float64{0.5: 0.05, 0.9: 0.01, 0.99: 0.001},
}),
walRecordsLogged: promauto.With(r).NewCounter(prometheus.CounterOpts{
Name: "loki_ingester_wal_records_logged_total",
Help: "Total number of WAL records logged.",
}),
checkpointLoggedBytesTotal: promauto.With(r).NewCounter(prometheus.CounterOpts{
Name: "loki_ingester_checkpoint_logged_bytes_total",
Help: "Total number of bytes written to disk for checkpointing.",
}),
walLoggedBytesTotal: promauto.With(r).NewCounter(prometheus.CounterOpts{
Name: "loki_ingester_wal_logged_bytes_total",
Help: "Total number of bytes written to disk for WAL records.",
}),
recoveredStreamsTotal: promauto.With(r).NewCounter(prometheus.CounterOpts{
Name: "loki_ingester_wal_recovered_streams_total",
Help: "Total number of streams recovered from the WAL.",
}),
recoveredChunksTotal: promauto.With(r).NewCounter(prometheus.CounterOpts{
Name: "loki_ingester_wal_recovered_chunks_total",
Help: "Total number of chunks recovered from the WAL checkpoints.",
}),
recoveredEntriesTotal: promauto.With(r).NewCounter(prometheus.CounterOpts{
Name: "loki_ingester_wal_recovered_entries_total",
Help: "Total number of entries recovered from the WAL.",
}),
duplicateEntriesTotal: promauto.With(r).NewCounter(prometheus.CounterOpts{
Name: "loki_ingester_wal_duplicate_entries_total",
Help: "Entries discarded during WAL replay due to existing in checkpoints.",
}),
recoveredBytesTotal: promauto.With(r).NewCounter(prometheus.CounterOpts{
Name: "loki_ingester_wal_recovered_bytes_total",
Help: "Total number of bytes recovered from the WAL.",
}),
recoveryBytesInUse: promauto.With(r).NewGauge(prometheus.GaugeOpts{
Name: "loki_ingester_wal_bytes_in_use",
Help: "Total number of bytes in use by the WAL recovery process.",
}),
recoveryIsFlushing: promauto.With(r).NewGauge(prometheus.GaugeOpts{
Name: "loki_ingester_wal_replay_flushing",
Help: "Whether the wal replay is in a flushing phase due to backpressure",
}),
limiterEnabled: promauto.With(r).NewGauge(prometheus.GaugeOpts{
Name: "loki_ingester_limiter_enabled",
Help: "Whether the ingester's limiter is enabled",
}),
autoForgetUnhealthyIngestersTotal: promauto.With(r).NewCounter(prometheus.CounterOpts{
Name: "loki_ingester_autoforget_unhealthy_ingesters_total",
Help: "Total number of ingesters automatically forgotten",
}),
}
}