/
metrics.go
224 lines (181 loc) · 6.65 KB
/
metrics.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
package telemetry
import (
"bytes"
"encoding/json"
"fmt"
"net/http"
"time"
"github.com/hashicorp/go-metrics"
"github.com/hashicorp/go-metrics/datadog"
metricsprom "github.com/hashicorp/go-metrics/prometheus"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/common/expfmt"
)
// globalLabels defines the set of global labels that will be applied to all
// metrics emitted using the telemetry package function wrappers.
var globalLabels = []metrics.Label{}
// Metrics supported format types.
const (
FormatDefault = ""
FormatPrometheus = "prometheus"
FormatText = "text"
MetricSinkInMem = "mem"
MetricSinkStatsd = "statsd"
MetricSinkDogsStatsd = "dogstatsd"
)
// DisplayableSink is an interface that defines a method for displaying metrics.
type DisplayableSink interface {
DisplayMetrics(resp http.ResponseWriter, req *http.Request) (any, error)
}
// Config defines the configuration options for application telemetry.
type Config struct {
// Prefixed with keys to separate services
ServiceName string `mapstructure:"service-name"`
// Enabled enables the application telemetry functionality. When enabled,
// an in-memory sink is also enabled by default. Operators may also enabled
// other sinks such as Prometheus.
Enabled bool `mapstructure:"enabled"`
// Enable prefixing gauge values with hostname
EnableHostname bool `mapstructure:"enable-hostname"`
// Enable adding hostname to labels
EnableHostnameLabel bool `mapstructure:"enable-hostname-label"`
// Enable adding service to labels
EnableServiceLabel bool `mapstructure:"enable-service-label"`
// PrometheusRetentionTime, when positive, enables a Prometheus metrics sink.
// It defines the retention duration in seconds.
PrometheusRetentionTime int64 `mapstructure:"prometheus-retention-time"`
// GlobalLabels defines a global set of name/value label tuples applied to all
// metrics emitted using the wrapper functions defined in telemetry package.
//
// Example:
// [["chain_id", "cosmoshub-1"]]
GlobalLabels [][]string `mapstructure:"global-labels"`
// MetricsSink defines the type of metrics backend to use.
MetricsSink string `mapstructure:"metrics-sink" default:"mem"`
// StatsdAddr defines the address of a statsd server to send metrics to.
// Only utilized if MetricsSink is set to "statsd" or "dogstatsd".
StatsdAddr string `mapstructure:"statsd-addr"`
// DatadogHostname defines the hostname to use when emitting metrics to
// Datadog. Only utilized if MetricsSink is set to "dogstatsd".
DatadogHostname string `mapstructure:"datadog-hostname"`
}
// Metrics defines a wrapper around application telemetry functionality. It allows
// metrics to be gathered at any point in time. When creating a Metrics object,
// internally, a global metrics is registered with a set of sinks as configured
// by the operator. In addition to the sinks, when a process gets a SIGUSR1, a
// dump of formatted recent metrics will be sent to STDERR.
type Metrics struct {
sink metrics.MetricSink
prometheusEnabled bool
}
// GatherResponse is the response type of registered metrics
type GatherResponse struct {
Metrics []byte
ContentType string
}
// New creates a new instance of Metrics
func New(cfg Config) (_ *Metrics, rerr error) {
if !cfg.Enabled {
return nil, nil
}
if numGlobalLabels := len(cfg.GlobalLabels); numGlobalLabels > 0 {
parsedGlobalLabels := make([]metrics.Label, numGlobalLabels)
for i, gl := range cfg.GlobalLabels {
parsedGlobalLabels[i] = NewLabel(gl[0], gl[1])
}
globalLabels = parsedGlobalLabels
}
metricsConf := metrics.DefaultConfig(cfg.ServiceName)
metricsConf.EnableHostname = cfg.EnableHostname
metricsConf.EnableHostnameLabel = cfg.EnableHostnameLabel
var (
sink metrics.MetricSink
err error
)
switch cfg.MetricsSink {
case MetricSinkStatsd:
sink, err = metrics.NewStatsdSink(cfg.StatsdAddr)
case MetricSinkDogsStatsd:
sink, err = datadog.NewDogStatsdSink(cfg.StatsdAddr, cfg.DatadogHostname)
default:
memSink := metrics.NewInmemSink(10*time.Second, time.Minute)
sink = memSink
inMemSig := metrics.DefaultInmemSignal(memSink)
defer func() {
if rerr != nil {
inMemSig.Stop()
}
}()
}
if err != nil {
return nil, err
}
m := &Metrics{sink: sink}
fanout := metrics.FanoutSink{sink}
if cfg.PrometheusRetentionTime > 0 {
m.prometheusEnabled = true
prometheusOpts := metricsprom.PrometheusOpts{
Expiration: time.Duration(cfg.PrometheusRetentionTime) * time.Second,
}
promSink, err := metricsprom.NewPrometheusSinkFrom(prometheusOpts)
if err != nil {
return nil, err
}
fanout = append(fanout, promSink)
}
if _, err := metrics.NewGlobal(metricsConf, fanout); err != nil {
return nil, err
}
return m, nil
}
// Gather collects all registered metrics and returns a GatherResponse where the
// metrics are encoded depending on the type. Metrics are either encoded via
// Prometheus or JSON if in-memory.
func (m *Metrics) Gather(format string) (GatherResponse, error) {
switch format {
case FormatPrometheus:
return m.gatherPrometheus()
case FormatText:
return m.gatherGeneric()
case FormatDefault:
return m.gatherGeneric()
default:
return GatherResponse{}, fmt.Errorf("unsupported metrics format: %s", format)
}
}
// gatherPrometheus collects Prometheus metrics and returns a GatherResponse.
// If Prometheus metrics are not enabled, it returns an error.
func (m *Metrics) gatherPrometheus() (GatherResponse, error) {
if !m.prometheusEnabled {
return GatherResponse{}, fmt.Errorf("prometheus metrics are not enabled")
}
metricsFamilies, err := prometheus.DefaultGatherer.Gather()
if err != nil {
return GatherResponse{}, fmt.Errorf("failed to gather prometheus metrics: %w", err)
}
buf := &bytes.Buffer{}
defer buf.Reset()
e := expfmt.NewEncoder(buf, expfmt.FmtText)
for _, mf := range metricsFamilies {
if err := e.Encode(mf); err != nil {
return GatherResponse{}, fmt.Errorf("failed to encode prometheus metrics: %w", err)
}
}
return GatherResponse{ContentType: string(expfmt.FmtText), Metrics: buf.Bytes()}, nil
}
// gatherGeneric collects generic metrics and returns a GatherResponse.
func (m *Metrics) gatherGeneric() (GatherResponse, error) {
gm, ok := m.sink.(DisplayableSink)
if !ok {
return GatherResponse{}, fmt.Errorf("non in-memory metrics sink does not support generic format")
}
summary, err := gm.DisplayMetrics(nil, nil)
if err != nil {
return GatherResponse{}, fmt.Errorf("failed to gather in-memory metrics: %w", err)
}
content, err := json.Marshal(summary)
if err != nil {
return GatherResponse{}, fmt.Errorf("failed to encode in-memory metrics: %w", err)
}
return GatherResponse{ContentType: "application/json", Metrics: content}, nil
}