/
metricset.go
368 lines (307 loc) · 12.8 KB
/
metricset.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
// or more contributor license agreements. Licensed under the Elastic License;
// you may not use this file except in compliance with the Elastic License.
package metrics
import (
"context"
"errors"
"fmt"
"strings"
"time"
monitoring "cloud.google.com/go/monitoring/apiv3/v2"
"github.com/golang/protobuf/ptypes/duration"
"google.golang.org/api/iterator"
"google.golang.org/api/option"
"google.golang.org/genproto/googleapis/api/metric"
monitoringpb "google.golang.org/genproto/googleapis/monitoring/v3"
"github.com/elastic/beats/v7/libbeat/common/cfgwarn"
"github.com/elastic/beats/v7/metricbeat/mb"
"github.com/elastic/beats/v7/x-pack/metricbeat/module/gcp"
"github.com/elastic/elastic-agent-libs/logp"
"github.com/elastic/elastic-agent-libs/mapstr"
)
const (
// MetricsetName is the name of this Metricset
MetricsetName = "metrics"
)
// init registers the MetricSet with the central registry as soon as the program
// starts. The New function will be called later to instantiate an instance of
// the MetricSet for each host defined in the module's configuration. After the
// MetricSet has been created then Fetch will begin to be called periodically.
func init() {
mb.Registry.MustAddMetricSet(gcp.ModuleName, MetricsetName, New)
}
// MetricSet holds any configuration or state information. It must implement
// the mb.MetricSet interface. And this is best achieved by embedding
// mb.BaseMetricSet because it implements all of the required mb.MetricSet
// interface methods except for Fetch.
type MetricSet struct {
mb.BaseMetricSet
config config
metricsMeta map[string]metricMeta
requester *metricsRequester
MetricsConfig []metricsConfig `config:"metrics" validate:"nonzero,required"`
}
//metricsConfig holds a configuration specific for metrics metricset.
type metricsConfig struct {
ServiceName string `config:"service" validate:"required"`
// ServiceMetricPrefix allows to specify the prefix string for MetricTypes
// Stackdriver requires metrics to be prefixed with a common prefix.
// This prefix changes based on the services the metrics belongs to.
ServiceMetricPrefix string `config:"service_metric_prefix"`
MetricTypes []string `config:"metric_types" validate:"required"`
Aligner string `config:"aligner"`
}
// prefix returns the service metric prefix, falling back to the Google Cloud
// monitoring service prefix when not specified.
// The prefix is normalized to always end with '/'.
func (mc metricsConfig) prefix() string {
prefix := mc.ServiceMetricPrefix
// NOTE: fallback to Google Cloud prefix for backward compatibility
// Prefix <service>.googleapis.com/ works only for Google Cloud metrics
// List: https://cloud.google.com/monitoring/api/metrics_gcp
if prefix == "" {
prefix = mc.ServiceName + ".googleapis.com/"
}
// Final slash is part of prefix. Creating a prefix with final slash
// normalize the prefix for other use cases
if !strings.HasSuffix(prefix, "/") {
prefix = prefix + "/"
}
return prefix
}
// AddPrefixTo adds the required service metric prefix to the given metric
func (mc metricsConfig) AddPrefixTo(metric string) string {
return mc.prefix() + metric
}
// RemovePrefixFrom removes service metric prefix from the given metric
func (mc metricsConfig) RemovePrefixFrom(metric string) string {
return strings.TrimPrefix(metric, mc.prefix())
}
type metricMeta struct {
samplePeriod time.Duration
ingestDelay time.Duration
}
type config struct {
Zone string `config:"zone"`
Region string `config:"region"`
ProjectID string `config:"project_id" validate:"required"`
ExcludeLabels bool `config:"exclude_labels"`
CredentialsFilePath string `config:"credentials_file_path"`
CredentialsJSON string `config:"credentials_json"`
opt []option.ClientOption
period *duration.Duration
}
// New creates a new instance of the MetricSet. New is responsible for unpacking
// any MetricSet specific configuration options if there are any.
func New(base mb.BaseMetricSet) (mb.MetricSet, error) {
cfgwarn.Beta("The gcp '%s' metricset is beta.", MetricsetName)
m := &MetricSet{BaseMetricSet: base}
if err := base.Module().UnpackConfig(&m.config); err != nil {
return nil, err
}
metricsConfigs := struct {
Metrics []metricsConfig `config:"metrics" validate:"nonzero,required"`
}{}
if err := base.Module().UnpackConfig(&metricsConfigs); err != nil {
return nil, err
}
m.MetricsConfig = metricsConfigs.Metrics
if m.config.CredentialsFilePath != "" && m.config.CredentialsJSON != "" {
return m, fmt.Errorf("both credentials_file_path and credentials_json specified, you must use only one of them")
} else if m.config.CredentialsFilePath != "" {
m.config.opt = []option.ClientOption{option.WithCredentialsFile(m.config.CredentialsFilePath)}
} else if m.config.CredentialsJSON != "" {
m.config.opt = []option.ClientOption{option.WithCredentialsJSON([]byte(m.config.CredentialsJSON))}
} else {
return m, fmt.Errorf("no credentials_file_path or credentials_json specified")
}
m.config.period = &duration.Duration{
Seconds: int64(m.Module().Config().Period.Seconds()),
}
if err := validatePeriodForGCP(m.Module().Config().Period); err != nil {
return nil, err
}
// Get ingest delay and sample period for each metric type
ctx := context.Background()
client, err := monitoring.NewMetricClient(ctx, m.config.opt...)
if err != nil {
return nil, fmt.Errorf("error creating Stackdriver client: %w", err)
}
m.metricsMeta, err = m.metricDescriptor(ctx, client)
if err != nil {
return nil, fmt.Errorf("error calling metricDescriptor function: %w", err)
}
m.requester = &metricsRequester{
config: m.config,
client: client,
logger: logp.NewLogger(MetricsetName),
}
m.Logger().Warn("extra charges on Google Cloud API requests will be generated by this metricset")
return m, nil
}
// Fetch methods implements the data gathering and data conversion to the right
// format. It publishes the event which is then forwarded to the output. In case
// of an error set the Error field of mb.Event or simply call report.Error().
func (m *MetricSet) Fetch(ctx context.Context, reporter mb.ReporterV2) (err error) {
for _, sdc := range m.MetricsConfig {
m.Logger().Debugf("metrics config: %v", sdc)
// m.metricsMeta contains all metrics to be collected, not just the one in the current MetricsConfig.
// this loop filters the metrics in metricsMeta so requester.Metrics can collect only the appropriate
// ones.
// See https://github.com/elastic/beats/pull/29514
metricsToCollect := map[string]metricMeta{}
for _, v := range sdc.MetricTypes {
metricsToCollect[sdc.AddPrefixTo(v)] = m.metricsMeta[sdc.AddPrefixTo(v)]
}
responses, err := m.requester.Metrics(ctx, sdc.ServiceName, sdc.Aligner, metricsToCollect)
if err != nil {
err = fmt.Errorf("error trying to get metrics for project '%s' and zone '%s' or region '%s': %w", m.config.ProjectID, m.config.Zone, m.config.Region, err)
m.Logger().Error(err)
return err
}
events, err := m.eventMapping(ctx, responses, sdc)
if err != nil {
err = fmt.Errorf("eventMapping failed: %w", err)
m.Logger().Error(err)
return err
}
m.Logger().Debugf("Total %d of events are created for service name = %s and metric type = %s.", len(events), sdc.ServiceName, sdc.MetricTypes)
for _, event := range events {
reporter.Event(event)
}
}
return nil
}
func (m *MetricSet) eventMapping(ctx context.Context, tss []timeSeriesWithAligner, sdc metricsConfig) ([]mb.Event, error) {
e := newIncomingFieldExtractor(m.Logger(), sdc)
var gcpService = gcp.NewStackdriverMetadataServiceForTimeSeries(nil)
var err error
if !m.config.ExcludeLabels {
if gcpService, err = NewMetadataServiceForConfig(m.config, sdc.ServiceName); err != nil {
return nil, fmt.Errorf("error trying to create metadata service: %w", err)
}
}
tsGrouped := m.timeSeriesGrouped(ctx, gcpService, tss, e)
//Create single events for each group of data that matches some common patterns like labels and timestamp
events := make([]mb.Event, 0)
for _, groupedEvents := range tsGrouped {
event := mb.Event{
Timestamp: groupedEvents[0].Timestamp,
ModuleFields: mapstr.M{
"labels": groupedEvents[0].Labels,
},
MetricSetFields: mapstr.M{},
}
for _, singleEvent := range groupedEvents {
_, _ = event.MetricSetFields.Put(singleEvent.Key, singleEvent.Value)
}
if sdc.ServiceName == "compute" {
event.RootFields = addHostFields(groupedEvents)
} else {
event.RootFields = groupedEvents[0].ECS
}
events = append(events, event)
}
return events, nil
}
// validatePeriodForGCP returns nil if the Period in the module config is in the accepted threshold
func validatePeriodForGCP(d time.Duration) (err error) {
if d.Seconds() < gcp.MonitoringMetricsSamplingRate {
return fmt.Errorf("period in Google Cloud config file cannot be set to less than %d seconds", gcp.MonitoringMetricsSamplingRate)
}
return nil
}
// Validate metrics related config
func (mc *metricsConfig) Validate() error {
gcpAlignerNames := make([]string, 0)
for k := range gcp.AlignersMapToGCP {
gcpAlignerNames = append(gcpAlignerNames, k)
}
if mc.Aligner != "" {
if _, ok := gcp.AlignersMapToGCP[mc.Aligner]; !ok {
return fmt.Errorf("the given aligner is not supported, please specify one of %s as aligner", gcpAlignerNames)
}
}
return nil
}
// metricDescriptor calls ListMetricDescriptorsRequest API to get metric metadata
// (sample period and ingest delay) of each given metric type
func (m *MetricSet) metricDescriptor(ctx context.Context, client *monitoring.MetricClient) (map[string]metricMeta, error) {
metricsWithMeta := make(map[string]metricMeta, 0)
req := &monitoringpb.ListMetricDescriptorsRequest{
Name: "projects/" + m.config.ProjectID,
}
for _, sdc := range m.MetricsConfig {
for _, mt := range sdc.MetricTypes {
id := sdc.AddPrefixTo(mt)
req.Filter = fmt.Sprintf(`metric.type = starts_with("%s")`, id)
it := client.ListMetricDescriptors(ctx, req)
for {
out, err := it.Next()
if err != nil && !errors.Is(err, iterator.Done) {
err = fmt.Errorf("could not make ListMetricDescriptors request for metric type %s: %w", mt, err)
m.Logger().Error(err)
return metricsWithMeta, err
}
if out != nil {
metricsWithMeta = m.getMetadata(out, metricsWithMeta)
}
if errors.Is(err, iterator.Done) {
break
}
}
// NOTE: if a metric is not added to the metricsWithMeta map is not collected subsequently.
// Such a case is an error, as the configuration is explicitly requesting a metric that the beat
// is not able to collect, so we provide a logging statement for this behaviour.
if _, ok := metricsWithMeta[id]; !ok {
m.Logger().Errorf("%s metric descriptor is empty, this metric will not be collected", mt)
}
}
}
return metricsWithMeta, nil
}
func (m *MetricSet) getMetadata(out *metric.MetricDescriptor, metricsWithMeta map[string]metricMeta) map[string]metricMeta {
// Set samplePeriod default to 60 seconds and ingestDelay default to 0.
meta := metricMeta{
samplePeriod: 60 * time.Second,
ingestDelay: 0 * time.Second,
}
if out.Metadata != nil {
if out.Metadata.SamplePeriod != nil {
m.Logger().Debugf("For metric type %s: sample period = %s", out.Type, out.Metadata.SamplePeriod)
meta.samplePeriod = time.Duration(out.Metadata.SamplePeriod.Seconds) * time.Second
}
if out.Metadata.IngestDelay != nil {
m.Logger().Debugf("For metric type %s: ingest delay = %s", out.Type, out.Metadata.IngestDelay)
meta.ingestDelay = time.Duration(out.Metadata.IngestDelay.Seconds) * time.Second
}
}
metricsWithMeta[out.Type] = meta
return metricsWithMeta
}
func addHostFields(groupedEvents []KeyValuePoint) mapstr.M {
hostRootFields := groupedEvents[0].ECS
// add host.id and host.name
if hostID, err := groupedEvents[0].ECS.GetValue("cloud.instance.id"); err == nil {
_, _ = hostRootFields.Put("host.id", hostID)
}
if hostName, err := groupedEvents[0].ECS.GetValue("cloud.instance.name"); err == nil {
_, _ = hostRootFields.Put("host.name", hostName)
}
hostFieldTable := map[string]string{
"instance.cpu.utilization.value": "host.cpu.usage",
"instance.network.sent_bytes_count.value": "host.network.ingress.bytes",
"instance.network.received_bytes_count.value": "host.network.egress.bytes",
"instance.network.sent_packets_count.value": "host.network.ingress.packets",
"instance.network.received_packets_count.value": "host.network.egress.packets",
"instance.disk.read_bytes_count.value": "host.disk.read.bytes",
"instance.disk.write_bytes_count.value": "host.disk.write.bytes",
}
for _, singleEvent := range groupedEvents {
if hostMetricName, ok := hostFieldTable[singleEvent.Key]; ok {
_, _ = hostRootFields.Put(hostMetricName, singleEvent.Value)
}
}
return hostRootFields
}