Skip to content

Commit

Permalink
fix(inputs.cloudwatch): Option to produce dense metrics (#15317)
Browse files Browse the repository at this point in the history
  • Loading branch information
powersj committed May 15, 2024
1 parent c3c6189 commit 1a00a48
Show file tree
Hide file tree
Showing 4 changed files with 132 additions and 17 deletions.
67 changes: 57 additions & 10 deletions plugins/inputs/cloudwatch/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -50,12 +50,12 @@ See the [CONFIGURATION.md][CONFIGURATION.md] for more details.
# role_session_name = ""
# profile = ""
# shared_credential_file = ""
## If you are using CloudWatch cross-account observability, you can
## set IncludeLinkedAccounts to true in a monitoring account

## If you are using CloudWatch cross-account observability, you can
## set IncludeLinkedAccounts to true in a monitoring account
## and collect metrics from the linked source accounts
# include_linked_accounts = false

## Endpoint to make request against, the correct endpoint is automatically
## determined and this option should only be set if you wish to override the
## default.
Expand Down Expand Up @@ -102,6 +102,13 @@ See the [CONFIGURATION.md][CONFIGURATION.md] for more details.
## Metric Statistic Namespaces (required)
namespaces = ["AWS/ELB"]

## Metric Format
## This determines the format of the produces metrics. 'sparse', the default
## will produce a unique field for each statistic. 'dense' will report all
## statistics will be in a field called value and have a metric_name tag
## defining the name of the statistic. See the plugin README for examples.
# metric_format = "sparse"

## Maximum requests per second. Note that the global default AWS rate limit
## is 50 reqs/sec, so if you define multiple namespaces, these should add up
## to a maximum of 50.
Expand Down Expand Up @@ -212,15 +219,53 @@ but will output five metrics timestamped one minute apart.
## Metrics

Each CloudWatch Namespace monitored records a measurement with fields for each
available Metric Statistic. Namespace and Metrics are represented in [snake
available Metric Statistic. Namespace and Metrics are represented in [snake
case](https://en.wikipedia.org/wiki/Snake_case)

### Sparse Metrics

By default, metrics generated by this plugin are sparse. Use the `metric_format`
option to override this setting.

Sparse metrics produce a set of fields for every AWS Metric.

- cloudwatch_{namespace}
- Fields
- {metric}_sum (metric Sum value)
- {metric}_average (metric Average value)
- {metric}_minimum (metric Minimum value)
- {metric}_maximum (metric Maximum value)
- {metric}_sample_count (metric SampleCount value)

For example:

```text
cloudwatch_aws_usage,class=None,resource=GetSecretValue,service=Secrets\ Manager,type=API call_count_maximum=1,call_count_minimum=1,call_count_sum=8,call_count_sample_count=8,call_count_average=1 1715097720000000000
```

### Dense Metrics

Dense metrics are generated when `metric_format` is set to `dense`.

Dense metrics use the same fields over and over for every AWS Metric and
differentiate between AWS Metrics using a tag called `metric_name` with the AWS
Metric name:

- cloudwatch_{namespace}
- {metric}_sum (metric Sum value)
- {metric}_average (metric Average value)
- {metric}_minimum (metric Minimum value)
- {metric}_maximum (metric Maximum value)
- {metric}_sample_count (metric SampleCount value)
- Tags
- metric_name (AWS Metric name)
- Fields
- sum (metric Sum value)
- average (metric Average value)
- minimum (metric Minimum value)
- maximum (metric Maximum value)
- sample_count (metric SampleCount value)

For example:

```text
cloudwatch_aws_usage,class=None,resource=GetSecretValue,service=Secrets\ Manager,metric_name=call_count,type=API sum=6,sample_count=6,average=1,maximum=1,minimum=1 1715097840000000000
```

### Tags

Expand Down Expand Up @@ -274,6 +319,8 @@ aws cloudwatch get-metric-data \

## Example Output

See the discussion above about sparse vs dense metrics for more details.

```text
cloudwatch_aws_elb,load_balancer_name=p-example,region=us-east-1 latency_average=0.004810798017284538,latency_maximum=0.1100282669067383,latency_minimum=0.0006084442138671875,latency_sample_count=4029,latency_sum=19.382705211639404 1459542420000000000
```
Expand Down
29 changes: 26 additions & 3 deletions plugins/inputs/cloudwatch/cloudwatch.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ import (
"fmt"
"net"
"net/http"
"regexp"
"strconv"
"strings"
"sync"
Expand Down Expand Up @@ -48,8 +49,8 @@ type CloudWatch struct {
RecentlyActive string `toml:"recently_active"`
BatchSize int `toml:"batch_size"`
IncludeLinkedAccounts bool `toml:"include_linked_accounts"`

Log telegraf.Logger `toml:"-"`
MetricFormat string `toml:"metric_format"`
Log telegraf.Logger `toml:"-"`

client cloudwatchClient
statFilter filter.Filter
Expand Down Expand Up @@ -98,6 +99,14 @@ func (c *CloudWatch) Init() error {
c.Namespaces = append(c.Namespaces, c.Namespace)
}

switch c.MetricFormat {
case "":
c.MetricFormat = "sparse"
case "dense", "sparse":
default:
return fmt.Errorf("invalid metric_format: %s", c.MetricFormat)
}

err := c.initializeCloudWatch()
if err != nil {
return err
Expand Down Expand Up @@ -462,7 +471,21 @@ func (c *CloudWatch) aggregateMetrics(
tags["region"] = c.Region

for i := range result.Values {
grouper.Add(namespace, tags, result.Timestamps[i], *result.Label, result.Values[i])
if c.MetricFormat == "dense" {
// Remove the IDs from the result ID to get the statistic type
// e.g. "average" from "average_0_0"
re := regexp.MustCompile(`_\d+_\d+$`)
statisticType := re.ReplaceAllString(*result.Id, "")

// Remove the statistic type from the label to get the AWS Metric name
// e.g. "CPUUtilization" from "CPUUtilization_average"
re = regexp.MustCompile(`_?` + regexp.QuoteMeta(statisticType) + `$`)
tags["metric_name"] = re.ReplaceAllString(*result.Label, "")

grouper.Add(namespace, tags, result.Timestamps[i], statisticType, result.Values[i])
} else {
grouper.Add(namespace, tags, result.Timestamps[i], *result.Label, result.Values[i])
}
}
}
}
Expand Down
38 changes: 38 additions & 0 deletions plugins/inputs/cloudwatch/cloudwatch_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -198,6 +198,44 @@ func TestGather(t *testing.T) {
acc.AssertContainsTaggedFields(t, "cloudwatch_aws_elb", fields, tags)
}

func TestGatherDenseMetric(t *testing.T) {
duration, _ := time.ParseDuration("1m")
internalDuration := config.Duration(duration)
c := &CloudWatch{
CredentialConfig: internalaws.CredentialConfig{
Region: "us-east-1",
},
Namespace: "AWS/ELB",
Delay: internalDuration,
Period: internalDuration,
RateLimit: 200,
BatchSize: 500,
MetricFormat: "dense",
Log: testutil.Logger{},
}

var acc testutil.Accumulator

require.NoError(t, c.Init())
c.client = &mockGatherCloudWatchClient{}
require.NoError(t, acc.GatherError(c.Gather))

fields := map[string]interface{}{}
fields["minimum"] = 0.1
fields["maximum"] = 0.3
fields["average"] = 0.2
fields["sum"] = 123.0
fields["sample_count"] = 100.0

tags := map[string]string{}
tags["region"] = "us-east-1"
tags["load_balancer_name"] = "p-example1"
tags["metric_name"] = "latency"

require.True(t, acc.HasMeasurement("cloudwatch_aws_elb"))
acc.AssertContainsTaggedFields(t, "cloudwatch_aws_elb", fields, tags)
}

func TestMultiAccountGather(t *testing.T) {
duration, _ := time.ParseDuration("1m")
internalDuration := config.Duration(duration)
Expand Down
15 changes: 11 additions & 4 deletions plugins/inputs/cloudwatch/sample.conf
Original file line number Diff line number Diff line change
Expand Up @@ -21,12 +21,12 @@
# role_session_name = ""
# profile = ""
# shared_credential_file = ""
## If you are using CloudWatch cross-account observability, you can
## set IncludeLinkedAccounts to true in a monitoring account

## If you are using CloudWatch cross-account observability, you can
## set IncludeLinkedAccounts to true in a monitoring account
## and collect metrics from the linked source accounts
# include_linked_accounts = false

## Endpoint to make request against, the correct endpoint is automatically
## determined and this option should only be set if you wish to override the
## default.
Expand Down Expand Up @@ -73,6 +73,13 @@
## Metric Statistic Namespaces (required)
namespaces = ["AWS/ELB"]

## Metric Format
## This determines the format of the produces metrics. 'sparse', the default
## will produce a unique field for each statistic. 'dense' will report all
## statistics will be in a field called value and have a metric_name tag
## defining the name of the statistic. See the plugin README for examples.
# metric_format = "sparse"

## Maximum requests per second. Note that the global default AWS rate limit
## is 50 reqs/sec, so if you define multiple namespaces, these should add up
## to a maximum of 50.
Expand Down

0 comments on commit 1a00a48

Please sign in to comment.