Skip to content

Commit

Permalink
Merge pull request #94 from cloudflare/OBS-996
Browse files Browse the repository at this point in the history
Simplify configuration
  • Loading branch information
prymitive committed Dec 18, 2021
2 parents 507517a + 60bd0db commit db87474
Show file tree
Hide file tree
Showing 27 changed files with 323 additions and 403 deletions.
20 changes: 20 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,25 @@
# Changelog

## v0.5.0

### Added

- Added `alerts/for` check that will look for invalid `for` values in alerting rules.
This check is enabled by default.

### Changed

- `comparison` check is now enabled by default and require no configuration.
Remove `comparison{ ... }` blocks from pint config file when upgrading.
- `template` check is now enabled by default and require no configuration.
Remove `template{ ... }` blocks from pint config file when upgrading.
- `rate` check is now enabled by default for all configured Prometheus servers.
Remove `rate{ ... }` blocks from pint config file when upgrading.
- `series` check is now enabled by default for all configured Prometheus servers.
Remove `series{ ... }` blocks from pint config file when upgrading.
- `vector_matching` check is now enabled by default for all configured Prometheus servers.
Remove `vector_matching{ ... }` blocks from pint config file when upgrading.

## v0.4.4

### Added
Expand Down
4 changes: 1 addition & 3 deletions cmd/pint/tests/0007_alerts.txt
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ rules/0002.yml:11: using $value in labels will generate a new alert on every val
rules/0002.yml:12: using .Value in labels will generate a new alert on every value change, move it to annotations (alerts/template)
val: '{{ .Value|humanizeDuration }}'

level=info msg="Problems found" Bug=10 Warning=3
level=info msg="Problems found" Bug=5 Fatal=4 Warning=4
level=fatal msg="Fatal error" error="problems found"
-- rules/0001.yml --
- alert: Always
Expand Down Expand Up @@ -101,6 +101,4 @@ rule {
value = "critical|warning|info"
required = true
}
comparison {}
template {}
}
9 changes: 6 additions & 3 deletions cmd/pint/tests/0011_ignore_rules.txt
Original file line number Diff line number Diff line change
Expand Up @@ -14,10 +14,13 @@ rules/1.yaml:16: job label is required and should be preserved when aggregating
rules/1.yaml:22: syntax error: unexpected right parenthesis ')' (promql/syntax)
expr: sum(errors_total) by )

rules/1.yaml:33: alert query doesn't have any condition, it will always fire if the metric exists (promql/comparison)
expr: sum(errors_total) without(job)

rules/1.yaml:33: job label is required and should be preserved when aggregating "^.+$" rules, remove job from without() (promql/without)
expr: sum(errors_total) without(job)

level=info msg="Problems found" Fatal=2 Warning=2
level=info msg="Problems found" Fatal=2 Warning=3
level=fatal msg="Fatal error" error="problems found"
-- rules/1.yaml --
- record: disabled
Expand Down Expand Up @@ -45,11 +48,11 @@ level=fatal msg="Fatal error" error="problems found"

- alert: disabled
# pint disable promql/without(job:true)
expr: sum(errors_total) without(job)
expr: sum(errors_total) without(job) > 0

- alert: disabled
# pint disable promql/without
expr: sum(errors_total) without(job)
expr: sum(errors_total) without(job) > 0

- alert: active
expr: sum(errors_total) without(job)
Expand Down
3 changes: 0 additions & 3 deletions cmd/pint/tests/0012_issue_20.txt
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,4 @@ rule {
step = "1m"
resolve = "5m"
}
# Check if '{{ $value }}'/'{{ .Value }}' is used in labels
# https://www.robustperception.io/dont-put-the-value-in-alert-labels
template {}
}
7 changes: 5 additions & 2 deletions cmd/pint/tests/0018_match_alerting.txt
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,12 @@ cmp stderr stderr.txt
level=info msg="Loading configuration file" path=.pint.hcl
level=info msg="File parsed" path=rules/0001.yml rules=2
level=debug msg="Found recording rule" lines=1-2 path=rules/0001.yml record=colo:recording
level=debug msg="Configured checks for rule" enabled=["promql/syntax"] path=rules/0001.yml rule=colo:recording
level=debug msg="Configured checks for rule" enabled=["promql/syntax","alerts/for","promql/comparison","alerts/template"] path=rules/0001.yml rule=colo:recording
level=debug msg="Found alerting rule" alert=colo:alerting lines=4-5 path=rules/0001.yml
level=debug msg="Configured checks for rule" enabled=["promql/syntax","promql/without(job:true)","promql/by(job:true)"] path=rules/0001.yml rule=colo:alerting
level=debug msg="Configured checks for rule" enabled=["promql/syntax","alerts/for","promql/comparison","alerts/template","promql/without(job:true)","promql/by(job:true)"] path=rules/0001.yml rule=colo:alerting
rules/0001.yml:5: alert query doesn't have any condition, it will always fire if the metric exists (promql/comparison)
expr: sum(bar) without(job)

rules/0001.yml:5: job label is required and should be preserved when aggregating "^.+$" rules, remove job from without() (promql/without)
expr: sum(bar) without(job)

Expand Down
6 changes: 3 additions & 3 deletions cmd/pint/tests/0019_match_recording.txt
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,9 @@ cmp stderr stderr.txt
level=info msg="Loading configuration file" path=.pint.hcl
level=info msg="File parsed" path=rules/0001.yml rules=2
level=debug msg="Found recording rule" lines=1-2 path=rules/0001.yml record=colo:recording
level=debug msg="Configured checks for rule" enabled=["promql/syntax","promql/without(job:true)","promql/by(job:true)"] path=rules/0001.yml rule=colo:recording
level=debug msg="Configured checks for rule" enabled=["promql/syntax","alerts/for","promql/comparison","alerts/template","promql/without(job:true)","promql/by(job:true)"] path=rules/0001.yml rule=colo:recording
level=debug msg="Found alerting rule" alert=colo:alerting lines=4-5 path=rules/0001.yml
level=debug msg="Configured checks for rule" enabled=["promql/syntax"] path=rules/0001.yml rule=colo:alerting
level=debug msg="Configured checks for rule" enabled=["promql/syntax","alerts/for","promql/comparison","alerts/template"] path=rules/0001.yml rule=colo:alerting
rules/0001.yml:2: job label is required and should be preserved when aggregating "^.+$" rules, remove job from without() (promql/without)
expr: sum(foo) without(job)

Expand All @@ -17,7 +17,7 @@ rules/0001.yml:2: job label is required and should be preserved when aggregating
expr: sum(foo) without(job)

- alert: "colo:alerting"
expr: sum(bar) without(job)
expr: sum(bar) without(job) > 0

-- .pint.hcl --
rule {
Expand Down
7 changes: 5 additions & 2 deletions cmd/pint/tests/0020_ignore_kind.txt
Original file line number Diff line number Diff line change
Expand Up @@ -6,12 +6,15 @@ cmp stderr stderr.txt
level=info msg="Loading configuration file" path=.pint.hcl
level=info msg="File parsed" path=rules/0001.yml rules=2
level=debug msg="Found recording rule" lines=1-2 path=rules/0001.yml record=colo:recording
level=debug msg="Configured checks for rule" enabled=["promql/syntax","promql/without(job:true)","promql/by(job:true)"] path=rules/0001.yml rule=colo:recording
level=debug msg="Configured checks for rule" enabled=["promql/syntax","alerts/for","promql/comparison","alerts/template","promql/without(job:true)","promql/by(job:true)"] path=rules/0001.yml rule=colo:recording
level=debug msg="Found alerting rule" alert=colo:alerting lines=4-5 path=rules/0001.yml
level=debug msg="Configured checks for rule" enabled=["promql/syntax"] path=rules/0001.yml rule=colo:alerting
level=debug msg="Configured checks for rule" enabled=["promql/syntax","alerts/for","promql/comparison","alerts/template"] path=rules/0001.yml rule=colo:alerting
rules/0001.yml:2: job label is required and should be preserved when aggregating "^.+$" rules, remove job from without() (promql/without)
expr: sum(foo) without(job)

rules/0001.yml:5: alert query doesn't have any condition, it will always fire if the metric exists (promql/comparison)
expr: sum(bar) without(job)

-- rules/0001.yml --
- record: "colo:recording"
expr: sum(foo) without(job)
Expand Down
100 changes: 13 additions & 87 deletions docs/CONFIGURATION.md
Original file line number Diff line number Diff line change
Expand Up @@ -337,31 +337,27 @@ For `irate()` function:
- If duration is less than 2x `scrape_interval` it will report a bug.
- If duration is between 2x and 3x `scrape_interval` it will report a warning.

Syntax:

```JS
rate {}
```
This check is enabled by default for all configured Prometheus servers.

Example:

```JS
prometheus "prod" {
uri = "https://prometheus-prod.example.com"
timeout = "60s"
paths = [
"rules/prod/.*",
"rules/common/.*",
]
}

prometheus "dev" {
uri = "https://prometheus-dev.example.com"
timeout = "30s"
}

rule {
match {
kind = "recording"
}

rate {}
paths = [
"rules/dev/.*",
"rules/common/.*",
]
}
```

Expand Down Expand Up @@ -419,27 +415,7 @@ like `error_count > 10`, so we only get `error_count` series if the value
is above 10. If we would remove `> 10` part query would always return `error_count`
and so it would always trigger an alert.

Syntax:

```JS
comparison {
severity = "bug|warning|info"
}
```

- `severity` - set custom severity for reported issues, defaults to a bug.

Example:

```
rule {
match {
kind = "alerting"
}
comparison {}
}
```

This check is enabled by default and doesn't require any configuration.

## Cost

Expand Down Expand Up @@ -515,15 +491,7 @@ Let's say we have a rule this query: `sum(my_metric{foo="bar"}) > 10`.
This checks would query all configured server for the existence of
`my_metric{foo="bar"}` series and report a warning if it's missing.

Syntax:

```JS
series {
severity = "bug|warning|info"
}
```

- `severity` - set custom severity for reported issues, defaults to a warning.
This check is enabled by default for all configured Prometheus servers.

Example:

Expand All @@ -537,14 +505,6 @@ prometheus "prod" {
uri = "https://prometheus-prod.example.com"
timeout = "30s"
}

rule {
match {
kind = "recording"
}

series {}
}
```

## Reject
Expand Down Expand Up @@ -622,29 +582,7 @@ the value of any annotation can change between alert evaluations.
See [this blog post](https://www.robustperception.io/dont-put-the-value-in-alert-labels)
for more details.

Syntax:

```JS
template {
severity = "bug|warning|info"
}
```

- `severity` - set custom severity for reported issues, defaults to a bug.

Example:

```JS
rule {
match {
kind = "alerting"
}

template {
severity = "fatal"
}
}
```
This check is enabled by default and doesn't require any configuration.

## Vector Matching

Expand Down Expand Up @@ -681,15 +619,7 @@ http_errors / ignoring(instance) cluster:http_errors
This check aims to find all queries that using vector matching where both sides
of the query have different sets of labels causing no results to be returned.

Syntax:

```JS
vector_matching {
severity = "bug|warning|info"
}
```

- `severity` - set custom severity for reported issues, defaults to a warning.
This check is enabled by default for all configured Prometheus servers.

Example:

Expand All @@ -703,10 +633,6 @@ prometheus "prod" {
uri = "https://prometheus-prod.example.com"
timeout = "30s"
}

rule {
vector_matching {}
}
```

# Ignoring selected lines or files
Expand Down
3 changes: 0 additions & 3 deletions docs/examples/config.hcl
Original file line number Diff line number Diff line change
Expand Up @@ -64,9 +64,6 @@ rule {
step = "1m"
resolve = "5m"
}

# Check if templates used in annotations and labels are valid.
template {}
}

rule {
Expand Down
54 changes: 54 additions & 0 deletions internal/checks/alerts_for.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
package checks

import (
"fmt"

"github.com/cloudflare/pint/internal/parser"
"github.com/prometheus/common/model"
)

const (
AlertForCheckName = "alerts/for"
)

func NewAlertsForCheck() AlertsForChecksFor {
return AlertsForChecksFor{}
}

type AlertsForChecksFor struct {
}

func (c AlertsForChecksFor) String() string {
return AlertForCheckName
}

func (c AlertsForChecksFor) Check(rule parser.Rule) (problems []Problem) {
if rule.AlertingRule == nil || rule.AlertingRule.For == nil {
return
}

d, err := model.ParseDuration(rule.AlertingRule.For.Value.Value)
if err != nil {
problems = append(problems, Problem{
Fragment: rule.AlertingRule.For.Value.Value,
Lines: rule.AlertingRule.For.Lines(),
Reporter: AlertForCheckName,
Text: fmt.Sprintf("invalid duration: %s", err),
Severity: Bug,
})
return
}

if d == 0 {
problems = append(problems, Problem{
Fragment: rule.AlertingRule.For.Value.Value,
Lines: rule.AlertingRule.For.Lines(),
Reporter: AlertForCheckName,
Text: fmt.Sprintf("%q is the default value of %q, consider removing this line",
rule.AlertingRule.For.Value.Value, rule.AlertingRule.For.Key.Value),
Severity: Information,
})
}

return
}
Loading

0 comments on commit db87474

Please sign in to comment.