Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[v10.2.x] Unified Alerting: Set max_attempts to 1 by default #79103

Merged
merged 1 commit into from Dec 5, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
4 changes: 2 additions & 2 deletions conf/defaults.ini
Expand Up @@ -1125,8 +1125,8 @@ execute_alerts = true
# The timeout string is a possibly signed sequence of decimal numbers, followed by a unit suffix (ms, s, m, h, d), e.g. 30s or 1m.
evaluation_timeout = 30s

# Number of times we'll attempt to evaluate an alert rule before giving up on that evaluation. This option has a legacy version in the `[alerting]` section that takes precedence.
max_attempts = 3
# Number of times we'll attempt to evaluate an alert rule before giving up on that evaluation. The default value is 1.
max_attempts = 1

# Minimum interval to enforce between rule evaluations. Rules will be adjusted if they are less than this value or if they are not multiple of the scheduler interval (10s). Higher values can help with resource management as we'll schedule fewer evaluations over time. This option has a legacy version in the `[alerting]` section that takes precedence.
# The interval string is a possibly signed sequence of decimal numbers, followed by a unit suffix (ms, s, m, h, d), e.g. 30s or 1m.
Expand Down
4 changes: 2 additions & 2 deletions conf/sample.ini
Expand Up @@ -1073,8 +1073,8 @@
# The timeout string is a possibly signed sequence of decimal numbers, followed by a unit suffix (ms, s, m, h, d), e.g. 30s or 1m.
;evaluation_timeout = 30s

# Number of times we'll attempt to evaluate an alert rule before giving up on that evaluation. This option has a legacy version in the `[alerting]` section that takes precedence.
;max_attempts = 3
# Number of times we'll attempt to evaluate an alert rule before giving up on that evaluation. The default value is 1.
;max_attempts = 1

# Minimum interval to enforce between rule evaluations. Rules will be adjusted if they are less than this value or if they are not multiple of the scheduler interval (10s). Higher values can help with resource management as we'll schedule fewer evaluations over time. This option has a legacy version in the `[alerting]` section that takes precedence.
# The interval string is a possibly signed sequence of decimal numbers, followed by a unit suffix (ms, s, m, h, d), e.g. 30s or 1m.
Expand Down
2 changes: 1 addition & 1 deletion docs/sources/setup-grafana/configure-grafana/_index.md
Expand Up @@ -1582,7 +1582,7 @@ The timeout string is a possibly signed sequence of decimal numbers, followed by

### max_attempts

Sets a maximum number of times we'll attempt to evaluate an alert rule before giving up on that evaluation. The default value is `3`. This option has a [legacy version in the alerting section]({{< relref "#max_attempts-1" >}}) that takes precedence.
Sets a maximum number of times we'll attempt to evaluate an alert rule before giving up on that evaluation. The default value is `1`.

### min_interval

Expand Down
12 changes: 2 additions & 10 deletions pkg/setting/setting_unified_alerting.go
Expand Up @@ -47,7 +47,7 @@ const (
evaluatorDefaultEvaluationTimeout = 30 * time.Second
schedulerDefaultAdminConfigPollInterval = time.Minute
schedulereDefaultExecuteAlerts = true
schedulerDefaultMaxAttempts = 3
schedulerDefaultMaxAttempts = 1
schedulerDefaultLegacyMinInterval = 1
screenshotsDefaultCapture = false
screenshotsDefaultCaptureTimeout = 10 * time.Second
Expand Down Expand Up @@ -288,15 +288,7 @@ func (cfg *Cfg) ReadUnifiedAlertingSettings(iniFile *ini.File) error {
}
uaCfg.EvaluationTimeout = uaEvaluationTimeout

uaMaxAttempts := ua.Key("max_attempts").MustInt64(schedulerDefaultMaxAttempts)
if uaMaxAttempts == schedulerDefaultMaxAttempts { // unified option or equals the default
legacyMaxAttempts := alerting.Key("max_attempts").MustInt64(schedulerDefaultMaxAttempts)
if legacyMaxAttempts != schedulerDefaultMaxAttempts {
cfg.Logger.Warn("falling back to legacy setting of 'max_attempts'; please use the configuration option in the `unified_alerting` section if Grafana 8 alerts are enabled.")
}
uaMaxAttempts = legacyMaxAttempts
}
uaCfg.MaxAttempts = uaMaxAttempts
uaCfg.MaxAttempts = ua.Key("max_attempts").MustInt64(schedulerDefaultMaxAttempts)

uaCfg.BaseInterval = SchedulerBaseInterval

Expand Down
9 changes: 4 additions & 5 deletions pkg/setting/setting_unified_alerting_test.go
Expand Up @@ -110,20 +110,19 @@ func TestUnifiedAlertingSettings(t *testing.T) {
desc: "when the unified options equal the defaults, it should apply the legacy ones",
unifiedAlertingOptions: map[string]string{
"admin_config_poll_interval": "120s",
"max_attempts": strconv.FormatInt(schedulerDefaultMaxAttempts, 10),
"min_interval": SchedulerBaseInterval.String(),
"execute_alerts": strconv.FormatBool(schedulereDefaultExecuteAlerts),
"evaluation_timeout": evaluatorDefaultEvaluationTimeout.String(),
},
alertingOptions: map[string]string{
"max_attempts": "12",
"max_attempts": "1",
"min_interval_seconds": "120",
"execute_alerts": "true",
"evaluation_timeout_seconds": "160",
},
verifyCfg: func(t *testing.T, cfg Cfg) {
require.Equal(t, 120*time.Second, cfg.UnifiedAlerting.AdminConfigPollInterval)
require.Equal(t, int64(12), cfg.UnifiedAlerting.MaxAttempts)
require.Equal(t, int64(1), cfg.UnifiedAlerting.MaxAttempts)
require.Equal(t, 120*time.Second, cfg.UnifiedAlerting.MinInterval)
require.Equal(t, true, cfg.UnifiedAlerting.ExecuteAlerts)
require.Equal(t, 160*time.Second, cfg.UnifiedAlerting.EvaluationTimeout)
Expand Down Expand Up @@ -164,14 +163,14 @@ func TestUnifiedAlertingSettings(t *testing.T) {
"evaluation_timeout": "invalid",
},
alertingOptions: map[string]string{
"max_attempts": "12",
"max_attempts": "1",
"min_interval_seconds": "120",
"execute_alerts": "false",
"evaluation_timeout_seconds": "160",
},
verifyCfg: func(t *testing.T, cfg Cfg) {
require.Equal(t, alertmanagerDefaultConfigPollInterval, cfg.UnifiedAlerting.AdminConfigPollInterval)
require.Equal(t, int64(12), cfg.UnifiedAlerting.MaxAttempts)
require.Equal(t, int64(1), cfg.UnifiedAlerting.MaxAttempts)
require.Equal(t, 120*time.Second, cfg.UnifiedAlerting.MinInterval)
require.Equal(t, false, cfg.UnifiedAlerting.ExecuteAlerts)
require.Equal(t, 160*time.Second, cfg.UnifiedAlerting.EvaluationTimeout)
Expand Down