Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[v8.1.x] Alerting: Fix alert flapping in the internal alertmanager #38829

Merged
merged 1 commit into from
Sep 2, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
4 changes: 3 additions & 1 deletion pkg/services/ngalert/state/manager.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,8 @@ import (
"github.com/grafana/grafana/pkg/services/ngalert/store"
)

var ResendDelay = 30 * time.Second

type Manager struct {
log log.Logger
metrics *metrics.Metrics
Expand All @@ -33,7 +35,7 @@ func NewManager(logger log.Logger, metrics *metrics.Metrics, ruleStore store.Rul
manager := &Manager{
cache: newCache(logger, metrics),
quit: make(chan struct{}),
ResendDelay: 1 * time.Minute, // TODO: make this configurable
ResendDelay: ResendDelay, // TODO: make this configurable
log: logger,
metrics: metrics,
ruleStore: ruleStore,
Expand Down
20 changes: 10 additions & 10 deletions pkg/services/ngalert/state/manager_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -153,7 +153,7 @@ func TestProcessEvalResults(t *testing.T) {
},
},
StartsAt: evaluationTime,
EndsAt: evaluationTime.Add(20 * time.Second),
EndsAt: evaluationTime.Add(state.ResendDelay * 3),
LastEvaluationTime: evaluationTime,
EvaluationDuration: evaluationDuration,
Annotations: map[string]string{"annotation": "test"},
Expand Down Expand Up @@ -275,7 +275,7 @@ func TestProcessEvalResults(t *testing.T) {
},
},
StartsAt: evaluationTime.Add(1 * time.Minute),
EndsAt: evaluationTime.Add(1 * time.Minute).Add(time.Duration(20) * time.Second),
EndsAt: evaluationTime.Add(1 * time.Minute).Add(state.ResendDelay * 3),
LastEvaluationTime: evaluationTime.Add(1 * time.Minute),
EvaluationDuration: evaluationDuration,
Annotations: map[string]string{"annotation": "test"},
Expand Down Expand Up @@ -351,7 +351,7 @@ func TestProcessEvalResults(t *testing.T) {
},
},
StartsAt: evaluationTime.Add(80 * time.Second),
EndsAt: evaluationTime.Add(80 * time.Second).Add(1 * time.Minute),
EndsAt: evaluationTime.Add(80 * time.Second).Add(state.ResendDelay * 3),
LastEvaluationTime: evaluationTime.Add(80 * time.Second),
EvaluationDuration: evaluationDuration,
Annotations: map[string]string{"annotation": "test"},
Expand Down Expand Up @@ -414,7 +414,7 @@ func TestProcessEvalResults(t *testing.T) {
},
},
StartsAt: evaluationTime.Add(10 * time.Second),
EndsAt: evaluationTime.Add(10 * time.Second).Add(1 * time.Minute),
EndsAt: evaluationTime.Add(10 * time.Second).Add(state.ResendDelay * 3),
LastEvaluationTime: evaluationTime.Add(10 * time.Second),
EvaluationDuration: evaluationDuration,
Annotations: map[string]string{"annotation": "test"},
Expand Down Expand Up @@ -477,7 +477,7 @@ func TestProcessEvalResults(t *testing.T) {
},
},
StartsAt: evaluationTime,
EndsAt: evaluationTime.Add(1 * time.Minute),
EndsAt: evaluationTime.Add(state.ResendDelay * 3),
LastEvaluationTime: evaluationTime.Add(10 * time.Second),
EvaluationDuration: evaluationDuration,
Annotations: map[string]string{"annotation": "test"},
Expand Down Expand Up @@ -540,7 +540,7 @@ func TestProcessEvalResults(t *testing.T) {
},
},
StartsAt: evaluationTime.Add(10 * time.Second),
EndsAt: evaluationTime.Add(10 * time.Second).Add(20 * time.Second),
EndsAt: evaluationTime.Add(10 * time.Second).Add(state.ResendDelay * 3),
LastEvaluationTime: evaluationTime.Add(10 * time.Second),
EvaluationDuration: evaluationDuration,
Annotations: map[string]string{"annotation": "test"},
Expand Down Expand Up @@ -603,7 +603,7 @@ func TestProcessEvalResults(t *testing.T) {
},
},
StartsAt: evaluationTime.Add(10 * time.Second),
EndsAt: evaluationTime.Add(10 * time.Second).Add(20 * time.Second),
EndsAt: evaluationTime.Add(10 * time.Second).Add(state.ResendDelay * 3),
LastEvaluationTime: evaluationTime.Add(10 * time.Second),
EvaluationDuration: evaluationDuration,
Annotations: map[string]string{"annotation": "test"},
Expand Down Expand Up @@ -666,7 +666,7 @@ func TestProcessEvalResults(t *testing.T) {
},
},
StartsAt: evaluationTime.Add(10 * time.Second),
EndsAt: evaluationTime.Add(10 * time.Second).Add(20 * time.Second),
EndsAt: evaluationTime.Add(10 * time.Second).Add(state.ResendDelay * 3),
LastEvaluationTime: evaluationTime.Add(10 * time.Second),
EvaluationDuration: evaluationDuration,
Annotations: map[string]string{"annotation": "test"},
Expand Down Expand Up @@ -730,7 +730,7 @@ func TestProcessEvalResults(t *testing.T) {
},
},
StartsAt: evaluationTime.Add(10 * time.Second),
EndsAt: evaluationTime.Add(10 * time.Second).Add(1 * time.Minute),
EndsAt: evaluationTime.Add(10 * time.Second).Add(state.ResendDelay * 3),
LastEvaluationTime: evaluationTime.Add(10 * time.Second),
EvaluationDuration: evaluationDuration,
Annotations: map[string]string{"annotation": "test"},
Expand Down Expand Up @@ -794,7 +794,7 @@ func TestProcessEvalResults(t *testing.T) {
},
},
StartsAt: evaluationTime.Add(10 * time.Second),
EndsAt: evaluationTime.Add(10 * time.Second).Add(1 * time.Minute),
EndsAt: evaluationTime.Add(10 * time.Second).Add(state.ResendDelay * 3),
LastEvaluationTime: evaluationTime.Add(10 * time.Second),
EvaluationDuration: evaluationDuration,
Annotations: map[string]string{"annotation": "test"},
Expand Down
15 changes: 9 additions & 6 deletions pkg/services/ngalert/state/state.go
Original file line number Diff line number Diff line change
Expand Up @@ -151,12 +151,15 @@ func (a *State) TrimResults(alertRule *ngModels.AlertRule) {
a.Results = newResults
}

// setEndsAt sets the ending timestamp of the alert.
// The internal Alertmanager will use this time to know when it should automatically resolve the alert
// in case it hasn't received additional alerts. Under regular operations the scheduler will continue to send the
// alert with an updated EndsAt, if the alert is resolved then a last alert is sent with EndsAt = last evaluation time.
func (a *State) setEndsAt(alertRule *ngModels.AlertRule, result eval.Result) {
if int64(alertRule.For.Seconds()) > alertRule.IntervalSeconds {
// For is set and longer than IntervalSeconds
a.EndsAt = result.EvaluatedAt.Add(alertRule.For)
} else {
// For is not set or is less than or equal to IntervalSeconds
a.EndsAt = result.EvaluatedAt.Add(time.Duration(alertRule.IntervalSeconds*2) * time.Second)
ends := ResendDelay
if alertRule.IntervalSeconds > int64(ResendDelay.Seconds()) {
ends = time.Duration(alertRule.IntervalSeconds)
}

a.EndsAt = result.EvaluatedAt.Add(ends * 3)
}
81 changes: 36 additions & 45 deletions pkg/services/ngalert/state/state_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,8 @@ import (
"testing"
"time"

ngmodels "github.com/grafana/grafana/pkg/services/ngalert/models"

"github.com/grafana/grafana/pkg/services/ngalert/eval"
ngmodels "github.com/grafana/grafana/pkg/services/ngalert/models"

"github.com/stretchr/testify/assert"
)
Expand Down Expand Up @@ -114,87 +113,79 @@ func TestSetEndsAt(t *testing.T) {
testCases := []struct {
name string
expected time.Time
testState *State
testRule *ngmodels.AlertRule
testResult eval.Result
}{
{
name: "For: unset Interval: 10s EndsAt should be evaluation time + 2X IntervalSeconds",
expected: evaluationTime.Add(20 * time.Second),
testState: &State{},
name: "less than resend delay: for=unset,interval=10s - endsAt = resendDelay * 3",
expected: evaluationTime.Add(ResendDelay * 3),
testRule: &ngmodels.AlertRule{
IntervalSeconds: 10,
},
testResult: eval.Result{
EvaluatedAt: evaluationTime,
},
},
{
name: "For: 0s Interval: 10s EndsAt should be evaluation time + 2X IntervalSeconds",
expected: evaluationTime.Add(20 * time.Second),
testState: &State{},
name: "less than resend delay: for=0s,interval=10s - endsAt = resendDelay * 3",
expected: evaluationTime.Add(ResendDelay * 3),
testRule: &ngmodels.AlertRule{
For: 0 * time.Second,
IntervalSeconds: 10,
},
testResult: eval.Result{
EvaluatedAt: evaluationTime,
},
},
{
name: "For: 1s Interval: 10s EndsAt should be evaluation time + 2X IntervalSeconds",
expected: evaluationTime.Add(20 * time.Second),
testState: &State{},
name: "less than resend delay: for=10s,interval=10s - endsAt = resendDelay * 3",
expected: evaluationTime.Add(ResendDelay * 3),
testRule: &ngmodels.AlertRule{
For: 0 * time.Second,
For: 10 * time.Second,
IntervalSeconds: 10,
},
testResult: eval.Result{
EvaluatedAt: evaluationTime,
},
},
{
name: "For: 10s Interval: 10s EndsAt should be evaluation time + 2X IntervalSeconds",
expected: evaluationTime.Add(20 * time.Second),
testState: &State{},
name: "less than resend delay: for=10s,interval=20s - endsAt = resendDelay * 3",
expected: evaluationTime.Add(ResendDelay * 3),
testRule: &ngmodels.AlertRule{
For: 10 * time.Second,
IntervalSeconds: 10,
},
testResult: eval.Result{
EvaluatedAt: evaluationTime,
IntervalSeconds: 20,
},
},
{
name: "For: 11s Interval: 10s EndsAt should be evaluation time + For duration",
expected: evaluationTime.Add(11 * time.Second),
testState: &State{},
name: "more than resend delay: for=unset,interval=1m - endsAt = interval * 3",
expected: evaluationTime.Add(60 * 3),
testRule: &ngmodels.AlertRule{
For: 11 * time.Second,
IntervalSeconds: 10,
IntervalSeconds: 60,
},
testResult: eval.Result{
EvaluatedAt: evaluationTime,
},
{
name: "more than resend delay: for=0s,interval=1m - endsAt = resendDelay * 3",
expected: evaluationTime.Add(60 * 3),
testRule: &ngmodels.AlertRule{
For: 0 * time.Second,
IntervalSeconds: 60,
},
},
{
name: "For: 20s Interval: 10s EndsAt should be evaluation time + For duration",
expected: evaluationTime.Add(20 * time.Second),
testState: &State{},
name: "more than resend delay: for=1m,interval=5m - endsAt = interval * 3",
expected: evaluationTime.Add(300 * 3),
testRule: &ngmodels.AlertRule{
For: 20 * time.Second,
IntervalSeconds: 10,
For: 60 * time.Second,
IntervalSeconds: 300,
},
testResult: eval.Result{
EvaluatedAt: evaluationTime,
},
{
name: "more than resend delay: for=5m,interval=1m - endsAt = interval * 3",
expected: evaluationTime.Add(60 * 3),
testRule: &ngmodels.AlertRule{
For: 300 * time.Second,
IntervalSeconds: 60,
},
},
}

for _, tc := range testCases {
t.Run(tc.name, func(t *testing.T) {
tc.testState.setEndsAt(tc.testRule, tc.testResult)
assert.Equal(t, tc.expected, tc.testState.EndsAt)
s := &State{}
r := eval.Result{EvaluatedAt: evaluationTime}
s.setEndsAt(tc.testRule, r)
assert.Equal(t, tc.expected, s.EndsAt)
})
}
}