Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[v9.5.x] Alerting: Fix state manager to not keep datasource_uid and ref_id labels in state after Error #77391

Merged
merged 2 commits into from
Nov 13, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
21 changes: 21 additions & 0 deletions pkg/services/ngalert/state/manager.go
Original file line number Diff line number Diff line change
Expand Up @@ -282,6 +282,27 @@ func (st *Manager) setNextState(ctx context.Context, alertRule *ngModels.AlertRu
// Add the instance to the log context to help correlate log lines for a state
logger = logger.New("instance", result.Instance)

// if the current state is Error but the result is different, then we need o clean up the extra labels
// that were added after the state key was calculated
// https://github.com/grafana/grafana/blob/1df4d332c982dc5e394201bb2ef35b442727ce63/pkg/services/ngalert/state/state.go#L298-L311
// Usually, it happens in the case of classic conditions when the evalResult does not have labels.
//
// This is temporary change to make sure that the labels are not persistent in the state after it was in Error state
// TODO yuri. Remove it in https://github.com/grafana/grafana/pull/68142
if currentState.State == eval.Error && result.State != eval.Error {
// This is possible because state was updated after the CacheID was calculated.
_, curOk := currentState.Labels["ref_id"]
_, resOk := result.Instance["ref_id"]
if curOk && !resOk {
delete(currentState.Labels, "ref_id")
}
_, curOk = currentState.Labels["datasource_uid"]
_, resOk = result.Instance["datasource_uid"]
if curOk && !resOk {
delete(currentState.Labels, "datasource_uid")
}
}

switch result.State {
case eval.Normal:
logger.Debug("Setting next state", "handler", "resultNormal")
Expand Down
83 changes: 83 additions & 0 deletions pkg/services/ngalert/state/manager_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -2251,6 +2251,89 @@ func TestProcessEvalResults(t *testing.T) {
},
},
},
{
desc: "classic condition, execution Error as Error (alerting -> query error -> alerting)",
alertRule: &models.AlertRule{
OrgID: 1,
Title: "test_title",
UID: "test_alert_rule_uid",
NamespaceUID: "test_namespace_uid",
Annotations: map[string]string{},
Labels: map[string]string{"label": "test"},
Data: []models.AlertQuery{
{
RefID: "A",
DatasourceUID: "test-datasource-uid",
},
},
IntervalSeconds: 10,
ExecErrState: models.ErrorErrState,
},
expectedAnnotations: 2,
evalResults: []eval.Results{
{
eval.Result{
Instance: data.Labels{},
State: eval.Alerting,
EvaluatedAt: evaluationTime,
EvaluationDuration: evaluationDuration,
},
eval.Result{
Instance: data.Labels{},
State: eval.Error,
Error: expr.QueryError{
RefID: "A",
Err: errors.New("this is an error"),
},
EvaluatedAt: evaluationTime.Add(10 * time.Second),
EvaluationDuration: evaluationDuration,
},
eval.Result{
Instance: data.Labels{},
State: eval.Alerting,
EvaluatedAt: evaluationTime.Add(20 * time.Second),
EvaluationDuration: evaluationDuration,
},
},
},
expectedStates: map[string]*state.State{
`[["__alert_rule_namespace_uid__","test_namespace_uid"],["__alert_rule_uid__","test_alert_rule_uid"],["alertname","test_title"],["label","test"]]`: {
AlertRuleUID: "test_alert_rule_uid",
OrgID: 1,
CacheID: `[["__alert_rule_namespace_uid__","test_namespace_uid"],["__alert_rule_uid__","test_alert_rule_uid"],["alertname","test_title"],["label","test"]]`,
Labels: data.Labels{
"__alert_rule_namespace_uid__": "test_namespace_uid",
"__alert_rule_uid__": "test_alert_rule_uid",
"alertname": "test_title",
"label": "test",
},
Values: make(map[string]float64),
State: eval.Alerting,
Results: []state.Evaluation{
{
EvaluationTime: evaluationTime,
EvaluationState: eval.Alerting,
Values: make(map[string]*float64),
},
{
EvaluationTime: evaluationTime.Add(10 * time.Second),
EvaluationState: eval.Error,
Values: make(map[string]*float64),
},
{
EvaluationTime: evaluationTime.Add(20 * time.Second),
EvaluationState: eval.Alerting,
Values: make(map[string]*float64),
},
},
StartsAt: evaluationTime.Add(20 * time.Second),
EndsAt: evaluationTime.Add(110 * time.Second),
LastEvaluationTime: evaluationTime.Add(20 * time.Second),
EvaluationDuration: evaluationDuration,
Annotations: map[string]string{},
},
},
},
}

for _, tc := range testCases {
Expand Down