Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[v9.4.x] Alerting: Allow pausing alerts from provisioning #62492

Merged
merged 1 commit into from Jan 30, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
13 changes: 7 additions & 6 deletions conf/provisioning/alerting/sample.yaml
Expand Up @@ -11,7 +11,7 @@ apiVersion: 1
# folder: my_first_folder
# # <duration, required> interval of the rule group evaluation
# interval: 60s
# # <list, required> list of rules that are part of the rule group
# # <list, required> list of rules that are part of the rule group
# rules:
# # <string, required> unique identifier for the rule
# - uid: my_id_1
Expand Down Expand Up @@ -53,7 +53,7 @@ apiVersion: 1
# # <string> state of the alert rule when no data is returned
# # possible values: "NoData", "Alerting", "OK", default = NoData
# noDataState: Alerting
# # <string> state of the alert rule when the query execution
# # <string> state of the alert rule when the query execution
# # fails - possible values: "Error", "Alerting", "OK"
# # default = Alerting
# executionErrorState: Alerting
Expand All @@ -62,10 +62,11 @@ apiVersion: 1
# # <map<string, string>> map of strings to attach arbitrary custom data
# annotations:
# some_key: some_value
# # <map<string, string> map of strings to filter and
# # <map<string, string> map of strings to filter and
# # route alerts
# labels:
# team: sre_team_1
# isPaused: false

# # List of alert rule UIDs that should be deleted
# deleteRules:
Expand Down Expand Up @@ -103,7 +104,7 @@ apiVersion: 1
# # <list<string>> The labels by which incoming alerts are grouped together. For example,
# # multiple alerts coming in for cluster=A and alertname=LatencyHigh would
# # be batched into a single group.
# #
# #
# # To aggregate by all possible labels, use the special value '...' as
# # the sole label name, for example:
# # group_by: ['...']
Expand All @@ -127,7 +128,7 @@ apiVersion: 1
# mute_time_intervals:
# - abc
# # <duration> How long to initially wait to send a notification for a group
# # of alerts. Allows to collect more initial alerts for the same group.
# # of alerts. Allows to collect more initial alerts for the same group.
# # (Usually ~0s to few minutes), default = 30s
# group_wait: 30s
# # <duration> How long to wait before sending a notification about new alerts that
Expand All @@ -138,7 +139,7 @@ apiVersion: 1
# # been sent successfully for an alert. (Usually ~3h or more), default = 4h
# repeat_interval: 4h
# # <list> Zero or more child routes
# routes:
# routes:
# ...

# # List of orgIds that should be reset to the default policy
Expand Down
12 changes: 6 additions & 6 deletions pkg/services/ngalert/api/api_provisioning_test.go

Large diffs are not rendered by default.

15 changes: 11 additions & 4 deletions pkg/services/ngalert/api/tooling/api.json
Expand Up @@ -194,6 +194,9 @@
"for": {
"$ref": "#/definitions/Duration"
},
"isPaused": {
"type": "boolean"
},
"labels": {
"additionalProperties": {
"type": "string"
Expand Down Expand Up @@ -2276,6 +2279,10 @@
"format": "int64",
"type": "integer"
},
"isPaused": {
"example": false,
"type": "boolean"
},
"labels": {
"additionalProperties": {
"type": "string"
Expand Down Expand Up @@ -3277,6 +3284,7 @@
"type": "object"
},
"URL": {
"description": "The general form represented is:\n\n[scheme:][//[userinfo@]host][/]path[?query][#fragment]\n\nURLs that do not start with a slash after the scheme are interpreted as:\n\nscheme:opaque[?query][#fragment]\n\nNote that the Path field is stored in decoded form: /%47%6f%2f becomes /Go/.\nA consequence is that it is impossible to tell which slashes in the Path were\nslashes in the raw URL and which were %2f. This distinction is rarely important,\nbut when it is, the code should use RawPath, an optional field which only gets\nset if the default encoding is different from Path.\n\nURL's String method uses the EscapedPath method to obtain the path. See the\nEscapedPath method for more details.",
"properties": {
"ForceQuery": {
"type": "boolean"
Expand Down Expand Up @@ -3312,7 +3320,7 @@
"$ref": "#/definitions/Userinfo"
}
},
"title": "URL is a custom URL type that allows validation at configuration load time.",
"title": "A URL represents a parsed URL (technically, a URI reference).",
"type": "object"
},
"Userinfo": {
Expand Down Expand Up @@ -3616,7 +3624,6 @@
"type": "object"
},
"gettableAlert": {
"description": "GettableAlert gettable alert",
"properties": {
"annotations": {
"$ref": "#/definitions/labelSet"
Expand Down Expand Up @@ -3672,13 +3679,13 @@
"type": "object"
},
"gettableAlerts": {
"description": "GettableAlerts gettable alerts",
"items": {
"$ref": "#/definitions/gettableAlert"
},
"type": "array"
},
"gettableSilence": {
"description": "GettableSilence gettable silence",
"properties": {
"comment": {
"description": "comment",
Expand Down Expand Up @@ -3733,6 +3740,7 @@
"type": "array"
},
"integration": {
"description": "Integration integration",
"properties": {
"lastNotifyAttempt": {
"description": "A timestamp indicating the last attempt to deliver a notification regardless of the outcome.\nFormat: date-time",
Expand Down Expand Up @@ -3876,7 +3884,6 @@
"type": "array"
},
"postableSilence": {
"description": "PostableSilence postable silence",
"properties": {
"comment": {
"description": "comment",
Expand Down
Expand Up @@ -134,6 +134,8 @@ type ProvisionedAlertRule struct {
Labels map[string]string `json:"labels,omitempty"`
// readonly: true
Provenance models.Provenance `json:"provenance,omitempty"`
// example: false
IsPaused bool `json:"isPaused"`
}

func (a *ProvisionedAlertRule) UpstreamModel() (models.AlertRule, error) {
Expand All @@ -152,6 +154,7 @@ func (a *ProvisionedAlertRule) UpstreamModel() (models.AlertRule, error) {
For: time.Duration(a.For),
Annotations: a.Annotations,
Labels: a.Labels,
IsPaused: a.IsPaused,
}, nil
}

Expand All @@ -172,6 +175,7 @@ func NewAlertRule(rule models.AlertRule, provenance models.Provenance) Provision
Annotations: rule.Annotations,
Labels: rule.Labels,
Provenance: provenance,
IsPaused: rule.IsPaused,
}
}

Expand Down
57 changes: 34 additions & 23 deletions pkg/services/ngalert/api/tooling/post.json
Expand Up @@ -194,6 +194,9 @@
"for": {
"$ref": "#/definitions/Duration"
},
"isPaused": {
"type": "boolean"
},
"labels": {
"additionalProperties": {
"type": "string"
Expand All @@ -219,19 +222,7 @@
"type": "string"
}
},
"title": "AlertRuleExport is the provisioned export of models.AlertRule.",
"type": "object"
},
"AlertRuleFileExport": {
"properties": {
"groups": {
"items": {
"$ref": "#/definitions/AlertRuleGroupExport"
},
"type": "array"
}
},
"title": "AlertRuleFileExport is the provisioned export of multiple models.AlertRuleGroup.",
"title": "AlertRuleExport is the provisioned file export of models.AlertRule.",
"type": "object"
},
"AlertRuleGroup": {
Expand Down Expand Up @@ -277,7 +268,7 @@
"type": "array"
}
},
"title": "AlertRuleGroupExport is the provisioned export of models.AlertRuleGroup.",
"title": "AlertRuleGroupExport is the provisioned file export of AlertRuleGroupV1.",
"type": "object"
},
"AlertRuleGroupMetadata": {
Expand All @@ -289,6 +280,22 @@
},
"type": "object"
},
"AlertingFileExport": {
"properties": {
"apiVersion": {
"format": "int64",
"type": "integer"
},
"groups": {
"items": {
"$ref": "#/definitions/AlertRuleGroupExport"
},
"type": "array"
}
},
"title": "AlertingFileExport is the full provisioned file export.",
"type": "object"
},
"AlertingRule": {
"description": "adapted from cortex",
"properties": {
Expand Down Expand Up @@ -2272,6 +2279,10 @@
"format": "int64",
"type": "integer"
},
"isPaused": {
"example": false,
"type": "boolean"
},
"labels": {
"additionalProperties": {
"type": "string"
Expand Down Expand Up @@ -3510,6 +3521,7 @@
"type": "object"
},
"alertGroups": {
"description": "AlertGroups alert groups",
"items": {
"$ref": "#/definitions/alertGroup"
},
Expand Down Expand Up @@ -3675,7 +3687,6 @@
"type": "array"
},
"gettableSilence": {
"description": "GettableSilence gettable silence",
"properties": {
"comment": {
"description": "comment",
Expand Down Expand Up @@ -3724,13 +3735,13 @@
"type": "object"
},
"gettableSilences": {
"description": "GettableSilences gettable silences",
"items": {
"$ref": "#/definitions/gettableSilence"
},
"type": "array"
},
"integration": {
"description": "Integration integration",
"properties": {
"lastNotifyAttempt": {
"description": "A timestamp indicating the last attempt to deliver a notification regardless of the outcome.\nFormat: date-time",
Expand Down Expand Up @@ -3874,6 +3885,7 @@
"type": "array"
},
"postableSilence": {
"description": "PostableSilence postable silence",
"properties": {
"comment": {
"description": "comment",
Expand Down Expand Up @@ -3911,7 +3923,6 @@
"type": "object"
},
"receiver": {
"description": "Receiver receiver",
"properties": {
"active": {
"description": "active",
Expand Down Expand Up @@ -5772,9 +5783,9 @@
],
"responses": {
"200": {
"description": "AlertRuleFileExport",
"description": "AlertingFileExport",
"schema": {
"$ref": "#/definitions/AlertRuleFileExport"
"$ref": "#/definitions/AlertingFileExport"
}
},
"404": {
Expand Down Expand Up @@ -5908,9 +5919,9 @@
],
"responses": {
"200": {
"description": "AlertRuleExport",
"description": "AlertingFileExport",
"schema": {
"$ref": "#/definitions/AlertRuleExport"
"$ref": "#/definitions/AlertingFileExport"
}
},
"404": {
Expand Down Expand Up @@ -6157,9 +6168,9 @@
],
"responses": {
"200": {
"description": "AlertRuleGroupExport",
"description": "AlertingFileExport",
"schema": {
"$ref": "#/definitions/AlertRuleGroupExport"
"$ref": "#/definitions/AlertingFileExport"
}
},
"404": {
Expand Down
14 changes: 12 additions & 2 deletions pkg/services/ngalert/api/tooling/spec.json
Expand Up @@ -2806,6 +2806,9 @@
"for": {
"$ref": "#/definitions/Duration"
},
"isPaused": {
"type": "boolean"
},
"labels": {
"type": "object",
"additionalProperties": {
Expand Down Expand Up @@ -4903,6 +4906,10 @@
"type": "integer",
"format": "int64"
},
"isPaused": {
"type": "boolean",
"example": false
},
"labels": {
"type": "object",
"additionalProperties": {
Expand Down Expand Up @@ -5892,8 +5899,9 @@
}
},
"URL": {
"description": "The general form represented is:\n\n[scheme:][//[userinfo@]host][/]path[?query][#fragment]\n\nURLs that do not start with a slash after the scheme are interpreted as:\n\nscheme:opaque[?query][#fragment]\n\nNote that the Path field is stored in decoded form: /%47%6f%2f becomes /Go/.\nA consequence is that it is impossible to tell which slashes in the Path were\nslashes in the raw URL and which were %2f. This distinction is rarely important,\nbut when it is, the code should use RawPath, an optional field which only gets\nset if the default encoding is different from Path.\n\nURL's String method uses the EscapedPath method to obtain the path. See the\nEscapedPath method for more details.",
"type": "object",
"title": "URL is a custom URL type that allows validation at configuration load time.",
"title": "A URL represents a parsed URL (technically, a URI reference).",
"properties": {
"ForceQuery": {
"type": "boolean"
Expand Down Expand Up @@ -6129,6 +6137,7 @@
"$ref": "#/definitions/alertGroup"
},
"alertGroups": {
"description": "AlertGroups alert groups",
"type": "array",
"items": {
"$ref": "#/definitions/alertGroup"
Expand Down Expand Up @@ -6297,7 +6306,6 @@
"$ref": "#/definitions/gettableAlerts"
},
"gettableSilence": {
"description": "GettableSilence gettable silence",
"type": "object",
"required": [
"comment",
Expand Down Expand Up @@ -6347,6 +6355,7 @@
"$ref": "#/definitions/gettableSilence"
},
"gettableSilences": {
"description": "GettableSilences gettable silences",
"type": "array",
"items": {
"$ref": "#/definitions/gettableSilence"
Expand Down Expand Up @@ -6498,6 +6507,7 @@
}
},
"postableSilence": {
"description": "PostableSilence postable silence",
"type": "object",
"required": [
"comment",
Expand Down
6 changes: 5 additions & 1 deletion pkg/services/ngalert/schedule/schedule.go
Expand Up @@ -474,7 +474,11 @@ func (sch *schedule) ruleRoutine(grafanaCtx context.Context, key ngmodels.AlertR
isPaused := ctx.rule.IsPaused
// fetch latest alert rule version
if currentRuleVersion != newVersion {
if currentRuleVersion > 0 { // do not clean up state if the eval loop has just started.
// Do not clean up state if the eval loop has just started.
// We need to reset state if the loop has started and the alert is already paused. It can happen,
// if we have an alert with state and we do file provision with stateful Grafana, that state
// lingers in DB and won't be cleaned up until next alert rule update.
if currentRuleVersion > 0 || isPaused {
logger.Debug("Got a new version of alert rule. Clear up the state and refresh extra labels", "version", currentRuleVersion, "newVersion", newVersion)
resetState(grafanaCtx, isPaused)
}
Expand Down