Skip to content
This repository was archived by the owner on Jul 12, 2023. It is now read-only.
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
40 changes: 37 additions & 3 deletions terraform/alerting/alerts.tf
Original file line number Diff line number Diff line change
Expand Up @@ -236,12 +236,13 @@ resource "google_monitoring_alert_policy" "StackdriverExportFailed" {
resource "google_monitoring_alert_policy" "fast_burn" {
project = var.verification-server-project
display_name = "FastErrorBudgetBurn"
combiner = "OR"
combiner = "AND"
enabled = "true"
# create only if using GCLB, which means there's an SLO created
count = var.https-forwarding-rule == "" ? 0 : 1

conditions {
display_name = "2% burn in 1 hour"
display_name = "Fast burn over last hour"
condition_threshold {
filter = <<-EOT
select_slo_burn_rate("projects/${var.verification-server-project}/services/verification-server/serviceLevelObjectives/availability-slo", "3600s")
Expand All @@ -256,6 +257,22 @@ resource "google_monitoring_alert_policy" "fast_burn" {
}
}

conditions {
display_name = "Fast burn over last 5 minutes"
condition_threshold {
filter = <<-EOT
select_slo_burn_rate("projects/${var.verification-server-project}/services/verification-server/serviceLevelObjectives/availability-slo", "300s")
EOT
duration = "0s"
comparison = "COMPARISON_GT"
# burn rate = budget consumed * period / alerting window = .02 * (7 * 24 * 60)/60 = 3.36
threshold_value = 3.36
trigger {
count = 1
}
}
}

documentation {
content = "${local.playbook_prefix}/FastErrorBudgetBurn.md"
mime_type = "text/markdown"
Expand All @@ -276,8 +293,9 @@ resource "google_monitoring_alert_policy" "slow_burn" {
enabled = "true"
# create only if using GCLB, which means there's an SLO created
count = var.https-forwarding-rule == "" ? 0 : 1

conditions {
display_name = "5% burn in 6 hour"
display_name = "Slow burn over last 6 hours"
condition_threshold {
filter = <<-EOT
select_slo_burn_rate("projects/${var.verification-server-project}/services/verification-server/serviceLevelObjectives/availability-slo", "21600s")
Expand All @@ -292,6 +310,22 @@ resource "google_monitoring_alert_policy" "slow_burn" {
}
}

conditions {
display_name = "Slow burn over last 30 minutes"
condition_threshold {
filter = <<-EOT
select_slo_burn_rate("projects/${var.verification-server-project}/services/verification-server/serviceLevelObjectives/availability-slo", "1800s")
EOT
duration = "0s"
comparison = "COMPARISON_GT"
# burn rate = budget consumed * period / alerting window = .05 * (7 * 24 * 60)/360 = 1.4
threshold_value = 1.4
trigger {
count = 1
}
}
}

documentation {
content = "${local.playbook_prefix}/SlowErrorBudgetBurn.md"
mime_type = "text/markdown"
Expand Down