Skip to content

Commit

Permalink
Add support to auto discovery services with honorLabels
Browse files Browse the repository at this point in the history
  • Loading branch information
wpjunior committed Oct 7, 2019
1 parent 98f76a3 commit 416ff3c
Show file tree
Hide file tree
Showing 3 changed files with 168 additions and 34 deletions.
81 changes: 47 additions & 34 deletions slo/slo.go
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,8 @@ type SLO struct {
Name string `yaml:"name"`
Objectives Objectives

HonorLabels bool `yaml:"honorLabels"`

TrafficRateRecord ExprBlock `yaml:"trafficRateRecord"`
ErrorRateRecord ExprBlock `yaml:"errorRateRecord"`
LatencyRecord ExprBlock `yaml:"latencyRecord"`
Expand Down Expand Up @@ -86,44 +88,55 @@ func (slo SLO) GenerateGroupRules() []rulefmt.RuleGroup {
}

for _, bucket := range sample.Buckets {
if slo.TrafficRateRecord.Expr != "" {
trafficRateRecord := rulefmt.Rule{
Record: "slo:service_traffic:ratio_rate_" + bucket,
Expr: slo.TrafficRateRecord.ComputeExpr(bucket, ""),
Labels: map[string]string{
"service": slo.Name,
},
}
ruleGroup.Rules = append(ruleGroup.Rules, trafficRateRecord)
}

errorRateRecord := rulefmt.Rule{
Record: "slo:service_errors_total:ratio_rate_" + bucket,
Expr: slo.ErrorRateRecord.ComputeExpr(bucket, ""),
Labels: map[string]string{
"service": slo.Name,
},
}

ruleGroup.Rules = append(ruleGroup.Rules, errorRateRecord)

for _, latencyBucket := range slo.Objectives.Latency {
latencyRateRecord := rulefmt.Rule{
Record: "slo:service_latency:ratio_rate_" + bucket,
Expr: slo.LatencyRecord.ComputeExpr(bucket, latencyBucket.LE),
Labels: map[string]string{
"service": slo.Name,
"le": latencyBucket.LE,
},
}

ruleGroup.Rules = append(ruleGroup.Rules, latencyRateRecord)
}

ruleGroup.Rules = append(ruleGroup.Rules, slo.generateRules(bucket)...)
}

rules = append(rules, ruleGroup)
}

return rules
}

func (slo SLO) generateRules(bucket string) []rulefmt.Rule {
rules := []rulefmt.Rule{}
if slo.TrafficRateRecord.Expr != "" {
trafficRateRecord := rulefmt.Rule{
Record: "slo:service_traffic:ratio_rate_" + bucket,
Expr: slo.TrafficRateRecord.ComputeExpr(bucket, ""),
Labels: map[string]string{},
}

if !slo.HonorLabels {
trafficRateRecord.Labels["service"] = slo.Name
}

rules = append(rules, trafficRateRecord)
}

errorRateRecord := rulefmt.Rule{
Record: "slo:service_errors_total:ratio_rate_" + bucket,
Expr: slo.ErrorRateRecord.ComputeExpr(bucket, ""),
Labels: map[string]string{},
}

if !slo.HonorLabels {
errorRateRecord.Labels["service"] = slo.Name
}

rules = append(rules, errorRateRecord)

for _, latencyBucket := range slo.Objectives.Latency {
latencyRateRecord := rulefmt.Rule{
Record: "slo:service_latency:ratio_rate_" + bucket,
Expr: slo.LatencyRecord.ComputeExpr(bucket, latencyBucket.LE),
Labels: map[string]string{
"service": slo.Name,
"le": latencyBucket.LE,
},
}

rules = append(rules, latencyRateRecord)
}

return rules
}
117 changes: 117 additions & 0 deletions slo/slo_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -290,6 +290,123 @@ func TestSLOGenerateGroupRules(t *testing.T) {
})
}

func TestSLOGenerateGroupRulesWithAutoDiscovery(t *testing.T) {
slo := &SLO{
Name: "auto-discover-services",
HonorLabels: true,
TrafficRateRecord: ExprBlock{
Expr: "sum(rate(http_total[$window])) by (service)",
},
ErrorRateRecord: ExprBlock{
AlertMethod: "multi-window",
Expr: "sum(rate(http_errors[$window])) by (service)/sum(rate(http_total[$window])) by (service)",
},
}

groupRules := slo.GenerateGroupRules()
assert.Len(t, groupRules, 3)

assert.Equal(t, rulefmt.RuleGroup{
Name: "slo:auto-discover-services:short",
Interval: model.Duration(time.Second * 30),
Rules: []rulefmt.Rule{
// 5m
{
Record: "slo:service_traffic:ratio_rate_5m",
Expr: "sum(rate(http_total[5m])) by (service)",
Labels: map[string]string{},
},
{
Record: "slo:service_errors_total:ratio_rate_5m",
Expr: "sum(rate(http_errors[5m])) by (service)/sum(rate(http_total[5m])) by (service)",
Labels: map[string]string{},
},
// 30m
{
Record: "slo:service_traffic:ratio_rate_30m",
Expr: "sum(rate(http_total[30m])) by (service)",
Labels: map[string]string{},
},
{
Record: "slo:service_errors_total:ratio_rate_30m",
Expr: "sum(rate(http_errors[30m])) by (service)/sum(rate(http_total[30m])) by (service)",
Labels: map[string]string{},
},
// 1h
{
Record: "slo:service_traffic:ratio_rate_1h",
Expr: "sum(rate(http_total[1h])) by (service)",
Labels: map[string]string{},
},
{
Record: "slo:service_errors_total:ratio_rate_1h",
Expr: "sum(rate(http_errors[1h])) by (service)/sum(rate(http_total[1h])) by (service)",
Labels: map[string]string{},
},
},
}, groupRules[0])

assert.Equal(t, rulefmt.RuleGroup{
Name: "slo:auto-discover-services:medium",
Interval: model.Duration(time.Second * 120),
Rules: []rulefmt.Rule{
// 2h
{
Record: "slo:service_traffic:ratio_rate_2h",
Expr: "sum(rate(http_total[2h])) by (service)",
Labels: map[string]string{},
},
{
Record: "slo:service_errors_total:ratio_rate_2h",
Expr: "sum(rate(http_errors[2h])) by (service)/sum(rate(http_total[2h])) by (service)",
Labels: map[string]string{},
},

// 6h
{
Record: "slo:service_traffic:ratio_rate_6h",
Expr: "sum(rate(http_total[6h])) by (service)",
Labels: map[string]string{},
},
{
Record: "slo:service_errors_total:ratio_rate_6h",
Expr: "sum(rate(http_errors[6h])) by (service)/sum(rate(http_total[6h])) by (service)",
Labels: map[string]string{},
},
},
}, groupRules[1])

assert.Equal(t, rulefmt.RuleGroup{
Name: "slo:auto-discover-services:daily",
Interval: model.Duration(time.Second * 300),
Rules: []rulefmt.Rule{
// 1d
{
Record: "slo:service_traffic:ratio_rate_1d",
Expr: "sum(rate(http_total[1d])) by (service)",
Labels: map[string]string{},
},
{
Record: "slo:service_errors_total:ratio_rate_1d",
Expr: "sum(rate(http_errors[1d])) by (service)/sum(rate(http_total[1d])) by (service)",
Labels: map[string]string{},
},

// 3d
{
Record: "slo:service_traffic:ratio_rate_3d",
Expr: "sum(rate(http_total[3d])) by (service)",
Labels: map[string]string{},
},
{
Record: "slo:service_errors_total:ratio_rate_3d",
Expr: "sum(rate(http_errors[3d])) by (service)/sum(rate(http_total[3d])) by (service)",
Labels: map[string]string{},
},
},
}, groupRules[2])
}

func TestSLOGenerateAlertRules(t *testing.T) {
slo := &SLO{
Name: "my-team.my-service.payment",
Expand Down
4 changes: 4 additions & 0 deletions slo_example.yml
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,10 @@ slos:
link: https://grafana.myservice.com/URL
slack_channel: '_team_a'

trafficRateRecord:
expr: |
sum (rate(http_requests_total{job="service-a"}[$window]))
errorRateRecord:
alertMethod: multi-window
expr: |
Expand Down

0 comments on commit 416ff3c

Please sign in to comment.