Skip to content

Commit

Permalink
Initial commit
Browse files Browse the repository at this point in the history
  • Loading branch information
wpjunior committed Aug 5, 2019
1 parent 5373e9e commit a3f0f49
Show file tree
Hide file tree
Showing 628 changed files with 299,203 additions and 0 deletions.
33 changes: 33 additions & 0 deletions algorithms/algorithms.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
package algoritms

import (
"fmt"

"github.com/prometheus/prometheus/pkg/labels"
)

func MultiBurnRateForPage(metric string, lbs labels.Labels, operator string, value float64) string {
result := ""
result += fmt.Sprintf(`(%s:ratio_rate_1h%s %s (14.4 * %.3f) and `, metric, lbs.String(), operator, value)
result += fmt.Sprintf(`%s:ratio_rate_5m%s %s (14.4 * %.3f))`, metric, lbs.String(), operator, value)

result += " or "

result += fmt.Sprintf(`(%s:ratio_rate_6h%s %s (6 * %.3f) and `, metric, lbs.String(), operator, value)
result += fmt.Sprintf(`%s:ratio_rate_30m%s %s (6 * %.3f))`, metric, lbs.String(), operator, value)

return result
}

func MultiBurnRateForTicket(metric string, lbs labels.Labels, operator string, value float64) string {
result := ""
result += fmt.Sprintf(`(%s:ratio_rate_1d%s %s (3 * %.3f) and `, metric, lbs.String(), operator, value)
result += fmt.Sprintf(`%s:ratio_rate_2h%s %s (3 * %.3f))`, metric, lbs.String(), operator, value)

result += " or "

result += fmt.Sprintf(`(%s:ratio_rate_3d%s %s %.3f and `, metric, lbs.String(), operator, value)
result += fmt.Sprintf(`%s:ratio_rate_6h%s %s %.3f)`, metric, lbs.String(), operator, value)

return result
}
11 changes: 11 additions & 0 deletions go.mod
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
module github.com/globocom/slo-generator

go 1.12

require (
github.com/opentracing/opentracing-go v1.1.0 // indirect
github.com/prometheus/common v0.6.0
github.com/prometheus/prometheus v0.0.0-20190525122359-d20e84d0fb64
github.com/prometheus/tsdb v0.10.0 // indirect
gopkg.in/yaml.v2 v2.2.2
)
313 changes: 313 additions & 0 deletions go.sum

Large diffs are not rendered by default.

176 changes: 176 additions & 0 deletions main.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,176 @@
package main

import (
"flag"
"log"
"os"
"strings"

algorithms "github.com/globocom/slo-generator/algorithms"
"github.com/prometheus/common/model"
"github.com/prometheus/prometheus/pkg/labels"
"github.com/prometheus/prometheus/pkg/rulefmt"
yaml "gopkg.in/yaml.v2"
)

type Sample struct {
Name string
Interval string
Buckets []string
}

var defaultSamples = []Sample{
{
Name: "short",
Interval: "30s",
Buckets: []string{"5m", "30m", "1h"},
},
{
Name: "medium",
Interval: "2m",
Buckets: []string{"2h", "6h"},
},
{
Name: "daily",
Interval: "5m",
Buckets: []string{"1d", "3d"},
},
}

type SLOSpec struct {
SLOS []SLO
}

type ExprBlock struct {
Expr string `yaml:"expr"`
}

func (block *ExprBlock) ComputeExpr(window string) string {
replacer := strings.NewReplacer("$window", window)
return replacer.Replace(block.Expr)
}

type SLO struct {
Name string `yaml:"name"`
Algorithm string `yaml:"algorithm"`
AvailabilityObjectivePercent float64 `yaml:"availabilityObjectivePercent"`
LatencyObjectiveBuckets map[float64]string `yaml:"latencyObjectiveBuckets"`
ErrorRateRecord ExprBlock `yaml:"errorRateRecord"`
LatencyRecord ExprBlock `yaml:"latencyRecord"`
Annotations map[string]string `yaml:"annotations"`
}

func (slo SLO) GenerateGroupRules() []rulefmt.RuleGroup {
rules := []rulefmt.RuleGroup{}

for _, sample := range defaultSamples {
interval, err := model.ParseDuration(sample.Interval)
if err != nil {
log.Fatal(err)
}
ruleGroup := rulefmt.RuleGroup{
Name: "slo:" + slo.Name + "_" + sample.Name,
Interval: interval,
Rules: []rulefmt.Rule{},
}

for _, bucket := range sample.Buckets {
errorRateRecord := rulefmt.Rule{
Record: "slo:service_errors_total:ratio_rate_" + bucket,
Expr: slo.ErrorRateRecord.ComputeExpr(bucket),
Labels: map[string]string{
"service": slo.Name,
},
}

ruleGroup.Rules = append(ruleGroup.Rules, errorRateRecord)
}

rules = append(rules, ruleGroup)
}

// alerting
alertingGroup := rulefmt.RuleGroup{
Name: "slo:" + slo.Name + "_alert",
Rules: []rulefmt.Rule{},
}

// alerting page
sloPageRecord := rulefmt.Rule{
Alert: "slo:" + slo.Name + ".errors.page",
Expr: algorithms.MultiBurnRateForPage(
"slo:service_errors_total",
labels.New(labels.Label{"service", slo.Name}),
"<", (1 - slo.AvailabilityObjectivePercent/100),
),
Annotations: slo.Annotations,
}

alertingGroup.Rules = append(alertingGroup.Rules, sloPageRecord)

// alerting ticket
sloTicketRecord := rulefmt.Rule{
Alert: "slo:" + slo.Name + ".errors.ticket",
Expr: algorithms.MultiBurnRateForTicket(
"slo:service_errors_total",
labels.New(labels.Label{"service", slo.Name}),
"<", (1 - slo.AvailabilityObjectivePercent/100),
),
Annotations: slo.Annotations,
}

alertingGroup.Rules = append(alertingGroup.Rules, sloTicketRecord)

rules = append(rules, alertingGroup)

return rules
}

func main() {
var (
sloPath = ""
ruleOutput = ""
)
flag.StringVar(&sloPath, "slo.path", "", "A YML file describing SLOs")
flag.StringVar(&ruleOutput, "rule.output", "", "Output to describe a prometheus rules")

flag.Parse()

if sloPath == "" {
log.Fatal("slo.path is a required param")
}

if ruleOutput == "" {
log.Fatal("rule.output is a required param")
}

f, err := os.Open(sloPath)
if err != nil {
log.Fatal(err)
}

spec := &SLOSpec{}
err = yaml.NewDecoder(f).Decode(spec)
if err != nil {
log.Fatal(err)
}

ruleGroups := &rulefmt.RuleGroups{
Groups: []rulefmt.RuleGroup{},
}

for _, slo := range spec.SLOS {
ruleGroups.Groups = append(ruleGroups.Groups, slo.GenerateGroupRules()...)
}

targetFile, err := os.Create(ruleOutput)
if err != nil {
log.Fatal(err)
}
defer targetFile.Close()
err = yaml.NewEncoder(targetFile).Encode(ruleGroups)
if err != nil {
log.Fatal(err)
}
log.Printf("generated a SLO record in %q", ruleOutput)
}
45 changes: 45 additions & 0 deletions slo_example.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
slos:
- name: myteam-a.service-a
algorithm: multiwindow
availabilityObjectivePercent: 99
latencyObjectiveBuckets:
95: 5s # 95% < 5s
97: 10s # 97% < 10s

annotations:
message: Service A Error Budget consumption
link: https://grafana.myservice.com/URL
slack_channel: '_team_a'

errorRateRecord:
expr: |
sum (rate(http_requests_total{job="service-a", status="5xx"}[$window])) /
sum (rate(http_requests_total{job="service-a"}[$window]))
latencyRecord:
expr: |
sum (rate(http_request_duration_seconds_bucket{job="service-a", le="{{ $latencyBucket }}"}[$window])) /
sum (rate(http_requests_total{job="service-a"}[$window]))
- name: myteam-b.service-b
algorithm: multiwindow
availabilityObjectivePercent: 99.9
latencyObjectiveBuckets:
90: 50ms # 90% < 50ms
95: 100ms # 95% < 100ms

annotations:
message: Service B Error Budget consumption
link:
slack_channel: '_team_b'

errorRateRecord:
expr: |
sum (rate(http_requests_total{job="service-b", status="5xx"}[$window])) /
sum (rate(http_requests_total{job="service-b"}[$window]))
latencyRecord:
expr: |
sum (rate(http_request_duration_seconds_bucket{job="service-b", le="{{ $latencyBucket }}"}[$window])) /
sum (rate(http_requests_total{job="service-b"}[$window]))
20 changes: 20 additions & 0 deletions vendor/github.com/beorn7/perks/LICENSE

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading

0 comments on commit a3f0f49

Please sign in to comment.