-
Notifications
You must be signed in to change notification settings - Fork 24
/
alert-query.go
100 lines (89 loc) · 2.98 KB
/
alert-query.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
package prometheus
import (
"context"
"time"
prometheusv1 "github.com/prometheus/client_golang/api/prometheus/v1"
prometheusmodel "github.com/prometheus/common/model"
"google.golang.org/protobuf/proto"
"github.com/fluxninja/aperture/v2/pkg/jobs"
)
// AlertCallback is a callback function that gets invoked respectively when the alert gets active, inactive.
type AlertCallback func(context.Context, ...interface{}) (proto.Message, error)
type alertState int64
const (
inactive alertState = iota
pending
active
)
type alertQuery struct {
activeAt time.Time
savedError error
savedDetails proto.Message
alertActiveCallback AlertCallback
alertInactiveCallback AlertCallback
state alertState
forDuration time.Duration
}
// NewAlertQueryJob takes Alert active and Alert inactive callbacks which get invoked when the alert gets active, inactive respectively.
// Also, it takes an error callback which gets invoked when there's an error from running PromQL.
// Alert is computed via a PromQL query using semantics similar to Prometheus alert rules.
// It returns a callback compatible with scheduler BasicJob.
func NewAlertQueryJob(
query string,
endTimestamp time.Time,
promAPI prometheusv1.API,
enforcer *PrometheusEnforcer,
timeout time.Duration,
forDuration time.Duration,
alertActiveCallback,
alertInactiveCallback AlertCallback,
errorCallback PromErrorCallback,
cbArgs ...interface{},
) jobs.JobCallback {
aq := &alertQuery{forDuration: forDuration, alertActiveCallback: alertActiveCallback, alertInactiveCallback: alertInactiveCallback}
return NewPromQueryJob(query, endTimestamp, promAPI, enforcer, timeout, aq.execute, errorCallback, cbArgs...)
}
func (aq *alertQuery) execute(jobCtxt context.Context, value prometheusmodel.Value, cbArgs ...interface{}) (proto.Message, error) {
activeNow := false
if _, ok := value.(*prometheusmodel.Scalar); ok {
activeNow = true
} else if vector, ok := value.(prometheusmodel.Vector); ok {
if vector.Len() > 0 {
activeNow = true
}
} else if matrix, ok := value.(prometheusmodel.Matrix); ok {
if matrix.Len() > 0 {
activeNow = true
}
} else if _, ok := value.(*prometheusmodel.String); ok {
activeNow = true
}
if aq.state == inactive {
if activeNow {
aq.activeAt = time.Now()
if aq.forDuration == 0 {
// Transition
aq.state = active
aq.savedDetails, aq.savedError = aq.alertActiveCallback(jobCtxt, cbArgs...)
}
}
} else if aq.state == pending {
if activeNow {
// Make sure it is active for forDuration before marking as active
if time.Since(aq.activeAt) >= aq.forDuration {
// Transition
aq.state = active
aq.savedDetails, aq.savedError = aq.alertActiveCallback(jobCtxt, cbArgs...)
}
} else {
aq.state = inactive
}
} else if aq.state == active {
if !activeNow {
// Transition
aq.state = inactive
aq.savedDetails, aq.savedError = aq.alertInactiveCallback(jobCtxt, cbArgs...)
}
}
return aq.savedDetails, aq.savedError
}