-
Notifications
You must be signed in to change notification settings - Fork 665
/
service.go
133 lines (113 loc) · 3.37 KB
/
service.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
package health
import (
"encoding/json"
"sync"
"time"
"github.com/ava-labs/avalanchego/utils/constants"
"github.com/ava-labs/avalanchego/utils/logging"
"github.com/prometheus/client_golang/prometheus"
health "github.com/AppsFlyer/go-sundheit"
)
var _ Service = &service{}
// Service performs health checks. Other things register health checks
// with Service, which performs them.
type Service interface {
RegisterCheck(name string, checkFn Check) error
RegisterMonotonicCheck(name string, checkFn Check) error
Results() (map[string]health.Result, bool)
}
// NewService returns a new [Service] where the health checks
// run every [checkFreq]
func NewService(checkFreq time.Duration, log logging.Logger, namespace string, registry prometheus.Registerer) (Service, error) {
healthChecker := health.New()
metrics, err := newMetrics(log, namespace, registry)
if err != nil {
return nil, err
}
// Add the check listener to report when a check changes status.
healthChecker.WithCheckListener(&checkListener{
log: log,
checks: make(map[string]bool),
metrics: metrics,
})
return &service{
Health: healthChecker,
checkFreq: checkFreq,
}, nil
}
// service implements Service
type service struct {
// performs the underlying health checks
health.Health
// Time between health checks
checkFreq time.Duration
}
// RegisterCheckFn adds a check that calls [checkFn] to evaluate health
func (s *service) RegisterCheck(name string, checkFn Check) error {
check := &check{
name: name,
checkFn: checkFn,
}
return s.Health.RegisterCheck(&health.Config{
InitialDelay: constants.DefaultHealthCheckInitialDelay,
ExecutionPeriod: s.checkFreq,
Check: check,
})
}
// RegisterMonotonicCheckFn adds a health check that, after it passes once,
// always returns healthy without executing any logic
func (s *service) RegisterMonotonicCheck(name string, checkFn Check) error {
c := &monotonicCheck{
check: check{
name: name,
checkFn: checkFn,
},
}
return s.Health.RegisterCheck(&health.Config{
InitialDelay: constants.DefaultHealthCheckInitialDelay,
ExecutionPeriod: s.checkFreq,
Check: c,
})
}
type checkListener struct {
log logging.Logger
// lock ensures that updates and reads to [checks] are atomic
lock sync.Mutex
// checks maps name -> is healthy
checks map[string]bool
metrics *metrics
}
func (c *checkListener) OnCheckStarted(name string) {
c.log.Debug("starting to run %s", name)
}
// OnCheckCompleted is called concurrently with multiple health checks.
func (c *checkListener) OnCheckCompleted(name string, result health.Result) {
resultJSON, err := json.Marshal(result)
if err != nil {
c.log.Error("failed to encode %q when it was failing due to: %s", name, err)
return
}
isHealthy := result.IsHealthy()
c.lock.Lock()
previouslyHealthy, exists := c.checks[name]
c.checks[name] = isHealthy
c.lock.Unlock()
if !exists && !isHealthy {
c.metrics.unHealthy()
}
if !exists || isHealthy == previouslyHealthy {
if isHealthy {
c.log.Debug("%q returned healthy with: %s", name, string(resultJSON))
} else {
c.log.Debug("%q returned unhealthy with: %s", name, string(resultJSON))
}
return
}
if isHealthy {
c.log.Info("%q became healthy with: %s", name, string(resultJSON))
c.metrics.healthy()
} else {
c.log.Warn("%q became unhealthy with: %s", name, string(resultJSON))
c.metrics.unHealthy()
}
}