-
Notifications
You must be signed in to change notification settings - Fork 2.5k
/
health.go
286 lines (242 loc) · 7.98 KB
/
health.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
package health
import (
"context"
"encoding/json"
"errors"
"fmt"
"net/http"
"sync"
"time"
"github.com/distribution/distribution/v3/internal/dcontext"
"github.com/distribution/distribution/v3/registry/api/errcode"
)
// Registers global /debug/health api endpoint, creates default registry
func init() {
DefaultRegistry = NewRegistry()
http.HandleFunc("/debug/health", StatusHandler)
}
// A Registry is a collection of checks. Most applications will use the global
// registry defined in DefaultRegistry. However, unit tests may need to create
// separate registries to isolate themselves from other tests.
type Registry struct {
mu sync.RWMutex
registeredChecks map[string]Checker
}
// NewRegistry creates a new registry. This isn't necessary for normal use of
// the package, but may be useful for unit tests so individual tests have their
// own set of checks.
func NewRegistry() *Registry {
return &Registry{
registeredChecks: make(map[string]Checker),
}
}
// DefaultRegistry is the default registry where checks are registered. It is
// the registry used by the HTTP handler.
var DefaultRegistry *Registry
// Checker is the interface for a Health Checker
type Checker interface {
// Check returns nil if the service is okay.
Check(context.Context) error
}
// CheckFunc is a convenience type to create functions that implement
// the Checker interface
type CheckFunc func(context.Context) error
// Check Implements the Checker interface to allow for any func() error method
// to be passed as a Checker
func (cf CheckFunc) Check(ctx context.Context) error {
return cf(ctx)
}
// Updater implements a health check that is explicitly set.
type Updater interface {
Checker
// Update updates the current status of the health check.
Update(status error)
}
// updater implements Checker and Updater, providing an asynchronous Update
// method.
// This allows us to have a Checker that returns the Check() call immediately
// not blocking on a potentially expensive check.
type updater struct {
mu sync.Mutex
status error
}
// Check implements the Checker interface
func (u *updater) Check(context.Context) error {
u.mu.Lock()
defer u.mu.Unlock()
return u.status
}
// Update implements the Updater interface, allowing asynchronous access to
// the status of a Checker.
func (u *updater) Update(status error) {
u.mu.Lock()
defer u.mu.Unlock()
u.status = status
}
// NewStatusUpdater returns a new updater
func NewStatusUpdater() Updater {
return &updater{}
}
// thresholdUpdater implements Checker and Updater, providing an asynchronous Update
// method.
// This allows us to have a Checker that returns the Check() call immediately
// not blocking on a potentially expensive check.
type thresholdUpdater struct {
mu sync.Mutex
status error
threshold int
count int
}
// Check implements the Checker interface
func (tu *thresholdUpdater) Check(context.Context) error {
tu.mu.Lock()
defer tu.mu.Unlock()
if tu.count >= tu.threshold || errors.As(tu.status, new(pollingTerminatedErr)) {
return tu.status
}
return nil
}
// thresholdUpdater implements the Updater interface, allowing asynchronous
// access to the status of a Checker.
func (tu *thresholdUpdater) Update(status error) {
tu.mu.Lock()
defer tu.mu.Unlock()
if status == nil {
tu.count = 0
} else if tu.count < tu.threshold {
tu.count++
}
tu.status = status
}
// NewThresholdStatusUpdater returns a new thresholdUpdater
func NewThresholdStatusUpdater(t int) Updater {
if t > 0 {
return &thresholdUpdater{threshold: t}
}
return NewStatusUpdater()
}
type pollingTerminatedErr struct{ Err error }
func (e pollingTerminatedErr) Error() string {
return fmt.Sprintf("health: check is not polled: %v", e.Err)
}
func (e pollingTerminatedErr) Unwrap() error { return e.Err }
// Poll periodically polls the checker c at interval and updates the updater u
// with the result. The checker is called with ctx as the context. When ctx is
// done, Poll updates the updater with ctx.Err() and returns.
func Poll(ctx context.Context, u Updater, c Checker, interval time.Duration) {
t := time.NewTicker(interval)
defer t.Stop()
for {
select {
case <-ctx.Done():
u.Update(pollingTerminatedErr{Err: ctx.Err()})
return
case <-t.C:
u.Update(c.Check(ctx))
}
}
}
// CheckStatus returns a map with all the current health check errors
func (registry *Registry) CheckStatus(ctx context.Context) map[string]string { // TODO(stevvooe) this needs a proper type
registry.mu.RLock()
defer registry.mu.RUnlock()
statusKeys := make(map[string]string)
for k, v := range registry.registeredChecks {
err := v.Check(ctx)
if err != nil {
statusKeys[k] = err.Error()
}
}
return statusKeys
}
// CheckStatus returns a map with all the current health check errors from the
// default registry.
func CheckStatus(ctx context.Context) map[string]string {
return DefaultRegistry.CheckStatus(ctx)
}
// Register associates the checker with the provided name.
func (registry *Registry) Register(name string, check Checker) {
if registry == nil {
registry = DefaultRegistry
}
registry.mu.Lock()
defer registry.mu.Unlock()
_, ok := registry.registeredChecks[name]
if ok {
panic("Check already exists: " + name)
}
registry.registeredChecks[name] = check
}
// Register associates the checker with the provided name in the default
// registry.
func Register(name string, check Checker) {
DefaultRegistry.Register(name, check)
}
// RegisterFunc allows the convenience of registering a checker directly from
// an arbitrary func(context.Context) error.
func (registry *Registry) RegisterFunc(name string, check CheckFunc) {
registry.Register(name, check)
}
// RegisterFunc allows the convenience of registering a checker in the default
// registry directly from an arbitrary func(context.Context) error.
func RegisterFunc(name string, check CheckFunc) {
DefaultRegistry.RegisterFunc(name, check)
}
// StatusHandler returns a JSON blob with all the currently registered Health Checks
// and their corresponding status.
// Returns 503 if any Error status exists, 200 otherwise
func StatusHandler(w http.ResponseWriter, r *http.Request) {
if r.Method == http.MethodGet {
checks := CheckStatus(r.Context())
status := http.StatusOK
// If there is an error, return 503
if len(checks) != 0 {
status = http.StatusServiceUnavailable
}
statusResponse(w, r, status, checks)
} else {
http.NotFound(w, r)
}
}
// Handler returns a handler that will return 503 response code if the health
// checks have failed. If everything is okay with the health checks, the
// handler will pass through to the provided handler. Use this handler to
// disable a web application when the health checks fail.
func Handler(handler http.Handler) http.Handler {
return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
checks := CheckStatus(r.Context())
if len(checks) != 0 {
// NOTE(milosgajdos): disable errcheck as the error is
// accessible via /debug/health
// nolint:errcheck
errcode.ServeJSON(w, errcode.ErrorCodeUnavailable.
WithDetail("health check failed: please see /debug/health"))
return
}
handler.ServeHTTP(w, r) // pass through
})
}
// statusResponse completes the request with a response describing the health
// of the service.
func statusResponse(w http.ResponseWriter, r *http.Request, status int, checks map[string]string) {
p, err := json.Marshal(checks)
if err != nil {
dcontext.GetLogger(r.Context()).Errorf("error serializing health status: %v", err)
p, err = json.Marshal(struct {
ServerError string `json:"server_error"`
}{
ServerError: "Could not parse error message",
})
status = http.StatusInternalServerError
if err != nil {
dcontext.GetLogger(r.Context()).Errorf("error serializing health status failure message: %v", err)
return
}
}
w.Header().Set("Content-Type", "application/json")
w.Header().Set("Content-Length", fmt.Sprint(len(p)))
w.WriteHeader(status)
if _, err := w.Write(p); err != nil {
dcontext.GetLogger(r.Context()).Errorf("error writing health status response body: %v", err)
}
}