forked from knative/serving
-
Notifications
You must be signed in to change notification settings - Fork 0
/
prober.go
276 lines (232 loc) · 6.89 KB
/
prober.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
/*
Copyright 2019 The Knative Authors
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
// route.go provides methods to perform actions on the route resource.
package test
import (
"fmt"
"net/http"
"sync"
"testing"
pkgTest "github.com/knative/pkg/test"
"github.com/knative/pkg/test/spoof"
"golang.org/x/sync/errgroup"
)
// Prober is the interface for a prober, which checks the result of the probes when stopped.
type Prober interface {
// SLI returns the "service level indicator" for the prober, which is the observed
// success rate of the probes. This will panic if the prober has not been stopped.
SLI() (total int64, failures int64)
// Stop terminates the prober, returning any observed errors.
// Implementations may choose to put additional requirements on
// the prober, which may cause this to block (e.g. a minimum number
// of probes to achieve a population suitable for SLI measurement).
Stop() error
}
type prober struct {
// These shouldn't change after creation
t *testing.T
domain string
minimumProbes int64
// m guards access to these fields
m sync.RWMutex
requests int64
failures int64
stopped bool
// This channel is used to send errors encountered probing the domain.
errCh chan error
// This channel is simply closed when minimumProbes has been satisfied.
minDoneCh chan struct{}
}
// prober implements Prober
var _ Prober = (*prober)(nil)
// SLI implements Prober
func (p *prober) SLI() (int64, int64) {
p.m.RLock()
defer p.m.RUnlock()
return p.requests, p.failures
}
// Stop implements Prober
func (p *prober) Stop() error {
// When we're done stop sending requests.
defer func() {
p.m.Lock()
defer p.m.Unlock()
p.stopped = true
}()
// Check for any immediately available errors
select {
case err := <-p.errCh:
return err
default:
// Don't block if there are no errors immediately available.
}
// If there aren't any immediately available errors, then
// wait for either an error or the minimum number of probes
// to be satisfied.
select {
case err := <-p.errCh:
return err
case <-p.minDoneCh:
return nil
}
}
func (p *prober) handleResponse(response *spoof.Response) (bool, error) {
p.m.Lock()
defer p.m.Unlock()
if p.stopped {
return p.stopped, nil
}
p.requests++
if response.StatusCode != http.StatusOK {
p.t.Logf("%q status = %d, want: %d", p.domain, response.StatusCode, http.StatusOK)
p.t.Logf("response: %s", response)
p.failures++
}
if p.requests == p.minimumProbes {
close(p.minDoneCh)
}
// Returning (false, nil) causes SpoofingClient.Poll to retry.
return false, nil
}
// ProberManager is the interface for spawning probers, and checking their results.
type ProberManager interface {
// The ProberManager should expose a way to collectively reason about spawned
// probes as a sort of aggregating Prober.
Prober
// Spawn creates a new Prober
Spawn(domain string) Prober
// Foreach iterates over the probers spawned by this ProberManager.
Foreach(func(domain string, p Prober))
}
type manager struct {
// Should not change after creation
t *testing.T
clients *Clients
minProbes int64
m sync.RWMutex
probes map[string]Prober
}
var _ ProberManager = (*manager)(nil)
// Spawn implements ProberManager
func (m *manager) Spawn(domain string) Prober {
m.m.Lock()
defer m.m.Unlock()
if p, ok := m.probes[domain]; ok {
return p
}
m.t.Logf("Starting Route prober for route domain %s.", domain)
p := &prober{
t: m.t,
domain: domain,
minimumProbes: m.minProbes,
errCh: make(chan error, 1),
minDoneCh: make(chan struct{}),
}
m.probes[domain] = p
go func() {
client, err := pkgTest.NewSpoofingClient(m.clients.KubeClient, m.t.Logf, domain,
ServingFlags.ResolvableDomain)
if err != nil {
m.t.Logf("NewSpoofingClient() = %v", err)
p.errCh <- err
return
}
// RequestTimeout is set to 0 to make the polling infinite.
client.RequestTimeout = 0
req, err := http.NewRequest(http.MethodGet, fmt.Sprintf("http://%s", domain), nil)
if err != nil {
m.t.Logf("NewRequest() = %v", err)
p.errCh <- err
return
}
// We keep polling the domain and accumulate success rates
// to ultimately establish the SLI and compare to the SLO.
_, err = client.Poll(req, p.handleResponse)
if err != nil {
// SLO violations are not reflected as errors. They are
// captured and calculated internally.
m.t.Logf("Poll() = %v", err)
p.errCh <- err
return
}
}()
return p
}
// Stop implements ProberManager
func (m *manager) Stop() error {
m.m.Lock()
defer m.m.Unlock()
m.t.Log("Stopping all probers")
errgrp := &errgroup.Group{}
for _, prober := range m.probes {
errgrp.Go(prober.Stop)
}
return errgrp.Wait()
}
// SLI implements Prober
func (m *manager) SLI() (total int64, failures int64) {
m.m.RLock()
defer m.m.RUnlock()
for _, prober := range m.probes {
pt, pf := prober.SLI()
total += pt
failures += pf
}
return
}
// Foreach implements ProberManager
func (m *manager) Foreach(f func(domain string, p Prober)) {
m.m.RLock()
defer m.m.RUnlock()
for domain, prober := range m.probes {
f(domain, prober)
}
}
// NewProberManager creates a new manager for probes.
func NewProberManager(t *testing.T, clients *Clients, minProbes int64) ProberManager {
return &manager{
t: t,
clients: clients,
minProbes: minProbes,
probes: make(map[string]Prober),
}
}
// RunRouteProber starts a single Prober of the given domain.
func RunRouteProber(t *testing.T, clients *Clients, domain string) Prober {
// Default to 10 probes
pm := NewProberManager(t, clients, 10)
pm.Spawn(domain)
return pm
}
// AssertProberDefault is a helper for stopping the Prober and checking its SLI
// against the default SLO, which requires perfect responses.
// This takes `testing.T` so that it may be used in `defer`.
func AssertProberDefault(t *testing.T, p Prober) {
t.Helper()
if err := p.Stop(); err != nil {
t.Errorf("Stop() = %v", err)
}
// Default to 100% correct (typically used in conjunction with the low probe count above)
if err := CheckSLO(1.0, t.Name(), p); err != nil {
t.Errorf("CheckSLO() = %v", err)
}
}
// CheckSLO compares the SLI of the given prober against the SLO, erroring if too low.
func CheckSLO(slo float64, name string, p Prober) error {
total, failures := p.SLI()
successRate := float64(total-failures) / float64(total)
if successRate < slo {
return fmt.Errorf("SLI for %q = %f, wanted >= %f", name, successRate, slo)
}
return nil
}