forked from bblfsh/bblfshd
/
pool.go
492 lines (411 loc) · 11.7 KB
/
pool.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
package daemon
import (
"fmt"
"math"
"runtime"
"sync"
"sync/atomic"
"time"
"github.com/bblfsh/bblfshd/daemon/protocol"
"context"
"github.com/sirupsen/logrus"
"gopkg.in/bblfsh/sdk.v1/sdk/server"
"gopkg.in/src-d/go-errors.v1"
)
const (
// DefaultPoolTimeout is the time a request to the DriverPool can wait
// before getting a driver assigned.
DefaultPoolTimeout = 5 * time.Second
// MaxPoolTimeout maximum time allowed to wait for a driver be assigned.
MaxPoolTimeout = 5 * time.Minute
)
var (
// DefaultMaxInstancesPerDriver is the maximum number of instances of
// the same driver which can be launched following the default
// scaling policy (see DefaultScalingPolicy()).
DefaultMaxInstancesPerDriver = runtime.NumCPU()
ErrPoolClosed = errors.NewKind("driver pool already closed")
ErrPoolTimeout = errors.NewKind("timeout, all drivers are busy")
ErrInvalidPoolTimeout = errors.NewKind(fmt.Sprintf("invalid timeout: %%v, max. timeout allowed %s", MaxPoolTimeout))
ErrNegativeInstances = errors.NewKind("cannot set instances to negative number")
)
// DriverPool controls a pool of drivers and balances requests among them,
// ensuring each driver does not get concurrent requests. The number of driver
// instances in the driver pool is controlled by a ScalingPolicy.
type DriverPool struct {
// ScalingPolicy scaling policy used to scale up the instances.
ScalingPolicy ScalingPolicy
// Timeout time for wait until a driver instance is available.
Timeout time.Duration
// Logger used during the live of the driver pool.
Logger server.Logger
// factory function used to spawn new driver instances.
factory FactoryFunction
// queue holds all the driver instances.
queue *driverQueue
// index holds a pointer to the current driver instances by id.
index sync.Map
// close channel will be used to synchronize Close() call with the
// scaling() goroutine. Once Close() starts, a struct{} will be sent to
// the close channel. And once scaling() finish it will close it.
close chan struct{}
closed bool
running bool
// stats hold different metrics about the state of the pool.
stats struct {
instances atomicInt // instances wanted
waiting atomicInt // requests waiting for a driver
success atomicInt // requests executed successfully
errors atomicInt // requests errored
exited atomicInt // drivers exited unexpectedly
}
}
// FactoryFunction is a factory function that creates new DriverInstance's.
type FactoryFunction func() (Driver, error)
// NewDriverPool creates and starts a new DriverPool. It takes as parameters
// a FactoryFunction, used to instantiate new drivers.
func NewDriverPool(factory FactoryFunction) *DriverPool {
return &DriverPool{
ScalingPolicy: DefaultScalingPolicy(),
Timeout: DefaultPoolTimeout,
Logger: logrus.New(),
factory: factory,
close: make(chan struct{}),
queue: newDriverQueue(),
}
}
// Start stats the driver pool.
func (dp *DriverPool) Start() error {
target := dp.ScalingPolicy.Scale(0, 0)
if err := dp.setInstances(target); err != nil {
_ = dp.setInstances(0)
return err
}
dp.running = true
go dp.scaling()
return nil
}
// setInstances changes the number of running driver instances. Instances
// will be started or stopped as necessary to satisfy the new instance count.
// It blocks until the all required instances are started or stopped.
func (dp *DriverPool) setInstances(target int) error {
if target < 0 {
return ErrNegativeInstances.New()
}
n := target - dp.stats.instances.Value()
if n > 0 {
return dp.add(n)
} else if n < 0 {
return dp.del(-n)
}
return nil
}
func (dp *DriverPool) add(n int) error {
for i := 0; i < n; i++ {
d, err := dp.factory()
if err != nil {
return err
}
dp.index.Store(d.ID(), d)
dp.queue.Put(d)
dp.stats.instances.Add(1)
}
return nil
}
func (dp *DriverPool) del(n int) error {
for i := 0; i < n; i++ {
d, more := dp.queue.Get()
if !more {
return ErrPoolClosed.New()
}
if err := dp.remove(d); err != nil {
return err
}
}
return nil
}
func (dp *DriverPool) remove(d Driver) error {
dp.stats.instances.Add(-1)
if err := d.Stop(); err != nil {
return err
}
dp.index.Delete(d.ID())
return nil
}
func (dp *DriverPool) scaling() {
ticker := time.NewTicker(time.Millisecond * 100)
defer ticker.Stop()
for {
select {
case <-dp.close:
close(dp.close)
return
case <-ticker.C:
dp.doScaling()
}
}
}
func (dp *DriverPool) doScaling() {
total := dp.stats.instances.Value()
ready := dp.queue.Size()
load := dp.stats.waiting.Value()
s := dp.ScalingPolicy.Scale(total, load-ready)
if s == total {
return
}
dp.Logger.Debugf("scaling driver pool from %d instance(s) to %d instance(s)", total, s)
if err := dp.setInstances(s); err != nil {
dp.Logger.Errorf("error re-scaling pool: %s", err)
}
}
// Function is a function to be executed using a given driver.
type Function func(d Driver) error
// FunctionCtx is a function to be executed using a given driver.
type FunctionCtx func(ctx context.Context, d Driver) error
// Execute executes the given Function in the first available driver instance.
// It gets a driver from the pool and forwards the request to it. If all drivers
// are busy, it will return an error after the timeout passes. If the DriverPool
// is closed, an error will be returned.
func (dp *DriverPool) Execute(c Function, timeout time.Duration) error {
if timeout > MaxPoolTimeout {
return ErrInvalidPoolTimeout.New(timeout)
}
if timeout == 0 {
timeout = DefaultPoolTimeout
}
ctx, cancel := context.WithTimeout(context.Background(), timeout)
defer cancel()
return dp.ExecuteCtx(ctx, func(_ context.Context, d Driver) error {
return c(d)
})
}
// ExecuteCtx executes the given Function in the first available driver instance.
// It gets a driver from the pool and forwards the request to it. If all drivers
// are busy, it will return an error after the timeout passes. If the DriverPool
// is closed, an error will be returned.
func (dp *DriverPool) ExecuteCtx(ctx context.Context, c FunctionCtx) error {
if deadline, ok := ctx.Deadline(); !ok {
var cancel func()
ctx, cancel = context.WithTimeout(ctx, DefaultPoolTimeout)
defer cancel()
} else if timeout := time.Until(deadline); timeout > MaxPoolTimeout {
var cancel func()
ctx, cancel = context.WithTimeout(ctx, MaxPoolTimeout-time.Second/2)
defer cancel()
}
d, err := dp.getDriver(ctx)
if err != nil {
return err
}
status, err := d.Status()
if err != nil {
return err
}
if status != protocol.Running {
defer func() {
dp.stats.exited.Add(1)
dp.Logger.Debugf("removing stopped driver")
if err := dp.remove(d); err != nil {
dp.Logger.Errorf("error removing stopped driver: %s", err)
}
}()
return dp.ExecuteCtx(ctx, c)
}
defer dp.queue.Put(d)
if err := c(ctx, d); err != nil {
dp.stats.errors.Add(1)
return err
}
dp.stats.success.Add(1)
return nil
}
func (dp *DriverPool) getDriver(ctx context.Context) (Driver, error) {
dp.stats.waiting.Add(1)
defer dp.stats.waiting.Add(-1)
d, more, err := dp.queue.GetWithContext(ctx)
if err != nil {
dp.stats.errors.Add(1)
dp.Logger.Warningf("unable to allocate a driver instance: %s", err)
return nil, err
}
if !more {
return nil, ErrPoolClosed.New()
}
return d, nil
}
// Current returns a list of the current instances from the pool, it includes
// the running ones and those being stopped.
func (dp *DriverPool) Current() []Driver {
var list []Driver
dp.index.Range(func(_, d interface{}) bool {
list = append(list, d.(Driver))
return true
})
return list
}
// State current state of driver pool.
func (dp *DriverPool) State() *protocol.DriverPoolState {
return &protocol.DriverPoolState{
Wanted: dp.stats.instances.Value(),
Running: len(dp.Current()),
Waiting: dp.stats.waiting.Value(),
Success: dp.stats.success.Value(),
Errors: dp.stats.errors.Value(),
Exited: dp.stats.exited.Value(),
}
}
// Stop stop the driver pool, including all its underlying driver instances.
func (dp *DriverPool) Stop() error {
if dp.closed {
return ErrPoolClosed.New()
} else if !dp.running {
return nil
}
dp.running = false
dp.closed = true
dp.close <- struct{}{}
<-dp.close
if err := dp.setInstances(0); err != nil {
return err
}
return dp.queue.Close()
}
type driverQueue struct {
c chan Driver
n *atomicInt
}
func newDriverQueue() *driverQueue {
return &driverQueue{c: make(chan Driver), n: &atomicInt{}}
}
func (q *driverQueue) Put(d Driver) {
q.n.Add(1)
go func() { q.c <- d }()
}
func (q *driverQueue) Get() (driver Driver, more bool) {
defer q.n.Add(-1)
d, more := <-q.c
return d, more
}
func (q *driverQueue) GetWithContext(ctx context.Context) (driver Driver, more bool, err error) {
if deadline, ok := ctx.Deadline(); !ok {
return nil, true, ErrInvalidPoolTimeout.New(time.Duration(0))
} else if timeout := time.Until(deadline); timeout > MaxPoolTimeout {
return nil, true, ErrInvalidPoolTimeout.New(timeout)
}
select {
case d, more := <-q.c:
q.n.Add(-1)
return d, more, nil
case <-ctx.Done():
return nil, true, ErrPoolTimeout.New()
}
}
func (q *driverQueue) Size() int {
return int(q.n.Value())
}
func (q *driverQueue) Close() error {
close(q.c)
return nil
}
type atomicInt struct {
val int32
}
func (c *atomicInt) Add(n int) {
atomic.AddInt32(&c.val, int32(n))
}
func (c *atomicInt) Value() int {
return int(atomic.LoadInt32(&c.val))
}
// ScalingPolicy specifies whether instances should be started or stopped to
// cope with load.
type ScalingPolicy interface {
// Scale takes the number of total instances and the load. The load is
// the number of request waiting or, there is none, it is a negative
// value indicating how many instances are ready.
Scale(total, load int) int
}
// DefaultScalingPolicy returns a new instance of the default scaling policy.
// Instances returned by this function should not be reused.
func DefaultScalingPolicy() ScalingPolicy {
return MovingAverage(10, MinMax(1, DefaultMaxInstancesPerDriver, AIMD(1, 0.5)))
}
type movingAverage struct {
ScalingPolicy
loads []float64
pos int
filled bool
}
// MovingAverage computes a moving average of the load and forwards it to the
// underlying scaling policy. This policy is stateful and not thread-safe, do not
// reuse its instances for multiple pools.
func MovingAverage(window int, p ScalingPolicy) ScalingPolicy {
return &movingAverage{
ScalingPolicy: p,
loads: make([]float64, window),
pos: 0,
filled: false,
}
}
func (p *movingAverage) Scale(total, load int) int {
p.loads[p.pos] = float64(load)
p.pos++
if p.pos >= len(p.loads) {
p.filled = true
p.pos = 0
}
maxPos := len(p.loads)
if !p.filled {
maxPos = p.pos
}
var sum float64
for i := 0; i < maxPos; i++ {
sum += p.loads[i]
}
avg := sum / float64(maxPos)
return p.ScalingPolicy.Scale(total, int(avg))
}
type minMax struct {
ScalingPolicy
Min, Max int
}
// MinMax wraps a ScalingPolicy and applies a minimum and maximum to the number
// of instances.
func MinMax(min, max int, p ScalingPolicy) ScalingPolicy {
return &minMax{
Min: min,
Max: max,
ScalingPolicy: p,
}
}
func (p *minMax) Scale(total, load int) int {
s := p.ScalingPolicy.Scale(total, load)
if s > p.Max {
return p.Max
}
if s < p.Min {
return p.Min
}
return s
}
type aimd struct {
Add int
Mul float64
}
// AIMD returns a ScalingPolicy of additive increase / multiplicative decrease.
// Increases are of min(add, load). Decreases are of (ready / mul).
func AIMD(add int, mul float64) ScalingPolicy {
return &aimd{add, mul}
}
func (p *aimd) Scale(total, load int) int {
if load > 0 {
if load > p.Add {
return total + p.Add
}
return total + load
}
res := total - int(math.Ceil(float64(-load)*p.Mul))
if res < 0 {
return 0
}
return res
}