-
Notifications
You must be signed in to change notification settings - Fork 687
/
main.go
543 lines (471 loc) · 17.2 KB
/
main.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
package ambex
/**********************************************
* ambex: Ambassador Experimental ADS server
*
* Here's the deal.
*
* go-control-plane, several different classes manage this stuff:
*
* - The root of the world is a SnapshotCache.
* - import github.com/datawire/ambassador/pkg/envoy-control-plane/cache/v2, then refer
* to cache.SnapshotCache.
* - A collection of internally consistent configuration objects is a
* Snapshot (cache.Snapshot).
* - Snapshots are collected in the SnapshotCache.
* - A given SnapshotCache can hold configurations for multiple Envoys,
* identified by the Envoy 'node ID', which must be configured for the
* Envoy.
* - The SnapshotCache can only hold go-control-plane configuration objects,
* so you have to build these up to hand to the SnapshotCache.
* - The gRPC stuff is handled by a Server.
* - import github.com/datawire/ambassador/pkg/envoy-control-plane/server, then refer
* to server.Server.
* - Our runManagementServer (largely ripped off from the go-control-plane
* tests) gets this running. It takes a SnapshotCache (cleverly called a
* "config" for no reason I understand) and a gRPCServer as arguments.
* - _ALL_ the gRPC madness is handled by the Server, with the assistance
* of the methods in a callback object.
* - Once the Server is running, Envoy can open a gRPC stream to it.
* - On connection, Envoy will get handed the most recent Snapshot that
* the Server's SnapshotCache knows about.
* - Whenever a newer Snapshot is added to the SnapshotCache, that Snapshot
* will get sent to the Envoy.
* - We manage the SnapshotCache by loading envoy configuration from
* json and/or protobuf files on disk.
* - By default when we get a SIGHUP, we reload configuration.
* - When passed the -watch argument we reload whenever any file in
* the directory changes.
*/
import (
"context"
"encoding/json"
"flag"
"fmt"
"io/ioutil"
"net"
"os"
"os/signal"
"path/filepath"
"reflect"
"strings"
"syscall"
"github.com/fsnotify/fsnotify"
"github.com/sirupsen/logrus"
"google.golang.org/grpc"
// protobuf library
"github.com/golang/protobuf/jsonpb"
"github.com/golang/protobuf/proto"
"github.com/golang/protobuf/ptypes"
"github.com/golang/protobuf/ptypes/any"
// envoy control plane
ctypes "github.com/datawire/ambassador/pkg/envoy-control-plane/cache/types"
"github.com/datawire/ambassador/pkg/envoy-control-plane/cache/v2"
"github.com/datawire/ambassador/pkg/envoy-control-plane/server/v2"
"github.com/datawire/ambassador/pkg/memory"
// envoy protobuf v2 -- Be sure to import the package of any types that the Python emits a
// "@type" of in the generated config, even if that package is otherwise not used by ambex.
v2 "github.com/datawire/ambassador/pkg/api/envoy/api/v2"
_ "github.com/datawire/ambassador/pkg/api/envoy/api/v2/auth"
core "github.com/datawire/ambassador/pkg/api/envoy/api/v2/core"
_ "github.com/datawire/ambassador/pkg/api/envoy/config/accesslog/v2"
bootstrap "github.com/datawire/ambassador/pkg/api/envoy/config/bootstrap/v2"
_ "github.com/datawire/ambassador/pkg/api/envoy/config/filter/http/buffer/v2"
_ "github.com/datawire/ambassador/pkg/api/envoy/config/filter/http/ext_authz/v2"
_ "github.com/datawire/ambassador/pkg/api/envoy/config/filter/http/gzip/v2"
_ "github.com/datawire/ambassador/pkg/api/envoy/config/filter/http/lua/v2"
_ "github.com/datawire/ambassador/pkg/api/envoy/config/filter/http/rate_limit/v2"
_ "github.com/datawire/ambassador/pkg/api/envoy/config/filter/http/rbac/v2"
_ "github.com/datawire/ambassador/pkg/api/envoy/config/filter/http/router/v2"
_ "github.com/datawire/ambassador/pkg/api/envoy/config/filter/network/http_connection_manager/v2"
_ "github.com/datawire/ambassador/pkg/api/envoy/config/filter/network/tcp_proxy/v2"
discovery "github.com/datawire/ambassador/pkg/api/envoy/service/discovery/v2"
// envoy protobuf v3 -- likewise
_ "github.com/datawire/ambassador/pkg/api/envoy/extensions/filters/http/response_map/v3"
// first-party libraries
"github.com/datawire/dlib/dhttp"
)
const (
localhost = "127.0.0.1"
)
type Args struct {
debug bool
watch bool
adsNetwork string
adsAddress string
dirs []string
}
func parseArgs(rawArgs ...string) (*Args, error) {
var args Args
flagset := flag.NewFlagSet("ambex", flag.ContinueOnError)
flagset.BoolVar(&args.debug, "debug", false, "Use debug logging")
flagset.BoolVar(&args.watch, "watch", false, "Watch for file changes")
// TODO(lukeshu): Consider changing the default here so we don't need to put it in entrypoint.sh
flagset.StringVar(&args.adsNetwork, "ads-listen-network", "tcp", "network for ADS to listen on")
flagset.StringVar(&args.adsAddress, "ads-listen-address", ":18000", "address (on --ads-listen-network) for ADS to listen on")
var legacyAdsPort uint
flagset.UintVar(&legacyAdsPort, "ads", 0, "port number for ADS to listen on--deprecated, use --ads-listen-address=:1234 instead")
if err := flagset.Parse(rawArgs); err != nil {
return nil, err
}
if legacyAdsPort != 0 {
args.adsAddress = fmt.Sprintf(":%v", legacyAdsPort)
}
args.dirs = flagset.Args()
if len(args.dirs) == 0 {
args.dirs = []string{"."}
}
return &args, nil
}
// Hasher returns node ID as an ID
type Hasher struct {
}
// ID function
func (h Hasher) ID(node *core.Node) string {
if node == nil {
return "unknown"
}
return node.Id
}
// end Hasher stuff
// This feels kinda dumb.
type logger struct {
*logrus.Logger
}
var log = &logger{
Logger: logrus.StandardLogger(),
}
// run stuff
// RunManagementServer starts an xDS server at the given port.
func runManagementServer(ctx context.Context, server server.Server, adsNetwork, adsAddress string) {
grpcServer := grpc.NewServer()
lis, err := net.Listen(adsNetwork, adsAddress)
if err != nil {
log.WithError(err).Panic("failed to listen")
}
// register services
discovery.RegisterAggregatedDiscoveryServiceServer(grpcServer, server)
v2.RegisterEndpointDiscoveryServiceServer(grpcServer, server)
v2.RegisterClusterDiscoveryServiceServer(grpcServer, server)
v2.RegisterRouteDiscoveryServiceServer(grpcServer, server)
v2.RegisterListenerDiscoveryServiceServer(grpcServer, server)
log.WithFields(logrus.Fields{"addr": adsNetwork + ":" + adsAddress}).Info("Listening")
go func() {
sc := &dhttp.ServerConfig{
Handler: grpcServer,
}
if err := sc.Serve(ctx, lis); err != nil {
log.WithFields(logrus.Fields{"error": err}).Error("Management server exited")
}
}()
}
// Decoders for unmarshalling our config
var decoders = map[string](func(string, proto.Message) error){
".json": jsonpb.UnmarshalString,
".pb": proto.UnmarshalText,
}
func isDecodable(name string) bool {
if strings.HasPrefix(name, ".") {
return false
}
ext := filepath.Ext(name)
_, ok := decoders[ext]
return ok
}
// Not sure if there is a better way to do this, but we cast to this
// so we can call the generated Validate method.
type Validatable interface {
proto.Message
Validate() error
}
func Decode(name string) (proto.Message, error) {
any := &any.Any{}
contents, err := ioutil.ReadFile(name)
if err != nil {
return nil, err
}
ext := filepath.Ext(name)
decoder := decoders[ext]
err = decoder(string(contents), any)
if err != nil {
return nil, err
}
var m ptypes.DynamicAny
err = ptypes.UnmarshalAny(any, &m)
if err != nil {
return nil, err
}
var v = m.Message.(Validatable)
err = v.Validate()
if err != nil {
return nil, err
}
log.Infof("Loaded file %s", name)
return v, nil
}
func Merge(to, from proto.Message) {
str, err := (&jsonpb.Marshaler{}).MarshalToString(from)
if err != nil {
panic(err)
}
err = jsonpb.UnmarshalString(str, to)
if err != nil {
panic(err)
}
}
func Clone(src proto.Message) proto.Message {
in := reflect.ValueOf(src)
if in.IsNil() {
return src
}
out := reflect.New(in.Type().Elem())
dst := out.Interface().(proto.Message)
Merge(dst, src)
return dst
}
func update(ctx context.Context, config cache.SnapshotCache, generation *int, dirs []string, edsEndpoints map[string]*v2.ClusterLoadAssignment, updates chan<- Update) {
clusters := []ctypes.Resource{} // v2.Cluster
routes := []ctypes.Resource{} // v2.RouteConfiguration
listeners := []ctypes.Resource{} // v2.Listener
runtimes := []ctypes.Resource{} // discovery.Runtime
var filenames []string
for _, dir := range dirs {
files, err := ioutil.ReadDir(dir)
if err != nil {
log.WithError(err).Warnf("Error listing %v", dir)
continue
}
for _, file := range files {
name := file.Name()
if isDecodable(name) {
filenames = append(filenames, filepath.Join(dir, name))
}
}
}
for _, name := range filenames {
m, e := Decode(name)
if e != nil {
log.Warnf("%s: %v", name, e)
continue
}
var dst *[]ctypes.Resource
switch m.(type) {
case *v2.Cluster:
dst = &clusters
case *v2.RouteConfiguration:
dst = &routes
case *v2.Listener:
dst = &listeners
case *discovery.Runtime:
dst = &runtimes
case *bootstrap.Bootstrap:
bs := m.(*bootstrap.Bootstrap)
sr := bs.StaticResources
for _, lst := range sr.Listeners {
// When the RouteConfiguration is embedded in the listener, it will cause envoy to
// go through a complete drain cycle whenever there is a routing change and that
// will potentially disrupt in-flight requests. By converting all listeners to use
// RDS rather than inlining their routing configuration, we significantly reduce the
// set of circumstances where the listener definition itself changes, and this in
// turn reduces the set of circumstances where envoy has to go through that drain
// process and disrupt in-flight requests.
rdsListener, routeConfigs, err := ListenerToRdsListener(lst)
if err != nil {
log.Errorf("Error converting listener to RDS: %+v", err)
listeners = append(listeners, Clone(lst).(ctypes.Resource))
continue
}
listeners = append(listeners, rdsListener)
for _, rc := range routeConfigs {
// These routes will get included in the configuration snapshot created below.
routes = append(routes, rc)
}
}
for _, cls := range sr.Clusters {
clusters = append(clusters, Clone(cls).(ctypes.Resource))
}
continue
default:
log.Warnf("Unrecognized resource %s: %v", name, e)
continue
}
*dst = append(*dst, m.(ctypes.Resource))
}
// The configuration data that reaches us here arrives via two parallel paths that race each
// other. The endpoint data comes in realtime directly from the golang watcher in the entrypoint
// package. The cluster configuration comes from the python code. Either one can win which means
// we might at times see endpoint data with no corresponding cluster and we might also see
// clusters with no corresponding endpoint data. Both of these circumstances should be
// transient.
//
// To produce a consistent configuration we do an outer join operation on the endpoint and
// cluster configuration that we have at this moment. If there is no endpoint information for a
// given cluster, we will synthesize an empty ClusterLoadAssignment.
//
// Note that a cluster not existing is very different to envoy than a cluster existing but
// having an empty ClusterLoadAssignment. When envoy first discovers clusters it goes through a
// warmup process to be sure the cluster is properly bootstrapped before routing traffic to
// it. See here for more details:
//
// https://www.envoyproxy.io/docs/envoy/latest/intro/arch_overview/upstream/cluster_manager.html?highlight=cluster%20warming
//
// For this reason if there is no endpoint data for the cluster we will synthesize an empty
// ClusterLoadAssignment rather than filtering out the cluster. This avoids triggering the
// warmup sequence in scenarios where the endpoint data for a cluster is really flapping into
// and out of existence. In that circumstance we want to faithfully relay to envoy that the
// cluster exists but currently has no endpoints.
endpoints := JoinEdsClusters(ctx, clusters, edsEndpoints)
// Create a new configuration snapshot from everything we have just loaded from disk.
version := fmt.Sprintf("v%d", *generation)
*generation++
snapshot := cache.NewSnapshot(
version,
endpoints,
clusters,
routes,
listeners,
runtimes)
if err := snapshot.Consistent(); err != nil {
bs, _ := json.Marshal(snapshot)
log.Errorf("Snapshot inconsistency: %v: %s", err, bs)
return
}
// This used to just directly update envoy. Since we want ratelimiting, we now send an
// Update object down the channel with a function that knows how to do the update if/when
// the ratelimiting logic decides.
//
// We also need to pay attention to contexts here so we can shutdown properly. If we didn't
// have the context portion, the ratelimit goroutine could shutdown first and we could end
// up blocking here and never shutting down.
select {
case updates <- Update{version, func() error {
err := config.SetSnapshot("test-id", snapshot)
if err != nil {
return fmt.Errorf("Snapshot error %q for %+v", err, snapshot)
} else {
return nil
}
}}:
case <-ctx.Done():
}
}
func warn(err error) bool {
if err != nil {
log.Warn(err)
return true
} else {
return false
}
}
// OnStreamOpen is called once an xDS stream is open with a stream ID and the type URL (or "" for ADS).
func (l logger) OnStreamOpen(_ context.Context, sid int64, stype string) error {
l.Infof("Stream open[%v]: %v", sid, stype)
return nil
}
// OnStreamClosed is called immediately prior to closing an xDS stream with a stream ID.
func (l logger) OnStreamClosed(sid int64) {
l.Infof("Stream closed[%v]", sid)
}
// OnStreamRequest is called once a request is received on a stream.
func (l logger) OnStreamRequest(sid int64, req *v2.DiscoveryRequest) error {
l.Infof("Stream request[%v]: %v", sid, req)
return nil
}
// OnStreamResponse is called immediately prior to sending a response on a stream.
func (l logger) OnStreamResponse(sid int64, req *v2.DiscoveryRequest, res *v2.DiscoveryResponse) {
l.Infof("Stream response[%v]: %v -> %v", sid, req, res)
}
// OnFetchRequest is called for each Fetch request
func (l logger) OnFetchRequest(_ context.Context, r *v2.DiscoveryRequest) error {
l.Infof("Fetch request: %v", r)
return nil
}
// OnFetchResponse is called immediately prior to sending a response.
func (l logger) OnFetchResponse(req *v2.DiscoveryRequest, res *v2.DiscoveryResponse) {
l.Infof("Fetch response: %v -> %v", req, res)
}
func Main(ctx context.Context, Version string, rawArgs ...string) error {
usage := memory.GetMemoryUsage()
go usage.Watch(ctx)
return Main2(ctx, Version, usage.PercentUsed, make(chan *Endpoints), rawArgs...)
}
func Main2(ctx context.Context, Version string, getUsage MemoryGetter, endpointsCh <-chan *Endpoints,
rawArgs ...string) error {
args, err := parseArgs(rawArgs...)
if err != nil {
return err
}
if args.debug {
log.SetLevel(logrus.DebugLevel)
} else {
log.SetLevel(logrus.WarnLevel)
}
log.Infof("Ambex %s starting...", Version)
watcher, err := fsnotify.NewWatcher()
if err != nil {
log.WithError(err).Panic()
}
defer watcher.Close()
if args.watch {
for _, d := range args.dirs {
watcher.Add(d)
}
}
// The golang signal package does not block when it writes to the channel. We therefore need a
// nonzero buffer for the channel to minimize the possiblity that we miss out on a signal that
// comes in while we are doing work and not reading from the channel. Since we are subscribing
// to multiple signals there is also the possibility that even with buffering, too many of one
// kind of signal can fill up the buffer and cause us to drop an occurance of the other types of
// signal. To minimize the chance of that happening we will choose a buffer size of 100. That
// may well be overkill, but better to not have to consider the possibility that we lose a
// signal.
ch := make(chan os.Signal, 100)
signal.Notify(ch, syscall.SIGHUP, os.Interrupt, syscall.SIGTERM)
ctx, cancel := context.WithCancel(ctx)
defer cancel()
config := cache.NewSnapshotCache(true, Hasher{}, log)
srv := server.NewServer(ctx, config, log)
runManagementServer(ctx, srv, args.adsNetwork, args.adsAddress)
pid := os.Getpid()
file := "ambex.pid"
if !warn(ioutil.WriteFile(file, []byte(fmt.Sprintf("%v", pid)), 0644)) {
log.WithFields(logrus.Fields{"pid": pid, "file": file}).Info("Wrote PID")
}
updates := make(chan Update)
envoyUpdaterDone := make(chan struct{})
go func() {
defer close(envoyUpdaterDone)
err := Updater(ctx, updates, getUsage)
if err != nil {
// Panic will get reported more usefully by entrypoint.go's exit code than logging the
// error.
panic(err)
}
}()
generation := 0
edsEndpoints := map[string]*v2.ClusterLoadAssignment{}
update(ctx, config, &generation, args.dirs, edsEndpoints, updates)
OUTER:
for {
select {
case sig := <-ch:
switch sig {
case syscall.SIGHUP:
update(ctx, config, &generation, args.dirs, edsEndpoints, updates)
case os.Interrupt, syscall.SIGTERM:
break OUTER
}
case eps := <-endpointsCh:
edsEndpoints = eps.ToMap_v2()
update(ctx, config, &generation, args.dirs, edsEndpoints, updates)
case <-watcher.Events:
update(ctx, config, &generation, args.dirs, edsEndpoints, updates)
case err := <-watcher.Errors:
log.WithError(err).Warn("Watcher error")
case <-ctx.Done():
break OUTER
}
}
<-envoyUpdaterDone
log.Info("Done")
return nil
}