forked from hashicorp/serf
/
command.go
599 lines (524 loc) · 18.3 KB
/
command.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
package agent
import (
"flag"
"fmt"
"github.com/armon/go-metrics"
"github.com/hashicorp/logutils"
"github.com/hashicorp/memberlist"
"github.com/hashicorp/serf/serf"
"github.com/mitchellh/cli"
"io"
"log"
"net"
"os"
"os/signal"
"strings"
"syscall"
"time"
)
// gracefulTimeout controls how long we wait before forcefully terminating
var gracefulTimeout = 3 * time.Second
// Command is a Command implementation that runs a Serf agent.
// The command will not end unless a shutdown message is sent on the
// ShutdownCh. If two messages are sent on the ShutdownCh it will forcibly
// exit.
type Command struct {
Ui cli.Ui
ShutdownCh <-chan struct{}
args []string
scriptHandler *ScriptEventHandler
logFilter *logutils.LevelFilter
}
// readConfig is responsible for setup of our configuration using
// the command line and any file configs
func (c *Command) readConfig() *Config {
var cmdConfig Config
var configFiles []string
var tags []string
cmdFlags := flag.NewFlagSet("agent", flag.ContinueOnError)
cmdFlags.Usage = func() { c.Ui.Output(c.Help()) }
cmdFlags.StringVar(&cmdConfig.BindAddr, "bind", "", "address to bind listeners to")
cmdFlags.StringVar(&cmdConfig.AdvertiseAddr, "advertise", "", "address to advertise to cluster")
cmdFlags.Var((*AppendSliceValue)(&configFiles), "config-file",
"json file to read config from")
cmdFlags.Var((*AppendSliceValue)(&configFiles), "config-dir",
"directory of json files to read")
cmdFlags.StringVar(&cmdConfig.EncryptKey, "encrypt", "", "encryption key")
cmdFlags.Var((*AppendSliceValue)(&cmdConfig.EventHandlers), "event-handler",
"command to execute when events occur")
cmdFlags.Var((*AppendSliceValue)(&cmdConfig.StartJoin), "join",
"address of agent to join on startup")
cmdFlags.BoolVar(&cmdConfig.ReplayOnJoin, "replay", false,
"replay events for startup join")
cmdFlags.StringVar(&cmdConfig.LogLevel, "log-level", "", "log level")
cmdFlags.StringVar(&cmdConfig.NodeName, "node", "", "node name")
cmdFlags.IntVar(&cmdConfig.Protocol, "protocol", -1, "protocol version")
cmdFlags.StringVar(&cmdConfig.Role, "role", "", "role name")
cmdFlags.StringVar(&cmdConfig.RPCAddr, "rpc-addr", "",
"address to bind RPC listener to")
cmdFlags.StringVar(&cmdConfig.Profile, "profile", "", "timing profile to use (lan, wan, local)")
cmdFlags.StringVar(&cmdConfig.SnapshotPath, "snapshot", "", "path to the snapshot file")
cmdFlags.Var((*AppendSliceValue)(&tags), "tag",
"tag pair, specified as key=value")
cmdFlags.StringVar(&cmdConfig.Discover, "discover", "", "mDNS discovery name")
cmdFlags.StringVar(&cmdConfig.Interface, "iface", "", "interface to bind to")
if err := cmdFlags.Parse(c.args); err != nil {
return nil
}
// Parse any command line tag values
cmdConfig.Tags = make(map[string]string)
for _, tag := range tags {
parts := strings.SplitN(tag, "=", 2)
if len(parts) != 2 {
c.Ui.Error(fmt.Sprintf("Invalid tag '%s' provided", tag))
return nil
}
cmdConfig.Tags[parts[0]] = parts[1]
}
config := DefaultConfig()
if len(configFiles) > 0 {
fileConfig, err := ReadConfigPaths(configFiles)
if err != nil {
c.Ui.Error(err.Error())
return nil
}
config = MergeConfig(config, fileConfig)
}
config = MergeConfig(config, &cmdConfig)
if config.NodeName == "" {
hostname, err := os.Hostname()
if err != nil {
c.Ui.Error(fmt.Sprintf("Error determining hostname: %s", err))
return nil
}
config.NodeName = hostname
}
eventScripts := config.EventScripts()
for _, script := range eventScripts {
if !script.Valid() {
c.Ui.Error(fmt.Sprintf("Invalid event script: %s", script.String()))
return nil
}
}
// Check for a valid interface
if _, err := config.NetworkInterface(); err != nil {
c.Ui.Error(fmt.Sprintf("Invalid network interface: %s", err))
return nil
}
// Backward compatibility hack for 'Role'
if config.Role != "" {
c.Ui.Output("Deprecation warning: 'Role' has been replaced with 'Tags'")
config.Tags["role"] = config.Role
}
return config
}
// setupAgent is used to create the agent we use
func (c *Command) setupAgent(config *Config, logOutput io.Writer) *Agent {
bindIP, bindPort, err := config.AddrParts(config.BindAddr)
if err != nil {
c.Ui.Error(fmt.Sprintf("Invalid bind address: %s", err))
return nil
}
// Check if we have an interface
if iface, _ := config.NetworkInterface(); iface != nil {
addrs, err := iface.Addrs()
if err != nil {
c.Ui.Error(fmt.Sprintf("Failed to get interface addresses: %s", err))
return nil
}
if len(addrs) == 0 {
c.Ui.Error(fmt.Sprintf("Interface '%s' has no addresses", config.Interface))
return nil
}
// If there is no bind IP, pick an address
if bindIP == "0.0.0.0" {
found := false
for _, a := range addrs {
addr, ok := a.(*net.IPNet)
if !ok {
continue
}
// Skip self-assigned IPs
if addr.IP.IsLinkLocalUnicast() {
continue
}
// Found an IP
found = true
bindIP = addr.IP.String()
c.Ui.Output(fmt.Sprintf("Using interface '%s' address '%s'",
config.Interface, bindIP))
// Update the configuration
bindAddr := &net.TCPAddr{
IP: net.ParseIP(bindIP),
Port: bindPort,
}
config.BindAddr = bindAddr.String()
break
}
if !found {
c.Ui.Error(fmt.Sprintf("Failed to find usable address for interface '%s'", config.Interface))
return nil
}
} else {
// If there is a bind IP, ensure it is available
found := false
for _, a := range addrs {
addr, ok := a.(*net.IPNet)
if !ok {
continue
}
if addr.IP.String() == bindIP {
found = true
break
}
}
if !found {
c.Ui.Error(fmt.Sprintf("Interface '%s' has no '%s' address",
config.Interface, bindIP))
return nil
}
}
}
var advertiseIP string
var advertisePort int
if config.AdvertiseAddr != "" {
advertiseIP, advertisePort, err = config.AddrParts(config.AdvertiseAddr)
if err != nil {
c.Ui.Error(fmt.Sprintf("Invalid advertise address: %s", err))
return nil
}
}
encryptKey, err := config.EncryptBytes()
if err != nil {
c.Ui.Error(fmt.Sprintf("Invalid encryption key: %s", err))
return nil
}
serfConfig := serf.DefaultConfig()
switch config.Profile {
case "lan":
serfConfig.MemberlistConfig = memberlist.DefaultLANConfig()
case "wan":
serfConfig.MemberlistConfig = memberlist.DefaultWANConfig()
case "local":
serfConfig.MemberlistConfig = memberlist.DefaultLocalConfig()
default:
c.Ui.Error(fmt.Sprintf("Unknown profile: %s", config.Profile))
return nil
}
serfConfig.MemberlistConfig.BindAddr = bindIP
serfConfig.MemberlistConfig.BindPort = bindPort
serfConfig.MemberlistConfig.AdvertiseAddr = advertiseIP
serfConfig.MemberlistConfig.AdvertisePort = advertisePort
serfConfig.MemberlistConfig.SecretKey = encryptKey
serfConfig.NodeName = config.NodeName
serfConfig.Tags = config.Tags
serfConfig.SnapshotPath = config.SnapshotPath
serfConfig.ProtocolVersion = uint8(config.Protocol)
serfConfig.CoalescePeriod = 3 * time.Second
serfConfig.QuiescentPeriod = time.Second
serfConfig.UserCoalescePeriod = 3 * time.Second
serfConfig.UserQuiescentPeriod = time.Second
if config.ReconnectInterval != 0 {
serfConfig.ReconnectInterval = config.ReconnectInterval
}
if config.ReconnectTimeout != 0 {
serfConfig.ReconnectTimeout = config.ReconnectTimeout
}
if config.TombstoneTimeout != 0 {
serfConfig.TombstoneTimeout = config.TombstoneTimeout
}
serfConfig.EnableNameConflictResolution = !config.DisableNameResolution
// Start Serf
c.Ui.Output("Starting Serf agent...")
agent, err := Create(serfConfig, logOutput)
if err != nil {
c.Ui.Error(fmt.Sprintf("Failed to start the Serf agent: %v", err))
return nil
}
return agent
}
// setupLoggers is used to setup the logGate, logWriter, and our logOutput
func (c *Command) setupLoggers(config *Config) (*GatedWriter, *logWriter, io.Writer) {
// Setup logging. First create the gated log writer, which will
// store logs until we're ready to show them. Then create the level
// filter, filtering logs of the specified level.
logGate := &GatedWriter{
Writer: &cli.UiWriter{Ui: c.Ui},
}
c.logFilter = LevelFilter()
c.logFilter.MinLevel = logutils.LogLevel(strings.ToUpper(config.LogLevel))
c.logFilter.Writer = logGate
if !ValidateLevelFilter(c.logFilter.MinLevel, c.logFilter) {
c.Ui.Error(fmt.Sprintf(
"Invalid log level: %s. Valid log levels are: %v",
c.logFilter.MinLevel, c.logFilter.Levels))
return nil, nil, nil
}
// Create a log writer, and wrap a logOutput around it
logWriter := NewLogWriter(512)
logOutput := io.MultiWriter(c.logFilter, logWriter)
return logGate, logWriter, logOutput
}
// startAgent is used to start the agent and IPC
func (c *Command) startAgent(config *Config, agent *Agent,
logWriter *logWriter, logOutput io.Writer) *AgentIPC {
// Add the script event handlers
c.scriptHandler = &ScriptEventHandler{
SelfFunc: func() serf.Member { return agent.Serf().LocalMember() },
Scripts: config.EventScripts(),
Logger: log.New(logOutput, "", log.LstdFlags),
}
agent.RegisterEventHandler(c.scriptHandler)
// Start the agent after the handler is registered
if err := agent.Start(); err != nil {
c.Ui.Error(fmt.Sprintf("Failed to start the Serf agent: %v", err))
return nil
}
// Parse the bind address information
bindIP, bindPort, err := config.AddrParts(config.BindAddr)
bindAddr := &net.TCPAddr{IP: net.ParseIP(bindIP), Port: bindPort}
// Start the discovery layer
if config.Discover != "" {
// Use the advertise addr and port
local := agent.Serf().Memberlist().LocalNode()
// Get the bind interface if any
iface, _ := config.NetworkInterface()
_, err := NewAgentMDNS(agent, logOutput, config.ReplayOnJoin,
config.NodeName, config.Discover, iface, local.Addr, int(local.Port))
if err != nil {
c.Ui.Error(fmt.Sprintf("Error starting mDNS listener: %s", err))
return nil
}
}
// Setup the RPC listener
rpcListener, err := net.Listen("tcp", config.RPCAddr)
if err != nil {
c.Ui.Error(fmt.Sprintf("Error starting RPC listener: %s", err))
return nil
}
// Start the IPC layer
c.Ui.Output("Starting Serf agent RPC...")
ipc := NewAgentIPC(agent, config.RPCAuthKey, rpcListener, logOutput, logWriter)
c.Ui.Output("Serf agent running!")
c.Ui.Info(fmt.Sprintf(" Node name: '%s'", config.NodeName))
c.Ui.Info(fmt.Sprintf(" Bind addr: '%s'", bindAddr.String()))
if config.AdvertiseAddr != "" {
advertiseIP, advertisePort, _ := config.AddrParts(config.AdvertiseAddr)
advertiseAddr := (&net.TCPAddr{IP: net.ParseIP(advertiseIP), Port: advertisePort}).String()
c.Ui.Info(fmt.Sprintf("Advertise addr: '%s'", advertiseAddr))
}
c.Ui.Info(fmt.Sprintf(" RPC addr: '%s'", config.RPCAddr))
c.Ui.Info(fmt.Sprintf(" Encrypted: %#v", config.EncryptKey != ""))
c.Ui.Info(fmt.Sprintf(" Snapshot: %v", config.SnapshotPath != ""))
c.Ui.Info(fmt.Sprintf(" Profile: %s", config.Profile))
if config.Discover != "" {
c.Ui.Info(fmt.Sprintf(" mDNS cluster: %s", config.Discover))
}
return ipc
}
// startupJoin is invoked to handle any joins specified to take place at start time
func (c *Command) startupJoin(config *Config, agent *Agent) error {
if len(config.StartJoin) == 0 {
return nil
}
c.Ui.Output(fmt.Sprintf("Joining cluster...(replay: %v)", config.ReplayOnJoin))
n, err := agent.Join(config.StartJoin, config.ReplayOnJoin)
if err != nil {
return err
}
c.Ui.Info(fmt.Sprintf("Join completed. Synced with %d initial agents", n))
return nil
}
func (c *Command) Run(args []string) int {
c.Ui = &cli.PrefixedUi{
OutputPrefix: "==> ",
InfoPrefix: " ",
ErrorPrefix: "==> ",
Ui: c.Ui,
}
// Parse our configs
c.args = args
config := c.readConfig()
if config == nil {
return 1
}
// Setup the log outputs
logGate, logWriter, logOutput := c.setupLoggers(config)
if logWriter == nil {
return 1
}
/* Setup telemetry
Aggregate on 10 second intervals for 1 minute. Expose the
metrics over stderr when there is a SIGUSR1 received.
*/
inm := metrics.NewInmemSink(10*time.Second, time.Minute)
metrics.DefaultInmemSignal(inm)
metricsConf := metrics.DefaultConfig("serf-agent")
metricsConf.EnableHostname = false
metrics.NewGlobal(metricsConf, inm)
// Setup serf
agent := c.setupAgent(config, logOutput)
if agent == nil {
return 1
}
defer agent.Shutdown()
// Start the agent
ipc := c.startAgent(config, agent, logWriter, logOutput)
if ipc == nil {
return 1
}
defer ipc.Shutdown()
// Join startup nodes if specified
if err := c.startupJoin(config, agent); err != nil {
c.Ui.Error(err.Error())
return 1
}
// Enable log streaming
c.Ui.Info("")
c.Ui.Output("Log data will now stream in as it occurs:\n")
logGate.Flush()
// Wait for exit
return c.handleSignals(config, agent)
}
// handleSignals blocks until we get an exit-causing signal
func (c *Command) handleSignals(config *Config, agent *Agent) int {
signalCh := make(chan os.Signal, 4)
signal.Notify(signalCh, os.Interrupt, syscall.SIGTERM, syscall.SIGHUP)
// Wait for a signal
WAIT:
var sig os.Signal
select {
case s := <-signalCh:
sig = s
case <-c.ShutdownCh:
sig = os.Interrupt
case <-agent.ShutdownCh():
// Agent is already shutdown!
return 0
}
c.Ui.Output(fmt.Sprintf("Caught signal: %v", sig))
// Check if this is a SIGHUP
if sig == syscall.SIGHUP {
config = c.handleReload(config, agent)
goto WAIT
}
// Check if we should do a graceful leave
graceful := false
if sig == os.Interrupt && !config.SkipLeaveOnInt {
graceful = true
} else if sig == syscall.SIGTERM && config.LeaveOnTerm {
graceful = true
}
// Bail fast if not doing a graceful leave
if !graceful {
return 1
}
// Attempt a graceful leave
gracefulCh := make(chan struct{})
c.Ui.Output("Gracefully shutting down agent...")
go func() {
if err := agent.Leave(); err != nil {
c.Ui.Error(fmt.Sprintf("Error: %s", err))
return
}
close(gracefulCh)
}()
// Wait for leave or another signal
select {
case <-signalCh:
return 1
case <-time.After(gracefulTimeout):
return 1
case <-gracefulCh:
return 0
}
}
// handleReload is invoked when we should reload our configs, e.g. SIGHUP
func (c *Command) handleReload(config *Config, agent *Agent) *Config {
c.Ui.Output("Reloading configuration...")
newConf := c.readConfig()
if newConf == nil {
c.Ui.Error(fmt.Sprintf("Failed to reload configs"))
return config
}
// Change the log level
minLevel := logutils.LogLevel(strings.ToUpper(newConf.LogLevel))
if ValidateLevelFilter(minLevel, c.logFilter) {
c.logFilter.SetMinLevel(minLevel)
} else {
c.Ui.Error(fmt.Sprintf(
"Invalid log level: %s. Valid log levels are: %v",
minLevel, c.logFilter.Levels))
// Keep the current log level
newConf.LogLevel = config.LogLevel
}
// Change the event handlers
c.scriptHandler.UpdateScripts(config.EventScripts())
// Update the tags in serf
serf := agent.Serf()
if err := serf.SetTags(newConf.Tags); err != nil {
c.Ui.Error(fmt.Sprintf("Failed to update tags: %v", err))
return newConf
}
return newConf
}
func (c *Command) Synopsis() string {
return "Runs a Serf agent"
}
func (c *Command) Help() string {
helpText := `
Usage: serf agent [options]
Starts the Serf agent and runs until an interrupt is received. The
agent represents a single node in a cluster.
Options:
-bind=0.0.0.0 Address to bind network listeners to
-advertise=0.0.0.0 Address to advertise to the other cluster members
-config-file=foo Path to a JSON file to read configuration from.
This can be specified multiple times.
-config-dir=foo Path to a directory to read configuration files
from. This will read every file ending in ".json"
as configuration in this directory in alphabetical
order.
-discover=cluster Discover is set to enable mDNS discovery of peer. On
networks that support multicast, this can be used to have
peers join each other without an explicit join.
-encrypt=foo Key for encrypting network traffic within Serf.
Must be a base64-encoded 16-byte key.
-event-handler=foo Script to execute when events occur. This can
be specified multiple times. See the event scripts
section below for more info.
-join=addr An initial agent to join with. This flag can be
specified multiple times.
-log-level=info Log level of the agent.
-node=hostname Name of this node. Must be unique in the cluster
-profile=[lan|wan|local] Profile is used to control the timing profiles used in Serf.
The default if not provided is lan.
-protocol=n Serf protocol version to use. This defaults to
the latest version, but can be set back for upgrades.
-role=foo The role of this node, if any. This can be used
by event scripts to differentiate different types
of nodes that may be part of the same cluster.
'-role' is deprecated in favor of '-tag role=foo'.
-rpc-addr=127.0.0.1:7373 Address to bind the RPC listener.
-snapshot=path/to/file The snapshot file is used to store alive nodes and
event information so that Serf can rejoin a cluster
and avoid event replay on restart.
-tag key=value Tag can be specified multiple times to attach multiple
key/value tag pairs to the given node.
Event handlers:
For more information on what event handlers are, please read the
Serf documentation. This section will document how to configure them
on the command-line. There are three methods of specifying an event
handler:
- The value can be a plain script, such as "event.sh". In this case,
Serf will send all events to this script, and you'll be responsible
for differentiating between them based on the SERF_EVENT.
- The value can be in the format of "TYPE=SCRIPT", such as
"member-join=join.sh". With this format, Serf will only send events
of that type to that script.
- The value can be in the format of "user:EVENT=SCRIPT", such as
"user:deploy=deploy.sh". This means that Serf will only invoke this
script in the case of user events named "deploy".
`
return strings.TrimSpace(helpText)
}