forked from hashicorp/nomad
-
Notifications
You must be signed in to change notification settings - Fork 0
/
check.go
143 lines (114 loc) · 3.6 KB
/
check.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
package command
import (
"fmt"
"strconv"
"strings"
"time"
"github.com/posener/complete"
)
const (
HealthCritical = 2
HealthWarn = 1
HealthPass = 0
HealthUnknown = 3
)
type AgentCheckCommand struct {
Meta
}
func (c *AgentCheckCommand) Help() string {
helpText := `
Usage: nomad check [options]
Display state of the Nomad agent. The exit code of the command is Nagios
compatible and could be used with alerting systems.
General Options:
` + generalOptionsUsage() + `
Agent Check Options:
-min-peers
Minimum number of peers that a server is expected to know.
-min-servers
Minimum number of servers that a client is expected to know.
`
return strings.TrimSpace(helpText)
}
func (c *AgentCheckCommand) Synopsis() string {
return "Displays health of the local Nomad agent"
}
func (c *AgentCheckCommand) Run(args []string) int {
var minPeers, minServers int
flags := c.Meta.FlagSet("check", FlagSetClient)
flags.Usage = func() { c.Ui.Output(c.Help()) }
flags.IntVar(&minPeers, "min-peers", 0, "")
flags.IntVar(&minServers, "min-servers", 1, "")
if err := flags.Parse(args); err != nil {
return 1
}
client, err := c.Meta.Client()
if err != nil {
c.Ui.Error(fmt.Sprintf("error initializing client: %s", err))
return HealthCritical
}
info, err := client.Agent().Self()
if err != nil {
c.Ui.Output(fmt.Sprintf("unable to query agent info: %v", err))
return HealthCritical
}
if _, ok := info.Stats["nomad"]; ok {
return c.checkServerHealth(info.Stats, minPeers)
}
if clientStats, ok := info.Stats["client"]; ok {
return c.checkClientHealth(clientStats, minServers)
}
return HealthWarn
}
// checkServerHealth returns the health of a server.
// TODO Add more rules for determining server health
func (c *AgentCheckCommand) checkServerHealth(info map[string]map[string]string, minPeers int) int {
raft := info["raft"]
knownPeers, err := strconv.Atoi(raft["num_peers"])
if err != nil {
c.Ui.Output(fmt.Sprintf("unable to get known peers: %v", err))
return HealthCritical
}
if knownPeers < minPeers {
c.Ui.Output(fmt.Sprintf("known peers: %v, is less than expected number of peers: %v", knownPeers, minPeers))
return HealthCritical
}
return HealthPass
}
// checkClientHealth returns the health of a client
func (c *AgentCheckCommand) checkClientHealth(clientStats map[string]string, minServers int) int {
knownServers, err := strconv.Atoi(clientStats["known_servers"])
if err != nil {
c.Ui.Output(fmt.Sprintf("unable to get known servers: %v", err))
return HealthCritical
}
heartbeatTTL, err := time.ParseDuration(clientStats["heartbeat_ttl"])
if err != nil {
c.Ui.Output(fmt.Sprintf("unable to parse heartbeat TTL: %v", err))
return HealthCritical
}
lastHeartbeat, err := time.ParseDuration(clientStats["last_heartbeat"])
if err != nil {
c.Ui.Output(fmt.Sprintf("unable to parse last heartbeat: %v", err))
return HealthCritical
}
if lastHeartbeat > heartbeatTTL {
c.Ui.Output(fmt.Sprintf("last heartbeat was %q time ago, expected heartbeat ttl: %q", lastHeartbeat, heartbeatTTL))
return HealthCritical
}
if knownServers < minServers {
c.Ui.Output(fmt.Sprintf("known servers: %v, is less than expected number of servers: %v", knownServers, minServers))
return HealthCritical
}
return HealthPass
}
func (c *AgentCheckCommand) AutocompleteFlags() complete.Flags {
return mergeAutocompleteFlags(c.Meta.AutocompleteFlags(FlagSetClient),
complete.Flags{
"-min-peers": complete.PredictAnything,
"-min-servers": complete.PredictAnything,
})
}
func (c *AgentCheckCommand) AutocompleteArgs() complete.Predictor {
return complete.PredictNothing
}