-
Notifications
You must be signed in to change notification settings - Fork 0
/
node_drain.go
331 lines (284 loc) · 9.51 KB
/
node_drain.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
package command
import (
"context"
"fmt"
"strings"
"time"
"github.com/hashicorp/nomad/api"
"github.com/hashicorp/nomad/api/contexts"
"github.com/posener/complete"
)
var (
// defaultDrainDuration is the default drain duration if it is not specified
// explicitly
defaultDrainDuration = 1 * time.Hour
)
type NodeDrainCommand struct {
Meta
}
func (c *NodeDrainCommand) Help() string {
helpText := `
Usage: nomad node drain [options] <node>
Toggles node draining on a specified node. It is required
that either -enable or -disable is specified, but not both.
The -self flag is useful to drain the local node.
General Options:
` + generalOptionsUsage() + `
Node Drain Options:
-disable
Disable draining for the specified node.
-enable
Enable draining for the specified node.
-deadline <duration>
Set the deadline by which all allocations must be moved off the node.
Remaining allocations after the deadline are forced removed from the node.
If unspecified, a default deadline of one hour is applied.
-detach
Return immediately instead of entering monitor mode.
-monitor
Enter monitor mode directly without modifying the drain status.
-force
Force remove allocations off the node immediately.
-no-deadline
No deadline allows the allocations to drain off the node without being force
stopped after a certain deadline.
-ignore-system
Ignore system allows the drain to complete without stopping system job
allocations. By default system jobs are stopped last.
-keep-ineligible
Keep ineligible will maintain the node's scheduling ineligibility even if
the drain is being disabled. This is useful when an existing drain is being
cancelled but additional scheduling on the node is not desired.
-self
Set the drain status of the local node.
-yes
Automatic yes to prompts.
`
return strings.TrimSpace(helpText)
}
func (c *NodeDrainCommand) Synopsis() string {
return "Toggle drain mode on a given node"
}
func (c *NodeDrainCommand) AutocompleteFlags() complete.Flags {
return mergeAutocompleteFlags(c.Meta.AutocompleteFlags(FlagSetClient),
complete.Flags{
"-disable": complete.PredictNothing,
"-enable": complete.PredictNothing,
"-deadline": complete.PredictAnything,
"-detach": complete.PredictNothing,
"-force": complete.PredictNothing,
"-no-deadline": complete.PredictNothing,
"-ignore-system": complete.PredictNothing,
"-keep-ineligible": complete.PredictNothing,
"-self": complete.PredictNothing,
"-yes": complete.PredictNothing,
})
}
func (c *NodeDrainCommand) AutocompleteArgs() complete.Predictor {
return complete.PredictFunc(func(a complete.Args) []string {
client, err := c.Meta.Client()
if err != nil {
return nil
}
resp, _, err := client.Search().PrefixSearch(a.Last, contexts.Nodes, nil)
if err != nil {
return []string{}
}
return resp.Matches[contexts.Nodes]
})
}
func (c *NodeDrainCommand) Name() string { return "node-drain" }
func (c *NodeDrainCommand) Run(args []string) int {
var enable, disable, detach, force,
noDeadline, ignoreSystem, keepIneligible,
self, autoYes, monitor bool
var deadline string
flags := c.Meta.FlagSet(c.Name(), FlagSetClient)
flags.Usage = func() { c.Ui.Output(c.Help()) }
flags.BoolVar(&enable, "enable", false, "Enable drain mode")
flags.BoolVar(&disable, "disable", false, "Disable drain mode")
flags.StringVar(&deadline, "deadline", "", "Deadline after which allocations are force stopped")
flags.BoolVar(&detach, "detach", false, "")
flags.BoolVar(&force, "force", false, "Force immediate drain")
flags.BoolVar(&noDeadline, "no-deadline", false, "Drain node with no deadline")
flags.BoolVar(&ignoreSystem, "ignore-system", false, "Do not drain system job allocations from the node")
flags.BoolVar(&keepIneligible, "keep-ineligible", false, "Do not update the nodes scheduling eligibility")
flags.BoolVar(&self, "self", false, "")
flags.BoolVar(&autoYes, "yes", false, "Automatic yes to prompts.")
flags.BoolVar(&monitor, "monitor", false, "Monitor drain status.")
if err := flags.Parse(args); err != nil {
return 1
}
// Check that enable or disable is not set with monitor
if monitor && (enable || disable) {
c.Ui.Error("The -monitor flag cannot be used with the '-enable' or '-disable' flags")
c.Ui.Error(commandErrorText(c))
return 1
}
// Check that we got either enable or disable, but not both.
if (enable && disable) || (!monitor && !enable && !disable) {
c.Ui.Error("Ethier the '-enable' or '-disable' flag must be set, unless using '-monitor'")
c.Ui.Error(commandErrorText(c))
return 1
}
// Check that we got a node ID
args = flags.Args()
if l := len(args); self && l != 0 || !self && l != 1 {
c.Ui.Error("Node ID must be specified if -self isn't being used")
c.Ui.Error(commandErrorText(c))
return 1
}
// Validate a compatible set of flags were set
if disable && (deadline != "" || force || noDeadline || ignoreSystem) {
c.Ui.Error("-disable can't be combined with flags configuring drain strategy")
c.Ui.Error(commandErrorText(c))
return 1
}
if deadline != "" && (force || noDeadline) {
c.Ui.Error("-deadline can't be combined with -force or -no-deadline")
c.Ui.Error(commandErrorText(c))
return 1
}
if force && noDeadline {
c.Ui.Error("-force and -no-deadline are mutually exclusive")
c.Ui.Error(commandErrorText(c))
return 1
}
// Parse the duration
var d time.Duration
if force {
d = -1 * time.Second
} else if noDeadline {
d = 0
} else if deadline != "" {
dur, err := time.ParseDuration(deadline)
if err != nil {
c.Ui.Error(fmt.Sprintf("Failed to parse deadline %q: %v", deadline, err))
return 1
}
if dur <= 0 {
c.Ui.Error("A positive drain duration must be given")
return 1
}
d = dur
} else {
d = defaultDrainDuration
}
// Get the HTTP client
client, err := c.Meta.Client()
if err != nil {
c.Ui.Error(fmt.Sprintf("Error initializing client: %s", err))
return 1
}
// If -self flag is set then determine the current node.
var nodeID string
if !self {
nodeID = args[0]
} else {
var err error
if nodeID, err = getLocalNodeID(client); err != nil {
c.Ui.Error(err.Error())
return 1
}
}
// Check if node exists
if len(nodeID) == 1 {
c.Ui.Error(fmt.Sprintf("Identifier must contain at least two characters."))
return 1
}
nodeID = sanitizeUUIDPrefix(nodeID)
nodes, _, err := client.Nodes().PrefixList(nodeID)
if err != nil {
c.Ui.Error(fmt.Sprintf("Error toggling drain mode: %s", err))
return 1
}
// Return error if no nodes are found
if len(nodes) == 0 {
c.Ui.Error(fmt.Sprintf("No node(s) with prefix or id %q found", nodeID))
return 1
}
if len(nodes) > 1 {
c.Ui.Error(fmt.Sprintf("Prefix matched multiple nodes\n\n%s",
formatNodeStubList(nodes, true)))
return 1
}
// Prefix lookup matched a single node
node, _, err := client.Nodes().Info(nodes[0].ID, nil)
if err != nil {
c.Ui.Error(fmt.Sprintf("Error toggling drain mode: %s", err))
return 1
}
// If monitoring the drain start the montior and return when done
if monitor {
c.Ui.Info(fmt.Sprintf("%s: Monitoring node %q: Ctrl-C to detach monitoring", formatTime(time.Now()), node.ID))
c.monitorDrain(client, context.Background(), node, 0, ignoreSystem)
return 0
}
// Confirm drain if the node was a prefix match.
if nodeID != node.ID && !autoYes {
verb := "enable"
if disable {
verb = "disable"
}
question := fmt.Sprintf("Are you sure you want to %s drain mode for node %q? [y/N]", verb, node.ID)
answer, err := c.Ui.Ask(question)
if err != nil {
c.Ui.Error(fmt.Sprintf("Failed to parse answer: %v", err))
return 1
}
if answer == "" || strings.ToLower(answer)[0] == 'n' {
// No case
c.Ui.Output("Canceling drain toggle")
return 0
} else if strings.ToLower(answer)[0] == 'y' && len(answer) > 1 {
// Non exact match yes
c.Ui.Output("For confirmation, an exact ‘y’ is required.")
return 0
} else if answer != "y" {
c.Ui.Output("No confirmation detected. For confirmation, an exact 'y' is required.")
return 1
}
}
var spec *api.DrainSpec
if enable {
spec = &api.DrainSpec{
Deadline: d,
IgnoreSystemJobs: ignoreSystem,
}
}
// Toggle node draining
meta, err := client.Nodes().UpdateDrain(node.ID, spec, !keepIneligible, nil)
if err != nil {
c.Ui.Error(fmt.Sprintf("Error updating drain specification: %s", err))
return 1
}
if !enable || detach {
if enable {
c.Ui.Output(fmt.Sprintf("Node %q drain strategy set", node.ID))
} else {
c.Ui.Output(fmt.Sprintf("Node %q drain strategy unset", node.ID))
}
}
if enable && !detach {
now := time.Now()
c.Ui.Info(fmt.Sprintf("%s: Ctrl-C to stop monitoring: will not cancel the node drain", formatTime(now)))
c.Ui.Output(fmt.Sprintf("%s: Node %q drain strategy set", formatTime(now), node.ID))
c.monitorDrain(client, context.Background(), node, meta.LastIndex, ignoreSystem)
}
return 0
}
func (c *NodeDrainCommand) monitorDrain(client *api.Client, ctx context.Context, node *api.Node, index uint64, ignoreSystem bool) {
outCh := client.Nodes().MonitorDrain(ctx, node.ID, index, ignoreSystem)
for msg := range outCh {
switch msg.Level {
case api.MonitorMsgLevelInfo:
c.Ui.Info(fmt.Sprintf("%s: %s", formatTime(time.Now()), msg))
case api.MonitorMsgLevelWarn:
c.Ui.Warn(fmt.Sprintf("%s: %s", formatTime(time.Now()), msg))
case api.MonitorMsgLevelError:
c.Ui.Error(fmt.Sprintf("%s: %s", formatTime(time.Now()), msg))
default:
c.Ui.Output(fmt.Sprintf("%s: %s", formatTime(time.Now()), msg))
}
}
}