-
Notifications
You must be signed in to change notification settings - Fork 12
/
root.go
299 lines (262 loc) · 8.67 KB
/
root.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
// Copyright © 2016-2018, 2021 Genome Research Limited
// Author: Sendu Bala <sb10@sanger.ac.uk>.
//
// This file is part of wr.
//
// wr is free software: you can redistribute it and/or modify
// it under the terms of the GNU Lesser General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// wr is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Lesser General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public License
// along with wr. If not, see <http://www.gnu.org/licenses/>.
package cmd
// this is the cobra file that enables subcommands and handles command-line args
import (
"context"
"fmt"
"os"
"syscall"
"time"
"github.com/VertebrateResequencing/wr/internal"
"github.com/VertebrateResequencing/wr/jobqueue"
"github.com/sevlyar/go-daemon"
"github.com/spf13/cobra"
"github.com/wtsi-ssg/wr/clog"
)
// maxCloudResourceUsernameLength is the maximum length that cloud username can
// be. It is limited because it will form part of cloudResourceName(), which
// will in turn form hostnames, which have max length 63. cloudResourceName()
// has a fixed prefix of length up to 8, and host names will include a UUID of
// length 36 and a prefix length 1, leaving 18 characters for the username.
const maxCloudResourceUsernameLength = 18
// these variables are accessible by all subcommands.
var deployment string
var config *internal.Config
// these are shared by some of the subcommands.
var addr string
var caFile string
var timeoutint int
var cmdCwd string
// RootCmd represents the base command when called without any subcommands.
var RootCmd = &cobra.Command{
Use: "wr",
Short: "wr is a software workflow management system.",
Long: `wr is a software workflow management system and command runner.
You use it to run the same sequence of commands (a "workflow") on many different
input files (which comprise a "datasource").
Initially, you start the management system, which maintains a queue of the
commands you want to run:
$ wr manager start
Then you add commands you want to run to the queue:
$ wr add
At this point your commands should be running, and you can monitor their
progress with:
$ wr status`,
}
// Execute adds all child commands to the root command and sets flags
// appropriately. This is called by main.main(). It only needs to happen once to
// the rootCmd.
func Execute() {
if err := RootCmd.Execute(); err != nil {
die(err.Error())
}
}
// ExecuteLSF is for treating a call to wr as if `wr lsf xxx` was called, for
// the LSF emulation to work.
func ExecuteLSF(cmd string) {
args := append([]string{"lsf", cmd}, os.Args[1:]...)
command, _, err := RootCmd.Find(args)
if err != nil {
die(err.Error())
}
RootCmd.SetArgs(args)
if err := command.Execute(); err != nil {
die(err.Error())
}
}
func init() {
// set up logging to stderr
clog.ToDefaultAtLevel("info")
// global flags
RootCmd.PersistentFlags().StringVar(&deployment, "deployment", internal.DefaultDeployment(context.Background()),
"use production or development config")
cobra.OnInitialize(initConfig)
}
// initConfig reads in config file and ENV variables if set.
func initConfig() {
config = internal.ConfigLoadFromCurrentDir(context.Background(), deployment)
addr = config.ManagerHost + ":" + config.ManagerPort
caFile = config.ManagerCAFile
}
// token reads and returns the token from the file created when the manager
// starts.
func token() ([]byte, error) {
token, err := os.ReadFile(config.ManagerTokenFile)
if err != nil {
return nil, err
}
return token, nil
}
// realUsername returns the username of the current user.
func realUsername() string {
username, err := internal.Username()
if err != nil {
die("could not get username: %s", err)
}
return username
}
// cloudResourceName returns a user and deployment specific string that can be
// used to name cloud resources so they can be identified as having been created
// by wr. username arg defaults to the real username of the user running wr.
func cloudResourceName(username string) string {
if username == "" {
username = realUsername()
}
var dep string
if config.Deployment == internal.Production {
dep = "prod"
} else {
dep = "dev"
}
return "wr-" + dep + "-" + username
}
// info is a convenience to log a message at the Info level.
func info(msg string, a ...interface{}) {
clog.Info(context.Background(), fmt.Sprintf(msg, a...))
}
// warn is a convenience to log a message at the Warn level.
func warn(msg string, a ...interface{}) {
clog.Warn(context.Background(), fmt.Sprintf(msg, a...))
}
// die is a convenience to log a message at the Error level and exit non zero.
func die(msg string, a ...interface{}) {
clog.Error(context.Background(), fmt.Sprintf(msg, a...))
os.Exit(1)
}
// createWorkingDir ensures the main working directory is available
func createWorkingDir() {
_, err := os.Stat(config.ManagerDir)
if err != nil {
if os.IsNotExist(err) {
// try and create the directory
err = os.MkdirAll(config.ManagerDir, os.ModePerm)
if err != nil {
die("could not create the working directory '%s': %v", config.ManagerDir, err)
}
} else {
die("could not access or create the working directory '%s': %v", config.ManagerDir, err)
}
}
}
// daemonize spawns a child copy of ourselves with the correct deployment (we
// need to be careful because the default deployment depends on current dir, and
// the child is forced to run from /). Supplying extraArgs can override earlier
// args (to eg. re-specify an option with a relative path with an absolute
// path).
func daemonize(pidFile string, umask int, extraArgs ...string) (*os.Process, *daemon.Context) {
args := os.Args
hadDeployment := false
for _, arg := range args {
if arg == "--deployment" {
hadDeployment = true
break
}
}
if !hadDeployment {
args = append(args, "--deployment")
args = append(args, config.Deployment)
}
args = append(args, extraArgs...)
dContext := &daemon.Context{
PidFileName: pidFile,
PidFilePerm: 0644,
WorkDir: "/",
Args: args,
Umask: umask,
}
child, err := dContext.Reborn()
if err != nil {
// try again, deleting the pidFile first
errr := os.Remove(pidFile)
if errr != nil && !os.IsNotExist(errr) {
warn("failed to delete existing pid file: %s", errr)
}
child, err = dContext.Reborn()
if err != nil {
die("failed to daemonize: %s", err)
}
}
return child, dContext
}
// stopdaemon stops the daemon created by daemonize() by sending it SIGTERM and
// checking it really exited
func stopdaemon(pid int, source string) bool {
err := syscall.Kill(pid, syscall.SIGTERM)
if err != nil {
warn("wr manager is running with pid %d according to %s, but failed to send it SIGTERM: %s", pid, source, err)
return false
}
// wait a while for the daemon to gracefully close down
giveupseconds := 120
giveup := time.After(time.Duration(giveupseconds) * time.Second)
ticker := time.NewTicker(50 * time.Millisecond)
stopped := make(chan bool, 1)
go func() {
for {
select {
case <-ticker.C:
err = syscall.Kill(pid, syscall.Signal(0))
if err == nil {
// pid is still running
continue
}
// assume the error was "no such process" *** should I do a string comparison to confirm?
ticker.Stop()
stopped <- true
return
case <-giveup:
ticker.Stop()
stopped <- false
return
}
}
}()
ok := <-stopped
// if it didn't stop, offer to force kill it? That's a bit dangerous...
// just warn for now
if !ok {
warn("wr manager, running with pid %d according to %s, is still running %ds after I sent it a SIGTERM", pid, source, giveupseconds)
}
return ok
}
// sAddr gets a nice manager address to report in logs, preferring hostname,
// falling back on the ip address if that wasn't set
func sAddr(s *jobqueue.ServerInfo) string {
saddr := s.Host
if saddr == "localhost" {
saddr = s.Addr
} else {
saddr += ":" + s.Port
}
return saddr
}
// connect gives you a connected client. Dies on error. Dies if there is no
// token file. Does not die or report any kind of error if an optional bool is
// supplied true.
func connect(wait time.Duration, expectedToBeDown ...bool) *jobqueue.Client {
token, err := token()
if err != nil && !(len(expectedToBeDown) == 1 && expectedToBeDown[0]) {
die("could not read token file; has the manager been started? [%s]", err)
}
jq, err := jobqueue.Connect(config.ManagerHost+":"+config.ManagerPort, caFile, config.ManagerCertDomain, token, wait)
if err != nil && !(len(expectedToBeDown) == 1 && expectedToBeDown[0]) {
die("%s", err)
}
return jq
}