-
Notifications
You must be signed in to change notification settings - Fork 12
/
mount.go
343 lines (300 loc) · 13.3 KB
/
mount.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
// Copyright © 2017, 2018 Genome Research Limited
// Author: Sendu Bala <sb10@sanger.ac.uk>.
//
// This file is part of wr.
//
// wr is free software: you can redistribute it and/or modify
// it under the terms of the GNU Lesser General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// wr is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Lesser General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public License
// along with wr. If not, see <http://www.gnu.org/licenses/>.
package cmd
import (
"encoding/json"
"os"
"os/signal"
"strings"
"syscall"
"github.com/VertebrateResequencing/muxfys"
"github.com/VertebrateResequencing/wr/jobqueue"
"github.com/inconshreveable/log15"
"github.com/sb10/l15h"
"github.com/spf13/cobra"
)
// options for this cmd
var mountSimple string
var mountJSON string
var mountVerbose bool
// mountCmd represents the mount command
var mountCmd = &cobra.Command{
Use: "mount",
Short: "Mount an S3 bucket",
Long: `Test mounting of S3 buckets.
'wr add' can take mount options if your commands need to read from/ write to
S3 buckets. Before supplying these mount options to 'wr add', you can use this
command to test that your mount options work.
You can also use this as a quick, easy and high performance way of mounting an
S3 bucket for general use, but note that it is only designed as a temporary
mount since it won't notice externally altered or added files in directories you
already accessed. It also only allows yourself access to the files.
Since this command doesn't run as a daemon, you'll have to either keep it
running in the foreground and open up a new terminal to actually explore your
mount point, or you'll have to run this command in the background:
$ wr mount -m '...' &
When you're finished with your mount, you must either send SIGTERM to its
process id (using the 'kill' command) or bring 'wr mount' back to the
foreground:
$ fg
And then kill it by hitting ctrl-c.
NB: if you are writing to your mount point, it's very important to kill it
cleanly using one of these methods once you're done, since uploads only occur
when you do this!
For mounting to work, you must be able to carry out fuse mounts, which means
fuse-utils must be installed, and /etc/fuse.conf should have user_allow_other
set. An easy way to enable it is to run:
sudo perl -i -pne 's/#user_allow_other/user_allow_other/;' /etc/fuse.conf
--mounts is a convenience option that lets you specify your mounts in the common
case that you wish the contents of 1 or more remote directories to be accessible
from a single local directory ('mnt' when using this command, the command
working directory when using 'wr add'). For anything more complicated you'll
need to use --mount_json. You can't use both --mounts and --mount_json at once.
The format is a comma-separated list of [c|u][r|w]:[profile@]bucket[/path]
strings. The first character as 'c' means to turn on caching, while 'u' means
uncached. The second character as 'r' means read-only, while 'w' means writeable
(only one of them can have w). After the colon you can optionally specify the
profile name followed by the @ symbol, followed by the required remote bucket
name and ideally the path to the deepest subdirectory that contains the data you
wish to access.
--mount_json is the JSON string for an array of Config objects describing all
your mount parameters.
A JSON array begins and ends with a square bracket, and each item is separated
with a comma.
A JSON object can be written by starting and ending it with curly braces.
Parameter names and their values are put in double quotes (except for numbers,
which are left bare, booleans where you write, unquoted, true or false, and
arrays as described previously), and the pair separated with a colon, and pairs
separated from each other with commas.
For example (all on one line): --mounts '[{"Mount":"/tmp/wr_mnt","Targets":
[{"Profile":"default","Path":"mybucket/subdir","Write":true}]}]'
The paragraphs below describe all the possible Config object parameters.
Mount is the local directory on which to mount your Target(s). It can be (in)
any directory you're able to write to. If the directory doesn't exist, wr will
try to create it first. Otherwise, it must be empty. If not supplied, defaults
to the subdirectory "mnt" in the current working directory (under 'wr add', if
--cwd_matters has not been set, then instead the actual working directory is
used as the mount point). Note that if specifying multiple Config objects, they
must each have a different Mount (and so only one of them can have Mount
undefined).
CacheBase is the parent directory to use for the CacheDir of any Targets
configured with Cache on, but CacheDir undefined. If CacheBase is also
undefined, the cache directories will be made in the current working directory
(under 'wr add', if --cwd_matters has not been set, then instead the cache
directories will be in a sister directory of the actual working directory).
Retries is the number of retries wr should attempt when it encounters errors in
trying to access your remote S3 bucket. At least 3 is recommended. It defaults
to 10 if not provided.
Verbose is a boolean, which if true, would make wr store timing information on
all remote calls as lines of all job STDERR that use the mount. Errors always
appear there. This has no effect on what you see when using this command to test
your mount; instead use the global -v command line argument to see the same
things.
Targets is an array of Target objects which define what you want to access at
your Mount. It's an array to allow you to multiplex different buckets (or
different subdirectories of the same bucket) so that it looks like all their
data is in the same place, for easier access to files in your mount. You can
only have one of these configured to be writeable. (If you don't want to
multiplex but instead want multiple different mount points, you specify a single
Target in this array, and have multiple Config objects in your top level array.)
The remaining paragraphs describe the possible parameters for Target objects.
Profile is the S3 configuration profile name to use. If not supplied, the value
of the $AWS_DEFAULT_PROFILE or $AWS_PROFILE environment variables is used, and
if those are unset it defaults to "default".
wr will look at a number of standard S3 configuration files and environment
variables to determine the scheme, domain, region and authentication details to
connect to S3 with. All possible sources are checked to fill in any missing
values from more preferred sources.
The preferred file is ~/.s3cfg, since this is the only config file type that
allows the specification of a custom domain. This file is Amazon's s3cmd config
file, described here: http://s3tools.org/kb/item14.htm. wr will look at the
access_key, secret_key, use_https and host_base options under the section with
the given Profile name. If you don't wish to use any other config files or
environment variables, you can add the non-standard region option to this file
if you need to specify a specific region.
The next file checked is the one pointed to by the $AWS_SHARED_CREDENTIALS_FILE
environment variable, or ~/.aws/credentials. This file is described here:
http://docs.aws.amazon.com/cli/latest/userguide/cli-chap-getting-started.html.
wr will look at the aws_access_key_id and aws_secret_access_key options under
the section with the given Profile name.
wr also checks the file pointed to by the $AWS_CONFIG_FILE environment variable,
or ~/.aws/config, described in the previous link. From here the region option is
used from the section with the given Profile name. If you don't wish to use a
~/.s3cfg file but do need to specify a custom domain, you can add the
non-standard host_base and use_https options to this file instead.
As a last resort, ~/.awssecret is checked. This is s3fs's config file, and
consists of a single line with your access key and secret key separated by a
colon.
If set, the environment variables $AWS_ACCESS_KEY_ID, $AWS_SECRET_ACCESS_KEY and
$AWS_DEFAULT_REGION override corresponding options found in any config file.
Path (required) is the name of your S3 bucket, optionally followed URL-style
(separated with forward slashes) by sub-directory names. The highest performance
is gained by specifying the deepest path under your bucket that holds all the
files you wish to access.
Cache is a boolean, which if true, turns on data caching of any data retrieved,
or any data you wish to upload.
CacheDir is the local directory to store cached data. If this parameter is
supplied, Cache is forced true and so doesn't need to be provided. If this
parameter is not supplied but Cache is true, the directory will be a unique
directory in CacheBase, which will get deleted on unmount.
Write is a boolean, which if true, makes the mount point writeable. If you
don't intend to write to a mount, just leave this parameter out. Note that when
not cached, only serial writes are possible.`,
Run: func(cmd *cobra.Command, args []string) {
// set up logging
logLevel := log15.LvlWarn
if mountVerbose {
logLevel = log15.LvlInfo
}
muxfys.SetLogHandler(log15.LvlFilterHandler(logLevel, l15h.CallerInfoHandler(log15.StderrHandler)))
// mount everything
var mounted []*muxfys.MuxFys
for _, mc := range mountParse(mountJSON, mountSimple) {
var rcs []*muxfys.RemoteConfig
for _, mt := range mc.Targets {
accessorConfig, err := muxfys.S3ConfigFromEnvironment(mt.Profile, mt.Path)
if err != nil {
die("had a problem reading S3 config values from the environment: %s", err)
}
accessor, err := muxfys.NewS3Accessor(accessorConfig)
if err != nil {
die("had a problem creating an S3 accessor: %s", err)
}
rc := &muxfys.RemoteConfig{
Accessor: accessor,
CacheData: mt.Cache,
CacheDir: mt.CacheDir,
Write: mt.Write,
}
rcs = append(rcs, rc)
}
retries := 10
if mc.Retries > 0 {
retries = mc.Retries
}
cfg := &muxfys.Config{
Mount: mc.Mount,
CacheBase: mc.CacheBase,
Retries: retries,
Verbose: mc.Verbose,
}
fs, err := muxfys.New(cfg)
if err != nil {
die("bad configuration: %s\n", err)
}
err = fs.Mount(rcs...)
if err != nil {
die("could not mount: %s\n", err)
}
mounted = append(mounted, fs)
// (we can't use each fs's UnmountOnDeath() function because they
// won't wait for each other)
}
// wait for death
if len(mounted) > 0 {
deathSignals := make(chan os.Signal, 2)
signal.Notify(deathSignals, os.Interrupt, syscall.SIGTERM)
<-deathSignals
for _, fs := range mounted {
err := fs.Unmount()
if err != nil {
fs.Error("Failed to unmount", "err", err)
}
}
return
}
},
}
func init() {
RootCmd.AddCommand(mountCmd)
// flags specific to this sub-command
mountCmd.Flags().StringVarP(&mountJSON, "mount_json", "j", "", "mount parameters JSON (see --help)")
mountCmd.Flags().StringVarP(&mountSimple, "mounts", "m", "", "comma-separated list of [c|u][r|w]:bucket[/path] (see --help)")
mountCmd.Flags().BoolVarP(&mountVerbose, "verbose", "v", false, "print timing info on all remote calls")
}
// mountParse takes possible json string or simple string (as per `wr mount -h`)
// and parses exactly 1 of them to a MountConfig for each mount defined.
func mountParse(jsonString, simpleString string) jobqueue.MountConfigs {
if jsonString == "" && simpleString == "" {
die("--mounts or --mount_json is required")
}
if jsonString != "" && simpleString != "" {
die("--mounts and --mount_json are mutually exclusive")
}
if jsonString != "" {
return mountParseJSON(jsonString)
}
return mountParseSimple(simpleString)
}
// mountParseJSON takes a json string (as per `wr mount --help`) and parses it
// to a MountConfig for each mount defined.
func mountParseJSON(jsonString string) jobqueue.MountConfigs {
var mcs jobqueue.MountConfigs
err := json.Unmarshal([]byte(jsonString), &mcs)
if err != nil {
die("had a problem with the provided mount JSON (%s): %s", jsonString, err)
}
return mcs
}
// mountParseSimple takes a comma-separated list of [c|u][r|w]:bucket[/path] and
// parses it to a MountConfig in a MountConfigs (to match the output type of
// mountParseJSON).
func mountParseSimple(simpleString string) jobqueue.MountConfigs {
var targets []jobqueue.MountTarget
for _, simple := range strings.Split(simpleString, ",") {
parts := strings.Split(simple, ":")
if len(parts) != 2 || len(parts[0]) != 2 {
die("'%s' was not in the right format", simple)
}
var cache, write bool
switch parts[0][0] {
case 'c':
cache = true
case 'u':
cache = false
default:
die("'%s' did not start with c or u", simple)
}
switch parts[0][1] {
case 'w':
write = true
case 'r':
write = false
default:
die("'%s' did not specify w or r", simple)
}
path := parts[1]
var profile string
if strings.Contains(path, "@") {
parts := strings.Split(path, "@")
profile = parts[0]
path = parts[1]
}
mt := jobqueue.MountTarget{
Path: path,
Cache: cache,
Write: write,
}
if profile != "" {
mt.Profile = profile
}
targets = append(targets, mt)
}
var mcs jobqueue.MountConfigs
return append(mcs, jobqueue.MountConfig{Targets: targets})
}