Skip to content

Commit

Permalink
make opt-in to load host AWS credentials for S3 downloads
Browse files Browse the repository at this point in the history
  • Loading branch information
mlin committed Jul 3, 2020
1 parent 32677fb commit c95fd96
Show file tree
Hide file tree
Showing 3 changed files with 24 additions and 17 deletions.
27 changes: 17 additions & 10 deletions WDL/runtime/config_templates/default.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,16 @@
#
# miniwdl loads configuration options in the following priority order:
# 1. command-line arguments
# 2. environment variables MINIWDL__SECTION__KEY (uppercased with double-underscores)
# 2. environment variables MINIWDL__SECTION__KEY (uppercased with double-underscores) **
# 3. custom configuration file (mutually exclusive):
# a) file named on command-line --cfg
# b) file named by environment variable MINIWDL_CFG
# c) miniwdl.cfg in XDG_CONFIG_HOME & XDG_CONFIG_DIRS
# 4. WDL/runtime/config_templates/default.cfg
# c) XDG_CONFIG_HOME/miniwdl.cfg (usually ~/.config/miniwdl.cfg)
# d) XDG_CONFIG_DIRS/miniwdl.cfg (usually /etc/xdg/.config/miniwdl.cfg)
# 4. installed WDL/runtime/config_templates/default.cfg
#
# ** Notice that any option can be set/changed temporarily without a cfg file, by setting the
# environment variable MINIWDL__SECTION__KEY=value


[scheduler]
Expand Down Expand Up @@ -98,6 +102,16 @@ enable_patterns = ["*"]
disable_patterns = ["*.php", "*.aspx"]


[download_awscli]
# If workflow inputs or generates s3:// URIs, load AWS credentials using boto3 on the miniwdl host.
# Note: If running inside EC2, downloader & other tasks might be able assume an IAM role via the
# instance metadata service, regardless of this setting.
# Recommendation: use only trusted WDL and Docker images, if either host_credentials = true or an
# EC2 instance profile is available.
# Failing all of the above, public S3 URIs can always be used.
host_credentials = false


[call_cache]
# When a task in a workflow is performed, cache the output of the task in a certain directory where it can
# be found later and reused for the same task definition/inputs
Expand All @@ -107,13 +121,6 @@ get = false
dir = ~/.cache/miniwdl


[download_awscli]
# When a s3:// URI is supplied for a File input, attempt to load AWS credentials using boto3 on the
# miniwdl host. If disabled, the downloader task might still get credentials from metadata service
# if running in EC2. Failing that, public S3 objects can be accessed.
host_credentials = true


[plugins]
# Control which plugins are used. Plugins are installed using the Python entry points convention,
# https://packaging.python.org/specifications/entry-points/
Expand Down
8 changes: 3 additions & 5 deletions WDL/runtime/download.py
Original file line number Diff line number Diff line change
Expand Up @@ -206,12 +206,10 @@ def awscli_downloader(
# make file group-readable to ensure it'll be usable if the docker image runs as non-root
os.chmod(aws_credentials_file.name, os.stat(aws_credentials_file.name).st_mode | 0o40)
inputs["aws_credentials"] = aws_credentials_file.name
logger.getChild("awscli_downloader").info(
"using host's AWS credentials; to disable, configure [download_awscli] host_credentials=false (MINIWDL__DOWNLOAD_AWSCLI__HOST_CREDENTIALS=false)"
)
logger.getChild("awscli_downloader").info("loaded host AWS credentials")
else:
logger.getChild("awscli_downloader").warning(
"no AWS credentials available on host; if needed, install awscli+boto3 and `aws configure`"
logger.getChild("awscli_downloader").info(
"no AWS credentials available via host awscli/boto3; if needed, configure them and set [download_awscli] host_credentials=true. (On EC2: awscli might still assume role from instance metadata service.)"
)

wdl = r"""
Expand Down
6 changes: 4 additions & 2 deletions docs/runner_reference.md
Original file line number Diff line number Diff line change
Expand Up @@ -50,8 +50,8 @@ Upon starting, miniwdl looks for a custom configuration file in the following lo

1. File named by `--cfg` command-line argument
2. File named by `MINIWDL_CFG` environment variable
3. `XDG_CONFIG_HOME/miniwdl.cfg` (typically `${HOME}/.config/miniwdl.cfg`)
4. `miniwdl.cfg` in [XDG_CONFIG_DIRS](https://specifications.freedesktop.org/basedir-spec/basedir-spec-latest.html) (typically `/etc/xdg/.config/miniwdl.cfg`)
3. `XDG_CONFIG_HOME/miniwdl.cfg` (usually `~/.config/miniwdl.cfg`)
4. `miniwdl.cfg` in [XDG_CONFIG_DIRS](https://specifications.freedesktop.org/basedir-spec/basedir-spec-latest.html) (usually `/etc/xdg/.config/miniwdl.cfg`)

Miniwdl loads *only the first file found* in this priority order, and merges its options into the defaults; so the file needs only contain selected sections & options to override. For example, the following overrides the default docker image (used when a task doesn't specify `runtime.docker`), leaving other defaults in place:

Expand All @@ -72,6 +72,8 @@ Environment variables following the convention `MINIWDL__SECTION__KEY=VALUE` ove
MINIWDL__TASK_RUNTIME__DEFAULTS='{"docker":"ubuntu:19.10"}'
```

Any option can thus be set/changed temporarily without a configuration file.

`miniwdl run` command-line arguments override the other sources. If in doubt, running with `--debug` logs the effective configuration and sources.

## File download cache
Expand Down

0 comments on commit c95fd96

Please sign in to comment.