-
Notifications
You must be signed in to change notification settings - Fork 54
/
default.cfg
245 lines (221 loc) · 12.9 KB
/
default.cfg
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
# Default configuration for miniwdl runner; see:
# https://miniwdl.readthedocs.io/en/latest/runner_reference.html#configuration
# For guidance on overriding these defaults with a custom .cfg file and/or environment variables.
# While this file exhibits all available configuration options, custom .cfg files need only specify
# the options to override.
#
# This file is organized into sections & options within each section. Some values take the form of
# a JSON object or array, which may span multiple -indented- lines. Except within JSON, string
# values don't need to be in quotes. Environment variable substitutions ($VAR) are supported.
[scheduler]
# Thread pool size bounding how many WDL tasks the runner may attempt to execute concurrently;
# actual concurrency may be less due to CPU/memory resource scheduling. This setting may need to be
# increased on multi-node deployments, but not beyond ~200 (guideline) to avoid excessive overhead
# in miniwdl's Python process.
# 0 = default to host `nproc`.
# -@
task_concurrency = 0
# task_concurrency applies to URI download tasks too, however, it may be desirable to limit their
# concurrency further still, since they're exceptionally I/O-intensive. In that case set
# download_concurrency to a nonzero value lower than the effective task_concurrency.
# (New in v1.3.1)
download_concurrency = 0
# Thread pool size bounding how many subworkflow calls the runner may attempt to execute
# concurrently (per level of subworkflow call nesting depth).
# 0 = max(task_concurrency, `nproc`)
# (New in v1.5.4)
subworkflow_concurrency = 0
# container backend; docker_swarm (default), singularity, or as added by plug-ins
container_backend = docker_swarm
# When one task fails, immediately terminate all other running tasks. If disabled, stop launching
# new tasks, but leave those still running to succeed or fail on their own. The latter mode might
# be useful with call caching (see below) to avoid discarding all work done by other tasks.
fail_fast = true
[docker_swarm]
# Docker Swarm Mode is the default container backend. It's included with every docker installation,
# but disabled by default. The following option allows miniwdl to automatically initialize the local
# host as a one-node swarm if needed, by performing
# docker swarm init --listen-addr 127.0.0.1 --advertise-addr 127.0.0.1
# This is is typically helpful for single-node use, but should be disabled if a preexisting swarm
# is to be used.
auto_init = true
# Period for polling the status of running containers. Increasing this reduces steady-state load on
# miniwdl and dockerd (when many tasks are running concurrently), but also increases the delay
# noticing when tasks have exited and (--verbose) the appearance of standard error logs. Worker
# threads randomize the interval +/- 50% to spread out activity.
polling_period_seconds = 1.5
# Retry idempotent dockerd requests yielding 5xx status code (after polling_period_seconds)
server_error_retries = 2
# Recognize e.g. `docker_network: "host"` in task runtime sections, and associate respective
# containers with the docker network, if the network name appears in this list -- e.g.
# allow_networks = ["host"]
# (New in v1.5.1)
allow_networks = []
[file_io]
# During startup, require the run directory and input files to reside somewhere within this root
# directory. This constraint is needed in cluster deployments relying on a shared mount. It can
# also be useful on a single node to prevent accidental consumption of a small boot/home volume,
# by confining workflow operations to some spacious scratch/data mount.
root = /
# Allow workflows access to any existing file/directory within the root, rather than (default) only
# those the operator expressly supplied through the command line or JSON inputs. Not recommended,
# for security and portability reasons; but enables workflows that derive input filenames at
# runtime, or receive input filenames through a manifest file. (New in v1.7.0)
allow_any_input = false
# Populate task working directory with writable copies of the input files, instead of mounting them
# in situ & read-only. Needed if tasks want to write/move/rename input files, but costs time and
# disk space. --copy-input-files
copy_input_files = false
# Selectively copy_input_files for those tasks whose names appear in this list. (New in v1.3.1)
copy_input_files_for = []
# Each succeeded run directory has an "out/" folder containing (by default) a symbolic link to each
# output file in its original working location. If output_hardlinks is true, then out/ is populated
# with hardlinks instead of symlinks. Beware the potential confusion arising from files with
# multiple hardlinks! See also delete_work, below.
output_hardlinks = false
# Delete task working directory upon completion. The task container's working directory is a
# bind-mounted host directory, so files written into it are left behind after the container is torn
# down. If tasks write large non-output files into their working directory (instead of $TMPDIR as
# they should), then it can be useful to delete them automatically.
# Values:
# false = never delete (default)
# success = delete working directories of succeeded tasks
# failure = " failed tasks
# always = " both succeeded and failed tasks
# The "success" and "always" settings require output_hardlinks, above, to be true; otherwise,
# output files would be deleted too. Input/output JSON, logs, and stdout/stderr are always retained
# in the task run directory (above the container working directory).
delete_work = false
# Suggest that each task's temporary directory should reside within the mounted task working
# directory, instead of the storage backing the container's root filesystem. The latter (default)
# is usually preferred because the working directory is more likely to reside on slower network-
# attached storage. But mount_tmpdir may be helpful if tasks use large amounts of scratch space and
# the working directory storage has more capacity.
# The exact effect of this setting depends on the container backend implementation: some may change
# where they mount /tmp, some may merely override TMPDIR inside the container, and others may not
# yet implement it at all. (New in v1.5.4)
mount_tmpdir = false
# Selectively mount_tmpdir for those tasks whose names appear in this list. (New in v1.5.4)
mount_tmpdir_for = []
[task_runtime]
# Effective maximum values of runtime.cpu and runtime.memory (bytes), which evaluated values may be
# rounded down to in order to "fit" the maximum available host resources. Warning: tasks may
# deadlock if these are set higher than actual achievable resources.
# 0 = detect host resources, -1 = do not apply a limit.
# --runtime-cpu-max, --runtime-memory-max
cpu_max = 0
memory_max = 0
# A task's runtime.memory is used as a "reservation" to guide container scheduling, but isn't an
# enforced limit unless memory_limit_multiplier is positive, which sets a hard limit of
# memory_limit_multiplier*runtime.memory. Recommendation: if activating this, disable host swap.
memory_limit_multiplier = 0.0
# Defaults which each task's runtime{} section will be merged into. --runtime-defaults
defaults = {
"docker": "ubuntu:20.04"
}
# Defaults applying to URI download tasks specifically. (New in v1.5.1)
download_defaults = {
"cpu": 2,
"memory": "1G"
}
# Run the command script as the invoking user's uid:gid instead of usually running as root. More
# secure, but interferes with commands that assume root access e.g. apt-get. --as-me
as_user = false
# Fail the evaluation of ${}/~{} placeholders in task command templates, if the interpolated string
# does not match this POSIX regular expression. The default admits anything (including newlines),
# but applications that must process untrusted inputs might set this more restrictively, as one
# line of defense against code injection into task command scripts. (As this is a blunt tool, it's
# preferable to validate inputs before invoking miniwdl, if possible.)
placeholder_regex = (.|\n)*
# Set environment variable(s) in all task environments. The JSON object is keyed by environment
# variable name. If a variable is set to null here, then miniwdl passes through the variable from
# its own environment (if there defined). Any other value is used as a string.
# --env (New in v1.2.2)
# Warning: this is a non-standard side channel and relying on it is probably not portable to other
# WDL engines and/or compute platforms. Explicit WDL task inputs are usually better, except for a
# few cases like auth tokens for platform-specific tasks.
env = {}
# If true, recognize `privileged: true` in task runtime sections and add restricted capabilities to
# respective containers. Not recommended, for security & portability reasons. (New in v1.4.2)
allow_privileged = false
[download_cache]
# When File or Directory inputs are given URIs to be downloaded, store the downloaded copy in a
# local directory where it can later be found and reused for the same input URI.
put = false
# Enable retrieval of File/Directory input URIs from the local cache
get = false
# Base directory for the local download cache. A relative path will be joined to [file_io] root, or
# use $PWD to reference the miniwdl process working directory.
dir = /tmp/miniwdl_download_cache
# Remove URI query strings for cache key/lookup purposes. By default, downloads from URIs with
# query strings are never cached (neither put nor get).
ignore_query = false
# To be eligible for caching (in addition to above options), a URI must (i) match at least one glob
# pattern in enable_patterns, AND (ii) not match any disable_patterns.
enable_patterns = ["*"]
disable_patterns = ["*.php", "*.aspx"]
# flock files in the cache directory whilst in use by a running workflow. Helps external cache
# cleaning logic to avoid disrupting running workflows. But, may be unsuitable for network file
# systems with low limits on the number of outstanding flocks.
# (Opt-out new in v1.3.1)
flock = true
[download_aria2c]
# see: https://github.com/chanzuckerberg/miniwdl/tree/main/tools_image
docker = ghcr.io/miniwdl-ext/miniwdl-tools:Id_sha256_73733b619d46f8a3f75229fb6d0b3f61868d159be5fb0ddc3a04613feab9ad18
[download_awscli]
# If workflow inputs or generates s3:// URIs, load AWS credentials using boto3 on the miniwdl host.
# Note: If running inside EC2, downloader & other tasks might be able assume an IAM role via the
# instance metadata service, regardless of this setting. You can set the environment variable
# AWS_EC2_METADATA_DISABLED=true to prevent this (or use iptables to block the endpoint IP).
# Recommendation: use only trusted WDL and Docker images, if either host_credentials = true or an
# EC2 instance profile is available.
# Failing all of the above, public S3 URIs can always be used.
host_credentials = false
# see: https://github.com/chanzuckerberg/miniwdl/tree/main/tools_image
docker = ghcr.io/miniwdl-ext/miniwdl-tools:Id_sha256_73733b619d46f8a3f75229fb6d0b3f61868d159be5fb0ddc3a04613feab9ad18
[download_gsutil]
# current version from https://github.com/GoogleCloudPlatform/cloud-sdk-docker/releases
docker = gcr.io/google.com/cloudsdktool/cloud-sdk:398.0.0-slim
[call_cache]
# Register task outputs for potential reuse by subsequent executions of the same task & inputs
put = false
# Enable use of previously-cached outputs
get = false
# Pluggable implementation: the default stores cache JSON files in a local directory, and checks
# posix mtimes of any local files referenced in the cached inputs/outputs (invalidating the cache
# entry if any referenced files were modified or deleted in the meantime).
backend = dir
# A relative path will be joined to [file_io] root, or use $PWD to reference the miniwdl process
# working directory.
dir = ~/.cache/miniwdl
[plugins]
# Control which plugins are used. Plugins are installed using the Python entry points convention,
# https://packaging.python.org/specifications/entry-points/
# Furthermore for a plugin to be used, its "object reference" must (i) match at least one glob
# pattern in enable_patterns, AND (ii) not match any disable_patterns.
enable_patterns = ["*"]
disable_patterns = ["miniwdl_task_omnibus_example:*"]
[singularity]
# Singularity task runtime -- with `singularity` CLI already set up, set
# [task_scheduler] container_backend = singularity
#
# singularity executable and any desired global options
exe = ["singularity"]
# Configuration options to pass to `singularity run` invocations. Defaults for docker-like
# isolation from the host.
run_options = [
"--containall",
"--no-mount", "hostfs",
"--fakeroot"
]
# Where pulled images should be stored to save image construction time between
# starting the same task. Empty value -> no image cache used.
image_cache=
[udocker]
# udocker task runtime -- with `udocker` CLI already set up, set
# [task_scheduler] container_backend = udocker
#
# udocker executable and any desired global options
exe = ["udocker", "--allow-root"]
# Configuration options to pass to `udocker run` invocations
run_options = ["--nobanner", "--rm"]